date:20230914

[PATCH 4/4] virtio-blk: don't lock AioContext in the submission code path

2023-09-14 Thread Stefan Hajnoczi

There is no need to acquire the AioContext lock around blk_aio_*() or
blk_get_geometry() anymore. I/O plugging (defer_call()) also does not
require the AioContext lock anymore.

Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index f5315df042..e110f9718b 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -,7 +,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 MultiReqBuffer mrb = {};
 bool suppress_notifications = virtio_queue_get_notification(vq);
 
-aio_context_acquire(blk_get_aio_context(s->blk));
 defer_call_begin();
 
 do {
@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
 }
 
 defer_call_end();
-aio_context_release(blk_get_aio_context(s->blk));
 }
 
 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque)
 s->rq = NULL;
 }
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (req) {
 VirtIOBlockReq *next = req->next;
 if (virtio_blk_handle_request(req, )) {
@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque)
 
 /* Paired with inc in virtio_blk_dma_restart_cb() */
 blk_dec_in_flight(s->conf.conf.blk);
-
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static void virtio_blk_dma_restart_cb(void *opaque, bool running,
-- 
2.41.0

[PATCH 2/4] virtio-blk: add lock to protect s->rq

2023-09-14 Thread Stefan Hajnoczi

s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock
to protect s->rq and eliminate reliance on the AioContext lock.

Signed-off-by: Stefan Hajnoczi 
---
 include/hw/virtio/virtio-blk.h |  3 +-
 hw/block/virtio-blk.c  | 67 +++---
 2 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index dafec432ce..9881009c22 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -54,7 +54,8 @@ struct VirtIOBlockReq;
 struct VirtIOBlock {
 VirtIODevice parent_obj;
 BlockBackend *blk;
-void *rq;
+QemuMutex rq_lock;
+void *rq; /* protected by rq_lock */
 VirtIOBlkConf conf;
 unsigned short sector_mask;
 bool original_wce;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a1f8e15522..ee38e089bc 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
int error,
 /* Break the link as the next request is going to be parsed from the
  * ring again. Otherwise we may end up doing a double completion! */
 req->mr_next = NULL;
-req->next = s->rq;
-s->rq = req;
+
+WITH_QEMU_LOCK_GUARD(>rq_lock) {
+req->next = s->rq;
+s->rq = req;
+}
 } else if (action == BLOCK_ERROR_ACTION_REPORT) {
 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
 if (acct_failed) {
@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque)
 {
 VirtIOBlock *s = opaque;
 
-VirtIOBlockReq *req = s->rq;
+VirtIOBlockReq *req;
 MultiReqBuffer mrb = {};
 
-s->rq = NULL;
+WITH_QEMU_LOCK_GUARD(>rq_lock) {
+req = s->rq;
+s->rq = NULL;
+}
 
 aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (req) {
@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev)
 AioContext *ctx;
 VirtIOBlockReq *req;
 
+/* Dataplane has stopped... */
+assert(!s->dataplane_started);
+
+/* ...but requests may still be in flight. */
 ctx = blk_get_aio_context(s->blk);
 aio_context_acquire(ctx);
 blk_drain(s->blk);
+aio_context_release(ctx);
 
 /* We drop queued requests after blk_drain() because blk_drain() itself can
  * produce them. */
-while (s->rq) {
-req = s->rq;
-s->rq = req->next;
-virtqueue_detach_element(req->vq, >elem, 0);
-virtio_blk_free_request(req);
+WITH_QEMU_LOCK_GUARD(>rq_lock) {
+while (s->rq) {
+req = s->rq;
+s->rq = req->next;
+
+/* No other threads can access req->vq here */
+virtqueue_detach_element(req->vq, >elem, 0);
+
+virtio_blk_free_request(req);
+}
 }
 
-aio_context_release(ctx);
-
-assert(!s->dataplane_started);
 blk_set_enable_write_cache(s->blk, s->original_wce);
 }
 
@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
-VirtIOBlockReq *req = s->rq;
 
-while (req) {
-qemu_put_sbyte(f, 1);
+WITH_QEMU_LOCK_GUARD(>rq_lock) {
+VirtIOBlockReq *req = s->rq;
 
-if (s->conf.num_queues > 1) {
-qemu_put_be32(f, virtio_get_queue_index(req->vq));
+while (req) {
+qemu_put_sbyte(f, 1);
+
+if (s->conf.num_queues > 1) {
+qemu_put_be32(f, virtio_get_queue_index(req->vq));
+}
+
+qemu_put_virtqueue_element(vdev, f, >elem);
+req = req->next;
 }
-
-qemu_put_virtqueue_element(vdev, f, >elem);
-req = req->next;
 }
+
 qemu_put_sbyte(f, 0);
 }
 
@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, 
QEMUFile *f,
 
 req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
 virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
-req->next = s->rq;
-s->rq = req;
+
+WITH_QEMU_LOCK_GUARD(>rq_lock) {
+req->next = s->rq;
+s->rq = req;
+}
 }
 
 return 0;
@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->host_features);
 virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
 
+qemu_mutex_init(>rq_lock);
+
 s->blk = conf->conf.blk;
 s->rq = NULL;
 s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
 virtio_del_queue(vdev, i);
 }
 qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
+qemu_mutex_destroy(>rq_lock);
 blk_ram_registrar_destroy(>blk_ram_registrar);

[PATCH 3/4] virtio-blk: don't lock AioContext in the completion code path

2023-09-14 Thread Stefan Hajnoczi

Nothing in the completion code path relies on the AioContext lock
anymore. Virtqueues are only accessed from one thread at any moment and
the s->rq global state is protected by its own lock now.

Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 34 --
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index ee38e089bc..f5315df042 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 VirtIOBlock *s = next->dev;
 VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (next) {
 VirtIOBlockReq *req = next;
 next = req->mr_next;
@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 block_acct_done(blk_get_stats(s->blk), >acct);
 virtio_blk_free_request(req);
 }
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int 
ret)
 VirtIOBlockReq *req = opaque;
 VirtIOBlock *s = req->dev;
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
-if (ret) {
-if (virtio_blk_handle_rw_error(req, -ret, 0, true)) {
-goto out;
-}
+if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) {
+return;
 }
 
 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
 block_acct_done(blk_get_stats(s->blk), >acct);
 virtio_blk_free_request(req);
-
-out:
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void 
*opaque, int ret)
 bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), >out.type) &
 ~VIRTIO_BLK_T_BARRIER) == 
VIRTIO_BLK_T_WRITE_ZEROES;
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
-if (ret) {
-if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
-goto out;
-}
+if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
+return;
 }
 
 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void 
*opaque, int ret)
 block_acct_done(blk_get_stats(s->blk), >acct);
 }
 virtio_blk_free_request(req);
-
-out:
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 #ifdef __linux__
@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int 
status)
 virtio_stl_p(vdev, >data_len, hdr->dxfer_len);
 
 out:
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 virtio_blk_req_complete(req, status);
 virtio_blk_free_request(req);
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 g_free(ioctl_req);
 }
 
@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, 
int ret)
 {
 ZoneCmdData *data = opaque;
 VirtIOBlockReq *req = data->req;
-VirtIOBlock *s = req->dev;
 VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
 struct iovec *in_iov = data->in_iov;
 unsigned in_num = data->in_num;
@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, 
int ret)
 }
 
 out:
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 virtio_blk_req_complete(req, err_status);
 virtio_blk_free_request(req);
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 g_free(data->zone_report_data.zones);
 g_free(data);
 }
@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, 
int ret)
 err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
 }
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 virtio_blk_req_complete(req, err_status);
 virtio_blk_free_request(req);
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op)
@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, 
int ret)
 {
 ZoneCmdData *data = opaque;
 VirtIOBlockReq *req = data->req;
-VirtIOBlock *s = req->dev;
 VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
 int64_t append_sector, n;
 uint8_t err_status = VIRTIO_BLK_S_OK;
@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, 
int ret)
 trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret);
 
 out:
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 virtio_blk_req_complete(req, err_status);
 virtio_blk_free_request(req);
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 g_free(data);
 }
 
@@

Re: [PATCH v2 00/11] Validate and test qapi examples

2023-09-14 Thread Markus Armbruster

PATCH 01-06,09:
Reviewed-by: Markus Armbruster

Re: [PATCH v2 07/11] qapi: fix example of query-rocker-of-dpa-flows command

2023-09-14 Thread Markus Armbruster

Victor Toso  writes:

> Example output has a comment embedded in the array. Remove it.
> The end result is a list of size 1.
>
> Signed-off-by: Victor Toso 
> ---
>  qapi/rocker.json | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/qapi/rocker.json b/qapi/rocker.json
> index 31ce0b36f6..858e4f4a45 100644
> --- a/qapi/rocker.json
> +++ b/qapi/rocker.json
> @@ -249,8 +249,7 @@
>  #   "cookie": 0,
>  #   "action": {"goto-tbl": 10},
>  #   "mask": {"in-pport": 4294901760}
> -#  },
> -#  {...more...},
> +#  }
>  #]}
>  ##
>  { 'command': 'query-rocker-of-dpa-flows',

The schema patches in this series fix typos, except for this patch and
the next one, which drop "more of the same omitted for brevity" text.  I
believe you drop the text because it doesn't parse as JSON.

Fine if the example still make sense afterwards.  Do they?

Shortening examples is a reasonable thing to do.  Perhaps we should
adopt a conventional way to do it, and teach the proposed generator to
cope with it.  What do you think?

[PATCH v23 13/20] docs/s390x/cpu topology: document s390x cpu topology

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

Add some basic examples for the definition of cpu topology
in s390x.

Signed-off-by: Pierre Morel 
Co-developed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Signed-off-by: Nina Schoetterl-Glausch 
---
 MAINTAINERS|   2 +
 docs/devel/index-internals.rst |   1 +
 docs/devel/s390-cpu-topology.rst   | 170 
 docs/system/s390x/cpu-topology.rst | 242 +
 docs/system/target-s390x.rst   |   1 +
 5 files changed, 416 insertions(+)
 create mode 100644 docs/devel/s390-cpu-topology.rst
 create mode 100644 docs/system/s390x/cpu-topology.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 17b92fe3ce..0cba0cb2d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1703,6 +1703,8 @@ S: Supported
 F: include/hw/s390x/cpu-topology.h
 F: hw/s390x/cpu-topology.c
 F: target/s390x/kvm/stsi-topology.c
+F: docs/devel/s390-cpu-topology.rst
+F: docs/system/s390x/cpu-topology.rst
 
 X86 Machines
 
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
index e1a93df263..6f81df92bc 100644
--- a/docs/devel/index-internals.rst
+++ b/docs/devel/index-internals.rst
@@ -14,6 +14,7 @@ Details about QEMU's various subsystems including how to add 
features to them.
migration
multi-process
reset
+   s390-cpu-topology
s390-dasd-ipl
tracing
vfio-migration
diff --git a/docs/devel/s390-cpu-topology.rst b/docs/devel/s390-cpu-topology.rst
new file mode 100644
index 00..9eab28d5e5
--- /dev/null
+++ b/docs/devel/s390-cpu-topology.rst
@@ -0,0 +1,170 @@
+QAPI interface for S390 CPU topology
+
+
+The following sections will explain the QAPI interface for S390 CPU topology
+with the help of exemplary output.
+For this, let's assume that QEMU has been started with the following
+command, defining 4 CPUs, where CPU[0] is defined by the -smp argument and will
+have default values:
+
+.. code-block:: bash
+
+ qemu-system-s390x \
+-enable-kvm \
+-cpu z14,ctop=on \
+-smp 1,drawers=3,books=3,sockets=2,cores=2,maxcpus=36 \
+-device z14-s390x-cpu,core-id=19,entitlement=high \
+-device z14-s390x-cpu,core-id=11,entitlement=low \
+-device z14-s390x-cpu,core-id=112,entitlement=high \
+   ...
+
+Additions to query-cpus-fast
+
+
+The command query-cpus-fast allows querying the topology tree and
+modifiers for all configured vCPUs.
+
+.. code-block:: QMP
+
+ { "execute": "query-cpus-fast" }
+ {
+  "return": [
+{
+  "dedicated": false,
+  "thread-id": 536993,
+  "props": {
+"core-id": 0,
+"socket-id": 0,
+"drawer-id": 0,
+"book-id": 0
+  },
+  "cpu-state": "operating",
+  "entitlement": "medium",
+  "qom-path": "/machine/unattached/device[0]",
+  "cpu-index": 0,
+  "target": "s390x"
+},
+{
+  "dedicated": false,
+  "thread-id": 537003,
+  "props": {
+"core-id": 19,
+"socket-id": 1,
+"drawer-id": 0,
+"book-id": 2
+  },
+  "cpu-state": "operating",
+  "entitlement": "high",
+  "qom-path": "/machine/peripheral-anon/device[0]",
+  "cpu-index": 19,
+  "target": "s390x"
+},
+{
+  "dedicated": false,
+  "thread-id": 537004,
+  "props": {
+"core-id": 11,
+"socket-id": 1,
+"drawer-id": 0,
+"book-id": 1
+  },
+  "cpu-state": "operating",
+  "entitlement": "low",
+  "qom-path": "/machine/peripheral-anon/device[1]",
+  "cpu-index": 11,
+  "target": "s390x"
+},
+{
+  "dedicated": true,
+  "thread-id": 537005,
+  "props": {
+"core-id": 112,
+"socket-id": 0,
+"drawer-id": 3,
+"book-id": 2
+  },
+  "cpu-state": "operating",
+  "entitlement": "high",
+  "qom-path": "/machine/peripheral-anon/device[2]",
+  "cpu-index": 112,
+  "target": "s390x"
+}
+  ]
+ }
+
+
+QAPI command: set-cpu-topology
+--
+
+The command set-cpu-topology allows modifying the topology tree
+or the topology modifiers of a vCPU in the configuration.
+
+.. code-block:: QMP
+
+{ "execute": "set-cpu-topology",
+  "arguments": {
+ "core-id": 11,
+ "socket-id": 0,
+ "book-id": 0,
+ "drawer-id": 0,
+ "entitlement": "low",
+ "dedicated": false
+  }
+}
+{"return": {}}
+
+The core-id parameter is the only mandatory parameter and every
+unspecified parameter keeps its previous value.
+
+QAPI event CPU_POLARIZATION_CHANGE
+--
+
+When a guest requests a modification of the polarization,
+QEMU sends a CPU_POLARIZATION_CHANGE event.
+
+When requesting the change, the guest only specifies horizontal or
+vertical polarization.
+It is the job of the entity administrating QEMU to set the dedication and fine
+grained vertical entitlement in response to this event.

Re: [PATCH 02/13] memory: Introduce memory_region_iommu_set_iova_ranges

2023-09-14 Thread David Hildenbrand


On 04.09.23 10:03, Eric Auger wrote:

This helper will allow to convey information about valid
IOVA ranges to virtual IOMMUS.

Signed-off-by: Eric Auger 
---
  include/exec/memory.h | 26 ++
  softmmu/memory.c  | 15 +++
  2 files changed, 41 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 184cb3a01b..f6fb99dd3f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -519,6 +519,27 @@ struct IOMMUMemoryRegionClass {
   int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
   uint64_t page_size_mask,
   Error **errp);
+/**
+ * @iommu_set_iova_ranges:
+ *
+ * Propagate information about the usable IOVA ranges for a given IOMMU
+ * memory region. Used for example to propagate host physical device
+ * reserved memory region constraints to the virtual IOMMU.
+ *
+ * Optional method: if this method is not provided, then the default IOVA
+ * aperture is used.
+ *
+ * @nr_ranges: number of IOVA ranges
+ *
+ * @iova_ranges: an array of @nr_ranges usable IOVA ranges
+ *
+ * Returns 0 on success, or a negative error. In case of failure, the error
+ * object must be created.
+ */
+ int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
+  uint32_t nr_ranges,
+  struct Range *iova_ranges,
+  Error **errp);
  };
  
  typedef struct RamDiscardListener RamDiscardListener;

@@ -1845,6 +1866,11 @@ int 
memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
 uint64_t page_size_mask,
 Error **errp);
  
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,

+uint32_t nr_ranges,
+struct Range *iova_ranges,
+Error **errp);
+
  /**
   * memory_region_name: get a memory region's name
   *
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..07499457aa 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1905,6 +1905,21 @@ int 
memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
  return ret;
  }
  
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,

+uint32_t nr_ranges,
+struct Range *iova_ranges,
+Error **errp)
+{
+IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+int ret = 0;
+
+if (imrc->iommu_set_iova_ranges) {
+ret = imrc->iommu_set_iova_ranges(iommu_mr, nr_ranges,
+  iova_ranges, errp);
+}
+return ret;
+}
+
  int memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n, Error **errp)
  {


Reviewed-by: David Hildenbrand 

--
Cheers,

David / dhildenb

Re: [PATCH v3 0/2] qemu-img: map: implement support for compressed clusters

2023-09-14 Thread Kevin Wolf

Am 07.09.2023 um 23:02 hat Andrey Drobyshev via geschrieben:
> v2 --> v3:
>   * Make "compressed" field mandatory, not optional;
>   * Adjust field description in qapi/block-core.json;
>   * Squash patch 3 into patch 2 so that failing tests don't break bisect;
>   * Update even more tests' outputs now that the field is mandatory.
> 
> v2: https://lists.nongnu.org/archive/html/qemu-block/2023-07/msg00106.html

Thanks, applied to the block branch.

Kevin

Re: [PATCH v2 04/10] Introduce the CPU address space destruction function

2023-09-14 Thread David Hildenbrand


On 14.09.23 15:00, lixianglai wrote:

Hi David:


Hi!




On 12.09.23 04:11, xianglai li wrote:

Introduce new function to destroy CPU address space resources
for cpu hot-(un)plug.


How do other archs handle that? Or how are they able to get away
without destroying?


They do not remove the cpu address space, taking the X86 architecture as
an example:

1.Start the x86 VM:

./qemu-system-x86_64 \
-machine q35  \
-cpu Broadwell-IBRS \
-smp 1,maxcpus=100,sockets=100,cores=1,threads=1 \
-m 4G \
-drive file=~/anolis-8.8.qcow2  \
-serial stdio   \
-monitor telnet:localhost:4498,server,nowait   \
-nographic

2.Connect the qemu monitor

telnet 127.0.0.1 4498

info mtree

address-space: cpu-memory-0
address-space: memory
    - (prio 0, i/o): system
      -7fff (prio 0, ram): alias ram-below-4g
@pc.ram -7fff
      - (prio -1, i/o): pci
    000a-000b (prio 1, i/o): vga-lowmem

3.Perform cpu hot swap int qemu monitor

device_add
Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1
device_del cpu1



Hm, doesn't seem to work for me on upstream QEMU for some reason: 
"Error: acpi: device unplug request for not supported device type: 
Broadwell-IBRS-x86_64-cpu"


What happens if you re-add that CPU? Will we reuse the previous address 
space?



info mtree

address-space: cpu-memory-0
address-space: cpu-memory-1
address-space: memory
    - (prio 0, i/o): system
      -7fff (prio 0, ram): alias ram-below-4g
@pc.ram -7fff
      - (prio -1, i/o): pci
    000a-000b (prio 1, i/o): vga-lowmem


  From the above test, you can see whether the address space of cpu1 is
residual after a cpu hot swap, and whether it is reasonable?



Probably we should teach other archs to destroy that address space as well.

Can we do that from the core, instead of having to do that in each CPU 
unrealize function?


--
Cheers,

David / dhildenb

Re: [PATCH v6 09/10] migration/yank: Keep track of registered yank instances

2023-09-14 Thread Fabiano Rosas

Peter Xu  writes:

> On Wed, Sep 13, 2023 at 06:53:20PM -0300, Fabiano Rosas wrote:
>> Peter Xu  writes:
>> 
>> > On Mon, Sep 11, 2023 at 02:13:19PM -0300, Fabiano Rosas wrote:
>> >> The core yank code is strict about balanced registering and
>> >> unregistering of yank functions.
>> >> 
>> >> This creates a difficulty because the migration code registers one
>> >> yank function per QIOChannel, but each QIOChannel can be referenced by
>> >> more than one QEMUFile. The yank function should not be removed until
>> >> all QEMUFiles have been closed.
>> >> 
>> >> Keep a reference count of how many QEMUFiles are using a QIOChannel
>> >> that has a yank function. Only unregister the yank function when all
>> >> QEMUFiles have been closed.
>> >> 
>> >> This improves the current code by removing the need for the programmer
>> >> to know which QEMUFile is the last one to be cleaned up and fixes the
>> >> theoretical issue of removing the yank function while another QEMUFile
>> >> could still be using the ioc and require a yank.
>> >> 
>> >> Signed-off-by: Fabiano Rosas 
>> >> ---
>> >>  migration/yank_functions.c | 81 ++
>> >>  migration/yank_functions.h |  8 
>> >>  2 files changed, 81 insertions(+), 8 deletions(-)
>> >
>> > I worry this over-complicate things.
>> 
>> It does. We ran out of simple options.
>> 
>> > If you prefer the cleaness that we operate always on qemufile level, can we
>> > just register each yank function per-qemufile?
>> 
>> "just" hehe
>> 
>> we could, but:
>> 
>> i) the yank is a per-channel operation, so this is even more unintuitive;
>
> I mean we can provide something like:
>
> void migration_yank_qemufile(void *opaque)
> {
> QEMUFile *file = opaque;
> QIOChannel *ioc = file->ioc;
>
> qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
> }
>
> void migration_qemufile_register_yank(QEMUFile *file)
> {
> if (migration_ioc_yank_supported(file->ioc)) {
> yank_register_function(MIGRATION_YANK_INSTANCE,
>migration_yank_qemufile,
>file);
> }
> }

Sure, this is what I was thinking as well. IMO it will be yet another
operation that happens on the channel, but it performed via the
file. Just like qio_channel_close() at qemu_fclose(). Not the end of the
world, of course, I just find it error-prone.

>> 
>> ii) multifd doesn't have a QEMUFile, so it will have to continue using
>> the ioc;
>
> We can keep using migration_ioc_[un]register_yank() for them if there's no
> qemufile attached.  As long as the function will all be registered under
> MIGRATION_YANK_INSTANCE we should be fine having different yank func.
>

ok

>> 
>> iii) we'll have to add a yank to every new QEMUFile created during the
>>  incoming migration (colo, rdma, etc), otherwise the incoming side
>>  will be left using iocs while the src uses the QEMUFile;
>
> For RDMA, IIUC it'll simply be a noop as migration_ioc_yank_supported()
> will be a noop for it for either reg/unreg.
>
> Currently it seems we will also unreg the ioc even for RDMA (even though we
> don't reg for it).  But since unreg will be a noop it seems all fine even
> if not paired.. maybe we should still try to pair it, e.g. register also in
> rdma_start_outgoing_migration() for the rdma ioc so at least they're paired.
>
> I don't see why COLO is special here, though.  Maybe I missed something.

For colo I was thinking we'd have to register the yank just to be sure
that all paths unregistering it have something to unregister.

Maybe I should move the register into qemu_file_new_impl() with a
matching unregister at qemu_fclose().

>> 
>> iv) this is a functional change of the yank feature for which we have no
>> tests.
>
> Having yank tested should be preferrable.  Lukas is in the loop, let's see
> whether he has something. We can still smoke test it before a selftest
> being there.
>
> Taking one step back.. I doubt whether anyone is using yank for migration?
> Knowing that migration already have migrate-cancel (for precopy) and
> migrate-pause (for postcopy).

Right, both already call qio_channel_shutdown().

> I never used it myself, and I don't think
> it's supported for RHEL.  How's that in suse's case?

Never heard mention of it and I don't see it in our virtualization
documentation.

>
> If no one is using it, maybe we can even avoid registering migration to
> yank?
>

Seems reasonable to me.

>> 
>> If that's all ok to you I'll go ahead and git it a try.
>> 
>> > I think qmp yank will simply fail the 2nd call on the qemufile if the
>> > iochannel is shared with the other one, but that's totally fine, IMHO.
>> >
>> > What do you think?
>> >
>> > In all cases, we should probably at least merge patch 1-8 if that can
>> > resolve the CI issue.  I think all of them are properly reviewed.
>> 
>> I agree. Someone needs to queue this though since Juan has been busy.
>
> Yes, I'll see what I can do.

Thanks. I could

Re: [PATCH] target/mips: Fix TX79 LQ/SQ opcodes

2023-09-14 Thread Richard Henderson


On 9/14/23 02:04, Philippe Mathieu-Daudé wrote:

The base register address offset is*signed*.

Cc:qemu-sta...@nongnu.org
Fixes: 82a9f9 ("target/mips/tx79: Introduce LQ opcode (Load Quadword)")
Signed-off-by: Philippe Mathieu-Daudé
---
  target/mips/tcg/tx79.decode | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH] target/mips: Fix MSA BZ/BNZ opcodes displacement

2023-09-14 Thread Richard Henderson


On 9/14/23 01:58, Philippe Mathieu-Daudé wrote:

The PC offset is*signed*.

Cc:qemu-sta...@nongnu.org
Reported-by: Sergey Evlashev
Resolves:https://gitlab.com/qemu-project/qemu/-/issues/1624
Fixes: c7a9ef7517 ("target/mips: Introduce decode tree bindings for MSA ASE")
Signed-off-by: Philippe Mathieu-Daudé
---
  target/mips/tcg/msa.decode | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH] hw/cxl: Fix out of bound array access

2023-09-14 Thread Michael Tokarev


14.09.2023 15:59, Philippe Mathieu-Daudé wrote:


Cc: qemu-sta...@nongnu.org
for stable-8.1.


[not related to this particular patch]

Maybe this can help if we specify the releases range as a comment
in the Cc tag, for example here:

Cc: qemu-sta...@nongnu.org # v8.1

and if it were a range:

Cc: qemu-sta...@nongnu.org # v6.2 to v8.0

Michael would that help? If so feel free to modify
docs/devel/stable-process.rst :)


I don't think this is necessary so far.

Or, actually it might help for sure, but it is an extra burden
for regular developers.

For quite some things I can see where it is applicable if there's
a proper Fixes: tag already (that's why I've added this Cc), - it's
trivial to run `git describe' for this commit ID, so it doesn't even
need to have a Cc: stable.

In some cases though it is hard to tell how deep a change needs to
be backported. In such cases some hint can help for sure, but when
in doubt I can ask too.  When you're in context of fixing something,
you usually don't want to think about how to backport it or somesuch,
you concentrate on the fix itself. If you're willing to think about
that too, give a small note in the comment, like some authors do.
If not, and it's entirely okay, and it's unclear if the change should
be applied to earlier versions, I can ask (or notify when picking the
change up for stable), and the author might think about this in another
context.  It's not often - so far anyway - when it's unclear if a
change should be propagated to older releases.

Changing stable-process.rst isn't very useful, as most changes are
changing master, - if anything, regular "contribution" docs should
be changed for that.  But since most stuff is going on through various
subsystem maintainers, they don't usually look there anyway :)

For now, it's basically my own whim to provide stable series for
older qemu releases.  Or an experiment.  I don't know how useful it
is (or will be) and how it will go long-term.  We've never had this
before.

In my view it is much more important to either add the Fixes: tag
(which gives me a hint already, as I check for all Fixes: in patches
being applied, and can automate this further by doing git-desc on
the hashes, alerting me if it is before the current release) or realize
something needs to go to stable at all. Even at the cost of sending
extra stuff to stable which is actually not needed at all - this is
entirely okay.  This is why I'm asking about various changes going
in, reminding about -stable existance, - because some stuff isn't
marked as a fix at all but in fact it is an important thing for
stable (one or another).

Thank you for your support Phil! :)

/mjt

Re: [PATCH v2 08/20] asc: generate silence if FIFO empty but engine still running

2023-09-14 Thread Philippe Mathieu-Daudé


On 14/9/23 09:56, Philippe Mathieu-Daudé wrote:


On 9/9/23 11:48, Mark Cave-Ayland wrote:
MacOS (un)helpfully leaves the FIFO engine running even when all the 
samples have
been written to the hardware, and expects the FIFO status flags and 
IRQ to be

updated continuously.

There is an additional problem in that not all audio backends 
guarantee an
all-zero output when there is no FIFO data available, in particular 
the Windows
dsound backend which re-uses its internal circular buffer causing the 
last played

sound to loop indefinitely.

Whilst this is effectively a bug in the Windows dsound backend, work 
around it
for now using a simple heuristic: if the FIFO remains empty for half a 
cycle

(~23ms) then continuously fill the generated buffer with empty silence.

Signed-off-by: Mark Cave-Ayland 
---
  hw/audio/asc.c | 19 +++
  include/hw/audio/asc.h |  2 ++
  2 files changed, 21 insertions(+)

diff --git a/hw/audio/asc.c b/hw/audio/asc.c
index 336ace0cd6..b01b285512 100644
--- a/hw/audio/asc.c
+++ b/hw/audio/asc.c
@@ -334,6 +334,21 @@ static void asc_out_cb(void *opaque, int free_b)
  }
  if (!generated) {
+    /* Workaround for audio underflow bug on Windows dsound 
backend */

+    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    int silent_samples = muldiv64(now - s->fifo_empty_ns,
+  NANOSECONDS_PER_SECOND, ASC_FREQ);
+
+    if (silent_samples > ASC_FIFO_CYCLE_TIME / 2) {
+    /*
+ * No new FIFO data within half a cycle time (~23ms) so 
fill the
+ * entire available buffer with silence. This prevents an 
issue
+ * with the Windows dsound backend whereby the sound 
appears to

+ * loop because the FIFO has run out of data, and the driver
+ * reuses the stale content in its circular audio buffer.
+ */
+    AUD_write(s->voice, s->silentbuf, samples << s->shift);
+    }
  return;
  }


What about having audio_callback_fn returning a boolean, and using
a flag in backends for that silence case? Roughtly:

-- >8 --
diff --git a/audio/audio.h b/audio/audio.h
index 01bdc567fb..4844771c92 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -30,7 +30,7 @@
  #include "hw/qdev-properties.h"
  #include "hw/qdev-properties-system.h"

-typedef void (*audio_callback_fn) (void *opaque, int avail);
+typedef bool (*audio_callback_fn) (void *opaque, int avail);

  #if HOST_BIG_ENDIAN
  #define AUDIO_HOST_ENDIANNESS 1
diff --git a/audio/audio.c b/audio/audio.c
index 90c7c49d11..5b6e69fbd6 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1178,8 +1178,11 @@ static void audio_run_out (AudioState *s)
  if (free > sw->resample_buf.pos) {
  free = MIN(free, sw->resample_buf.size)
     - sw->resample_buf.pos;
-    sw->callback.fn(sw->callback.opaque,
-    free * sw->info.bytes_per_frame);
+    if (!sw->callback.fn(sw->callback.opaque,
+ free * sw->info.bytes_per_frame)
+    && unlikely(hw->silentbuf_required)) {
+    /* write silence ... */
+    }
  }
  }
  }
---


(Clarifying, not a blocker for this series, just wondering)

Re: [RFC PATCH 0/3] Refactor PPI logic/definitions for virt/sbsa-ref

2023-09-14 Thread Marcin Juszkiewicz


W dniu 14.09.2023 o 14:01, Leif Lindholm pisze:

While reviewing Marcin's patch this morning, cross referencing different
specifications and looking at various places around the source code in
order to convinced myself he really hadn't missed something out (the
existing plumbing made it *so* clean to add), my brain broke slightly
at keeping track of PPIs/INTIDs between the various sources.

Moreover, I found the PPI() macro in virt.h to be doing the exact
opposite of what I would have expected it to (it converts a PPI to an
INTID rather than the other way around).

So I refactored stuff so that:
- PPIs defined by BSA are moved to a (new) common header.
- The _IRQ definitions for those PPIs refer to the INTIDs.
- sbsa-ref and virt both use these definitions.

This change does objectively add a bit more noise to the code, since it
means more locations need to use the PPI macro than before, but it felt
like a readability improvement to me.


I like how code looks after those changes. No more adding 16 to irq
numbers to fit them into BSA spec numbers is nice to have.

Will rebase my "non-secure EL2 virtual timer" patch on top of it.


Not even compilation tested, just the least confusing way of asking
whether the change could be accepted at all.


There are build warnings and final binary segfaults on start.


[1799/2931] Compiling C object libqemu-aarch64-softmmu.fa.p/hw_arm_sbsa-ref.c.o
../hw/arm/sbsa-ref.c: In function ‘create_gic’:
../hw/arm/sbsa-ref.c:496:13: warning: assignment to ‘int’ from ‘qemu_irq’ {aka 
‘struct IRQState *’} makes integer from pointer without a cast 
[-Wint-conversion]
  496 | irq = qdev_get_gpio_in(sms->gic,
  | ^
../hw/arm/sbsa-ref.c:499:37: warning: passing argument 4 of 
‘qdev_connect_gpio_out_named’ makes pointer from integer without a cast 
[-Wint-conversion]
  499 | irq);
  | ^~~
  | |
  | int
In file included from 
/home/marcin/devel/linaro/sbsa-qemu/code/qemu/include/hw/core/cpu.h:23,
 from ../target/arm/cpu-qom.h:23,
 from ../target/arm/cpu.h:26,
 from 
/home/marcin/devel/linaro/sbsa-qemu/code/qemu/include/sysemu/kvm.h:244,
 from ../hw/arm/sbsa-ref.c:27:
/home/marcin/devel/linaro/sbsa-qemu/code/qemu/include/hw/qdev-core.h:699:43: 
note: expected ‘qemu_irq’ {aka ‘struct IRQState *’} but argument is of type 
‘int’
  699 |  qemu_irq input_pin);
  |  ~^
../hw/arm/sbsa-ref.c:501:13: warning: assignment to ‘int’ from ‘qemu_irq’ {aka 
‘struct IRQState *’} makes integer from pointer without a cast 
[-Wint-conversion]
  501 | irq = qdev_get_gpio_in(sms->gic,
  | ^
../hw/arm/sbsa-ref.c:503:65: warning: passing argument 4 of 
‘qdev_connect_gpio_out_named’ makes pointer from integer without a cast 
[-Wint-conversion]
  503 | qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, irq);
  | ^~~
  | |
  | int
/home/marcin/devel/linaro/sbsa-qemu/code/qemu/include/hw/qdev-core.h:699:43: 
note: expected ‘qemu_irq’ {aka ‘struct IRQState *’} but argument is of type 
‘int’
  699 |  qemu_irq input_pin);
  |  ~^

Re: [PATCH v2 00/21] Graph locking part 4 (node management)

2023-09-14 Thread Kevin Wolf

Am 12.09.2023 um 18:49 hat Stefan Hajnoczi geschrieben:
> On Mon, Sep 11, 2023 at 11:45:59AM +0200, Kevin Wolf wrote:
> > The previous parts of the graph locking changes focussed mostly on the
> > BlockDriver side and taking reader locks while performing I/O. This
> > series focusses more on the functions managing the graph structure, i.e
> > adding, removing and replacing nodes and updating their permissions.
> > 
> > Many of these places actually need to take the writer lock to avoid
> > readers seeing an inconsistent half-updated graph state. Therefore
> > taking the writer lock is now moved from the very low-level function
> > bdrv_replace_child_noperm() into its more high level callers.
> > 
> > v2:
> > - Patch 5: Improved comments, added one for bdrv_schedule_unref()
> > 
> > Kevin Wolf (21):
> >   block: Remove unused BlockReopenQueueEntry.perms_checked
> >   preallocate: Factor out preallocate_truncate_to_real_size()
> >   preallocate: Don't poll during permission updates
> >   block: Take AioContext lock for bdrv_append() more consistently
> >   block: Introduce bdrv_schedule_unref()
> >   block-coroutine-wrapper: Add no_co_wrapper_bdrv_wrlock functions
> >   block-coroutine-wrapper: Allow arbitrary parameter names
> >   block: Mark bdrv_replace_child_noperm() GRAPH_WRLOCK
> >   block: Mark bdrv_replace_child_tran() GRAPH_WRLOCK
> >   block: Mark bdrv_attach_child_common() GRAPH_WRLOCK
> >   block: Call transaction callbacks with lock held
> >   block: Mark bdrv_attach_child() GRAPH_WRLOCK
> >   block: Mark bdrv_parent_perms_conflict() and callers GRAPH_RDLOCK
> >   block: Mark bdrv_get_cumulative_perm() and callers GRAPH_RDLOCK
> >   block: Mark bdrv_child_perm() GRAPH_RDLOCK
> >   block: Mark bdrv_parent_cb_change_media() GRAPH_RDLOCK
> >   block: Take graph rdlock in bdrv_drop_intermediate()
> >   block: Take graph rdlock in bdrv_change_aio_context()
> >   block: Mark bdrv_root_unref_child() GRAPH_WRLOCK
> >   block: Mark bdrv_unref_child() GRAPH_WRLOCK
> >   block: Mark bdrv_add/del_child() and caller GRAPH_WRLOCK
> > 
> >  include/block/block-common.h|   4 +
> >  include/block/block-global-state.h  |  30 +-
> >  include/block/block_int-common.h|  34 +-
> >  include/block/block_int-global-state.h  |  14 +-
> >  include/sysemu/block-backend-global-state.h |   4 +-
> >  block.c | 348 ++--
> >  block/blklogwrites.c|   4 +
> >  block/blkverify.c   |   2 +
> >  block/block-backend.c   |  29 +-
> >  block/copy-before-write.c   |  10 +-
> >  block/crypto.c  |   6 +-
> >  block/graph-lock.c  |  26 +-
> >  block/mirror.c  |   8 +
> >  block/preallocate.c | 133 +---
> >  block/qcow2.c   |   4 +-
> >  block/quorum.c  |  23 +-
> >  block/replication.c |   9 +
> >  block/snapshot.c|   2 +
> >  block/stream.c  |  20 +-
> >  block/vmdk.c|  13 +
> >  blockdev.c  |  23 +-
> >  blockjob.c  |   2 +
> >  tests/unit/test-bdrv-drain.c|  23 +-
> >  tests/unit/test-bdrv-graph-mod.c|  20 ++
> >  tests/unit/test-block-iothread.c|   3 +
> >  scripts/block-coroutine-wrapper.py  |  18 +-
> >  tests/qemu-iotests/051.pc.out   |   6 +-
> >  27 files changed, 591 insertions(+), 227 deletions(-)
> 
> Reviewed-by: Stefan Hajnoczi 

Thanks, applied to the block branch.

Kevin


signature.asc
Description: PGP signature

Re: [RFC PATCH v2 00/21] QEMU gmem implemention

2023-09-14 Thread David Hildenbrand


On 14.09.23 05:50, Xiaoyao Li wrote:

It's the v2 RFC of enabling KVM gmem[1] as the backend for private
memory.

For confidential-computing, KVM provides gmem/guest_mem interfaces for
userspace, like QEMU, to allocate user-unaccesible private memory. This
series aims to add gmem support in QEMU's RAMBlock so that each RAM can
have both hva-based shared memory and gmem_fd based private memory. QEMU
does the shared-private conversion on KVM_MEMORY_EXIT and discards the
memory.

It chooses the design that adds "private" property to hostmeory backend.
If "private" property is set, QEMU will allocate/create KVM gmem when
initialize the RAMbloch of the memory backend.

This sereis also introduces the first user of kvm gmem,
KVM_X86_SW_PROTECTED_VM. A KVM_X86_SW_PROTECTED_VM with private KVM gmem
can be created with

   $qemu -object sw-protected-vm,id=sp-vm0 \
-object memory-backend-ram,id=mem0,size=1G,private=on \
-machine 
q35,kernel_irqchip=split,confidential-guest-support=sp-vm0,memory-backend=mem0 \
...

Unfortunately this patch series fails the boot of OVMF at very early
stage due to triple fault, because KVM doesn't support emulating string IO
to private memory.


Is support being added? Or have we figured out what it would take to 
make it work?


How does this interact with other features (memory ballooning, virtiofs, 
vfio/mdev/...)?




This version still leave some opens to be discussed:
1. whether we need "private" propery to be user-settable?

It seems unnecessary because vm-type is determined. If the VM is
confidential-guest, then the RAM of the guest must be able to be
mapped as private, i.e., have kvm gmem backend. So QEMU can
determine the value of "private" property automatiacally based on vm
type.

This also aligns with the board internal MemoryRegion that needs to
have kvm gmem backend, e.g., TDX requires OVMF to act as private
memory so bios memory region needs to have kvm gmem fd associated.
QEMU no doubt will do it internally automatically.


Would it make sense to have some regions without "pivate" semantics? 
Like NVDIMMs?




2. hugepage support.

KVM gmem can be allocated from hugetlbfs. How does QEMU determine
when to allocate KVM gmem with KVM_GUEST_MEMFD_ALLOW_HUGEPAGE. The
easiest solution is create KVM gmem with KVM_GUEST_MEMFD_ALLOW_HUGEPAGE
only when memory backend is HostMemoryBackendFile of hugetlbfs.


Good question.

Probably "if the memory backend uses huge pages, also use huge pages for 
the private gmem" makes sense.


... but it becomes a mess with preallocation ... which is what people 
should actually be using with hugetlb. Andeventual double 
memory-consumption ... but maybe that's all been taken care of already?


Probably it's best to leave hugetlb support as future work and start 
with something minimal.




3. What is KVM_X86_SW_PROTECTED_VM going to look like? and do we need it?



Why implement it when you have to ask others for a motivation? ;)

Personally, I'm not sure if it is really useful, especially in this state.


This series implements KVM_X86_SW_PROTECTED_VM because it's introduced
with gmem together on KVM side and it's supposed to be the first user
who requires KVM gmem. However the implementation is incomplete and
there lacks the definition of how KVM_X86_SW_PROTECTED_VM works.


Then it should not be included in this series such that you can make 
progress with the gmem implementation for TDX guests instead?


--
Cheers,

David / dhildenb

Re: [PATCH] hw/qxl: move check of slot_id before accessing guest_slots

2023-09-14 Thread Philippe Mathieu-Daudé


Hi Anastasia,

On 14/9/23 11:27, Anastasia Belova wrote:

If slot_id >= NUM_MEMSLOTS, buffer overflow is possible.


overflow: unlikely. Do you mean over-read?

Did you found that by code audit? I can't see where this
function get slot_id >= NUM_MEMSLOTS.

This isn't guest triggerable and seems an API invalid use,
so developer error, which we can catch with assertions:

-- >8 --
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7bb00d68f5..443f034168 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1309,16 +1309,13 @@ static int qxl_add_memslot(PCIQXLDevice *d, 
uint32_t slot_id, uint64_t delta,

 QXLDevMemSlot memslot;
 int i;

+assert(slot_id < NUM_MEMSLOTS);
+
 guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);
 guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);

 trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);

-if (slot_id >= NUM_MEMSLOTS) {
-qxl_set_guest_bug(d, "%s: slot_id >= NUM_MEMSLOTS %d >= %d", 
__func__,

-  slot_id, NUM_MEMSLOTS);
-return 1;
-}
 if (guest_start > guest_end) {
 qxl_set_guest_bug(d, "%s: guest_start > guest_end 0x%" PRIx64
  " > 0x%" PRIx64, __func__, guest_start, 
guest_end);

---

Note, qxl_add_memslot() return value is ignored...


So the check should be upper than d->guest_slots[slot_id]
where size of d->guest_slots is NUM_MEMSLOTS.

Fixes: e954ea2873 ("qxl: qxl_add_memslot: remove guest trigerrable panics")
Signed-off-by: Anastasia Belova 
---
  hw/display/qxl.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7bb00d68f5..dc618727c0 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1309,16 +1309,17 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t 
slot_id, uint64_t delta,
  QXLDevMemSlot memslot;
  int i;
  
-guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);

-guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);
-
-trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);
-
  if (slot_id >= NUM_MEMSLOTS) {
  qxl_set_guest_bug(d, "%s: slot_id >= NUM_MEMSLOTS %d >= %d", __func__,
slot_id, NUM_MEMSLOTS);
  return 1;
  }
+
+guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);
+guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);
+
+trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);
+
  if (guest_start > guest_end) {
  qxl_set_guest_bug(d, "%s: guest_start > guest_end 0x%" PRIx64
   " > 0x%" PRIx64, __func__, guest_start, guest_end);

Re: [PATCH v2 04/10] Introduce the CPU address space destruction function

2023-09-14 Thread lixianglai


Hi David:


On 12.09.23 04:11, xianglai li wrote:

Introduce new function to destroy CPU address space resources
for cpu hot-(un)plug.

How do other archs handle that? Or how are they able to get away 
without destroying?


They do not remove the cpu address space, taking the X86 architecture as 
an example:


1.Start the x86 VM:

./qemu-system-x86_64 \
-machine q35  \
-cpu Broadwell-IBRS \
-smp 1,maxcpus=100,sockets=100,cores=1,threads=1 \
-m 4G \
-drive file=~/anolis-8.8.qcow2  \
-serial stdio   \
-monitor telnet:localhost:4498,server,nowait   \
-nographic

2.Connect the qemu monitor

telnet 127.0.0.1 4498

info mtree

address-space: cpu-memory-0
address-space: memory
  - (prio 0, i/o): system
    -7fff (prio 0, ram): alias ram-below-4g 
@pc.ram -7fff

    - (prio -1, i/o): pci
  000a-000b (prio 1, i/o): vga-lowmem

3.Perform cpu hot swap int qemu monitor

device_add 
Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1

device_del cpu1

info mtree

address-space: cpu-memory-0
address-space: cpu-memory-1
address-space: memory
  - (prio 0, i/o): system
    -7fff (prio 0, ram): alias ram-below-4g 
@pc.ram -7fff

    - (prio -1, i/o): pci
  000a-000b (prio 1, i/o): vga-lowmem


From the above test, you can see whether the address space of cpu1 is 
residual after a cpu hot swap, and whether it is reasonable?


The address space destruction function of the CPU can be used to delete 
the residual address space of the CPU1.


Thanks,

xianglai.

Re: [PATCH] hw/cxl: Fix out of bound array access

2023-09-14 Thread Philippe Mathieu-Daudé


On 14/9/23 14:38, Michael Tokarev wrote:

14.09.2023 15:37, Michael Tokarev:

13.09.2023 13:10, Dmitry Frolov wrote:

According to cxl_interleave_ways_enc(),
fw->num_targets is allowed to be up to 16.
This also corresponds to CXL specs.
So, the fw->target_hbs[] array is iterated from 0 to 15.
But it is staticaly declared of length 8.
Thus, out of bound array access may occur.

Fixes: c28db9e000 ("hw/pci-bridge: Make PCIe and CXL PXB Devices 
inherit from TYPE_PXB_DEV")


Signed-off-by: Dmitry Frolov 


Reviewed-by: Michael Tokarev 
(with the extra empty line removed :)


Also,

Cc: qemu-sta...@nongnu.org

for stable-8.1.


[not related to this particular patch]

Maybe this can help if we specify the releases range as a comment
in the Cc tag, for example here:

Cc: qemu-sta...@nongnu.org # v8.1

and if it were a range:

Cc: qemu-sta...@nongnu.org # v6.2 to v8.0

Michael would that help? If so feel free to modify
docs/devel/stable-process.rst :)

Regards,

Phil.

Re: [PATCH 4/4] hw/cxl: Line length reductions

2023-09-14 Thread Michael Tokarev


13.09.2023 18:05, Jonathan Cameron via wrote:

Michael Tsirkin observed that there were some unnecessarily
long lines in the CXL code in a recent review.
This patch is intended to rectify that where it does not
hurt readability.


Reviewed-by: Michael Tokarev 

This whole series can be picked up into trivial-patches tree
(whcih is getting ready to be pushed to master).

/mjt

Re: [PATCH 2/4] hw/cxl: Use available size parameter to index into register arrays.

2023-09-14 Thread Michael Tokarev


13.09.2023 18:05, Jonathan Cameron via wrote:

Indexing has to be done into an array with the right size elements.
As such, the size parameter always matches the array element size
and can be used in place of the longer sizeof(*array)

Signed-off-by: Jonathan Cameron 
---
  hw/cxl/cxl-component-utils.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index f3bbf0fd13..089e10b232 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -76,7 +76,7 @@ static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr 
offset,
  if (cregs->special_ops && cregs->special_ops->read) {
  return cregs->special_ops->read(cxl_cstate, offset, size);
  } else {
-return cregs->cache_mem_registers[offset / 
sizeof(*cregs->cache_mem_registers)];
+return cregs->cache_mem_registers[offset / size];


This is a though one, and smells wrong.

Though because it is not at all obvious where this "size" value comes from,
have to find usage(s) of this function (cache_mem_ops) and think twice about
the other parameters in there.  Also having in mind the previous comparison
with 8.  In this part of the code, size should always be =4, but it takes
hard time to figure this out.

Wrong - no, because of the above - the only 2 possible values are 4 and 8,
but it's difficult to see what's going on, and you're making it worse.

Original code was at least clear you're getting a single register from
an array of registers, with new code it is not clear at all.

What I'd probably use here is to add comment that size can be either 4 or 8,
and use a switch similar to what you've used in first patch in this series.
And have a static_assert(sizeof(register) == 4) or something like that
here in this second branch.

So it is something like:

static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
   unsigned size)
{
CXLComponentState *cxl_cstate = opaque;
ComponentRegisters *cregs = _cstate->crb;

switch (size) {
case 8:
qemu_log_mask(LOG_UNIMP,
  "CXL 8 byte cache mem registers not implemented\n");
return 0;

case 4:
if (cregs->special_ops && cregs->special_ops->read) {
return cregs->special_ops->read(cxl_cstate, offset, 4);
} else {
return cregs->cache_mem_registers[offset /
  
sizeof(*cregs->cache_mem_registers)];
}

default:
/* this routine is called with size being either 4 or 8 only */
g_assert_not_reached();
}
}

Note: I especially left the sizeof() here, instead of using a previously
suggested static_assert() - because a register can be implemented using
larger integers on the host, it does not need to be 4 bytes, - but only
low 4 bytes can actually be used.

This does not shorten the line (it does by wrapping it up), but it keep
code correct and more understandable.  Adding size parameter there makes
it much more cryptic.

Here and in other places.

This is just an example, not a suggestion.

/mjt

[sdl-qemu] [PATCH 1/1] No checks, dereferencing possible

2023-09-14 Thread Миронов Сергей Владимирович

No checks, dereferencing possible.


Return value of a function 'virDomainChrSourceDefNew'
is dereferenced at qemu_command.c without checking
for NULL, but it is usually checked for this function.


Found by Linux Verification Center (linuxtesting.org) with SVACE.


Fixes: 1f85f0967b ("ci: jobs.sh: Add back '--no-suite syntax-check 
--print-errorlogs'")

Signed-off-by: Sergey Mironov 

---
src/qemu/qemu_command.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index e84374b4cf..8d11972c88 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4698,6 +4698,8 @@ qemuBuildVideoCommandLine(virCommand *cmd,
 g_autofree char *name = g_strdup_printf("%s-vhost-user", 
video->info.alias);
 qemuDomainChrSourcePrivate *chrsrcpriv = 
QEMU_DOMAIN_CHR_SOURCE_PRIVATE(chrsrc);

+   if (chrsrc == NULL)
+   return -1;
 chrsrc->type = VIR_DOMAIN_CHR_TYPE_UNIX;
 chrsrcpriv->directfd = qemuFDPassDirectNew(name, 
>vhost_user_fd);
--
2.31.1

[sdl-qemu] [PATCH 0/1] There are no checks, virDomainChrSourceDefNew can return 0

2023-09-14 Thread Миронов Сергей Владимирович

There are no checks, virDomainChrSourceDefNew can return 0.


Return value of a function 'virDomainChrSourceDefNew'

is dereferenced at qemu_hotplug.c without checking for NULL,

but it is usually checked for this function.


Found by Linux Verification Center (linuxtesting.org) with SVACE.


Fixes: 1f85f0967b ("ci: jobs.sh: Add back '--no-suite syntax-check 
--print-errorlogs'")

Signed-off-by: Sergey Mironov 

---
src/qemu/qemu_hotplug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 177ca87d11..09e16c2c7e 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -3207,6 +3207,8 @@ qemuDomainAttachFSDevice(virQEMUDriver *driver,
 qemuAssignDeviceFSAlias(vm->def, fs);

 chardev = virDomainChrSourceDefNew(priv->driver->xmlopt);
+if (chardev == NULL)
+   goto cleanup;
 chardev->type = VIR_DOMAIN_CHR_TYPE_UNIX;
 chardev->data.nix.path = qemuDomainGetVHostUserFSSocketPath(priv, fs);
--
2.31.1

Re: [PATCH] qdev-properties: alias all object class properties

2023-09-14 Thread Stefan Hajnoczi

Paolo: ping?

On Thu, 3 Aug 2023 at 15:51, Stefan Hajnoczi  wrote:
>
> qdev_alias_all_properties() aliases a DeviceState's qdev properties onto
> an Object. This is used for VirtioPCIProxy types so that --device
> virtio-blk-pci has properties of its embedded --device virtio-blk-device
> object.
>
> Currently this function is implemented using qdev properties. Change the
> function to use QOM object class properties instead. This works because
> qdev properties create QOM object class properties, but it also catches
> any QOM object class-only properties that have no qdev properties.
>
> This change ensures that properties of devices are shown with --device
> foo,\? even if they are QOM object class properties.
>
> Signed-off-by: Stefan Hajnoczi 
> ---
>  hw/core/qdev-properties.c | 18 ++
>  1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
> index 357b8761b5..fbf3969d3c 100644
> --- a/hw/core/qdev-properties.c
> +++ b/hw/core/qdev-properties.c
> @@ -959,16 +959,18 @@ void device_class_set_props(DeviceClass *dc, Property 
> *props)
>  void qdev_alias_all_properties(DeviceState *target, Object *source)
>  {
>  ObjectClass *class;
> -Property *prop;
> +ObjectPropertyIterator iter;
> +ObjectProperty *prop;
>
>  class = object_get_class(OBJECT(target));
> -do {
> -DeviceClass *dc = DEVICE_CLASS(class);
>
> -for (prop = dc->props_; prop && prop->name; prop++) {
> -object_property_add_alias(source, prop->name,
> -  OBJECT(target), prop->name);
> +object_class_property_iter_init(, class);
> +while ((prop = object_property_iter_next())) {
> +if (object_property_find(source, prop->name)) {
> +continue; /* skip duplicate properties */
>  }
> -class = object_class_get_parent(class);
> -} while (class != object_class_by_name(TYPE_DEVICE));
> +
> +object_property_add_alias(source, prop->name,
> +  OBJECT(target), prop->name);
> +}
>  }
> --
> 2.41.0
>
>

[PATCH v23 08/20] qapi/s390x/cpu topology: set-cpu-topology qmp command

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

The modification of the CPU attributes are done through a monitor
command.

It allows to move the core inside the topology tree to optimize
the cache usage in the case the host's hypervisor previously
moved the CPU.

The same command allows to modify the CPU attributes modifiers
like polarization entitlement and the dedicated attribute to notify
the guest if the host admin modified scheduling or dedication of a vCPU.

With this knowledge the guest has the possibility to optimize the
usage of the vCPUs.

The command has a feature unstable for the moment.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 qapi/machine-target.json |  37 +++
 hw/s390x/cpu-topology.c  | 132 +++
 2 files changed, 169 insertions(+)

diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index 0d45a590ce..e47a252bd9 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -4,6 +4,8 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or later.
 # See the COPYING file in the top-level directory.
 
+{ 'include': 'machine-common.json' }
+
 ##
 # @CpuModelInfo:
 #
@@ -375,3 +377,38 @@
   'data': [ 'horizontal', 'vertical' ],
 'if': 'TARGET_S390X'
 }
+
+##
+# @set-cpu-topology:
+#
+# @core-id: the vCPU ID to be moved
+# @socket-id: optional destination socket where to move the vCPU
+# @book-id: optional destination book where to move the vCPU
+# @drawer-id: optional destination drawer where to move the vCPU
+# @entitlement: optional entitlement
+# @dedicated: optional, if the vCPU is dedicated to a real CPU
+#
+# Features:
+# @unstable: This command may still be modified.
+#
+# Modifies the topology by moving the CPU inside the topology
+# tree or by changing a modifier attribute of a CPU.
+# Default value for optional parameter is the current value
+# used by the CPU.
+#
+# Returns: Nothing on success, the reason on failure.
+#
+# Since: 8.2
+##
+{ 'command': 'set-cpu-topology',
+  'data': {
+  'core-id': 'uint16',
+  '*socket-id': 'uint16',
+  '*book-id': 'uint16',
+  '*drawer-id': 'uint16',
+  '*entitlement': 'CpuS390Entitlement',
+  '*dedicated': 'bool'
+  },
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
+}
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 13f404a0d7..28adfb3f84 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -23,6 +23,7 @@
 #include "target/s390x/cpu.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
+#include "qapi/qapi-commands-machine-target.h"
 
 /*
  * s390_topology is used to keep the topology information.
@@ -257,6 +258,29 @@ static bool s390_topology_check(uint16_t socket_id, 
uint16_t book_id,
 return true;
 }
 
+/**
+ * s390_topology_need_report
+ * @cpu: Current cpu
+ * @drawer_id: future drawer ID
+ * @book_id: future book ID
+ * @socket_id: future socket ID
+ * @entitlement: future entitlement
+ * @dedicated: future dedicated
+ *
+ * A modified topology change report is needed if the topology
+ * tree or the topology attributes change.
+ */
+static bool s390_topology_need_report(S390CPU *cpu, int drawer_id,
+  int book_id, int socket_id,
+  uint16_t entitlement, bool dedicated)
+{
+return cpu->env.drawer_id != drawer_id ||
+   cpu->env.book_id != book_id ||
+   cpu->env.socket_id != socket_id ||
+   cpu->env.entitlement != entitlement ||
+   cpu->env.dedicated != dedicated;
+}
+
 /**
  * s390_update_cpu_props:
  * @ms: the machine state
@@ -325,3 +349,111 @@ void s390_topology_setup_cpu(MachineState *ms, S390CPU 
*cpu, Error **errp)
 /* topology tree is reflected in props */
 s390_update_cpu_props(ms, cpu);
 }
+
+static void s390_change_topology(uint16_t core_id,
+ bool has_socket_id, uint16_t socket_id,
+ bool has_book_id, uint16_t book_id,
+ bool has_drawer_id, uint16_t drawer_id,
+ bool has_entitlement,
+ CpuS390Entitlement entitlement,
+ bool has_dedicated, bool dedicated,
+ Error **errp)
+{
+MachineState *ms = current_machine;
+int old_socket_entry;
+int new_socket_entry;
+bool report_needed;
+S390CPU *cpu;
+
+cpu = s390_cpu_addr2state(core_id);
+if (!cpu) {
+error_setg(errp, "Core-id %d does not exist!", core_id);
+return;
+}
+
+/* Get attributes not provided from cpu and verify the new topology */
+if (!has_socket_id) {
+socket_id = cpu->env.socket_id;
+}
+if (!has_book_id) {
+book_id = cpu->env.book_id;
+}
+if (!has_drawer_id) {

[PATCH v23 04/20] s390x/sclp: reporting the maximum nested topology entries

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

The maximum nested topology entries is used by the guest to
know how many nested topology are available on the machine.

Let change the MNEST value from 2 to 4 in the SCLP READ INFO
structure now that we support books and drawers.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
---
 include/hw/s390x/sclp.h | 5 +++--
 hw/s390x/sclp.c | 5 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index c49051e17e..9aef6d9370 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,12 +112,13 @@ typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128
-#define SCLP_READ_SCP_INFO_MNEST2
+#define SCLP_READ_SCP_INFO_MNEST4
 typedef struct ReadInfo {
 SCCBHeader h;
 uint16_t rnmax;
 uint8_t rnsize;
-uint8_t  _reserved1[16 - 11];   /* 11-15 */
+uint8_t  _reserved1[15 - 11];   /* 11-14 */
+uint8_t stsi_parm;  /* 15-15 */
 uint16_t entries_cpu;   /* 16-17 */
 uint16_t offset_cpu;/* 18-19 */
 uint8_t  _reserved2[24 - 20];   /* 20-23 */
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index eff74479f4..d339cbb7e4 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -20,6 +20,7 @@
 #include "hw/s390x/event-facility.h"
 #include "hw/s390x/s390-pci-bus.h"
 #include "hw/s390x/ipl.h"
+#include "hw/s390x/cpu-topology.h"
 
 static inline SCLPDevice *get_sclp_device(void)
 {
@@ -123,6 +124,10 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
 return;
 }
 
+if (s390_has_topology()) {
+read_info->stsi_parm = SCLP_READ_SCP_INFO_MNEST;
+}
+
 /* CPU information */
 prepare_cpu_entries(machine, entries_start, _count);
 read_info->entries_cpu = cpu_to_be16(cpu_count);
-- 
2.39.2

[PATCH v23 10/20] machine: adding s390 topology to info hotpluggable-cpus

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

S390 topology adds books and drawers topology containers.
Let's add these to the HMP information for hotpluggable cpus.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
---
 hw/core/machine-hmp-cmds.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c3e55ef9e9..9a4b59c6f2 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -71,6 +71,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 if (c->has_node_id) {
 monitor_printf(mon, "node-id: \"%" PRIu64 "\"\n", c->node_id);
 }
+if (c->has_drawer_id) {
+monitor_printf(mon, "drawer-id: \"%" PRIu64 "\"\n", 
c->drawer_id);
+}
+if (c->has_book_id) {
+monitor_printf(mon, "book-id: \"%" PRIu64 "\"\n", c->book_id);
+}
 if (c->has_socket_id) {
 monitor_printf(mon, "socket-id: \"%" PRIu64 "\"\n", 
c->socket_id);
 }
-- 
2.39.2

[PATCH v23 01/20] CPU topology: extend with s390 specifics

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

S390 adds two new SMP levels, drawers and books to the CPU
topology.
S390 CPUs have specific topology features like dedication and
entitlement. These indicate to the guest information on host
vCPU scheduling and help the guest make better scheduling decisions.

Let us provide the SMP properties with books and drawers levels
and S390 CPU with dedication and entitlement,

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 qapi/machine-common.json| 21 +
 qapi/machine.json   | 19 ++--
 include/hw/boards.h | 10 +-
 include/hw/qdev-properties-system.h |  4 +++
 target/s390x/cpu.h  |  6 
 hw/core/machine-smp.c   | 48 -
 hw/core/machine.c   |  4 +++
 hw/core/qdev-properties-system.c| 13 
 hw/s390x/s390-virtio-ccw.c  |  4 +++
 softmmu/vl.c|  6 
 target/s390x/cpu.c  |  7 +
 qapi/meson.build|  1 +
 qemu-options.hx |  7 +++--
 13 files changed, 137 insertions(+), 13 deletions(-)
 create mode 100644 qapi/machine-common.json

diff --git a/qapi/machine-common.json b/qapi/machine-common.json
new file mode 100644
index 00..e40421bb37
--- /dev/null
+++ b/qapi/machine-common.json
@@ -0,0 +1,21 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+##
+# = Machines S390 data types
+##
+
+##
+# @CpuS390Entitlement:
+#
+# An enumeration of cpu entitlements that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Entitlement',
+  'prefix': 'S390_CPU_ENTITLEMENT',
+  'data': [ 'auto', 'low', 'medium', 'high' ] }
diff --git a/qapi/machine.json b/qapi/machine.json
index a08b6576ca..a63cb951d2 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -9,6 +9,7 @@
 ##
 
 { 'include': 'common.json' }
+{ 'include': 'machine-common.json' }
 
 ##
 # @SysEmuTarget:
@@ -71,7 +72,7 @@
 #
 # @thread-id: ID of the underlying host thread
 #
-# @props: properties describing to which node/socket/core/thread
+# @props: properties describing to which node/drawer/book/socket/core/thread
 # virtual CPU belongs to, provided if supported by board
 #
 # @target: the QEMU system emulation target, which determines which
@@ -901,7 +902,11 @@
 #
 # @node-id: NUMA node ID the CPU belongs to
 #
-# @socket-id: socket number within node/board the CPU belongs to
+# @drawer-id: drawer number within node/board the CPU belongs to (since 8.2)
+#
+# @book-id: book number within drawer/node/board the CPU belongs to (since 8.2)
+#
+# @socket-id: socket number within book/node/board the CPU belongs to
 #
 # @die-id: die number within socket the CPU belongs to (since 4.1)
 #
@@ -912,7 +917,7 @@
 #
 # @thread-id: thread number within core the CPU belongs to
 #
-# Note: currently there are 6 properties that could be present but
+# Note: currently there are 8 properties that could be present but
 # management should be prepared to pass through other properties
 # with device_add command to allow for future interface extension.
 # This also requires the filed names to be kept in sync with the
@@ -922,6 +927,8 @@
 ##
 { 'struct': 'CpuInstanceProperties',
   'data': { '*node-id': 'int',
+'*drawer-id': 'int',
+'*book-id': 'int',
 '*socket-id': 'int',
 '*die-id': 'int',
 '*cluster-id': 'int',
@@ -1480,6 +1487,10 @@
 #
 # @cpus: number of virtual CPUs in the virtual machine
 #
+# @drawers: number of drawers in the CPU topology (since 8.2)
+#
+# @books: number of books in the CPU topology (since 8.2)
+#
 # @sockets: number of sockets in the CPU topology
 #
 # @dies: number of dies per socket in the CPU topology
@@ -1498,6 +1509,8 @@
 ##
 { 'struct': 'SMPConfiguration', 'data': {
  '*cpus': 'int',
+ '*drawers': 'int',
+ '*books': 'int',
  '*sockets': 'int',
  '*dies': 'int',
  '*clusters': 'int',
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6c67af196a..6dcfc879eb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -134,12 +134,16 @@ typedef struct {
  * @clusters_supported - whether clusters are supported by the machine
  * @has_clusters - whether clusters are explicitly specified in the user
  * provided SMP configuration
+ * @books_supported - whether books are supported by the machine
+ * @drawers_supported - whether drawers are supported by the machine
  */
 typedef struct {
 bool prefer_sockets;
 bool dies_supported;
 bool clusters_supported;
 bool has_clusters;
+bool books_supported;
+bool drawers_supported;
 } SMPCompatProps;
 
 /**
@@ -310,7 +314,9 @@ typedef struct

[PATCH v23 11/20] qapi/s390x/cpu topology: CPU_POLARIZATION_CHANGE qapi event

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

When the guest asks to change the polarization this change
is forwarded to the upper layer using QAPI.
The upper layer is supposed to take according decisions concerning
CPU provisioning.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 qapi/machine-target.json | 33 +
 hw/s390x/cpu-topology.c  |  2 ++
 2 files changed, 35 insertions(+)

diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index e47a252bd9..276c3bf9d1 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -412,3 +412,36 @@
   'features': [ 'unstable' ],
   'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
 }
+
+##
+# @CPU_POLARIZATION_CHANGE:
+#
+# Emitted when the guest asks to change the polarization.
+#
+# @polarization: polarization specified by the guest
+#
+# Features:
+# @unstable: This command may still be modified.
+#
+# The guest can tell the host (via the PTF instruction) whether the
+# CPUs should be provisioned using horizontal or vertical polarization.
+#
+# On horizontal polarization the host is expected to provision all vCPUs
+# equally.
+# On vertical polarization the host can provision each vCPU differently.
+# The guest will get information on the details of the provisioning
+# the next time it uses the STSI(15) instruction.
+#
+# Since: 8.2
+#
+# Example:
+#
+# <- { "event": "CPU_POLARIZATION_CHANGE",
+#  "data": { "polarization": "horizontal" },
+#  "timestamp": { "seconds": 1401385907, "microseconds": 422329 } }
+##
+{ 'event': 'CPU_POLARIZATION_CHANGE',
+  'data': { 'polarization': 'CpuS390Polarization' },
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 28adfb3f84..18274db74c 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -24,6 +24,7 @@
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
 #include "qapi/qapi-commands-machine-target.h"
+#include "qapi/qapi-events-machine-target.h"
 
 /*
  * s390_topology is used to keep the topology information.
@@ -136,6 +137,7 @@ void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra)
 } else {
 s390_topology.polarization = polarization;
 s390_cpu_topology_set_changed(true);
+qapi_event_send_cpu_polarization_change(polarization);
 setcc(cpu, 0);
 }
 break;
-- 
2.39.2

[PATCH v23 02/20] s390x/cpu topology: add topology entries on CPU hotplug

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

The topology information are attributes of the CPU and are
specified during the CPU device creation.

On hot plug we:
- calculate the default values for the topology for drawers,
  books and sockets in the case they are not specified.
- verify the CPU attributes
- check that we have still room on the desired socket

The possibility to insert a CPU in a mask is dependent on the
number of cores allowed in a socket, a book or a drawer, the
checking is done during the hot plug of the CPU to have an
immediate answer.

If the complete topology is not specified, the core is added
in the physical topology based on its core ID and it gets
defaults values for the modifier attributes.

This way, starting QEMU without specifying the topology can
still get some advantage of the CPU topology.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Signed-off-by: Nina Schoetterl-Glausch 
---
 MAINTAINERS |   6 +
 include/hw/s390x/cpu-topology.h |  54 +++
 hw/s390x/cpu-topology.c | 259 
 hw/s390x/s390-virtio-ccw.c  |  22 ++-
 hw/s390x/meson.build|   1 +
 5 files changed, 340 insertions(+), 2 deletions(-)
 create mode 100644 include/hw/s390x/cpu-topology.h
 create mode 100644 hw/s390x/cpu-topology.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 00562f924f..9c6599a55b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1697,6 +1697,12 @@ F: hw/s390x/event-facility.c
 F: hw/s390x/sclp*.c
 L: qemu-s3...@nongnu.org
 
+S390 CPU topology
+M: Nina Schoetterl-Glausch 
+S: Supported
+F: include/hw/s390x/cpu-topology.h
+F: hw/s390x/cpu-topology.c
+
 X86 Machines
 
 PC
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644
index 00..97b0af2795
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022, 2023
+ * Author(s): Pierre Morel 
+ *
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#ifndef CONFIG_USER_ONLY
+
+#include "qemu/queue.h"
+#include "hw/boards.h"
+#include "qapi/qapi-types-machine-target.h"
+
+typedef struct S390Topology {
+uint8_t *cores_per_socket;
+} S390Topology;
+
+#ifdef CONFIG_KVM
+bool s390_has_topology(void);
+void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
+#else
+static inline bool s390_has_topology(void)
+{
+   return false;
+}
+static inline void s390_topology_setup_cpu(MachineState *ms,
+   S390CPU *cpu,
+   Error **errp) {}
+#endif
+
+extern S390Topology s390_topology;
+
+static inline int s390_std_socket(int n, CpuTopology *smp)
+{
+return (n / smp->cores) % smp->sockets;
+}
+
+static inline int s390_std_book(int n, CpuTopology *smp)
+{
+return (n / (smp->cores * smp->sockets)) % smp->books;
+}
+
+static inline int s390_std_drawer(int n, CpuTopology *smp)
+{
+return (n / (smp->cores * smp->sockets * smp->books)) % smp->drawers;
+}
+
+#endif /* CONFIG_USER_ONLY */
+
+#endif
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 00..189fcc5334
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022, 2023
+ * Author(s): Pierre Morel 
+ *
+ * S390 topology handling can be divided in two parts:
+ *
+ * - The first part in this file is taking care of all common functions
+ *   used by KVM and TCG to create and modify the topology.
+ *
+ * - The second part, building the topology information data for the
+ *   guest with CPU and KVM specificity will be implemented inside
+ *   the target/s390/kvm sub tree.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/cpu-topology.h"
+
+/*
+ * s390_topology is used to keep the topology information.
+ * .cores_per_socket: tracks information on the count of cores
+ *per socket.
+ */
+S390Topology s390_topology = {
+/* will be initialized after the CPU model is realized */
+.cores_per_socket = NULL,
+};
+
+/**
+ * s390_socket_nb:
+ * @cpu: s390x CPU
+ *
+ * Returns the socket number used inside the cores_per_socket array
+ * for a topology tree entry
+ */
+static int s390_socket_nb_from_ids(int drawer_id, int book_id, int socket_id)
+{
+return (drawer_id * current_machine->smp.books + book_id) *
+   current_machine->smp.sockets + socket_id;
+}
+
+/**
+ * s390_socket_nb:
+ * @cpu: s390x CPU
+ *
+ * Returns the socket number used inside the cores_per_socket array
+ * for a cpu.
+ */
+static int

[PATCH v23 20/20] tests/avocado: s390x cpu topology bad move

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

This test verifies that QEMU refuses to move a CPU to an
nonexistent location.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
---
 tests/avocado/s390_topology.py | 25 +
 1 file changed, 25 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index d3e6556c0f..9154ac8776 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -412,3 +412,28 @@ def test_dedicated_error(self):
 res = self.vm.qmp('set-cpu-topology',
   {'core-id': 0, 'entitlement': 'medium', 'dedicated': 
False})
 self.assertEqual(res['return'], {})
+
+def test_move_error(self):
+"""
+This test verifies that QEMU refuses to move a CPU to an
+nonexistent location
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+
+res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'drawer-id': 1})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'book-id': 1})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'socket-id': 1})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+self.check_topology(0, 0, 0, 0, 'medium', False)
-- 
2.39.2

[PATCH v23 15/20] tests/avocado: s390x cpu topology polarization

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

Polarization is changed on a request from the guest.
Let's verify the polarization is accordingly set by QEMU.

Signed-off-by: Pierre Morel 
Co-developed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Reviewed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 tests/avocado/s390_topology.py | 45 ++
 1 file changed, 45 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index 9078b45281..8166cee134 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -41,6 +41,7 @@ class S390CPUTopology(QemuSystemTest):
 The polarization is changed on a request from the guest.
 """
 timeout = 90
+event_timeout = 10
 
 KERNEL_COMMON_COMMAND_LINE = ('printk.time=0 '
   'root=/dev/ram '
@@ -103,6 +104,14 @@ def kernel_init(self):
  '-initrd', initrd_path,
  '-append', kernel_command_line)
 
+def system_init(self):
+self.log.info("System init")
+exec_command_and_wait_for_pattern(self,
+""" mount proc -t proc /proc;
+mount sys -t sysfs /sys;
+cat /sys/devices/system/cpu/dispatching """,
+'0')
+
 def test_single(self):
 """
 This test checks the simplest topology with a single CPU.
@@ -198,3 +207,39 @@ def test_dash_device(self):
 self.check_topology(3, 1, 1, 1, 'high', False)
 self.check_topology(4, 1, 1, 1, 'medium', False)
 self.check_topology(5, 2, 1, 1, 'high', True)
+
+
+def guest_set_dispatching(self, dispatching):
+exec_command(self,
+f'echo {dispatching} > /sys/devices/system/cpu/dispatching')
+self.vm.event_wait('CPU_POLARIZATION_CHANGE', self.event_timeout)
+exec_command_and_wait_for_pattern(self,
+'cat /sys/devices/system/cpu/dispatching', dispatching)
+
+
+def test_polarization(self):
+"""
+This test verifies that QEMU modifies the entitlement change after
+several guest polarization change requests.
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+res = self.vm.qmp('query-s390x-cpu-polarization')
+self.assertEqual(res['return']['polarization'], 'horizontal')
+self.check_topology(0, 0, 0, 0, 'medium', False)
+
+self.guest_set_dispatching('1');
+res = self.vm.qmp('query-s390x-cpu-polarization')
+self.assertEqual(res['return']['polarization'], 'vertical')
+self.check_topology(0, 0, 0, 0, 'medium', False)
+
+self.guest_set_dispatching('0');
+res = self.vm.qmp('query-s390x-cpu-polarization')
+self.assertEqual(res['return']['polarization'], 'horizontal')
+self.check_topology(0, 0, 0, 0, 'medium', False)
-- 
2.39.2

Re: [PATCH] hw/cxl: Fix out of bound array access

2023-09-14 Thread Michael Tokarev


14.09.2023 15:37, Michael Tokarev:

13.09.2023 13:10, Dmitry Frolov wrote:

According to cxl_interleave_ways_enc(),
fw->num_targets is allowed to be up to 16.
This also corresponds to CXL specs.
So, the fw->target_hbs[] array is iterated from 0 to 15.
But it is staticaly declared of length 8.
Thus, out of bound array access may occur.

Fixes: c28db9e000 ("hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from 
TYPE_PXB_DEV")

Signed-off-by: Dmitry Frolov 


Reviewed-by: Michael Tokarev 
(with the extra empty line removed :)


Also,

Cc: qemu-sta...@nongnu.org

for stable-8.1.

/mjt

[PATCH v23 00/20] s390x: CPU Topology

2023-09-14 Thread Nina Schoetterl-Glausch

Changes since v22 (range-diff below):

* fix compile issues (thanks Thomas, Cédric)
* incorporate feedback (thanks Thomas!), most notably
 * forbid books and drawers in older machine types
 * changed implementation of TLE entry ordering
* also got rid of another ERRP_GUARD in s390_change_topology
* pick up R-b's (thanks Thomas)
* misc minor stuff

Changes since v21:

* fix ordering of entries in topology list
* don't leak topology list on error condition in insert_stsi_15_1_x
* make entitlement, dedication optional in query-cpu-info-fast
* rename query-cpu-polarization to query-s390x-cpu-polarization
* documentation changes
* add tests of guest view to avocado tests
* require KVM in avocado tests
* increase timeout in tests
* bump version in comments documenting availability to 8.2
* rename some symbols
* picked up R-b's (thanks Thomas)
* minor stuff, typos

Implementation discussions
==

CPU models
--

Since the facility 11, S390_FEAT_CONFIGURATION_TOPOLOGY is already
in the CPU model for old QEMU we could not activate it as usual from
KVM but needed a KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
Checking and enabling this capability enables facility 11,
S390_FEAT_CONFIGURATION_TOPOLOGY.

It is the responsibility of the admin to ensure the same CPU
model for source and target host in a migration.

Migration
-

When the target guest is started, the Multi-processor Topology Change
Report (MTCR) bit is set during the creation of the vCPU by KVM.
We do not need to migrate its state, in the worst case, the target
guest will see the MTCR and actualize its view of the topology
without necessity, but this will be done only one time.

Reset
-

Reseting the topology is done during subsystem reset, the
polarization is reset to horizontal polarization.

Topology attributes
---

The topology attributes are carried by the CPU object and defined
on object creation.
In the case the new attributes, socket, book, drawer, dedicated,
entitlement are not provided QEMU provides defaults values.

- Geometry defaults
  The geometry default are based on the core-id of the core to
  fill the geometry in a monotone way starting with drawer 0,
  book 0, and filling socket 0 with the number of cores per socket,
  then filling socket 1, socket 2 ... etc until the book is complete
  and all books until the first drawer is complete before starting with
  the next drawer.

  This allows to keep existing start scripts and Libvirt existing
  interface until it is extended.

- Modifiers defaults
  Default entitlement is medium
  Default dedication is not dedicated.

- Machine polarization default to horizontal

Dynamic topology modification
-

QAPI interface is extended with:
- a command: 'set-cpu-topology'
- a query: 'query-cpu-polarization'
- a query: extension of qmp 'query-cpus-fast'
- a query: extension of hmp 'hotpluggable-cpus'
- an event: 'CPU_POLARITY_CHANGE'

New command and interface are specified as unstable.

The admin may use query-cpus-fast to verify the topology provided
to the guest and set-cpu-topology to modify it.

The event CPU_POLARITY_CHANGE is sent when the guest successfuly
uses the PTF(2) instruction to request a polarization change.
In that case, the admin is supposed to modify the CPU provisioning
accordingly.

Testing
===

To use the QEMU patches, you will need Linux V6-rc1 or newer,
or use the following Linux mainline patches:

f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report
24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function
0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF fac..

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

This series provide 12 avocado tests using Fedora-35 kernel and initrd
image.

Documentation
=

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch of this series.

The admin will want to match the host and the guest topology, taking
into account that the guest does not recognize multithreading.
Consequently, two vCPU assigned to threads of the same real CPU should
preferably be assigned to the same socket of the guest machine.

Previous changes:
Since v20:

- changed name of target/s390/kvm/cpu_topology to
  target/s390/kvm/stsi-topology
  (Thomas, Cedric)

- moved the TLE list head from a global to a local in
  insert_stsi_15_1_x()
  (Nina)

- cleaning and merging some of the avocado tests
  (Nina)

- Several cleanings
  (Cedric, Thomas, Nina)

- moved setting of entitlement and dedicated from disapeared
  cpustate_to_cpuinfo_s390() to new s390_query_cpu_fast()

- small changes to documentation to reflect last changes using
  enum instead of int for polarization.
  0 -> horizontal and 1 -> vertical

Since v19:

- use enum to specify the

Re: [PATCH] hw/cxl: Fix out of bound array access

2023-09-14 Thread Michael Tokarev


13.09.2023 13:10, Dmitry Frolov wrote:

According to cxl_interleave_ways_enc(),
fw->num_targets is allowed to be up to 16.
This also corresponds to CXL specs.
So, the fw->target_hbs[] array is iterated from 0 to 15.
But it is staticaly declared of length 8.
Thus, out of bound array access may occur.

Fixes: c28db9e000 ("hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from 
TYPE_PXB_DEV")

Signed-off-by: Dmitry Frolov 


Reviewed-by: Michael Tokarev 
(with the extra empty line removed :)

Thanks!

/mjt


---
  include/hw/cxl/cxl.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index 56c9e7676e..4944725849 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -29,7 +29,7 @@ typedef struct PXBCXLDev PXBCXLDev;
  typedef struct CXLFixedWindow {
  uint64_t size;
  char **targets;
-PXBCXLDev *target_hbs[8];
+PXBCXLDev *target_hbs[16];
  uint8_t num_targets;
  uint8_t enc_int_ways;
  uint8_t enc_int_gran;

Re: [PATCH v3] hw/cxl: Fix out of bound array access

2023-09-14 Thread Michael Tokarev


14.09.2023 10:06, Dmitry Frolov wrote:

According to cxl_interleave_ways_enc(), fw->num_targets is allowed to be up
to 16. This also corresponds to CXL specs. So, the fw->target_hbs[] array
is iterated from 0 to 15. But it is statically declared of length 8. Thus,
out of bound array access may occur.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

v2: assert added
v3: assert removed


So it's the same as the initial submission.

/mjt

Re: [PATCH] hw/qxl: move check of slot_id before accessing guest_slots

2023-09-14 Thread Michael Tokarev


14.09.2023 12:27, Anastasia Belova wrote:

If slot_id >= NUM_MEMSLOTS, buffer overflow is possible.
So the check should be upper than d->guest_slots[slot_id]
where size of d->guest_slots is NUM_MEMSLOTS.

Fixes: e954ea2873 ("qxl: qxl_add_memslot: remove guest trigerrable panics")
Signed-off-by: Anastasia Belova 


Good catch.

Reviewed-by: Michael Tokarev 

The change is trivial enough for trivial-patches tree,
picked it up if Gerd don't mind.

/mjt


  hw/display/qxl.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 7bb00d68f5..dc618727c0 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1309,16 +1309,17 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t 
slot_id, uint64_t delta,
  QXLDevMemSlot memslot;
  int i;
  
-guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);

-guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);
-
-trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);
-
  if (slot_id >= NUM_MEMSLOTS) {
  qxl_set_guest_bug(d, "%s: slot_id >= NUM_MEMSLOTS %d >= %d", __func__,
slot_id, NUM_MEMSLOTS);
  return 1;
  }
+
+guest_start = le64_to_cpu(d->guest_slots[slot_id].slot.mem_start);
+guest_end   = le64_to_cpu(d->guest_slots[slot_id].slot.mem_end);
+
+trace_qxl_memslot_add_guest(d->id, slot_id, guest_start, guest_end);
+
  if (guest_start > guest_end) {
  qxl_set_guest_bug(d, "%s: guest_start > guest_end 0x%" PRIx64
   " > 0x%" PRIx64, __func__, guest_start, guest_end);

[PATCH] target/riscv: pmp: Ignore writes when RW=01

2023-09-14 Thread Mayuresh Chitale

As per the Priv spec: "The R, W, and X fields form a collective WARL
field for which the combinations with R=0 and W=1 are reserved."
However currently such writes are not ignored as ought to be. The
combinations with RW=01 are allowed only when the Smepmp extension
is enabled and mseccfg.MML is set.

Signed-off-by: Mayuresh Chitale 
---
 target/riscv/pmp.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index f3eb6e6585..5b430be18c 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -119,6 +119,14 @@ static bool pmp_write_cfg(CPURISCVState *env, uint32_t 
pmp_index, uint8_t val)
 if (locked) {
 qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - locked\n");
 } else if (env->pmp_state.pmp[pmp_index].cfg_reg != val) {
+/* If !mseccfg.MML then ignore writes with encoding RW=01 */
+if ((val & PMP_WRITE) && !(val & PMP_READ) &&
+(!riscv_cpu_cfg(env)->ext_smepmp ||
+!MSECCFG_MML_ISSET(env))) {
+val &= ~(PMP_WRITE | PMP_READ);
+val |= env->pmp_state.pmp[pmp_index].cfg_reg &
+   (PMP_WRITE | PMP_READ);
+}
 env->pmp_state.pmp[pmp_index].cfg_reg = val;
 pmp_update_rule_addr(env, pmp_index);
 return true;
-- 
2.34.1

Re: [PATCH 3/3] iotests: distinguish 'skipped' and 'not run' states

2023-09-14 Thread Peter Maydell

On Thu, 14 Sept 2023 at 13:29, Kevin Wolf  wrote:
>
> Am 13.09.2023 um 18:31 hat Eric Blake geschrieben:
> > I guess it boils down to whether there is an actionable response in
> > that message.  If a test is skipped because it is the wrong format
> > (for example, ./check -raw skipping a test that only works with
> > qcow2), there's nothing for me to do.  If a test is skipped because my
> > setup didn't permit running the test, but where I could enhance my
> > environment (install more software, pick a different file system,
> > ...), then having the skip message call that out is useful if I want
> > to take action to get more test coverage.
>
> I'm not sure if there is a clear line to define whether there is an
> actionable response or not, because that completely depends on what the
> user considers an acceptable response.
>
> You have the relatively clear cases like the test being for another
> format, though you could argue that the actionable response is running
> ./check -qcow2 if raw doesn't work - in fact, why don't we allow ./check
> -raw -qcow2 to run both and count it as skipped only if it wasn't run at
> all?
>
> The relatively clear case on the other end of the spectrum is if a tool
> is missing on the host that you could possibly install. Though maybe
> your distro doesn't even package it, so is that still a reasonable
> action to take?

I think for that sort of thing you need to actually collect the
reasons so that at the end you can say "skipped 21 tests because
frobnicator not present on system; skipped 4 tests because
bazulator not present on system". Then when you're running the
tests you can either (a) install the listed tools or (b) know
you can ignore those specific skips as ones you're aware you
can't run. But that would be a lot of pain and effort to implement...

-- PMM

[PATCH v23 09/20] machine: adding s390 topology to query-cpu-fast

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

S390x provides two more topology attributes, entitlement and dedication.

Let's add these CPU attributes to the QAPI command query-cpu-fast.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Signed-off-by: Nina Schoetterl-Glausch 
---
 qapi/machine.json  | 9 -
 target/s390x/cpu.c | 9 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index a63cb951d2..be1d70d8cd 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -56,10 +56,17 @@
 # Additional information about a virtual S390 CPU
 #
 # @cpu-state: the virtual CPU's state
+# @dedicated: the virtual CPU's dedication (since 8.2)
+# @entitlement: the virtual CPU's entitlement (since 8.2)
 #
 # Since: 2.12
 ##
-{ 'struct': 'CpuInfoS390', 'data': { 'cpu-state': 'CpuS390State' } }
+{ 'struct': 'CpuInfoS390',
+  'data': { 'cpu-state': 'CpuS390State',
+'*dedicated': 'bool',
+'*entitlement': 'CpuS390Entitlement'
+  }
+}
 
 ##
 # @CpuInfoFast:
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 74405beb51..5967e34a85 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -38,6 +38,7 @@
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/reset.h"
 #endif
+#include "hw/s390x/cpu-topology.h"
 
 #define CR0_RESET   0xE0UL
 #define CR14_RESET  0xC200UL;
@@ -146,6 +147,14 @@ static void s390_query_cpu_fast(CPUState *cpu, CpuInfoFast 
*value)
 S390CPU *s390_cpu = S390_CPU(cpu);
 
 value->u.s390x.cpu_state = s390_cpu->env.cpu_state;
+#if !defined(CONFIG_USER_ONLY)
+if (s390_has_topology()) {
+value->u.s390x.has_dedicated = true;
+value->u.s390x.dedicated = s390_cpu->env.dedicated;
+value->u.s390x.has_entitlement = true;
+value->u.s390x.entitlement = s390_cpu->env.entitlement;
+}
+#endif
 }
 
 /* S390CPUClass::reset() */
-- 
2.39.2

[PATCH v23 17/20] tests/avocado: s390x cpu topology test dedicated CPU

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

A dedicated CPU in vertical polarization can only have
a high entitlement.
Let's check this from both host and guest point of view.

Signed-off-by: Pierre Morel 
Co-developed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Reviewed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 tests/avocado/s390_topology.py | 33 +
 1 file changed, 33 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index 24fac9a54d..3661048f4c 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -305,3 +305,36 @@ def test_entitlement(self):
 self.guest_set_dispatching('0');
 self.check_polarization("horizontal")
 self.check_topology(0, 0, 0, 0, 'high', False)
+
+
+def test_dedicated(self):
+"""
+This test verifies that QEMU adjusts the entitlement correctly when a
+CPU is made dedicated.
+QEMU retains the entitlement value when horizontal polarization is in 
effect.
+For the guest, the field shows the effective value of the entitlement.
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+
+self.check_polarization("horizontal")
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'dedicated': True})
+self.assertEqual(res['return'], {})
+self.check_topology(0, 0, 0, 0, 'high', True)
+self.check_polarization("horizontal")
+
+self.guest_set_dispatching('1');
+self.check_topology(0, 0, 0, 0, 'high', True)
+self.check_polarization("vertical:high")
+
+self.guest_set_dispatching('0');
+self.check_topology(0, 0, 0, 0, 'high', True)
+self.check_polarization("horizontal")
-- 
2.39.2

[PATCH v23 06/20] s390x/cpu topology: interception of PTF instruction

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

When the host supports the CPU topology facility, the PTF
instruction with function code 2 is interpreted by the SIE,
provided that the userland hypervisor activates the interpretation
by using the KVM_CAP_S390_CPU_TOPOLOGY KVM extension.

The PTF instructions with function code 0 and 1 are intercepted
and must be emulated by the userland hypervisor.

During RESET all CPU of the configuration are placed in
horizontal polarity.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Reviewed-by: Thomas Huth 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 include/hw/s390x/s390-virtio-ccw.h |  6 
 hw/s390x/cpu-topology.c| 55 ++
 target/s390x/kvm/kvm.c | 11 ++
 3 files changed, 72 insertions(+)

diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 9bba21a916..c1d46e78af 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -30,6 +30,12 @@ struct S390CcwMachineState {
 uint8_t loadparm[8];
 };
 
+#define S390_PTF_REASON_NONE (0x00 << 8)
+#define S390_PTF_REASON_DONE (0x01 << 8)
+#define S390_PTF_REASON_BUSY (0x02 << 8)
+#define S390_TOPO_FC_MASK 0xffUL
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra);
+
 struct S390CcwMachineClass {
 /*< private >*/
 MachineClass parent_class;
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 03de3d3d10..c34581964f 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -90,6 +90,60 @@ static void s390_topology_init(MachineState *ms)
 smp->books * smp->drawers);
 }
 
+/*
+ * s390_handle_ptf:
+ *
+ * @register 1: contains the function code
+ *
+ * Function codes 0 (horizontal) and 1 (vertical) define the CPU
+ * polarization requested by the guest.
+ *
+ * Function code 2 is handling topology changes and is interpreted
+ * by the SIE.
+ */
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra)
+{
+CpuS390Polarization polarization;
+CPUS390XState *env = >env;
+uint64_t reg = env->regs[r1];
+int fc = reg & S390_TOPO_FC_MASK;
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+s390_program_interrupt(env, PGM_OPERATION, ra);
+return;
+}
+
+if (env->psw.mask & PSW_MASK_PSTATE) {
+s390_program_interrupt(env, PGM_PRIVILEGED, ra);
+return;
+}
+
+if (reg & ~S390_TOPO_FC_MASK) {
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+return;
+}
+
+polarization = S390_CPU_POLARIZATION_VERTICAL;
+switch (fc) {
+case 0:
+polarization = S390_CPU_POLARIZATION_HORIZONTAL;
+/* fallthrough */
+case 1:
+if (s390_topology.polarization == polarization) {
+env->regs[r1] |= S390_PTF_REASON_DONE;
+setcc(cpu, 2);
+} else {
+s390_topology.polarization = polarization;
+s390_cpu_topology_set_changed(true);
+setcc(cpu, 0);
+}
+break;
+default:
+/* Note that fc == 2 is interpreted by the SIE */
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+}
+}
+
 /**
  * s390_topology_reset:
  *
@@ -99,6 +153,7 @@ static void s390_topology_init(MachineState *ms)
 void s390_topology_reset(void)
 {
 s390_cpu_topology_set_changed(false);
+s390_topology.polarization = S390_CPU_POLARIZATION_HORIZONTAL;
 }
 
 /**
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index ee6345ba27..10d66c2b65 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -86,6 +86,7 @@
 
 #define PRIV_B9_EQBS0x9c
 #define PRIV_B9_CLP 0xa0
+#define PRIV_B9_PTF 0xa2
 #define PRIV_B9_PCISTG  0xd0
 #define PRIV_B9_PCILG   0xd2
 #define PRIV_B9_RPCIT   0xd3
@@ -1457,6 +1458,13 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct 
kvm_run *run)
 }
 }
 
+static void kvm_handle_ptf(S390CPU *cpu, struct kvm_run *run)
+{
+uint8_t r1 = (run->s390_sieic.ipb >> 20) & 0x0f;
+
+s390_handle_ptf(cpu, r1, RA_IGNORED);
+}
+
 static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
 int r = 0;
@@ -1474,6 +1482,9 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, 
uint8_t ipa1)
 case PRIV_B9_RPCIT:
 r = kvm_rpcit_service_call(cpu, run);
 break;
+case PRIV_B9_PTF:
+kvm_handle_ptf(cpu, run);
+break;
 case PRIV_B9_EQBS:
 /* just inject exception */
 r = -1;
-- 
2.39.2

[PATCH v23 14/20] tests/avocado: s390x cpu topology core

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

Introduction of the s390x cpu topology core functions and
basic tests.

We test the correlation between the command line and
the QMP results in query-cpus-fast for various CPU topology.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 MAINTAINERS|   1 +
 tests/avocado/s390_topology.py | 200 +
 2 files changed, 201 insertions(+)
 create mode 100644 tests/avocado/s390_topology.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 0cba0cb2d1..405800ac15 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1705,6 +1705,7 @@ F: hw/s390x/cpu-topology.c
 F: target/s390x/kvm/stsi-topology.c
 F: docs/devel/s390-cpu-topology.rst
 F: docs/system/s390x/cpu-topology.rst
+F: tests/avocado/s390_topology.py
 
 X86 Machines
 
diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
new file mode 100644
index 00..9078b45281
--- /dev/null
+++ b/tests/avocado/s390_topology.py
@@ -0,0 +1,200 @@
+# Functional test that boots a Linux kernel and checks the console
+#
+# Copyright IBM Corp. 2023
+#
+# Author:
+#  Pierre Morel 
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import shutil
+import time
+
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import exec_command
+from avocado_qemu import exec_command_and_wait_for_pattern
+from avocado_qemu import interrupt_interactive_console_until_pattern
+from avocado_qemu import wait_for_console_pattern
+from avocado.utils import process
+from avocado.utils import archive
+
+
+class S390CPUTopology(QemuSystemTest):
+"""
+S390x CPU topology consists of 4 topology layers, from bottom to top,
+the cores, sockets, books and drawers and 2 modifiers attributes,
+the entitlement and the dedication.
+See: docs/system/s390x/cpu-topology.rst.
+
+S390x CPU topology is setup in different ways:
+- implicitly from the '-smp' argument by completing each topology
+  level one after the other beginning with drawer 0, book 0 and
+  socket 0.
+- explicitly from the '-device' argument on the QEMU command line
+- explicitly by hotplug of a new CPU using QMP or HMP
+- it is modified by using QMP 'set-cpu-topology'
+
+The S390x modifier attribute entitlement depends on the machine
+polarization, which can be horizontal or vertical.
+The polarization is changed on a request from the guest.
+"""
+timeout = 90
+
+KERNEL_COMMON_COMMAND_LINE = ('printk.time=0 '
+  'root=/dev/ram '
+  'selinux=0 '
+  'rdinit=/bin/sh')
+
+def wait_until_booted(self):
+wait_for_console_pattern(self, 'no job control',
+ failure_message='Kernel panic - not syncing',
+ vm=None)
+
+def check_topology(self, c, s, b, d, e, t):
+res = self.vm.qmp('query-cpus-fast')
+cpus =  res['return']
+for cpu in cpus:
+core = cpu['props']['core-id']
+socket = cpu['props']['socket-id']
+book = cpu['props']['book-id']
+drawer = cpu['props']['drawer-id']
+entitlement = cpu.get('entitlement')
+dedicated = cpu.get('dedicated')
+if core == c:
+self.assertEqual(drawer, d)
+self.assertEqual(book, b)
+self.assertEqual(socket, s)
+self.assertEqual(entitlement, e)
+self.assertEqual(dedicated, t)
+
+def kernel_init(self):
+"""
+We need a VM that supports CPU topology,
+currently this only the case when using KVM, not TCG.
+We need a kernel supporting the CPU topology.
+We need a minimal root filesystem with a shell.
+"""
+self.require_accelerator("kvm")
+kernel_url = ('https://archives.fedoraproject.org/pub/archive'
+  '/fedora-secondary/releases/35/Server/s390x/os'
+  '/images/kernel.img')
+kernel_hash = '0d1aaaf303f07cf0160c8c48e56fe638'
+kernel_path = self.fetch_asset(kernel_url, algorithm='md5',
+   asset_hash=kernel_hash)
+
+initrd_url = ('https://archives.fedoraproject.org/pub/archive'
+  '/fedora-secondary/releases/35/Server/s390x/os'
+  '/images/initrd.img')
+initrd_hash = 'a122057d95725ac030e2ec51df46e172'
+initrd_path_xz = self.fetch_asset(initrd_url, algorithm='md5',
+  asset_hash=initrd_hash)
+initrd_path = os.path.join(self.workdir, 'initrd-raw.img')
+archive.lzma_uncompress(initrd_path_xz, initrd_path)
+
+self.vm.set_console()
+

[PATCH v23 18/20] tests/avocado: s390x cpu topology test socket full

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

This test verifies that QMP set-cpu-topology does not accept
to overload a socket.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
---
 tests/avocado/s390_topology.py | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index 3661048f4c..a63c2b2923 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -338,3 +338,29 @@ def test_dedicated(self):
 self.guest_set_dispatching('0');
 self.check_topology(0, 0, 0, 0, 'high', True)
 self.check_polarization("horizontal")
+
+
+def test_socket_full(self):
+"""
+This test verifies that QEMU does not accept to overload a socket.
+The socket-id 0 on book-id 0 already contains CPUs 0 and 1 and can
+not accept any new CPU while socket-id 0 on book-id 1 is free.
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.add_args('-smp',
+ '3,drawers=2,books=2,sockets=3,cores=2,maxcpus=24')
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 2, 'socket-id': 0, 'book-id': 0})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 2, 'socket-id': 0, 'book-id': 1})
+self.assertEqual(res['return'], {})
-- 
2.39.2

[PATCH v23 05/20] s390x/cpu topology: resetting the Topology-Change-Report

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

During a subsystem reset the Topology-Change-Report is cleared
by the machine.
Let's ask KVM to clear the Modified Topology Change Report (MTCR)
bit of the SCA in the case of a subsystem reset.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 include/hw/s390x/cpu-topology.h |  1 +
 target/s390x/cpu.h  |  1 +
 target/s390x/kvm/kvm_s390x.h|  1 +
 hw/s390x/cpu-topology.c | 11 +++
 hw/s390x/s390-virtio-ccw.c  |  3 +++
 target/s390x/cpu-sysemu.c   | 13 +
 target/s390x/kvm/kvm.c  | 17 +
 7 files changed, 47 insertions(+)

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 350c7ea8aa..196c9afb3f 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -56,6 +56,7 @@ static inline void s390_topology_setup_cpu(MachineState *ms,
 #endif
 
 extern S390Topology s390_topology;
+void s390_topology_reset(void);
 
 static inline int s390_std_socket(int n, CpuTopology *smp)
 {
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index c1ba5c46d6..2722e7c450 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -655,6 +655,7 @@ typedef struct SysIBCPUListEntry {
 QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16);
 
 void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, 
uintptr_t ra);
+void s390_cpu_topology_set_changed(bool changed);
 
 /* MMU defines */
 #define ASCE_ORIGIN   (~0xfffULL) /* segment table origin 
*/
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
index f9785564d0..649dae5948 100644
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -47,5 +47,6 @@ void kvm_s390_crypto_reset(void);
 void kvm_s390_restart_interrupt(S390CPU *cpu);
 void kvm_s390_stop_interrupt(S390CPU *cpu);
 void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info);
+int kvm_s390_topology_set_mtcr(uint64_t attr);
 
 #endif /* KVM_S390X_H */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 9bf13a6ebf..03de3d3d10 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -90,6 +90,17 @@ static void s390_topology_init(MachineState *ms)
 smp->books * smp->drawers);
 }
 
+/**
+ * s390_topology_reset:
+ *
+ * Generic reset for CPU topology, calls s390_topology_reset()
+ * to reset the kernel Modified Topology Change Record.
+ */
+void s390_topology_reset(void)
+{
+s390_cpu_topology_set_changed(false);
+}
+
 /**
  * s390_topology_cpu_default:
  * @cpu: pointer to a S390CPU
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4e05a00de8..6c63636eee 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -123,6 +123,9 @@ static void subsystem_reset(void)
 device_cold_reset(dev);
 }
 }
+if (s390_has_topology()) {
+s390_topology_reset();
+}
 }
 
 static int virtio_ccw_hcall_notify(const uint64_t *args)
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 8112561e5e..1cd30c1d84 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -307,3 +307,16 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data 
arg)
 kvm_s390_set_diag318(cs, arg.host_ulong);
 }
 }
+
+void s390_cpu_topology_set_changed(bool changed)
+{
+int ret;
+
+if (kvm_enabled()) {
+ret = kvm_s390_topology_set_mtcr(changed);
+if (ret) {
+error_report("Failed to set Modified Topology Change Report: %s",
+ strerror(-ret));
+}
+}
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 56b31b8aae..ee6345ba27 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2584,6 +2584,23 @@ int kvm_s390_get_zpci_op(void)
 return cap_zpci_op;
 }
 
+int kvm_s390_topology_set_mtcr(uint64_t attr)
+{
+struct kvm_device_attr attribute = {
+.group = KVM_S390_VM_CPU_TOPOLOGY,
+.attr  = attr,
+};
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+return 0;
+}
+if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_TOPOLOGY, attr)) {
+return -ENOTSUP;
+}
+
+return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, );
+}
+
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
 }
-- 
2.39.2

Re: [PATCH] gdbstub: Fix SEGFAULT in find_cpu_clusters()

2023-09-14 Thread Philippe Mathieu-Daudé


Hi Nikita,

On 14/9/23 14:25, Nikita Shubin wrote:

From: Nikita Shubin 

target_xml is a dynamic GString, use NULL to initialize it.

Fixes: 56e534bd11 ("gdbstub: refactor get_feature_xml")

Signed-off-by: Nikita Shubin 
---
Observed with:
build-qemu/qemu-system-riscv64 -M sifive_u -bios none -nographic -s
Segmentation fault
---
  gdbstub/softmmu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub/softmmu.c b/gdbstub/softmmu.c
index 9f0b8b5497..42645d2220 100644
--- a/gdbstub/softmmu.c
+++ b/gdbstub/softmmu.c
@@ -292,7 +292,7 @@ static int find_cpu_clusters(Object *child, void *opaque)
  assert(cluster->cluster_id != UINT32_MAX);
  process->pid = cluster->cluster_id + 1;
  process->attached = false;
-process->target_xml[0] = '\0';
+process->target_xml = NULL;


Yes, good catch. Akihiko also posted the same fix 2 days ago:
https://lore.kernel.org/qemu-devel/20230912065811.27796-2-akihiko.od...@daynix.com/

  
  return 0;

  }

Re: [PATCH v3 01/12] gdbstub: Fix target_xml initialization

2023-09-14 Thread Philippe Mathieu-Daudé


On 13/9/23 00:40, Akihiko Odaki wrote:

target_xml is no longer a fixed-length array but a pointer to a
variable-length memory.

Fixes: 56e534bd11 ("gdbstub: refactor get_feature_xml")
Signed-off-by: Akihiko Odaki 
---
  gdbstub/softmmu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub/softmmu.c b/gdbstub/softmmu.c
index 9f0b8b5497..42645d2220 100644
--- a/gdbstub/softmmu.c
+++ b/gdbstub/softmmu.c
@@ -292,7 +292,7 @@ static int find_cpu_clusters(Object *child, void *opaque)
  assert(cluster->cluster_id != UINT32_MAX);
  process->pid = cluster->cluster_id + 1;
  process->attached = false;
-process->target_xml[0] = '\0';
+process->target_xml = NULL;
  
  return 0;

  }


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 3/3] iotests: distinguish 'skipped' and 'not run' states

2023-09-14 Thread Kevin Wolf

Am 13.09.2023 um 18:31 hat Eric Blake geschrieben:
> On Wed, Sep 13, 2023 at 04:47:54PM +0100, Daniel P. Berrangé wrote:
> > On Wed, Sep 06, 2023 at 04:09:17PM +0200, Denis V. Lunev wrote:
> > > Each particular testcase could skipped intentionally and accidentally.
> > > For example the test is not designed for a particular image format or
> > > is not run due to the missed library.
> > > 
> > > The latter case is unwanted in reality. Though the discussion has
> > > revealed that failing the test in such a case would be bad. Thus the
> > > patch tries to do different thing. It adds additional status for
> > > the test case - 'skipped' and bound intentinal cases to that state.
> > 
> > I'm not convinced this distinction makes sense and I fear it is
> > leading us down the same undesirable route as avocado with too
> > many distinct states leading to confusion:
> > 
> >   https://lore.kernel.org/qemu-devel/yy1gb1kb3ysiu...@redhat.com/
> > 
> > If I looked at the output I can't tell you the difference between
> > "not run" and "skipped" - they both sound the same to me.
> > 
> > IMHO there's alot to be said for the simplicity of sticking with
> > nothing more than PASS/FAIL/SKIP as status names.  The descriptive
> > text associated with each SKIP would give more context as to the
> > reason in each case if needed.
> 
> I guess it boils down to whether there is an actionable response in
> that message.  If a test is skipped because it is the wrong format
> (for example, ./check -raw skipping a test that only works with
> qcow2), there's nothing for me to do.  If a test is skipped because my
> setup didn't permit running the test, but where I could enhance my
> environment (install more software, pick a different file system,
> ...), then having the skip message call that out is useful if I want
> to take action to get more test coverage.

I'm not sure if there is a clear line to define whether there is an
actionable response or not, because that completely depends on what the
user considers an acceptable response.

You have the relatively clear cases like the test being for another
format, though you could argue that the actionable response is running
./check -qcow2 if raw doesn't work - in fact, why don't we allow ./check
-raw -qcow2 to run both and count it as skipped only if it wasn't run at
all?

The relatively clear case on the other end of the spectrum is if a tool
is missing on the host that you could possibly install. Though maybe
your distro doesn't even package it, so is that still a reasonable
action to take?

What about cases where a block driver isn't compiled in or not
whitelisted? (We've seen this often enough with quorum.) That could be
an accident, but more likely it was a conscious decision and while just
enabling it might fix the test case, enabling additional features in
your product just to make tests happy might not be considered
acceptable. So should this be "not run" or "skipped"? [1]

You could find other unclear cases like tests depending on a different
target architecture, or testing different code paths on different target
architectures.

> Even if the message is present, we have so many tests intentionally
> skipped that it is hard to see the few tests where a skip could be
> turned into a pass by installing a prerequisite.
> 
> > > +++ b/tests/qemu-iotests/testrunner.py
> > > @@ -200,6 +200,8 @@ def test_print_one_line(self, test: str,
> > >  col = '\033[1m\033[31m'
> > >  elif status == 'not run':
> > >  col = '\033[33m'
> > > +elif status == 'skipped':
> > > +col = '\033[34m'
> > >  else:
> > >  col = ''
> 
> It looks like for now, the only difference in the two designations is
> the output color, and even then, only if you are running the tests in
> an environment where color matters (CI systems may not be showing
> colors as intended).

Well, and the different status text, obviously.

CI will probably use the tap interface, which is not modified at all by
the patch, so no result would be recorded for "skipped" tests (while
"not run" tests will still return "ok ... # SKIP").

Kevin

[1] If you do answer this rhetorical question, please note that I
already forgot which is which, so maybe also specify if you mean the
bad skip or the harmless skip.

Re: [RFC PATCH 3/3] hw/arm/sbsa-ref: use bsa.h for PPI definitions

2023-09-14 Thread Philippe Mathieu-Daudé


On 14/9/23 14:01, Leif Lindholm wrote:

Use the private peripheral interrupt definitions from bsa.h instead of
defining them locally. Refactor to use PPI() to convert from INTID macro
where necessary.

Signed-off-by: Leif Lindholm 
---
  hw/arm/sbsa-ref.c | 24 +++-
  1 file changed, 11 insertions(+), 13 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

[PATCH] gdbstub: Fix SEGFAULT in find_cpu_clusters()

2023-09-14 Thread Nikita Shubin

From: Nikita Shubin 

target_xml is a dynamic GString, use NULL to initialize it.

Fixes: 56e534bd11 ("gdbstub: refactor get_feature_xml")

Signed-off-by: Nikita Shubin 
---
Observed with:
build-qemu/qemu-system-riscv64 -M sifive_u -bios none -nographic -s
Segmentation fault
---
 gdbstub/softmmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gdbstub/softmmu.c b/gdbstub/softmmu.c
index 9f0b8b5497..42645d2220 100644
--- a/gdbstub/softmmu.c
+++ b/gdbstub/softmmu.c
@@ -292,7 +292,7 @@ static int find_cpu_clusters(Object *child, void *opaque)
 assert(cluster->cluster_id != UINT32_MAX);
 process->pid = cluster->cluster_id + 1;
 process->attached = false;
-process->target_xml[0] = '\0';
+process->target_xml = NULL;
 
 return 0;
 }
-- 
2.39.2

Re: [RFC PATCH 2/3] {include/}hw/arm: refactor BSA/virt PPI logic

2023-09-14 Thread Philippe Mathieu-Daudé


On 14/9/23 14:01, Leif Lindholm wrote:

GIC Private Peripheral Interrupts (PPI) are defined as GIC INTID 16-31.
As in, PPI0 is INTID16 .. PPI15 is INTID31.
Arm's Base System Architecture specification (BSA) lists the mandated and
recommended private interrupt IDs by INTID, not by PPI index. But current
definitions in qemu define them by PPI index, complicating cross
referencing.

Meanwhile, the PPI(x) macro counterintuitively adds 16 to the input value,
converting a PPI index to an INTID.

Resolve this by redefining the BSA-allocated PPIs by their INTIDs,
inverting the logic of the PPI(x) macro and flipping where it is used.

Signed-off-by: Leif Lindholm 
---
  hw/arm/virt-acpi-build.c |  4 ++--
  hw/arm/virt.c|  9 +
  include/hw/arm/bsa.h | 14 +++---
  3 files changed, 14 insertions(+), 13 deletions(-)


Isn't it simpler to reorder patches 1 <-> 2?
(First fix PPI macro use within virt.c, then expose header)

[PATCH v23 12/20] qapi/s390x/cpu topology: query-cpu-polarization qmp command

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

The query-cpu-polarization qmp command returns the current
CPU polarization of the machine.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 qapi/machine-target.json | 29 +
 hw/s390x/cpu-topology.c  |  8 
 2 files changed, 37 insertions(+)

diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index 276c3bf9d1..58ba28a868 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -445,3 +445,32 @@
   'features': [ 'unstable' ],
   'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
 }
+
+##
+# @CpuPolarizationInfo:
+#
+# The result of a cpu polarization
+#
+# @polarization: the CPU polarization
+#
+# Since: 8.2
+##
+{ 'struct': 'CpuPolarizationInfo',
+  'data': { 'polarization': 'CpuS390Polarization' },
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
+
+##
+# @query-s390x-cpu-polarization:
+#
+# Features:
+# @unstable: This command may still be modified.
+#
+# Returns: the machine polarization
+#
+# Since: 8.2
+##
+{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo',
+  'features': [ 'unstable' ],
+  'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] }
+}
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 18274db74c..1561a8ab44 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -459,3 +459,11 @@ void qmp_set_cpu_topology(uint16_t core,
  has_drawer, drawer, has_entitlement, entitlement,
  has_dedicated, dedicated, errp);
 }
+
+CpuPolarizationInfo *qmp_query_s390x_cpu_polarization(Error **errp)
+{
+CpuPolarizationInfo *info = g_new0(CpuPolarizationInfo, 1);
+
+info->polarization = s390_topology.polarization;
+return info;
+}
-- 
2.39.2

[PATCH v23 03/20] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 MAINTAINERS  |   1 +
 qapi/machine-target.json |  14 ++
 include/hw/s390x/cpu-topology.h  |  23 +++
 include/hw/s390x/sclp.h  |   1 +
 target/s390x/cpu.h   |  75 +++
 hw/s390x/cpu-topology.c  |   2 +
 target/s390x/kvm/kvm.c   |   5 +-
 target/s390x/kvm/stsi-topology.c | 338 +++
 target/s390x/kvm/meson.build |   3 +-
 9 files changed, 460 insertions(+), 2 deletions(-)
 create mode 100644 target/s390x/kvm/stsi-topology.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9c6599a55b..17b92fe3ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1702,6 +1702,7 @@ M: Nina Schoetterl-Glausch 
 S: Supported
 F: include/hw/s390x/cpu-topology.h
 F: hw/s390x/cpu-topology.c
+F: target/s390x/kvm/stsi-topology.c
 
 X86 Machines
 
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index f0a6b72414..0d45a590ce 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -361,3 +361,17 @@
'TARGET_MIPS',
'TARGET_LOONGARCH64',
'TARGET_RISCV' ] } }
+
+##
+# @CpuS390Polarization:
+#
+# An enumeration of cpu polarization that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Polarization',
+  'prefix': 'S390_CPU_POLARIZATION',
+  'data': [ 'horizontal', 'vertical' ],
+'if': 'TARGET_S390X'
+}
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 97b0af2795..350c7ea8aa 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -15,10 +15,33 @@
 #include "hw/boards.h"
 #include "qapi/qapi-types-machine-target.h"
 
+#define S390_TOPOLOGY_CPU_IFL   0x03
+
+typedef struct s390_topology_id {
+uint8_t sentinel;
+uint8_t drawer;
+uint8_t book;
+uint8_t socket;
+uint8_t type;
+uint8_t vertical:1;
+uint8_t entitlement:2;
+uint8_t dedicated;
+uint8_t origin;
+} s390_topology_id;
+
+typedef struct S390TopologyEntry {
+QTAILQ_ENTRY(S390TopologyEntry) next;
+s390_topology_id id;
+uint64_t mask;
+} S390TopologyEntry;
+
 typedef struct S390Topology {
 uint8_t *cores_per_socket;
+CpuS390Polarization polarization;
 } S390Topology;
 
+typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
+
 #ifdef CONFIG_KVM
 bool s390_has_topology(void);
 void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index cf1f2efae2..c49051e17e 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@ typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128
+#define SCLP_READ_SCP_INFO_MNEST2
 typedef struct ReadInfo {
 SCCBHeader h;
 uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index dfcc1aa1fc..c1ba5c46d6 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -571,6 +571,29 @@ typedef struct SysIB_322 {
 } SysIB_322;
 QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
 
+/*
+ * Topology Magnitude fields (MAG) indicates the maximum number of
+ * topology list entries (TLE) at the corresponding nesting level.
+ */
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+uint8_t  reserved0[2];
+uint16_t length;
+uint8_t  mag[S390_TOPOLOGY_MAG];
+uint8_t  reserved1;
+uint8_t  mnest;
+uint32_t reserved2;
+char tle[];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 typedef union SysIB {
 SysIB_111 sysib_111;
 SysIB_121 sysib_121;
@@ -578,9 +601,61 @@ typedef union SysIB {
 SysIB_221 sysib_221;
 SysIB_222 sysib_222;
 SysIB_322 sysib_322;
+SysIB_151x sysib_151x;
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only three levels, the drawer's, book's and socket's level.
+ *
+ * A container with a nesting level (NL)

[PATCH v23 16/20] tests/avocado: s390x cpu topology entitlement tests

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

Test changes in the entitlement from both a guest and a host point of
view, depending on the polarization.

Signed-off-by: Pierre Morel 
Reviewed-by: Nina Schoetterl-Glausch 
Co-developed-by: Nina Schoetterl-Glausch 
Signed-off-by: Nina Schoetterl-Glausch 
---
 tests/avocado/s390_topology.py | 62 ++
 1 file changed, 62 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index 8166cee134..24fac9a54d 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -243,3 +243,65 @@ def test_polarization(self):
 res = self.vm.qmp('query-s390x-cpu-polarization')
 self.assertEqual(res['return']['polarization'], 'horizontal')
 self.check_topology(0, 0, 0, 0, 'medium', False)
+
+
+def check_polarization(self, polarization):
+#We need to wait for the change to have been propagated to the kernel
+exec_command_and_wait_for_pattern(self,
+"\n".join([
+"timeout 1 sh -c 'while true",
+'do',
+'syspath="/sys/devices/system/cpu/cpu0/polarization"',
+'polarization="$(cat "$syspath")" || exit',
+   f'if [ "$polarization" = "{polarization}" ]; then',
+'exit 0',
+'fi',
+'sleep 0.01',
+#searched for strings mustn't show up in command, '' to 
obfuscate
+"done' && echo succ''ess || echo fail''ure",
+]),
+"success", "failure")
+
+
+def test_entitlement(self):
+"""
+This test verifies that QEMU modifies the entitlement
+after a guest request and that the guest sees the change.
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+
+self.check_polarization('horizontal')
+self.check_topology(0, 0, 0, 0, 'medium', False)
+
+self.guest_set_dispatching('1')
+self.check_polarization('vertical:medium')
+self.check_topology(0, 0, 0, 0, 'medium', False)
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'low'})
+self.assertEqual(res['return'], {})
+self.check_polarization('vertical:low')
+self.check_topology(0, 0, 0, 0, 'low', False)
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'medium'})
+self.assertEqual(res['return'], {})
+self.check_polarization('vertical:medium')
+self.check_topology(0, 0, 0, 0, 'medium', False)
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'high'})
+self.assertEqual(res['return'], {})
+self.check_polarization('vertical:high')
+self.check_topology(0, 0, 0, 0, 'high', False)
+
+self.guest_set_dispatching('0');
+self.check_polarization("horizontal")
+self.check_topology(0, 0, 0, 0, 'high', False)
-- 
2.39.2

Re: Various changes "backportability"

2023-09-14 Thread Stefan Hajnoczi

On Wed, Sep 13, 2023 at 05:44:38PM +0300, Michael Tokarev wrote:
> 13.09.2023 17:27, Stefan Hajnoczi wrote:
> ...
> > > For example, recent tpm bugfix, which is trivial by its own,
> > > uses RETRY_ON_EINTR helper which were introduced recently and
> > > which is now used everywhere.  coroutine_fn et al markers is
> > > another example, translator_io_start is yet another, and so
> > > on and so on.
> 
> > The general concept makes sense to me but I'm not sure what the
> > specific issue with adding (?) coroutine_fn was. Can you link to the
> > patch that caused difficulties so I can review it?
> 
> There's nothing really exciting here, and coroutine_fn example isn't
> a best one really.  I'm talking about this:
> 
> https://gitlab.com/mjt0k/qemu/-/commit/c5034f827726f5876234bf4c6a0fab648fd8b020
> 
> which is a current back-port of 92e2e6a867334a990f8d29f07ca34e3162fdd6ec
> "virtio: Drop out of coroutine context in virtio_load()":
> 
> https://gitlab.com/mjt0k/qemu/-/commit/92e2e6a867334a990f8d29f07ca34e3162fdd6ec
> 
> This is a bugfix which I tried to cherry-pick (btw, I dunno yet if it should
> go to 8.0 or 7.2 to begin with, asked this in another email, but it still
> serves as an example).  Original patch adds coroutine_mixed_fn to some 
> existing
> functions and to a newly added function.
> 
> The patch introducing coroutine_mixed_fn marker is v7.2.0-909-g0f3de970 .
> This is actually a very good example of a way how things are done best,
> an excellent example of what I'm talking here, - this 0f3de970 only introduces
> the new concept (to be widely used), not converting everything to it
> right away.  So it's a good example of how things can be done right.
> 
> But this 0f3de970 change is based on earlier change which split things up
> and moved stuff from one place to another, and which is too large to
> backport.  So even if 0f3de970 did an excellent job, it is still of no
> use in this context.
> 
> I decided to drop coroutine_mixed_fn markings in the fix for 7.2 in this
> context, - again, if this particular fix is needed there to begin with,
> which is a question unrelated to this topic.
> 
> 
> A better example is a trivial thing with RETRY_ON_EINTR introduction.
> A trivial macro which replaced TFR in
> 
> commit 37b0b24e933c18269dddbf6b83f91823cacf8105
> Author: Nikita Ivanov 
> Date:   Sun Oct 23 12:04:22 2022 +0300
> 
> error handling: Use RETRY_ON_EINTR() macro where applicable
> 
> if this change were split into two, first introducing the new macro
> and second converting existing code & removing old macro, it'd be
> possible to just cherry-pick the first part and thered' be no need
> to modify further cherry-picks which uses RETRY_ON_EINTR.
> 
> But once again, this all is definitely not as important as getting
> good code into main :)

I see, thank you!

Stefan


signature.asc
Description: PGP signature

[PATCH v23 07/20] target/s390x/cpu topology: activate CPU topology

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

The KVM capability KVM_CAP_S390_CPU_TOPOLOGY is used to
activate the S390_FEAT_CONFIGURATION_TOPOLOGY feature and
the topology facility in the host CPU model for the guest
in the case the topology is available in QEMU and in KVM.

The feature is disabled by default and fenced for SE
(secure execution).

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
---
 hw/s390x/cpu-topology.c   | 2 +-
 target/s390x/cpu_models.c | 1 +
 target/s390x/kvm/kvm.c| 9 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index c34581964f..13f404a0d7 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -69,7 +69,7 @@ static int s390_socket_nb(S390CPU *cpu)
  */
 bool s390_has_topology(void)
 {
-return false;
+return s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY);
 }
 
 /**
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 91ce896491..c67b7eeb84 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -255,6 +255,7 @@ bool s390_has_feat(S390Feat feat)
 case S390_FEAT_SIE_CMMA:
 case S390_FEAT_SIE_PFMFI:
 case S390_FEAT_SIE_IBS:
+case S390_FEAT_CONFIGURATION_TOPOLOGY:
 return false;
 break;
 default:
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 10d66c2b65..42795d3027 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -366,6 +366,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
 kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
 kvm_vm_enable_cap(s, KVM_CAP_S390_USER_STSI, 0);
+kvm_vm_enable_cap(s, KVM_CAP_S390_CPU_TOPOLOGY, 0);
 if (ri_allowed()) {
 if (kvm_vm_enable_cap(s, KVM_CAP_S390_RI, 0) == 0) {
 cap_ri = 1;
@@ -2462,6 +2463,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
 set_bit(S390_FEAT_UNPACK, model->features);
 }
 
+/*
+ * If we have kernel support for CPU Topology indicate the
+ * configuration-topology facility.
+ */
+if (kvm_check_extension(kvm_state, KVM_CAP_S390_CPU_TOPOLOGY)) {
+set_bit(S390_FEAT_CONFIGURATION_TOPOLOGY, model->features);
+}
+
 /* We emulate a zPCI bus and AEN, therefore we don't need HW support */
 set_bit(S390_FEAT_ZPCI, model->features);
 set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features);
-- 
2.39.2

[PATCH v23 19/20] tests/avocado: s390x cpu topology dedicated errors

2023-09-14 Thread Nina Schoetterl-Glausch

From: Pierre Morel 

Let's test that QEMU refuses to setup a dedicated CPU with
low or medium entitlement.

Signed-off-by: Pierre Morel 
Reviewed-by: Thomas Huth 
---
 tests/avocado/s390_topology.py | 48 ++
 1 file changed, 48 insertions(+)

diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py
index a63c2b2923..d3e6556c0f 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -364,3 +364,51 @@ def test_socket_full(self):
 res = self.vm.qmp('set-cpu-topology',
   {'core-id': 2, 'socket-id': 0, 'book-id': 1})
 self.assertEqual(res['return'], {})
+
+def test_dedicated_error(self):
+"""
+This test verifies that QEMU refuses to lower the entitlement
+of a dedicated CPU
+
+:avocado: tags=arch:s390x
+:avocado: tags=machine:s390-ccw-virtio
+"""
+self.kernel_init()
+self.vm.launch()
+self.wait_until_booted()
+
+self.system_init()
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'dedicated': True})
+self.assertEqual(res['return'], {})
+
+self.check_topology(0, 0, 0, 0, 'high', True)
+
+self.guest_set_dispatching('1');
+
+self.check_topology(0, 0, 0, 0, 'high', True)
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'low', 'dedicated': 
True})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'low'})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'medium', 'dedicated': 
True})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'medium'})
+self.assertEqual(res['error']['class'], 'GenericError')
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'low', 'dedicated': 
False})
+self.assertEqual(res['return'], {})
+
+res = self.vm.qmp('set-cpu-topology',
+  {'core-id': 0, 'entitlement': 'medium', 'dedicated': 
False})
+self.assertEqual(res['return'], {})
-- 
2.39.2

[RFC PATCH 2/3] {include/}hw/arm: refactor BSA/virt PPI logic

2023-09-14 Thread Leif Lindholm

GIC Private Peripheral Interrupts (PPI) are defined as GIC INTID 16-31.
As in, PPI0 is INTID16 .. PPI15 is INTID31.
Arm's Base System Architecture specification (BSA) lists the mandated and
recommended private interrupt IDs by INTID, not by PPI index. But current
definitions in qemu define them by PPI index, complicating cross
referencing.

Meanwhile, the PPI(x) macro counterintuitively adds 16 to the input value,
converting a PPI index to an INTID.

Resolve this by redefining the BSA-allocated PPIs by their INTIDs,
inverting the logic of the PPI(x) macro and flipping where it is used.

Signed-off-by: Leif Lindholm 
---
 hw/arm/virt-acpi-build.c |  4 ++--
 hw/arm/virt.c|  9 +
 include/hw/arm/bsa.h | 14 +++---
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..963c58a88a 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -729,9 +729,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 for (i = 0; i < MACHINE(vms)->smp.cpus; i++) {
 ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
 uint64_t physical_base_address = 0, gich = 0, gicv = 0;
-uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0;
+uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0;
 uint32_t pmu_interrupt = arm_feature(>env, ARM_FEATURE_PMU) ?
- PPI(VIRTUAL_PMU_IRQ) : 0;
+ VIRTUAL_PMU_IRQ : 0;
 
 if (vms->gic_version == VIRT_GIC_VERSION_2) {
 physical_base_address = memmap[VIRT_GIC_CPU].base;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8ad78b23c2..bb70f3eec8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -815,23 +815,24 @@ static void create_gic(VirtMachineState *vms, 
MemoryRegion *mem)
 for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
 qdev_connect_gpio_out(cpudev, irq,
   qdev_get_gpio_in(vms->gic,
-   ppibase + timer_irq[irq]));
+   ppibase
+   + PPI(timer_irq[irq])));
 }
 
 if (vms->gic_version != VIRT_GIC_VERSION_2) {
 qemu_irq irq = qdev_get_gpio_in(vms->gic,
-ppibase + ARCH_GIC_MAINT_IRQ);
+ppibase + PPI(ARCH_GIC_MAINT_IRQ));
 qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
 0, irq);
 } else if (vms->virt) {
 qemu_irq irq = qdev_get_gpio_in(vms->gic,
-ppibase + ARCH_GIC_MAINT_IRQ);
+ppibase + PPI(ARCH_GIC_MAINT_IRQ));
 sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq);
 }
 
 qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
 qdev_get_gpio_in(vms->gic, ppibase
- + VIRTUAL_PMU_IRQ));
+ + PPI(VIRTUAL_PMU_IRQ)));
 
 sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, 
ARM_CPU_IRQ));
 sysbus_connect_irq(gicbusdev, i + smp_cpus,
diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h
index 8277b3a379..b7db1cacf1 100644
--- a/include/hw/arm/bsa.h
+++ b/include/hw/arm/bsa.h
@@ -21,15 +21,15 @@
 #ifndef QEMU_ARM_BSA_H
 #define QEMU_ARM_BSA_H
 
-#define ARCH_GIC_MAINT_IRQ  9
+#define ARCH_GIC_MAINT_IRQ  25
 
-#define ARCH_TIMER_VIRT_IRQ   11
-#define ARCH_TIMER_S_EL1_IRQ  13
-#define ARCH_TIMER_NS_EL1_IRQ 14
-#define ARCH_TIMER_NS_EL2_IRQ 10
+#define ARCH_TIMER_VIRT_IRQ   27
+#define ARCH_TIMER_S_EL1_IRQ  29
+#define ARCH_TIMER_NS_EL1_IRQ 30
+#define ARCH_TIMER_NS_EL2_IRQ 26
 
-#define VIRTUAL_PMU_IRQ 7
+#define VIRTUAL_PMU_IRQ 23
 
-#define PPI(irq) ((irq) + 16)
+#define PPI(irq) ((irq) - 16)
 
 #endif /* QEMU_ARM_BSA_H */
-- 
2.30.2

[RFC PATCH 3/3] hw/arm/sbsa-ref: use bsa.h for PPI definitions

2023-09-14 Thread Leif Lindholm

Use the private peripheral interrupt definitions from bsa.h instead of
defining them locally. Refactor to use PPI() to convert from INTID macro
where necessary.

Signed-off-by: Leif Lindholm 
---
 hw/arm/sbsa-ref.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index bc89eb4806..589b17e3bc 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -2,6 +2,7 @@
  * ARM SBSA Reference Platform emulation
  *
  * Copyright (c) 2018 Linaro Limited
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
  * Written by Hongbo Zhang 
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -30,6 +31,7 @@
 #include "exec/hwaddr.h"
 #include "kvm_arm.h"
 #include "hw/arm/boot.h"
+#include "hw/arm/bsa.h"
 #include "hw/arm/fdt.h"
 #include "hw/arm/smmuv3.h"
 #include "hw/block/flash.h"
@@ -55,13 +57,6 @@
 #define NUM_SMMU_IRQS   4
 #define NUM_SATA_PORTS  6
 
-#define VIRTUAL_PMU_IRQ7
-#define ARCH_GIC_MAINT_IRQ 9
-#define ARCH_TIMER_VIRT_IRQ11
-#define ARCH_TIMER_S_EL1_IRQ   13
-#define ARCH_TIMER_NS_EL1_IRQ  14
-#define ARCH_TIMER_NS_EL2_IRQ  10
-
 enum {
 SBSA_FLASH,
 SBSA_MEM,
@@ -494,15 +489,18 @@ static void create_gic(SBSAMachineState *sms, 
MemoryRegion *mem)
 for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) {
 qdev_connect_gpio_out(cpudev, irq,
   qdev_get_gpio_in(sms->gic,
-   ppibase + timer_irq[irq]));
+   ppibase
+   + PPI(timer_irq[irq])));
 }
 
+irq = qdev_get_gpio_in(sms->gic,
+   ppibase + PPI(ARCH_GIC_MAINT_IRQ));
 qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0,
-qdev_get_gpio_in(sms->gic, ppibase
- + ARCH_GIC_MAINT_IRQ));
-qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
-qdev_get_gpio_in(sms->gic, ppibase
- + VIRTUAL_PMU_IRQ));
+irq);
+
+irq = qdev_get_gpio_in(sms->gic,
+   ppibase + PPI(VIRTUAL_PMU_IRQ));
+qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, irq);
 
 sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, 
ARM_CPU_IRQ));
 sysbus_connect_irq(gicbusdev, i + smp_cpus,
-- 
2.30.2

[RFC PATCH 1/3] include/hw/arm: move BSA definitions to bsa.h

2023-09-14 Thread Leif Lindholm

virt.h defines a number of IRQs that are ultimately described by Arm's
Base System Architecture specification. Move these to a dedicated header
so that they can be reused by other platforms that do the same.
Include that header from virt.h to minimise churn.

Signed-off-by: Leif Lindholm 
---
 include/hw/arm/bsa.h  | 35 +++
 include/hw/arm/virt.h | 12 +---
 2 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 include/hw/arm/bsa.h

diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h
new file mode 100644
index 00..8277b3a379
--- /dev/null
+++ b/include/hw/arm/bsa.h
@@ -0,0 +1,35 @@
+/*
+ * Common definitions for Arm Base System Architecture (BSA) platforms.
+ *
+ * Copyright (c) 2015 Linaro Limited
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ *
+ */
+
+#ifndef QEMU_ARM_BSA_H
+#define QEMU_ARM_BSA_H
+
+#define ARCH_GIC_MAINT_IRQ  9
+
+#define ARCH_TIMER_VIRT_IRQ   11
+#define ARCH_TIMER_S_EL1_IRQ  13
+#define ARCH_TIMER_NS_EL1_IRQ 14
+#define ARCH_TIMER_NS_EL2_IRQ 10
+
+#define VIRTUAL_PMU_IRQ 7
+
+#define PPI(irq) ((irq) + 16)
+
+#endif /* QEMU_ARM_BSA_H */
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index e1ddbea96b..f69239850e 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -34,6 +34,7 @@
 #include "qemu/notify.h"
 #include "hw/boards.h"
 #include "hw/arm/boot.h"
+#include "hw/arm/bsa.h"
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
 #include "hw/intc/arm_gicv3_common.h"
@@ -43,17 +44,6 @@
 #define NUM_VIRTIO_TRANSPORTS 32
 #define NUM_SMMU_IRQS  4
 
-#define ARCH_GIC_MAINT_IRQ  9
-
-#define ARCH_TIMER_VIRT_IRQ   11
-#define ARCH_TIMER_S_EL1_IRQ  13
-#define ARCH_TIMER_NS_EL1_IRQ 14
-#define ARCH_TIMER_NS_EL2_IRQ 10
-
-#define VIRTUAL_PMU_IRQ 7
-
-#define PPI(irq) ((irq) + 16)
-
 /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */
 #define PVTIME_SIZE_PER_CPU 64
 
-- 
2.30.2

[RFC PATCH 0/3] Refactor PPI logic/definitions for virt/sbsa-ref

2023-09-14 Thread Leif Lindholm

While reviewing Marcin's patch this morning, cross referencing different
specifications and looking at various places around the source code in
order to convinced myself he really hadn't missed something out (the
existing plumbing made it *so* clean to add), my brain broke slightly
at keeping track of PPIs/INTIDs between the various sources.

Moreover, I found the PPI() macro in virt.h to be doing the exact
opposite of what I would have expected it to (it converts a PPI to an
INTID rather than the other way around).

So I refactored stuff so that:
- PPIs defined by BSA are moved to a (new) common header.
- The _IRQ definitions for those PPIs refer to the INTIDs.
- sbsa-ref and virt both use these definitions.

This change does objectively add a bit more noise to the code, since it
means more locations need to use the PPI macro than before, but it felt
like a readability improvement to me.

Not even compilation tested, just the least confusing way of asking
whether the change could be accepted at all.

Leif Lindholm (3):
  include/hw/arm: move BSA definitions to bsa.h
  {include/}hw/arm: refactor BSA/virt PPI logic
  hw/arm/sbsa-ref: use bsa.h for PPI definitions

 hw/arm/sbsa-ref.c| 24 +++-
 hw/arm/virt-acpi-build.c |  4 ++--
 hw/arm/virt.c|  9 +
 include/hw/arm/bsa.h | 35 +++
 include/hw/arm/virt.h| 12 +---
 5 files changed, 54 insertions(+), 30 deletions(-)
 create mode 100644 include/hw/arm/bsa.h

-- 
2.30.2

Re: [PATCH v3 2/5] test-bdrv-drain: avoid race with BH in IOThread drain test

2023-09-14 Thread Stefan Hajnoczi

On Wed, Sep 13, 2023 at 11:08:54AM -0500, Eric Blake wrote:
> On Tue, Sep 12, 2023 at 07:10:34PM -0400, Stefan Hajnoczi wrote:
> > This patch fixes a race condition in test-bdrv-drain that is difficult
> > to reproduce. test-bdrv-drain sometimes fails without an error message
> > on the block pull request sent by Kevin Wolf on Sep 4, 2023. I was able
> > to reproduce it locally and found that "block-backend: process I/O in
> > the current AioContext" (in this patch series) is the first commit where
> > it reproduces.
> > 
> > I do not know why "block-backend: process I/O in the current AioContext"
> > exposes this bug. It might be related to the fact that the test's preadv
> > request runs in the main thread instead of IOThread a after my commit.
> 
> In reading the commit message before the impacted code, my first
> thought was that you had a typo of an extra word (that is, something
> to fix by s/a //), but reading further, a better fix would be calling
> attention to the fact that you are referencing a specific named
> thread, as in s/IOThread a/IOThread A/...
> 
> > That might simply change the timing of the test.
> > 
> > Now on to the race condition in test-bdrv-drain. The main thread
> > schedules a BH in IOThread a and then drains the BDS:
> 
> ...and another spot with the same parse issue...
> 
> > 
> >   aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, );
> > 
> >   /* The request is running on the IOThread a. Draining its block device
> 
> ...but here you were quoting from the existing code base, which is
> where I finally realized it was more than just your commit message.
> 
> >* will make sure that it has completed as far as the BDS is concerned,
> >* but the drain in this thread can continue immediately after
> >* bdrv_dec_in_flight() and aio_ret might be assigned only slightly
> >* later. */
> >   do_drain_begin(drain_type, bs);
> > 
> > If the BH completes before do_drain_begin() then there is nothing to
> > worry about.
> > 
> > If the BH invokes bdrv_flush() before do_drain_begin(), then
> > do_drain_begin() waits for it to complete.
> > 
> > The problematic case is when do_drain_begin() runs before the BH enters
> > bdrv_flush(). Then do_drain_begin() misses the BH and the drain
> > mechanism has failed in quiescing I/O.
> > 
> > Fix this by incrementing the in_flight counter so that do_drain_begin()
> > waits for test_iothread_main_thread_bh().
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> >  tests/unit/test-bdrv-drain.c | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
> > index ccc453c29e..67a79aa3f0 100644
> > --- a/tests/unit/test-bdrv-drain.c
> > +++ b/tests/unit/test-bdrv-drain.c
> > @@ -512,6 +512,7 @@ static void test_iothread_main_thread_bh(void *opaque)
> >   * executed during drain, otherwise this would deadlock. */
> >  aio_context_acquire(bdrv_get_aio_context(data->bs));
> >  bdrv_flush(data->bs);
> > +bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() 
> > */
> >  aio_context_release(bdrv_get_aio_context(data->bs));
> >  }
> >  
> > @@ -583,6 +584,13 @@ static void test_iothread_common(enum drain_type 
> > drain_type, int drain_thread)
> >  aio_context_acquire(ctx_a);
> >  }
> >  
> > +/*
> > + * Increment in_flight so that do_drain_begin() waits for
> > + * test_iothread_main_thread_bh(). This prevents the race between
> > + * test_iothread_main_thread_bh() in IOThread a and 
> > do_drain_begin() in
> > + * this thread. test_iothread_main_thread_bh() decrements 
> > in_flight.
> > + */
> > +bdrv_inc_in_flight(bs);
> >  aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, 
> > );
> >  
> >  /* The request is running on the IOThread a. Draining its block 
> > device
> 
> and indeed, your commit message is consistent with the current code's
> naming convention.  If you have reason to respin, a pre-req patch to
> change the case before adding more references might be nice, but I
> won't insist.
> 
> Reviewed-by: Eric Blake 

Sorry about that. It is confusing.

Stefan


signature.asc
Description: PGP signature

Re: [PATCH 11/11] qdev: Rework array properties based on list visitor

2023-09-14 Thread Kevin Wolf

Am 14.09.2023 um 12:24 hat Peter Maydell geschrieben:
> On Fri, 8 Sept 2023 at 15:37, Kevin Wolf  wrote:
> >
> > Until now, array properties are actually implemented with a hack that
> > uses multiple properties on the QOM level: a static "foo-len" property
> > and after it is set, dynamically created "foo[i]" properties.
> >
> > In external interfaces (-device on the command line and device_add in
> > QMP), this interface was broken by commit f3558b1b ('qdev: Base object
> > creation on QDict rather than QemuOpts') because QDicts are unordered
> > and therefore it could happen that QEMU tried to set the indexed
> > properties before setting the length, which fails and effectively makes
> > array properties inaccessible. In particular, this affects the 'ports'
> > property of the 'rocker' device.
> >
> > This patch reworks the external interface so that instead of using a
> > separate top-level property for the length and for each element, we use
> > a single true array property that accepts a list value. In the external
> > interfaces, this is naturally expressed as a JSON list and makes array
> > properties accessible again.
> >
> > Creating an array property on the command line without using JSON format
> > is currently not possible. This could be fixed by switching from
> > QemuOpts to a keyval parser, which however requires consideration of the
> > compatibility implications.
> >
> > All internal users of devices with array properties go through
> > qdev_prop_set_array() at this point, so updating it takes care of all of
> > them.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1090
> > Fixes: f3558b1b763683bb877f7dd5b282469cdadc65c3
> > Signed-off-by: Kevin Wolf 
> 
> I'm hoping that somebody who understands the visitor APIs
> better than me will have a look at this patch, but in the
> meantime, here's my review, which I suspect has a lot of
> comments that mostly reflect that I don't really understand
> the visitor stuff...

I discussed the visitor aspects with Markus before sending the series,
so I think he agrees with the approach. But I wouldn't mind an explicit
Reviewed-by, of course.

> > ---
> >  include/hw/qdev-properties.h |  23 ++--
> >  hw/core/qdev-properties-system.c |   2 +-
> >  hw/core/qdev-properties.c| 204 +++
> >  3 files changed, 133 insertions(+), 96 deletions(-)
> >
> > diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
> > index 7fa2fdb7c9..9370b36b72 100644
> > --- a/include/hw/qdev-properties.h
> > +++ b/include/hw/qdev-properties.h
> > @@ -61,7 +61,7 @@ extern const PropertyInfo qdev_prop_size;
> >  extern const PropertyInfo qdev_prop_string;
> >  extern const PropertyInfo qdev_prop_on_off_auto;
> >  extern const PropertyInfo qdev_prop_size32;
> > -extern const PropertyInfo qdev_prop_arraylen;
> > +extern const PropertyInfo qdev_prop_array;
> >  extern const PropertyInfo qdev_prop_link;
> >
> >  #define DEFINE_PROP(_name, _state, _field, _prop, _type, ...) {  \
> > @@ -115,8 +115,6 @@ extern const PropertyInfo qdev_prop_link;
> >  .bitmask= (_bitmask), \
> >  .set_default = false)
> >
> > -#define PROP_ARRAY_LEN_PREFIX "len-"
> > -
> >  /**
> >   * DEFINE_PROP_ARRAY:
> >   * @_name: name of the array
> > @@ -127,24 +125,21 @@ extern const PropertyInfo qdev_prop_link;
> >   * @_arrayprop: PropertyInfo defining what property the array elements have
> >   * @_arraytype: C type of the array elements
> >   *
> > - * Define device properties for a variable-length array _name.  A
> > - * static property "len-arrayname" is defined. When the device creator
> > - * sets this property to the desired length of array, further dynamic
> > - * properties "arrayname[0]", "arrayname[1]", ...  are defined so the
> > - * device creator can set the array element values. Setting the
> > - * "len-arrayname" property more than once is an error.
> > + * Define device properties for a variable-length array _name.  The array 
> > is
> > + * represented as a list in the visitor interface.
> > + *
> > + * @_arraytype is required to be movable with memcpy().
> >   *
> > - * When the array length is set, the @_field member of the device
> > + * When the array property is set, the @_field member of the device
> >   * struct is set to the array length, and @_arrayfield is set to point
> > - * to (zero-initialised) memory allocated for the array.  For a zero
> > - * length array, @_field will be set to 0 and @_arrayfield to NULL.
> > + * to the memory allocated for the array.
> > + *
> >   * It is the responsibility of the device deinit code to free the
> >   * @_arrayfield memory.
> >   */
> >  #define DEFINE_PROP_ARRAY(_name, _state, _field,   \
> >_arrayfield, _arrayprop, _arraytype) \
> > -DEFINE_PROP((PROP_ARRAY_LEN_PREFIX _name), \
> > -_state, _field, qdev_prop_arraylen, uint32_t,  \
> > +

Re: [PATCH v2 0/3] docs: update x86 CPU model ABI matrix docs

2023-09-14 Thread Daniel P . Berrangé

Ping for review please. This series still applies to git master.

On Tue, Jul 18, 2023 at 10:26:28AM +0100, Daniel P. Berrangé wrote:
> Changed in v2:
> 
>  - Tweaked commit messages
>  - Also add GraniteRapids CPU model
> 
> Daniel P. Berrangé (3):
>   scripts: drop comment about autogenerated CPU API file
>   docs: fix highlighting of CPU ABI header rows
>   docs: re-generate x86_64 ABI compatibility CSV
> 
>  docs/system/cpu-models-x86-abi.csv | 20 ++--
>  docs/system/cpu-models-x86.rst.inc |  2 +-
>  scripts/cpu-x86-uarch-abi.py   |  1 -
>  3 files changed, 19 insertions(+), 4 deletions(-)
> 
> -- 
> 2.41.0
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PULL 14/14] ui: add precondition for dpy_get_ui_info()

2023-09-14 Thread Daniel P . Berrangé

On Tue, Sep 12, 2023 at 02:46:48PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Ensure that it only get called when dpy_ui_info_supported(). The
> function should always return a result. There should be a non-null
> console or active_console.

Empirically that does not appear to be the case. After this patch,
a no-args QEMU launch immediately aborts:

$ ./build/qemu-system-x86_64 
qemu-system-x86_64: ../ui/console.c:818: dpy_get_ui_info: Assertion 
`dpy_ui_info_supported(con)' failed.
Aborted (core dumped)

This ought to be running the GTK UI for me. Manually ask for
SDL instead and it doesn't crash.

> 
> Modify the argument to be const as well.
> 
> Signed-off-by: Marc-André Lureau 
> Reviewed-by: Albert Esteve 
> ---
>  include/ui/console.h | 2 +-
>  ui/console.c | 4 +++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/include/ui/console.h b/include/ui/console.h
> index 79e4702912..28882f15a5 100644
> --- a/include/ui/console.h
> +++ b/include/ui/console.h
> @@ -329,7 +329,7 @@ void update_displaychangelistener(DisplayChangeListener 
> *dcl,
>uint64_t interval);
>  void unregister_displaychangelistener(DisplayChangeListener *dcl);
>  
> -bool dpy_ui_info_supported(QemuConsole *con);
> +bool dpy_ui_info_supported(const QemuConsole *con);
>  const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con);
>  int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info, bool delay);
>  
> diff --git a/ui/console.c b/ui/console.c
> index aa1e09462c..4a4f19ed33 100644
> --- a/ui/console.c
> +++ b/ui/console.c
> @@ -801,7 +801,7 @@ static void dpy_set_ui_info_timer(void *opaque)
>  con->hw_ops->ui_info(con->hw, head, >ui_info);
>  }
>  
> -bool dpy_ui_info_supported(QemuConsole *con)
> +bool dpy_ui_info_supported(const QemuConsole *con)
>  {
>  if (con == NULL) {
>  con = active_console;
> @@ -815,6 +815,8 @@ bool dpy_ui_info_supported(QemuConsole *con)
>  
>  const QemuUIInfo *dpy_get_ui_info(const QemuConsole *con)
>  {
> +assert(dpy_ui_info_supported(con));
> +
>  if (con == NULL) {
>  con = active_console;
>  }
> -- 
> 2.41.0
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] mem/x86: add processor address space check for VM memory

2023-09-14 Thread David Hildenbrand


We requested a to hotplug a maximum of "8 GiB", and sized the area slightly 
larger to allow for some flexibility
when it comes to placing DIMMs in that "device-memory" area.

Right but here in this example you do not hot plug memory while the VM is 
running. We can hot plug 8G yes, but the memory may not physically exist yet 
(and may never exist). How can we use this math to provision device-memory when 
the memory may not exist physically?


We simply reserve a region in GPA space where we can coldplug and hotplug a
predefined maximum amount of memory we can hotplug.

What do you think is wrong with that?


The only issue I have is that even though we are accounting for it, the memory 
actually might not be physically present.


Not sure if "accounting" is the right word; the memory is not present 
and nowhere indicated as present. It's just a reservation of GPA space, 
like the PCI hole is as well.


[...]



Yes. In this case ms->ram_size == ms->maxram_size and you cannot cold/hotplug 
any memory devices.

See how pc_memory_init() doesn't call machine_memory_devices_init() in that 
case.

That's what the QEMU user asked for when *not* specifying maxmem (e.g., -m 4g).

In order to cold/hotplug any memory devices, you have to tell QEMU ahead of 
time how much memory
you are intending to provide using memory devices (DIMM, NVDIMM, virtio-pmem, 
virtio-mem).


So that means that when we are actually hot plugging the memory, there is no 
need to actually perform additional checks. It can be done statically when -mem 
and -maxmem etc are provided in the command line.


What memory device code does, is find a free location inside the 
reserved GPA space for memory devices. Then, it maps that device at

that location.

[...]


/*
* The 64bit pci hole starts after "above 4G RAM" and
* potentially the space reserved for memory hotplug.
*/

There is the
ROUND_UP(hole64_start, 1 * GiB);
in there that is not really required for the !hole64 case. It
shouldn't matter much in practice I think (besides an aligned value
showing up in the error message).

We could factor out most of that calculation into a
separate function, skipping that alignment to make that
clearer.


Yeah this whole memory segmentation is quite complicated and might benefit from 
a qemu doc or a refactoring.


Absolutely. Do you have time to work on that (including the updated fix?).

--
Cheers,

David / dhildenb

[risu PATCH v3 4/7] s390x: Add basic risugen perl module for s390x

2023-09-14 Thread Thomas Huth

This implements support for simple 16-bit and 32-bit instructions.
Support for 48-bit instructions and support for load/store memory
instructions is not implemented yet.

Signed-off-by: Thomas Huth 
---
 risugen_s390x.pm | 186 +++
 1 file changed, 186 insertions(+)
 create mode 100644 risugen_s390x.pm

diff --git a/risugen_s390x.pm b/risugen_s390x.pm
new file mode 100644
index 000..260e2dd
--- /dev/null
+++ b/risugen_s390x.pm
@@ -0,0 +1,186 @@
+#!/usr/bin/perl -w
+###
+# Copyright 2023 Red Hat Inc.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# Thomas Huth - initial implementation (based on risugen_ppc64.pm etc.)
+###
+
+# risugen -- generate a test binary file for use with risu
+# See 'risugen --help' for usage information.
+package risugen_s390x;
+
+use strict;
+use warnings;
+
+use risugen_common;
+
+require Exporter;
+
+our @ISA= qw(Exporter);
+our @EXPORT = qw(write_test_code);
+
+my $periodic_reg_random = 1;
+
+# Maximum alignment restriction permitted for a memory op.
+my $MAXALIGN = 64;
+
+sub write_mov_ri($$$)
+{
+my ($r, $imm_h, $imm_l) = @_;
+
+# LGFI
+insn16(0xc0 << 8 | $r << 4 | 0x1);
+insn32($imm_l);
+# IIHF r,imm_high
+insn16(0xc0 << 8 | $r << 4 | 0x8);
+insn32($imm_h);
+}
+
+sub write_mov_fp($$)
+{
+my ($r, $imm) = @_;
+
+write_mov_ri(0, ~$imm, $imm);
+# LDGR
+insn32(0xb3c1 << 16 | $r << 4);
+}
+
+sub write_random_regdata()
+{
+# Floating point registers
+for (my $i = 0; $i < 16; $i++) {
+write_mov_fp($i, rand(0x));
+}
+
+# Load FPC (via r0)
+write_mov_ri(0, 0, (rand(0x) & 0x00fcff77));
+insn32(0xb384);
+
+# general purpose registers
+for (my $i = 0; $i < 16; $i++) {
+write_mov_ri($i, rand(0x), rand(0x));
+}
+}
+
+my $OP_COMPARE = 0;# compare registers
+my $OP_TESTEND = 1;# end of test, stop
+
+sub write_random_register_data()
+{
+write_random_regdata();
+write_risuop($OP_COMPARE);
+}
+
+sub gen_one_insn($$)
+{
+# Given an instruction-details array, generate an instruction
+my $constraintfailures = 0;
+
+INSN: while(1) {
+my ($forcecond, $rec) = @_;
+my $insn = int(rand(0x));
+my $insnname = $rec->{name};
+my $insnwidth = $rec->{width};
+my $fixedbits = $rec->{fixedbits};
+my $fixedbitmask = $rec->{fixedbitmask};
+my $constraint = $rec->{blocks}{"constraints"};
+my $memblock = $rec->{blocks}{"memory"};
+
+$insn &= ~$fixedbitmask;
+$insn |= $fixedbits;
+
+if (defined $constraint) {
+# user-specified constraint: evaluate in an environment
+# with variables set corresponding to the variable fields.
+my $v = eval_with_fields($insnname, $insn, $rec, "constraints", 
$constraint);
+if (!$v) {
+$constraintfailures++;
+if ($constraintfailures > 1) {
+print "1 consecutive constraint failures for $insnname 
constraints string:\n$constraint\n";
+exit (1);
+}
+next INSN;
+}
+}
+
+# OK, we got a good one
+$constraintfailures = 0;
+
+my $basereg;
+
+if (defined $memblock) {
+die "memblock handling has not been implemented yet."
+}
+
+if ($insnwidth == 16) {
+insn16(($insn >> 16) & 0x);
+} else {
+insn32($insn);
+}
+
+return;
+}
+}
+
+sub write_risuop($)
+{
+my ($op) = @_;
+insn32(0x835a0f00 | $op);
+}
+
+sub write_test_code($)
+{
+my ($params) = @_;
+
+my $condprob = $params->{ 'condprob' };
+my $numinsns = $params->{ 'numinsns' };
+my $outfile = $params->{ 'outfile' };
+
+my %insn_details = %{ $params->{ 'details' } };
+my @keys = @{ $params->{ 'keys' } };
+
+set_endian(1);
+
+open_bin($outfile);
+
+# convert from probability that insn will be conditional to
+# probability of forcing insn to unconditional
+$condprob = 1 - $condprob;
+
+# TODO better random number generator?
+srand(0);
+
+print "Generating code using patterns: @keys...\n";
+progress_start(78, $numinsns);
+
+if (grep { defined($insn_details{$_}->{blocks}->{"memory"}) } @keys) {
+write_memblock_setup();
+}
+
+# memblock setup doesn't clean its registers, so this must come afterwards.
+write_random_register_data();
+
+for my $i (1..$numinsns) {
+my $insn_enc =

[risu PATCH v3 1/7] Pass siginfo_t->si_addr to the reginfo_init() function

2023-09-14 Thread Thomas Huth

On s390x, we need the si_addr from the siginfo_t to get to
the address of the illegal instruction (the PSW address in
the ucontext_t is already pointing to the next instruction
there). So let's prepare for that situation and pass the
si_addr to the reginfo_init() function everywhere.

Signed-off-by: Thomas Huth 
---
 risu.c | 12 ++--
 risu.h |  2 +-
 risu_reginfo_aarch64.c |  2 +-
 risu_reginfo_arm.c |  2 +-
 risu_reginfo_i386.c|  2 +-
 risu_reginfo_loongarch64.c |  2 +-
 risu_reginfo_m68k.c|  2 +-
 risu_reginfo_ppc64.c   |  2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/risu.c b/risu.c
index 714074e..36fc82a 100644
--- a/risu.c
+++ b/risu.c
@@ -106,14 +106,14 @@ static void respond(RisuResult r)
 }
 }
 
-static RisuResult send_register_info(void *uc)
+static RisuResult send_register_info(void *uc, void *siaddr)
 {
 uint64_t paramreg;
 RisuResult res;
 RisuOp op;
 void *extra;
 
-reginfo_init([MASTER], uc);
+reginfo_init([MASTER], uc, siaddr);
 op = get_risuop([MASTER]);
 
 /* Write a header with PC/op to keep in sync */
@@ -178,7 +178,7 @@ static void master_sigill(int sig, siginfo_t *si, void *uc)
 RisuResult r;
 signal_count++;
 
-r = send_register_info(uc);
+r = send_register_info(uc, si->si_addr);
 if (r == RES_OK) {
 advance_pc(uc);
 } else {
@@ -232,13 +232,13 @@ static RisuResult recv_register_info(struct reginfo *ri)
 }
 }
 
-static RisuResult recv_and_compare_register_info(void *uc)
+static RisuResult recv_and_compare_register_info(void *uc, void *siaddr)
 {
 uint64_t paramreg;
 RisuResult res;
 RisuOp op;
 
-reginfo_init([APPRENTICE], uc);
+reginfo_init([APPRENTICE], uc, siaddr);
 
 res = recv_register_info([MASTER]);
 if (res != RES_OK) {
@@ -315,7 +315,7 @@ static void apprentice_sigill(int sig, siginfo_t *si, void 
*uc)
 RisuResult r;
 signal_count++;
 
-r = recv_and_compare_register_info(uc);
+r = recv_and_compare_register_info(uc, si->si_addr);
 if (r == RES_OK) {
 advance_pc(uc);
 } else {
diff --git a/risu.h b/risu.h
index bdb70c1..2c43384 100644
--- a/risu.h
+++ b/risu.h
@@ -115,7 +115,7 @@ RisuOp get_risuop(struct reginfo *ri);
 uintptr_t get_pc(struct reginfo *ri);
 
 /* initialize structure from a ucontext */
-void reginfo_init(struct reginfo *ri, ucontext_t *uc);
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr);
 
 /* return 1 if structs are equal, 0 otherwise. */
 int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2);
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index be47980..1244454 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -82,7 +82,7 @@ int reginfo_size(struct reginfo *ri)
 }
 
 /* reginfo_init: initialize with a ucontext */
-void reginfo_init(struct reginfo *ri, ucontext_t *uc)
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr)
 {
 int i;
 struct _aarch64_ctx *ctx, *extra = NULL;
diff --git a/risu_reginfo_arm.c b/risu_reginfo_arm.c
index 202120b..85a39ac 100644
--- a/risu_reginfo_arm.c
+++ b/risu_reginfo_arm.c
@@ -118,7 +118,7 @@ static void reginfo_init_vfp(struct reginfo *ri, ucontext_t 
*uc)
 }
 }
 
-void reginfo_init(struct reginfo *ri, ucontext_t *uc)
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr)
 {
 memset(ri, 0, sizeof(*ri)); /* necessary for memcmp later */
 
diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
index e9730be..834b2ed 100644
--- a/risu_reginfo_i386.c
+++ b/risu_reginfo_i386.c
@@ -102,7 +102,7 @@ static void *xsave_feature_buf(struct _xstate *xs, int 
feature)
 }
 
 /* reginfo_init: initialize with a ucontext */
-void reginfo_init(struct reginfo *ri, ucontext_t *uc)
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr)
 {
 int i, nvecregs;
 struct _fpstate *fp;
diff --git a/risu_reginfo_loongarch64.c b/risu_reginfo_loongarch64.c
index af6ab77..16384f1 100644
--- a/risu_reginfo_loongarch64.c
+++ b/risu_reginfo_loongarch64.c
@@ -81,7 +81,7 @@ static int parse_extcontext(struct sigcontext *sc, struct 
extctx_layout *extctx)
 }
 
 /* reginfo_init: initialize with a ucontext */
-void reginfo_init(struct reginfo *ri, ucontext_t *context)
+void reginfo_init(struct reginfo *ri, ucontext_t *context, void *siaddr)
 {
 int i;
 struct ucontext *uc = (struct ucontext *)context;
diff --git a/risu_reginfo_m68k.c b/risu_reginfo_m68k.c
index 4c25e77..e29da84 100644
--- a/risu_reginfo_m68k.c
+++ b/risu_reginfo_m68k.c
@@ -33,7 +33,7 @@ int reginfo_size(struct reginfo *ri)
 }
 
 /* reginfo_init: initialize with a ucontext */
-void reginfo_init(struct reginfo *ri, ucontext_t *uc)
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr)
 {
 int i;
 memset(ri, 0, sizeof(*ri));
diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index 9899b36..bbdd63c 100644

[risu PATCH v3 0/7] Add support for s390x to RISU

2023-09-14 Thread Thomas Huth

 Hi Peter!

Here are some patches that add basic support for s390x to RISU.
It's still quite limited, e.g. no support for load/store memory
operations yet, but the basics with simple 16-bit or 32-bit
instructions work *now* already fine.

(I'm also already experimenting in extending RISU to support
instructions with opcodes lengths > 32-bit, but the patches
are not quite ready yet)

v3:
- Use si_addr to get the address of the faulting instruction
  (since the PSW address in the ucontext already points to the
  next instruction)
- Use the kernel header asm/ucontext.h instead the one from the
  glibc since the latter does not provide information about
  vector registers (VRs are not used yet, but will be added later)
- Disable exceptions in the floating point control register so
  we can also test floating point instructions
- Added some more instructions to s390x.risu
- Added RFC patch to compile test aarch64, ppc64le and s390x on
  Travis-CI

v2:
- Removed the code to avoid r14 (return address) and r15 (stack pointer)
  since it is not necessary anymore since commit ad82a069e8d6a
- Initialize the floating point registers in test_s390x.S, too
- Added Acked-bys and Reviewed-bys from v1

Thomas Huth (7):
  Pass siginfo_t->si_addr to the reginfo_init() function
  s390x: Add basic s390x support to the C code
  s390x: Add simple s390x.risu file
  s390x: Add basic risugen perl module for s390x
  s390x: Update the configure script for s390x support
  build-all-archs: Add s390x to the script that builds all architectures
  Add a travis.yml file for testing RISU in the Travis-CI

 .travis.yml|  37 
 build-all-archs|   3 +-
 configure  |   4 +-
 risu.c |  12 +--
 risu.h |   2 +-
 risu_reginfo_aarch64.c |   2 +-
 risu_reginfo_arm.c |   2 +-
 risu_reginfo_i386.c|   2 +-
 risu_reginfo_loongarch64.c |   2 +-
 risu_reginfo_m68k.c|   2 +-
 risu_reginfo_ppc64.c   |   2 +-
 risu_reginfo_s390x.c   | 140 
 risu_reginfo_s390x.h   |  25 +
 risu_s390x.c   |  51 ++
 risugen_s390x.pm   | 186 +
 s390x.risu |  81 
 test_s390x.S   |  53 +++
 17 files changed, 591 insertions(+), 15 deletions(-)
 create mode 100644 .travis.yml
 create mode 100644 risu_reginfo_s390x.c
 create mode 100644 risu_reginfo_s390x.h
 create mode 100644 risu_s390x.c
 create mode 100644 risugen_s390x.pm
 create mode 100644 s390x.risu
 create mode 100644 test_s390x.S

-- 
2.41.0

[risu PATCH v3 6/7] build-all-archs: Add s390x to the script that builds all architectures

2023-09-14 Thread Thomas Huth

To avoid regressions, let's check s390x also via this file.

Suggested-by: Peter Maydell 
Signed-off-by: Thomas Huth 
---
 build-all-archs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/build-all-archs b/build-all-archs
index e5dcfc8..e89851b 100755
--- a/build-all-archs
+++ b/build-all-archs
@@ -91,7 +91,8 @@ program_exists() {
 for triplet in i386-linux-gnu i686-linux-gnu x86_64-linux-gnu \
aarch64-linux-gnu arm-linux-gnueabihf \
m68k-linux-gnu \
-   powerpc64le-linux-gnu powerpc64-linux-gnu ; do
+   powerpc64le-linux-gnu powerpc64-linux-gnu \
+   s390x-linux-gnu ; do
 
 if ! program_exists "${triplet}-gcc"; then
 echo "Skipping ${triplet}: no compiler found"
-- 
2.41.0

[risu RFC PATCH v3 7/7] Add a travis.yml file for testing RISU in the Travis-CI

2023-09-14 Thread Thomas Huth

Travis-CI offers native build machines for aarch64, ppc64le
and s390x, so this is very useful for testing RISU on these
architectures. While compiling works fine for all architectures,
running the binary currently only works for s390x (the aarch64
runner reports a mismatch when comparing the registers, and
the ppc64le runner simply hangs), so we can only run the
resulting binary on s390x right now.

Signed-off-by: Thomas Huth 
---
 Not sure if this is useful for anybody but me since Travis is
 not that popular anymore these days ... so please feel free
 to ignore this patch.

 .travis.yml | 37 +
 1 file changed, 37 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000..bafa8df
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,37 @@
+dist: focal
+language: c
+compiler:
+  - gcc
+addons:
+  apt:
+packages:
+  - perl
+  - perl-modules
+  - liblist-compare-perl
+
+before_script:
+  - ./configure
+script:
+  - set -e
+  - make -j2
+  - ./risugen --numinsns 1000 ${ARCH}.risu ${ARCH}.bin
+
+matrix:
+  include:
+
+- env:
+- ARCH="aarch64"
+  arch: arm64
+
+- env:
+- ARCH="ppc64"
+  arch: ppc64le
+
+- env:
+- ARCH="s390x"
+  arch: s390x
+  after_script:
+  - ./risu --master ${ARCH}.bin > stdout.txt 2> stderr.txt &
+  - sleep 1
+  - ./risu --host localhost ${ARCH}.bin
+  - cat stdout.txt stderr.txt
-- 
2.41.0

[risu PATCH v3 2/7] s390x: Add basic s390x support to the C code

2023-09-14 Thread Thomas Huth

With these changes, it is now possible to compile the "risu" binary
for s390x hosts.

Signed-off-by: Thomas Huth 
---
 risu_reginfo_s390x.c | 140 +++
 risu_reginfo_s390x.h |  25 
 risu_s390x.c |  51 
 test_s390x.S |  53 
 4 files changed, 269 insertions(+)
 create mode 100644 risu_reginfo_s390x.c
 create mode 100644 risu_reginfo_s390x.h
 create mode 100644 risu_s390x.c
 create mode 100644 test_s390x.S

diff --git a/risu_reginfo_s390x.c b/risu_reginfo_s390x.c
new file mode 100644
index 000..3fd91b9
--- /dev/null
+++ b/risu_reginfo_s390x.c
@@ -0,0 +1,140 @@
+/**
+ * Copyright 2023 Red Hat Inc.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * Thomas Huth - initial implementation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "risu.h"
+#include "risu_reginfo_s390x.h"
+
+
+const struct option * const arch_long_opts;
+const char * const arch_extra_help;
+
+void process_arch_opt(int opt, const char *arg)
+{
+abort();
+}
+
+void arch_init(void)
+{
+}
+
+int reginfo_size(struct reginfo *ri)
+{
+return sizeof(*ri);
+}
+
+/* reginfo_init: initialize with a ucontext */
+void reginfo_init(struct reginfo *ri, ucontext_t *uc, void *siaddr)
+{
+struct ucontext_extended *uce = (struct ucontext_extended *)uc;
+
+memset(ri, 0, sizeof(*ri));
+
+/*
+ * We can get the size of the instruction by looking at the
+ * first two bits of the instruction
+ */
+switch (*(uint8_t *)siaddr >> 6) {
+case 0:
+ri->faulting_insn = *(uint16_t *)siaddr;
+ri->faulting_insn_len = 2;
+break;
+case 3:
+ri->faulting_insn = ((*(uint32_t *)siaddr) << 16)
+| *(uint16_t *)(siaddr + 4);
+ri->faulting_insn_len = 6;
+break;
+default:
+ri->faulting_insn = *(uint32_t *)siaddr;
+ri->faulting_insn_len = 4;
+}
+
+ri->psw_mask = uce->uc_mcontext.regs.psw.mask;
+ri->pc_offset = (uintptr_t)siaddr - image_start_address;
+
+memcpy(ri->gprs, uce->uc_mcontext.regs.gprs, sizeof(ri->gprs));
+
+ri->fpc = uc->uc_mcontext.fpregs.fpc;
+memcpy(ri->fprs, >uc_mcontext.fpregs.fprs, sizeof(ri->fprs));
+}
+
+/* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
+int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
+{
+return m->pc_offset == a->pc_offset &&
+   m->fpc == a->fpc &&
+   memcmp(m->gprs, a->gprs, sizeof(m->gprs)) == 0 &&
+   memcmp(>fprs, >fprs, sizeof(m->fprs)) == 0;
+}
+
+/* reginfo_dump: print state to a stream, returns nonzero on success */
+int reginfo_dump(struct reginfo *ri, FILE * f)
+{
+int i;
+
+fprintf(f, "  faulting insn 0x%" PRIx64 "\n", ri->faulting_insn);
+fprintf(f, "  PSW mask  0x%" PRIx64 "\n", ri->psw_mask);
+fprintf(f, "  PC offset 0x%" PRIx64 "\n\n", ri->pc_offset);
+
+for (i = 0; i < 16/2; i++) {
+fprintf(f, "\tr%d: %16lx\tr%02d: %16lx\n", i, ri->gprs[i],
+i + 8, ri->gprs[i + 8]);
+}
+fprintf(f, "\n");
+
+for (i = 0; i < 16/2; i++) {
+fprintf(f, "\tf%d: %16lx\tf%02d: %16lx\n",
+i, *(uint64_t *)>fprs[i],
+i + 8, *(uint64_t *)>fprs[i + 8]);
+}
+fprintf(f, "\tFPC: %8x\n\n", ri->fpc);
+
+return !ferror(f);
+}
+
+int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
+{
+int i;
+
+if (m->pc_offset != a->pc_offset) {
+fprintf(f, "Mismatch: PC offset master: [%016lx] - PC offset 
apprentice: [%016lx]\n",
+m->pc_offset, a->pc_offset);
+}
+
+for (i = 0; i < 16; i++) {
+if (m->gprs[i] != a->gprs[i]) {
+fprintf(f, "Mismatch: r%d master: [%016lx] - r%d apprentice: 
[%016lx]\n",
+i, m->gprs[i], i, a->gprs[i]);
+}
+}
+
+for (i = 0; i < 16; i++) {
+if (*(uint64_t *)>fprs[i] != *(uint64_t *)>fprs[i]) {
+fprintf(f, "Mismatch: f%d master: [%016lx] - f%d apprentice: 
[%016lx]\n",
+i, *(uint64_t *)>fprs[i],
+i, *(uint64_t *)>fprs[i]);
+}
+}
+
+if (m->fpc != a->fpc) {
+fprintf(f, "Mismatch: FPC master: [%08x] - FPC apprentice: [%08x]\n",
+m->fpc, a->fpc);
+}
+
+return !ferror(f);
+}
diff --git a/risu_reginfo_s390x.h b/risu_reginfo_s390x.h
new file mode 100644
index 000..c65fff7
--- /dev/null
+++ b/risu_reginfo_s390x.h
@@ -0,0 +1,25 @@

[risu PATCH v3 3/7] s390x: Add simple s390x.risu file

2023-09-14 Thread Thomas Huth

This only adds a limited set of s390x instructions for initial testing.
More instructions will be added later.

Signed-off-by: Thomas Huth 
---
 s390x.risu | 81 ++
 1 file changed, 81 insertions(+)
 create mode 100644 s390x.risu

diff --git a/s390x.risu b/s390x.risu
new file mode 100644
index 000..1661be6
--- /dev/null
+++ b/s390x.risu
@@ -0,0 +1,81 @@
+###
+# Copyright 2023 Red Hat Inc.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# Thomas Huth - initial implementation
+###
+
+.mode s390x
+
+# format:RR Add (register + register, 32 bit)
+AR Z 00011010 r1:4 r2:4
+
+# format:RRE Add (register + register, 64 bit)
+AGR Z 10111001 1000  r1:4 r2:4
+
+# format:RRE Add (register + register, 32 bit to 64 bit)
+AGFR Z 10111001 00011000  r1:4 r2:4
+
+# format:RRF-a Add (three registers, 32 bit)
+ARK STFLE45 10111001 1000 r3:4  r1:4 r2:4
+
+# format:RRF-a Add (three registers, 64 bit)
+AGRK STFLE45 10111001 11101000 r3:4  r1:4 r2:4
+
+
+# format:RRE Add Halfword Immediate (32 bit)
+AHI Z 10100111 r1:4 1010 i2:16
+
+# format:RI Add Halfword Immediate (64 bit)
+AGHI Z 10100111 r1:4 1011 i2:16
+
+
+# format:RR Add Logical (32 bit)
+ALR Z 0000 r1:4 r2:4
+
+# format:RRE Add Logical (64 bit)
+ALGR Z 10111001 1010  r1:4 r2:4
+
+# format:RRE Add Logical (32 bit to 64 bit)
+ALGFR Z 10111001 00011010  r1:4 r2:4
+
+
+# format:RRF-c Population Count
+POPCNT STFLE45 10111001 1111 m3:4  r1:4 r2:4
+
+
+## Binary floating point instructions ##
+
+# format:RRE ADD (short BFP)
+AEBR BFP 10110011 1010  r1:4 r2:4
+
+# format:RRE ADD (long BFP)
+ADBR BFP 10110011 00011010  r1:4 r2:4
+
+# format:RRE ADD (extended BFP)
+AXBR BFP 10110011 01001010  r1:4 r2:4
+
+
+# format:RRE COMPARE (short BFP)
+CEBR BFP 10110011 1001  r1:4 r2:4
+
+# format:RRE COMPARE (long BFP)
+CDBR BFP 10110011 00011001  r1:4 r2:4
+
+# format:RRE COMPARE (extended BFP)
+CXBR BFP 10110011 01001001  r1:4 r2:4
+
+
+# format:RRF-e LOAD FP INTEGER (short BFP)
+FIEBRA BFP 10110011 01010111 m3:4 m4:4 r1:4 r2:4
+
+# format:RRF-e LOAD FP INTEGER (long BFP)
+FIDBRA BFP 10110011 0101 m3:4 m4:4 r1:4 r2:4
+
+# format:RRF-e LOAD FP INTEGER (extended BFP)
+FIXBRA BFP 10110011 01000111 m3:4 m4:4 r1:4 r2:4
+
-- 
2.41.0

[risu PATCH v3 5/7] s390x: Update the configure script for s390x support

2023-09-14 Thread Thomas Huth

Auto-detect s390x hosts and add s390x information to the help text.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 configure | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index ca2d7db..2f7c580 100755
--- a/configure
+++ b/configure
@@ -58,6 +58,8 @@ guess_arch() {
 ARCH="m68k"
 elif check_define __powerpc64__ ; then
 ARCH="ppc64"
+elif check_define __s390x__ ; then
+ARCH="s390x"
 else
 echo "This cpu is not supported by risu. Try -h. " >&2
 exit 1
@@ -139,7 +141,7 @@ Some influential environment variables:
prefixed with the given string.
 
   ARCH force target architecture instead of trying to detect it.
-   Valid values=[arm|aarch64|ppc64|ppc64le|m68k]
+   Valid values=[arm|aarch64|m68k|ppc64|ppc64le|s390x]
 
   CC   C compiler command
   CFLAGS   C compiler flags
-- 
2.41.0

Re: [PATCH] mem/x86: add processor address space check for VM memory

2023-09-14 Thread Ani Sinha




> On 14-Sep-2023, at 2:07 PM, David Hildenbrand  wrote:
> 
> On 14.09.23 07:53, Ani Sinha wrote:
>>> On 12-Sep-2023, at 9:04 PM, David Hildenbrand  wrote:
>>> 
>>> [...]
>>> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 54838c0c41..d187890675 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -908,9 +908,12 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, 
> uint64_t pci_hole64_size)
> {
> X86CPU *cpu = X86_CPU(first_cpu);
> 
> -/* 32-bit systems don't have hole64 thus return max CPU address */
> -if (cpu->phys_bits <= 32) {
> -return ((hwaddr)1 << cpu->phys_bits) - 1;
> +/*
> + * 32-bit systems don't have hole64, but we might have a region for
> + * memory hotplug.
> + */
> +if (!(cpu->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
> +return pc_pci_hole64_start() - 1;
 Ok this is very confusing! I am looking at pc_pci_hole64_start() function. 
 I have a few questions …
 (a) pc_get_device_memory_range() returns the size of the device memory as 
 the difference between ram_size and maxram_size. But from what I 
 understand, ram_size is the actual size of the ram present and maxram_size 
 is the max size of ram *after* hot plugging additional memory. How can we 
 assume that the additional available space is already occupied by hot 
 plugged memory?
>>> 
>>> Let's take a look at an example:
>>> 
>>> $ ./build/qemu-system-x86_64 -m 8g,maxmem=16g,slots=1 \
>>>  -object memory-backend-ram,id=mem0,size=1g \
>>>  -device pc-dimm,memdev=mem0 \
>>>  -nodefaults -nographic -S -monitor stdio
>>> 
>>> (qemu) info mtree
>>> ...
>>> memory-region: system
>>>  - (prio 0, i/o): system
>>>-bfff (prio 0, ram): alias ram-below-4g 
>>> @pc.ram -bfff
>>>- (prio -1, i/o): pci
>>>  000c-000d (prio 1, rom): pc.rom
>>>  000e-000f (prio 1, rom): alias isa-bios 
>>> @pc.bios 0002-0003
>>>  fffc- (prio 0, rom): pc.bios
>>>000a-000b (prio 1, i/o): alias smram-region @pci 
>>> 000a-000b
>>>000c-000c3fff (prio 1, i/o): alias pam-pci @pci 
>>> 000c-000c3fff
>>>000c4000-000c7fff (prio 1, i/o): alias pam-pci @pci 
>>> 000c4000-000c7fff
>>>000c8000-000cbfff (prio 1, i/o): alias pam-pci @pci 
>>> 000c8000-000cbfff
>>>000cc000-000c (prio 1, i/o): alias pam-pci @pci 
>>> 000cc000-000c
>>>000d-000d3fff (prio 1, i/o): alias pam-pci @pci 
>>> 000d-000d3fff
>>>000d4000-000d7fff (prio 1, i/o): alias pam-pci @pci 
>>> 000d4000-000d7fff
>>>000d8000-000dbfff (prio 1, i/o): alias pam-pci @pci 
>>> 000d8000-000dbfff
>>>000dc000-000d (prio 1, i/o): alias pam-pci @pci 
>>> 000dc000-000d
>>>000e-000e3fff (prio 1, i/o): alias pam-pci @pci 
>>> 000e-000e3fff
>>>000e4000-000e7fff (prio 1, i/o): alias pam-pci @pci 
>>> 000e4000-000e7fff
>>>000e8000-000ebfff (prio 1, i/o): alias pam-pci @pci 
>>> 000e8000-000ebfff
>>>000ec000-000e (prio 1, i/o): alias pam-pci @pci 
>>> 000ec000-000e
>>>000f-000f (prio 1, i/o): alias pam-pci @pci 
>>> 000f-000f
>>>fec0-fec00fff (prio 0, i/o): ioapic
>>>fed0-fed003ff (prio 0, i/o): hpet
>>>fee0-feef (prio 4096, i/o): apic-msi
>>>0001-00023fff (prio 0, ram): alias ram-above-4g 
>>> @pc.ram c000-0001
>>>00024000-00047fff (prio 0, i/o): device-memory
>>>  00024000-00027fff (prio 0, ram): mem0
>>> 
>>> 
>>> We requested 8G of boot memory, which is split between "<4G" memory and 
>>> ">=4G" memory.
>>> 
>>> We only place exactly 3G (0x0->0xbfff) under 4G, starting at address 0.
>> I can’t reconcile this with this code for q35:
>>if (machine->ram_size >= 0xb000) {
>> lowmem = 0x8000; // max memory 0x8fff or 2.25 GiB
>> } else {
>> lowmem = 0xb000; // max memory 0xbfff or 3 GiB
>> }
>> You assigned 8 Gib to ram which is > 0xb000 (2.75 Gib)
> 
> QEMU defaults to the "pc" machine. If you add "-M q35" you get:
> 
> address-space: memory
>  - (prio 0, i/o): system
>-7fff (prio 0,

[PATCH 1/2] block: do not try to list nearly-dropped filters

2023-09-14 Thread Andrey Zhadchenko via

When the block job ends, it removes filter from the tree. However the
last reference to filter bds is dropped when the job is destroyed.
So when we have finalized but not dismissed job, if we try to
'query-named-block-nodes', QEMU will stumble upon a half-dead filter
and crash, since the filter now has no childs, no bd->file, etc.

Example of crash:

Thread 1 "qemu-storage-da" received signal SIGSEGV, Segmentation fault.
0x0048795e in bdrv_refresh_filename (bs=0x7a5280) at ../block.c:7860
7860assert(QLIST_NEXT(child, next) == NULL);
(gdb) bt
0  0x0048795e in bdrv_refresh_filename (bs=0x7a5280) at ../block.c:7860
1  0x004e2804 in bdrv_block_device_info (blk=0x0, bs=0x7a5280, 
flat=false, errp=0x7fffdaa0) at ../block/qapi.c:58
2  0x0049634e in bdrv_named_nodes_list (flat=false, 
errp=0x7fffdaa0) at ../block.c:6098
3  0x0047fe1b in qmp_query_named_block_nodes (has_flat=false, 
flat=false, errp=0x7fffdaa0) at ../blockdev.c:3097
4  0x005a381d in qmp_marshal_query_named_block_nodes 
(args=0x7fffe80075d0, ret=0x76c38e38, errp=0x76c38e40)
at qapi/qapi-commands-block-core.c:554
5  0x005de657 in do_qmp_dispatch_bh (opaque=0x76c38e08) at 
../qapi/qmp-dispatch.c:128
6  0x00603f42 in aio_bh_call (bh=0x773c00) at ../util/async.c:142
7  0x0060407d in aio_bh_poll (ctx=0x75de00) at ../util/async.c:170
8  0x005eb809 in aio_dispatch (ctx=0x75de00) at ../util/aio-posix.c:421
9  0x00604eea in aio_ctx_dispatch (source=0x75de00, callback=0x0, 
user_data=0x0) at ../util/async.c:312
10 0x77c93d6f in g_main_context_dispatch () from /lib64/libglib-2.0.so.0
11 0x00617508 in glib_pollfds_poll () at ../util/main-loop.c:297
12 0x00617360 in os_host_main_loop_wait (timeout=0) at 
../util/main-loop.c:320
13 0x00617263 in main_loop_wait (nonblocking=0) at 
../util/main-loop.c:596
14 0x0046bbd6 in main (argc=13, argv=0x7fffde28) at 
../storage-daemon/qemu-storage-daemon.c:409

Allow bdrv_block_device_info() to return NULL for filters in a such
state. Modify bdrv_named_nodes_list() to skip bds for which it cannot
get an info.

Signed-off-by: Andrey Zhadchenko 
---
 block.c  | 7 +--
 block/qapi.c | 5 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index a307c151a8..e559b0aafa 100644
--- a/block.c
+++ b/block.c
@@ -6145,16 +6145,19 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
 BlockDeviceInfoList *list;
 BlockDriverState *bs;
 
+ERRP_GUARD();
 GLOBAL_STATE_CODE();
 
 list = NULL;
 QTAILQ_FOREACH(bs, _bdrv_states, node_list) {
 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
-if (!info) {
+if (*errp) {
 qapi_free_BlockDeviceInfoList(list);
 return NULL;
 }
-QAPI_LIST_PREPEND(list, info);
+if (info) {
+QAPI_LIST_PREPEND(list, info);
+}
 }
 
 return list;
diff --git a/block/qapi.c b/block/qapi.c
index f34f95e0ef..b7b48f7540 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -57,6 +57,11 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 return NULL;
 }
 
+/* Do not try to get info from empty filters */
+if (bs->drv->is_filter && !QLIST_FIRST(>children)) {
+return NULL;
+}
+
 bdrv_refresh_filename(bs);
 
 info = g_malloc0(sizeof(*info));
-- 
2.40.1

[PATCH 0/2] block: do not try to list nearly-dropped filters

2023-09-14 Thread Andrey Zhadchenko via

QEMU crashes on QMP command 'query-named-block-nodes' if we have
finalized but not dismissed block job with filter, for example
block-stream.
This happens because the filter no longer has references from which
QEMU can query block info. Skip such filters while listing block nodes.

This patchset also adds simple test for this case.

Andrey Zhadchenko (2):
  block: do not try to list nearly-dropped filters
  iotests: add new test case for image streaming

 block.c|  7 +--
 block/qapi.c   |  5 +
 tests/qemu-iotests/030 | 17 +
 tests/qemu-iotests/030.out |  4 ++--
 4 files changed, 29 insertions(+), 4 deletions(-)

-- 
2.40.1

[PATCH 2/2] iotests: add new test case for image streaming

2023-09-14 Thread Andrey Zhadchenko via

Check if we can list named block nodes when the block-stream is
finalized but not yet dismissed
This previously led to a crash

Signed-off-by: Andrey Zhadchenko 
---
 tests/qemu-iotests/030 | 17 +
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 98595d47fe..2be407a8da 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -192,6 +192,23 @@ test_img = os.path.join(iotests.test_dir, 'test.img')
 self.assert_qmp(result, 'return', {})
 
 
+def test_auto_dismiss(self):
+result = self.vm.qmp('block-stream', device='drive0', 
auto_dismiss=False)
+completed = False
+while not completed:
+for event in self.vm.get_qmp_events(wait=True):
+self.assertNotEqual(event['event'], 'BLOCK_JOB_ERROR')
+if event['event'] == 'BLOCK_JOB_COMPLETED':
+self.assert_qmp(event, 'data/device', 'drive0')
+completed = True
+elif event['event'] == 'JOB_STATUS_CHANGE':
+self.assert_qmp(event, 'data/id', 'drive0')
+
+result = self.vm.qmp('query-named-block-nodes')
+result = self.vm.qmp('job-dismiss', id='drive0')
+self.assert_qmp(result, 'return', {})
+
+
 class TestParallelOps(iotests.QMPTestCase):
 num_ops = 4 # Number of parallel block-stream operations
 num_imgs = num_ops * 2 + 1
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 6d9bee1a4b..af8dac10f9 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-...
+
 --
-Ran 27 tests
+Ran 28 tests
 
 OK
-- 
2.40.1

Re: [PATCH v3 3/3] linux-user/syscall.c: do_ppoll: eliminate large alloca

2023-09-14 Thread Daniel P . Berrangé

On Thu, Sep 14, 2023 at 02:05:21PM +0300, Michael Tokarev wrote:
> 14.09.2023 11:26, Michael Tokarev wrote:
> > 14.09.2023 11:18, Daniel P. Berrangé wrote:
> ..
> > > > -    struct pollfd *pfd = NULL;
> > > > +    struct pollfd *pfd = NULL, *heap_pfd = NULL;
> > > 
> > > g_autofree struct pollfd *heap_pdf = NULL;
> > > 
> > ...
> > > >   out:
> > > > +    g_free(heap_pfd);
> > > 
> > > This can be dropped with g_autofree usage
> > 
> > Yes, I know this, - this was deliberate choice.
> > Personally I'm just too used to old-school explicit resource deallocations.
> > Here, there's a single place where everything gets freed, so there's little
> > reason to use fancy modern automatic deallocations. To my taste anyway.
> > Maybe some future modifications adding some future ppoll3.. :)
> > 
> > Sure thing I can drop that and change it to autofree.
> 
> Should I? If that's easier in todays world :)

I prefer auto-free, but I'm fine with this commit either way, so

  Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 3/3] linux-user/syscall.c: do_ppoll: eliminate large alloca

2023-09-14 Thread Michael Tokarev


14.09.2023 11:26, Michael Tokarev wrote:

14.09.2023 11:18, Daniel P. Berrangé wrote:

..

-    struct pollfd *pfd = NULL;
+    struct pollfd *pfd = NULL, *heap_pfd = NULL;


g_autofree struct pollfd *heap_pdf = NULL;


...

  out:
+    g_free(heap_pfd);


This can be dropped with g_autofree usage


Yes, I know this, - this was deliberate choice.
Personally I'm just too used to old-school explicit resource deallocations.
Here, there's a single place where everything gets freed, so there's little
reason to use fancy modern automatic deallocations. To my taste anyway.
Maybe some future modifications adding some future ppoll3.. :)

Sure thing I can drop that and change it to autofree.


Should I? If that's easier in todays world :)

/mjt

Re: [PATCH] vdpa net: zero vhost_vdpa iova_tree pointer at cleanup

2023-09-14 Thread Lei Yang

QE tested this patch with real nic,guest can works well after
cancelling migration.

Tested-by: Lei Yang 

On Thu, Sep 14, 2023 at 11:23 AM Jason Wang  wrote:
>
> On Wed, Sep 13, 2023 at 8:34 PM Eugenio Pérez  wrote:
> >
> > Not zeroing it causes a SIGSEGV if the live migration is cancelled, at
> > net device restart.
> >
> > This is caused because CVQ tries to reuse the iova_tree that is present
> > in the first vhost_vdpa device at the end of vhost_vdpa_net_cvq_start.
> > As a consequence, it tries to access an iova_tree that has been already
> > free.
> >
> > Fixes: 00ef422e9fbf ("vdpa net: move iova tree creation from init to start")
> > Reported-by: Yanhui Ma 
> > Signed-off-by: Eugenio Pérez 
>
> Acked-by: Jason Wang 
>
> Thanks
>
> > ---
> >  net/vhost-vdpa.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 34202ca009..1714ff4b11 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -385,6 +385,8 @@ static void vhost_vdpa_net_client_stop(NetClientState 
> > *nc)
> >  dev = s->vhost_vdpa.dev;
> >  if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> >  g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> > +} else {
> > +s->vhost_vdpa.iova_tree = NULL;
> >  }
> >  }
> >
> > --
> > 2.39.3
> >
>

Re: [PATCH v2 02/24] accel/tcg: Move CPUTLB definitions from cpu-defs.h

2023-09-14 Thread Anton Johansson via



On 9/14/23 04:44, Richard Henderson wrote:

Accept that we will consume space in CPUState for CONFIG_USER_ONLY,
since we cannot test CONFIG_SOFTMMU within hw/core/cpu.h.

Signed-off-by: Richard Henderson
---
  include/exec/cpu-defs.h | 150 
  include/hw/core/cpu.h   | 141 +
  2 files changed, 141 insertions(+), 150 deletions(-)

Reviewed-by: Anton Johansson

Re: [PATCH v2 16/24] tcg: Remove TCGContext.tlb_fast_offset

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

Now that there is no padding between CPUNegativeOffsetState
and CPUArchState, this value is constant across all targets.

Signed-off-by: Richard Henderson 
---
  include/tcg/tcg.h |  1 -
  accel/tcg/translate-all.c |  2 --
  tcg/tcg.c | 13 +++--
  3 files changed, 7 insertions(+), 9 deletions(-)

Reviewed-by: Anton Johansson

Re: [PATCH v2 15/24] accel/tcg: Remove env_neg()

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

Replace the single use within env_tlb() and remove.

Signed-off-by: Richard Henderson 
---
  include/exec/cpu-all.h | 13 +
  1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 9db8544125..af9516654a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -451,17 +451,6 @@ static inline CPUState *env_cpu(CPUArchState *env)
  return (void *)env - sizeof(CPUState);
  }
  
-/**

- * env_neg(env)
- * @env: The architecture environment
- *
- * Return the CPUNegativeOffsetState associated with the environment.
- */
-static inline CPUNegativeOffsetState *env_neg(CPUArchState *env)
-{
-return (void *)env - sizeof(CPUNegativeOffsetState);
-}
-
  /**
   * env_tlb(env)
   * @env: The architecture environment
@@ -470,7 +459,7 @@ static inline CPUNegativeOffsetState *env_neg(CPUArchState 
*env)
   */
  static inline CPUTLB *env_tlb(CPUArchState *env)
  {
-return _neg(env)->tlb;
+return _cpu(env)->neg.tlb;
  }
  
  #endif /* CPU_ALL_H */

Reviewed-by: Anton Johansson

Re: [PATCH v2 13/24] accel/tcg: Replace CPUState.env_ptr with cpu_env()

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

Signed-off-by: Richard Henderson 
---
  include/exec/cpu-all.h   |  1 -
  include/hw/core/cpu.h|  9 ++---
  target/arm/common-semi-target.h  |  2 +-
  accel/tcg/cpu-exec.c |  8 
  accel/tcg/cputlb.c   | 18 +-
  accel/tcg/translate-all.c|  4 ++--
  gdbstub/gdbstub.c|  4 ++--
  gdbstub/user-target.c|  2 +-
  hw/i386/kvm/clock.c  |  2 +-
  hw/intc/mips_gic.c   |  2 +-
  hw/intc/riscv_aclint.c   | 12 ++--
  hw/intc/riscv_imsic.c|  2 +-
  hw/ppc/e500.c|  4 ++--
  hw/ppc/spapr.c   |  2 +-
  linux-user/elfload.c |  4 ++--
  linux-user/i386/cpu_loop.c   |  2 +-
  linux-user/main.c|  4 ++--
  linux-user/signal.c  | 15 +++
  monitor/hmp-cmds-target.c|  2 +-
  semihosting/arm-compat-semi.c|  6 +++---
  semihosting/syscalls.c   | 28 ++--
  target/alpha/translate.c |  4 ++--
  target/arm/cpu.c |  8 
  target/arm/helper.c  |  2 +-
  target/arm/tcg/translate-a64.c   |  4 ++--
  target/arm/tcg/translate.c   |  6 +++---
  target/avr/translate.c   |  2 +-
  target/cris/translate.c  |  4 ++--
  target/hexagon/translate.c   |  4 ++--
  target/hppa/mem_helper.c |  2 +-
  target/hppa/translate.c  |  4 ++--
  target/i386/tcg/sysemu/excp_helper.c |  2 +-
  target/i386/tcg/tcg-cpu.c|  2 +-
  target/i386/tcg/translate.c  |  4 ++--
  target/loongarch/translate.c |  4 ++--
  target/m68k/translate.c  |  4 ++--
  target/microblaze/translate.c|  2 +-
  target/mips/tcg/sysemu/mips-semi.c   |  4 ++--
  target/mips/tcg/translate.c  |  4 ++--
  target/nios2/translate.c |  4 ++--
  target/openrisc/translate.c  |  2 +-
  target/ppc/excp_helper.c | 10 +-
  target/ppc/translate.c   |  4 ++--
  target/riscv/translate.c |  6 +++---
  target/rx/cpu.c  |  3 ---
  target/rx/translate.c|  2 +-
  target/s390x/tcg/translate.c |  2 +-
  target/sh4/op_helper.c   |  2 +-
  target/sh4/translate.c   |  4 ++--
  target/sparc/translate.c |  4 ++--
  target/tricore/translate.c   |  4 ++--
  target/xtensa/translate.c|  4 ++--
  target/i386/tcg/decode-new.c.inc |  2 +-
  53 files changed, 125 insertions(+), 127 deletions(-)

Reviewed-by: Anton Johansson

Re: [PATCH v2 09/24] accel/tcg: Remove CPUState.icount_decr_ptr

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

We can now access icount_decr directly.

Signed-off-by: Richard Henderson 
---
  include/exec/cpu-all.h | 1 -
  include/hw/core/cpu.h  | 2 --
  hw/core/cpu-common.c   | 4 ++--
  3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index c3c78ed8ab..3b01e4ee25 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -434,7 +434,6 @@ void tcg_exec_unrealizefn(CPUState *cpu);
  static inline void cpu_set_cpustate_pointers(ArchCPU *cpu)
  {
  cpu->parent_obj.env_ptr = >env;
-cpu->parent_obj.icount_decr_ptr = >parent_obj.neg.icount_decr;
  }
  
  /* Validate correct placement of CPUArchState. */

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 1f289136ec..44955af3bc 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -440,7 +440,6 @@ struct qemu_work_item;
   * @as: Pointer to the first AddressSpace, for the convenience of targets 
which
   *  only have a single AddressSpace
   * @env_ptr: Pointer to subclass-specific CPUArchState field.
- * @icount_decr_ptr: Pointer to IcountDecr field within subclass.
   * @gdb_regs: Additional GDB registers.
   * @gdb_num_regs: Number of total registers accessible to GDB.
   * @gdb_num_g_regs: Number of registers in GDB 'g' packets.
@@ -512,7 +511,6 @@ struct CPUState {
  MemoryRegion *memory;
  
  CPUArchState *env_ptr;

-IcountDecr *icount_decr_ptr;
  
  CPUJumpCache *tb_jmp_cache;
  
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c

index ced66c2b34..08d5bbc873 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -86,7 +86,7 @@ void cpu_exit(CPUState *cpu)
  qatomic_set(>exit_request, 1);
  /* Ensure cpu_exec will see the exit request after TCG has exited.  */
  smp_wmb();
-qatomic_set(>icount_decr_ptr->u16.high, -1);
+qatomic_set(>neg.icount_decr.u16.high, -1);
  }
  
  static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)

@@ -130,7 +130,7 @@ static void cpu_common_reset_hold(Object *obj)
  cpu->halted = cpu->start_powered_off;
  cpu->mem_io_pc = 0;
  cpu->icount_extra = 0;
-qatomic_set(>icount_decr_ptr->u32, 0);
+qatomic_set(>neg.icount_decr.u32, 0);
  cpu->can_do_io = 1;
  cpu->exception_index = -1;
  cpu->crash_occurred = false;

Reviewed-by: Anton Johansson

Re: [PATCH 11/11] qdev: Rework array properties based on list visitor

2023-09-14 Thread Peter Maydell

On Fri, 8 Sept 2023 at 15:37, Kevin Wolf  wrote:
>
> Until now, array properties are actually implemented with a hack that
> uses multiple properties on the QOM level: a static "foo-len" property
> and after it is set, dynamically created "foo[i]" properties.
>
> In external interfaces (-device on the command line and device_add in
> QMP), this interface was broken by commit f3558b1b ('qdev: Base object
> creation on QDict rather than QemuOpts') because QDicts are unordered
> and therefore it could happen that QEMU tried to set the indexed
> properties before setting the length, which fails and effectively makes
> array properties inaccessible. In particular, this affects the 'ports'
> property of the 'rocker' device.
>
> This patch reworks the external interface so that instead of using a
> separate top-level property for the length and for each element, we use
> a single true array property that accepts a list value. In the external
> interfaces, this is naturally expressed as a JSON list and makes array
> properties accessible again.
>
> Creating an array property on the command line without using JSON format
> is currently not possible. This could be fixed by switching from
> QemuOpts to a keyval parser, which however requires consideration of the
> compatibility implications.
>
> All internal users of devices with array properties go through
> qdev_prop_set_array() at this point, so updating it takes care of all of
> them.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1090
> Fixes: f3558b1b763683bb877f7dd5b282469cdadc65c3
> Signed-off-by: Kevin Wolf 

I'm hoping that somebody who understands the visitor APIs
better than me will have a look at this patch, but in the
meantime, here's my review, which I suspect has a lot of
comments that mostly reflect that I don't really understand
the visitor stuff...

> ---
>  include/hw/qdev-properties.h |  23 ++--
>  hw/core/qdev-properties-system.c |   2 +-
>  hw/core/qdev-properties.c| 204 +++
>  3 files changed, 133 insertions(+), 96 deletions(-)
>
> diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
> index 7fa2fdb7c9..9370b36b72 100644
> --- a/include/hw/qdev-properties.h
> +++ b/include/hw/qdev-properties.h
> @@ -61,7 +61,7 @@ extern const PropertyInfo qdev_prop_size;
>  extern const PropertyInfo qdev_prop_string;
>  extern const PropertyInfo qdev_prop_on_off_auto;
>  extern const PropertyInfo qdev_prop_size32;
> -extern const PropertyInfo qdev_prop_arraylen;
> +extern const PropertyInfo qdev_prop_array;
>  extern const PropertyInfo qdev_prop_link;
>
>  #define DEFINE_PROP(_name, _state, _field, _prop, _type, ...) {  \
> @@ -115,8 +115,6 @@ extern const PropertyInfo qdev_prop_link;
>  .bitmask= (_bitmask), \
>  .set_default = false)
>
> -#define PROP_ARRAY_LEN_PREFIX "len-"
> -
>  /**
>   * DEFINE_PROP_ARRAY:
>   * @_name: name of the array
> @@ -127,24 +125,21 @@ extern const PropertyInfo qdev_prop_link;
>   * @_arrayprop: PropertyInfo defining what property the array elements have
>   * @_arraytype: C type of the array elements
>   *
> - * Define device properties for a variable-length array _name.  A
> - * static property "len-arrayname" is defined. When the device creator
> - * sets this property to the desired length of array, further dynamic
> - * properties "arrayname[0]", "arrayname[1]", ...  are defined so the
> - * device creator can set the array element values. Setting the
> - * "len-arrayname" property more than once is an error.
> + * Define device properties for a variable-length array _name.  The array is
> + * represented as a list in the visitor interface.
> + *
> + * @_arraytype is required to be movable with memcpy().
>   *
> - * When the array length is set, the @_field member of the device
> + * When the array property is set, the @_field member of the device
>   * struct is set to the array length, and @_arrayfield is set to point
> - * to (zero-initialised) memory allocated for the array.  For a zero
> - * length array, @_field will be set to 0 and @_arrayfield to NULL.
> + * to the memory allocated for the array.
> + *
>   * It is the responsibility of the device deinit code to free the
>   * @_arrayfield memory.
>   */
>  #define DEFINE_PROP_ARRAY(_name, _state, _field,   \
>_arrayfield, _arrayprop, _arraytype) \
> -DEFINE_PROP((PROP_ARRAY_LEN_PREFIX _name), \
> -_state, _field, qdev_prop_arraylen, uint32_t,  \
> +DEFINE_PROP(_name, _state, _field, qdev_prop_array, uint32_t, \
>  .set_default = true,   \
>  .defval.u = 0, \
>  .arrayinfo = &(_arrayprop),\
> diff --git a/hw/core/qdev-properties-system.c 
> b/hw/core/qdev-properties-system.c
> index 6d5d43eda2..f557ee886e 100644
> ---

Re: [PATCH v2 08/24] accel/tcg: Move CPUNegativeOffsetState into CPUState

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

Retain the separate structure to emphasize its importance.
Enforce CPUArchState always follows CPUState without padding.

Signed-off-by: Richard Henderson 
---
  include/exec/cpu-all.h| 22 +-
  include/hw/core/cpu.h | 14 --
  target/alpha/cpu.h|  1 -
  target/arm/cpu.h  |  1 -
  target/avr/cpu.h  |  1 -
  target/cris/cpu.h |  1 -
  target/hexagon/cpu.h  |  2 +-
  target/hppa/cpu.h |  1 -
  target/i386/cpu.h |  1 -
  target/loongarch/cpu.h|  1 -
  target/m68k/cpu.h |  1 -
  target/microblaze/cpu.h   |  6 +++---
  target/mips/cpu.h |  4 ++--
  target/nios2/cpu.h|  1 -
  target/openrisc/cpu.h |  1 -
  target/ppc/cpu.h  |  1 -
  target/riscv/cpu.h|  2 +-
  target/rx/cpu.h   |  1 -
  target/s390x/cpu.h|  1 -
  target/sh4/cpu.h  |  1 -
  target/sparc/cpu.h|  1 -
  target/tricore/cpu.h  |  1 -
  target/xtensa/cpu.h   |  3 +--
  accel/tcg/translate-all.c |  4 ++--
  accel/tcg/translator.c|  8 
  25 files changed, 35 insertions(+), 46 deletions(-)


Reviewed-by: Anton Johansson

Re: [PATCH v9 00/12] Add VIRTIO sound card

2023-09-14 Thread Stefano Garzarella


On Thu, Sep 14, 2023 at 01:02:05PM +0300, Manos Pitsidianakis wrote:

On Thu, 14 Sep 2023 12:54, Stefano Garzarella  wrote:

We are seeing something strange with the virtio-sound Linux driver.
It seems that the driver modifies the buffers after exposing them to
the device via the avail ring.


I need more information about this bug. What is the unexpected 
behavior that made you find that the buffer was modified in the first 
place?


CCing Matias for more details, but initially can you just run the test
Matias suggested to check if you experience the same behaviour or not?

Stefano

Re: [sdl-qemu] [PATCH 1/1] No checks, dereferencing possible

2023-09-14 Thread Peter Krempa

On Thu, Sep 14, 2023 at 09:44:16 +, Миронов Сергей Владимирович wrote:
> No checks, dereferencing possible.
> 
> 
> Return value of a function 'virDomainChrSourceDefNew'
> is dereferenced at qemu_command.c without checking
> for NULL, but it is usually checked for this function.

This description here doesn't make sense. You are checking the presence
of 'privateData' in 'virDomainVideoDef'.

> 
> 
> Found by Linux Verification Center (linuxtesting.org) with SVACE.
> 
> 
> Fixes: 1f85f0967b ("ci: jobs.sh: Add back '--no-suite syntax-check 
> --print-errorlogs'")
> 
> Signed-off-by: Sergey Mironov 
> 
> ---
> src/qemu/qemu_command.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index e84374b4cf..8d11972c88 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -4698,6 +4698,8 @@ qemuBuildVideoCommandLine(virCommand *cmd,
>  g_autofree char *name = g_strdup_printf("%s-vhost-user", 
> video->info.alias);
>  qemuDomainChrSourcePrivate *chrsrcpriv = 
> QEMU_DOMAIN_CHR_SOURCE_PRIVATE(chrsrc);
> 
> +   if (chrsrc == NULL)
> +   return -1;

This addition doesn't make sense as it's dead code. The private data is
always allocated and checked that it's non-NULL in the qemu driver via
the callback in virDomainVideoDefNew.

Do you have a call trace that would prove me otherwise?

Re: [PATCH v2 07/24] accel/tcg: Validate placement of CPUNegativeOffsetState

2023-09-14 Thread Anton Johansson via




On 9/14/23 04:44, Richard Henderson wrote:

Verify that the distance between CPUNegativeOffsetState and
CPUArchState is no greater than any alignment requirements.

Signed-off-by: Richard Henderson 
---
  include/exec/cpu-all.h | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index c2c62160c6..86a7452b0d 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -459,6 +459,12 @@ static inline CPUState *env_cpu(CPUArchState *env)
  return _archcpu(env)->parent_obj;
  }
  
+/*

+ * Validate placement of CPUNegativeOffsetState.
+ */
+QEMU_BUILD_BUG_ON(offsetof(ArchCPU, env) - offsetof(ArchCPU, neg) >=
+  sizeof(CPUNegativeOffsetState) + __alignof(CPUArchState));
+
  /**
   * env_neg(env)
   * @env: The architecture environment

Reviewed-by: Anton Johansson

Re: [sdl-qemu] [PATCH 0/1] There are no checks, virDomainChrSourceDefNew can return 0

2023-09-14 Thread Peter Krempa

CC-ing qemu-devel with a patch solely for libvirt doesn't make sense.

Also 'libvirt-security' list is private and is is intended as a first
contact list for stuff to be embargoed. It makes little sense to include
it when posting to the public 'libvir-list'.

On Thu, Sep 14, 2023 at 09:44:13 +, Миронов Сергей Владимирович wrote:
> There are no checks, virDomainChrSourceDefNew can return 0.

s/0/NULL

While very technically true, realistically that can't happen any more.

'virObjectNew' always returns a valid pointer or abort()s, and
VIR_CLASS_NEW can return 0 on programming errors.

Thus this is not a security issue.

> Return value of a function 'virDomainChrSourceDefNew'
> 
> is dereferenced at qemu_hotplug.c without checking for NULL,
> 
> but it is usually checked for this function.

Remove the extra empty lines please.

> 
> 
> Found by Linux Verification Center (linuxtesting.org) with SVACE.
> 
> 
> Fixes: 1f85f0967b ("ci: jobs.sh: Add back '--no-suite syntax-check 
> --print-errorlogs'")

^^ This makes no sense. The commit you are referencing is changing a
shell script.

> 
> Signed-off-by: Sergey Mironov 
> 
> ---
> src/qemu/qemu_hotplug.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 177ca87d11..09e16c2c7e 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -3207,6 +3207,8 @@ qemuDomainAttachFSDevice(virQEMUDriver *driver,
>  qemuAssignDeviceFSAlias(vm->def, fs);
> 
>  chardev = virDomainChrSourceDefNew(priv->driver->xmlopt);
> +if (chardev == NULL)
> +   goto cleanup;
>  chardev->type = VIR_DOMAIN_CHR_TYPE_UNIX;
>  chardev->data.nix.path = qemuDomainGetVHostUserFSSocketPath(priv, fs);
> --
> 2.31.1

Re: [PATCH v9 00/12] Add VIRTIO sound card

2023-09-14 Thread Manos Pitsidianakis


On Thu, 14 Sep 2023 12:54, Stefano Garzarella  wrote:

We are seeing something strange with the virtio-sound Linux driver.
It seems that the driver modifies the buffers after exposing them to
the device via the avail ring.


I need more information about this bug. What is the unexpected behavior 
that made you find that the buffer was modified in the first place?


Manos

[PATCH v6 2/3] hw/i2c: add mctp core

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add an abstract MCTP over I2C endpoint model. This implements MCTP
control message handling as well as handling the actual I2C transport
(packetization).

Devices are intended to derive from this and implement the class
methods.

Parts of this implementation is inspired by code[1] previously posted by
Jonathan Cameron.

Squashed a fix[2] from Matt Johnston.

  [1]: 
https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
  [2]: 
https://lore.kernel.org/qemu-devel/20221121080445.ga29...@codeconstruct.com.au/

Tested-by: Jonathan Cameron 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/trace-events   |  13 ++
 include/hw/i2c/mctp.h | 125 +++
 include/net/mctp.h|  35 
 8 files changed, 618 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 00562f924f7a..3208ebb1bcde 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3404,6 +3404,13 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 
+MCTP I2C Transport
+M: Klaus Jensen 
+S: Maintained
+F: hw/i2c/mctp.c
+F: include/hw/i2c/mctp.h
+F: include/net/mctp.h
+
 Firmware schema specifications
 M: Philippe Mathieu-Daudé 
 R: Daniel P. Berrange 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e6834844051..5bcb1e0e8a6f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -541,6 +541,7 @@ config ASPEED_SOC
 select DS1338
 select FTGMAC100
 select I2C
+select I2C_MCTP
 select DPS310
 select PCA9552
 select SERIAL
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 14886b35dac2..2b2a50b83d1e 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -6,6 +6,10 @@ config I2C_DEVICES
 # to any board's i2c bus
 bool
 
+config I2C_MCTP
+bool
+select I2C
+
 config SMBUS
 bool
 select I2C
diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
new file mode 100644
index ..8d8e74567745
--- /dev/null
+++ b/hw/i2c/mctp.c
@@ -0,0 +1,432 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus_master.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+
+#include "trace.h"
+
+/* DSP0237 1.2.0, Figure 1 */
+typedef struct MCTPI2CPacketHeader {
+uint8_t dest;
+#define MCTP_I2C_COMMAND_CODE 0xf
+uint8_t command_code;
+uint8_t byte_count;
+uint8_t source;
+} MCTPI2CPacketHeader;
+
+typedef struct MCTPI2CPacket {
+MCTPI2CPacketHeader i2c;
+MCTPPacket  mctp;
+} MCTPI2CPacket;
+
+#define i2c_mctp_payload_offset offsetof(MCTPI2CPacket, mctp.payload)
+#define i2c_mctp_payload(buf) (buf + i2c_mctp_payload_offset)
+
+/* DSP0236 1.3.0, Figure 20 */
+typedef struct MCTPControlMessage {
+#define MCTP_MESSAGE_TYPE_CONTROL 0x0
+uint8_t type;
+#define MCTP_CONTROL_FLAGS_RQ   (1 << 7)
+#define MCTP_CONTROL_FLAGS_D(1 << 6)
+uint8_t flags;
+uint8_t command_code;
+uint8_t data[];
+} MCTPControlMessage;
+
+enum MCTPControlCommandCodes {
+MCTP_CONTROL_SET_EID= 0x01,
+MCTP_CONTROL_GET_EID= 0x02,
+MCTP_CONTROL_GET_VERSION= 0x04,
+MCTP_CONTROL_GET_MESSAGE_TYPE_SUPPORT   = 0x05,
+};
+
+#define MCTP_CONTROL_ERROR_UNSUPPORTED_CMD 0x5
+
+#define i2c_mctp_control_data_offset \
+(i2c_mctp_payload_offset + offsetof(MCTPControlMessage, data))
+#define i2c_mctp_control_data(buf) (buf + i2c_mctp_control_data_offset)
+
+/**
+ * The byte count field in the SMBUS Block Write containers the number of bytes
+ * *following* the field itself.
+ *
+ * This is at least 5.
+ *
+ * 1 byte for the MCTP/I2C piggy-backed I2C source address in addition to the
+ * size of the MCTP transport/packet header.
+ */
+#define MCTP_I2C_BYTE_COUNT_OFFSET (sizeof(MCTPPacketHeader) + 1)
+
+void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
+{
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
+
+mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
+
+i2c_bus_master(i2c, mctp->tx.bh);
+}
+
+static void i2c_mctp_tx(void *opaque)
+{
+DeviceState *dev = DEVICE(opaque);
+I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
+I2CSlave *slave = I2C_SLAVE(dev);
+MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
+MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
+MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
+uint8_t flags = 0;
+
+switch (mctp->tx.state) {
+case I2C_MCTP_STATE_TX_SEND_BYTE:
+if (mctp->pos < mctp->len) {
+uint8_t byte = mctp->buffer[mctp->pos];
+
+

[PATCH v6 1/3] hw/i2c: add smbus pec utility function

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/i2c/smbus_master.c | 26 ++
 include/hw/i2c/smbus_master.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..01a8e4700222 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,32 @@
 #include "hw/i2c/i2c.h"
 #include "hw/i2c/smbus_master.h"
 
+static uint8_t crc8(uint16_t data)
+{
+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data ^= 0x1070U << 3;
+}
+
+data <<= 1;
+}
+
+return (uint8_t)(data >> 8);
+}
+
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
+{
+int i;
+
+for (i = 0; i < len; i++) {
+crc = crc8((crc ^ buf[i]) << 8);
+}
+
+return crc;
+}
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read)
 {
diff --git a/include/hw/i2c/smbus_master.h b/include/hw/i2c/smbus_master.h
index bb13bc423c22..d90f81767d86 100644
--- a/include/hw/i2c/smbus_master.h
+++ b/include/hw/i2c/smbus_master.h
@@ -27,6 +27,8 @@
 
 #include "hw/i2c/i2c.h"
 
+uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len);
+
 /* Master device commands.  */
 int smbus_quick_command(I2CBus *bus, uint8_t addr, int read);
 int smbus_receive_byte(I2CBus *bus, uint8_t addr);

-- 
2.42.0

[PATCH v6 3/3] hw/nvme: add nvme management interface model

2023-09-14 Thread Klaus Jensen

From: Klaus Jensen 

Add the 'nmi-i2c' device that emulates an NVMe Management Interface
controller.

Initial support is very basic (Read NMI DS, Configuration Get).

This is based on previously posted code by Padmakar Kalghatgi, Arun
Kumar Agasar and Saurav Kumar.

Reviewed-by: Jonathan Cameron 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/Kconfig  |   4 +
 hw/nvme/meson.build  |   1 +
 hw/nvme/nmi-i2c.c| 407 +++
 hw/nvme/trace-events |   6 +
 4 files changed, 418 insertions(+)

diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
index cfa2ab0f9d5a..e1f6360c0f4b 100644
--- a/hw/nvme/Kconfig
+++ b/hw/nvme/Kconfig
@@ -2,3 +2,7 @@ config NVME_PCI
 bool
 default y if PCI_DEVICES || PCIE_DEVICES
 depends on PCI
+
+config NVME_NMI_I2C
+bool
+default y if I2C_MCTP
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
index 1a6a2ca2f307..7bc85f31c149 100644
--- a/hw/nvme/meson.build
+++ b/hw/nvme/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 
'ns.c', 'subsys.c'))
+system_ss.add(when: 'CONFIG_NVME_NMI_I2C', if_true: files('nmi-i2c.c'))
diff --git a/hw/nvme/nmi-i2c.c b/hw/nvme/nmi-i2c.c
new file mode 100644
index ..bf4648db0457
--- /dev/null
+++ b/hw/nvme/nmi-i2c.c
@@ -0,0 +1,407 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2023 Samsung Electronics Co., Ltd.
+ *
+ * SPDX-FileContributor: Padmakar Kalghatgi 
+ * SPDX-FileContributor: Arun Kumar Agasar 
+ * SPDX-FileContributor: Saurav Kumar 
+ * SPDX-FileContributor: Klaus Jensen 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/crc32c.h"
+#include "hw/registerfields.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/mctp.h"
+#include "net/mctp.h"
+#include "trace.h"
+
+/* NVM Express Management Interface 1.2c, Section 3.1 */
+#define NMI_MAX_MESSAGE_LENGTH 4224
+
+#define TYPE_NMI_I2C_DEVICE "nmi-i2c"
+OBJECT_DECLARE_SIMPLE_TYPE(NMIDevice, NMI_I2C_DEVICE)
+
+typedef struct NMIDevice {
+MCTPI2CEndpoint mctp;
+
+uint8_t buffer[NMI_MAX_MESSAGE_LENGTH];
+uint8_t scratch[NMI_MAX_MESSAGE_LENGTH];
+
+size_t  len;
+int64_t pos;
+} NMIDevice;
+
+FIELD(NMI_MCTPD, MT, 0, 7)
+FIELD(NMI_MCTPD, IC, 7, 1)
+
+#define NMI_MCTPD_MT_NMI 0x4
+#define NMI_MCTPD_IC_ENABLED 0x1
+
+FIELD(NMI_NMP, ROR, 7, 1)
+FIELD(NMI_NMP, NMIMT, 3, 4)
+
+#define NMI_NMP_NMIMT_NVME_MI 0x1
+#define NMI_NMP_NMIMT_NVME_ADMIN 0x2
+
+typedef struct NMIMessage {
+uint8_t mctpd;
+uint8_t nmp;
+uint8_t rsvd2[2];
+uint8_t payload[]; /* includes the Message Integrity Check */
+} NMIMessage;
+
+typedef struct NMIRequest {
+   uint8_t opc;
+   uint8_t rsvd1[3];
+   uint32_t dw0;
+   uint32_t dw1;
+   uint32_t mic;
+} NMIRequest;
+
+FIELD(NMI_CMD_READ_NMI_DS_DW0, DTYP, 24, 8)
+
+typedef enum NMIReadDSType {
+NMI_CMD_READ_NMI_DS_SUBSYSTEM   = 0x0,
+NMI_CMD_READ_NMI_DS_PORTS   = 0x1,
+NMI_CMD_READ_NMI_DS_CTRL_LIST   = 0x2,
+NMI_CMD_READ_NMI_DS_CTRL_INFO   = 0x3,
+NMI_CMD_READ_NMI_DS_OPT_CMD_SUPPORT = 0x4,
+NMI_CMD_READ_NMI_DS_MEB_CMD_SUPPORT = 0x5,
+} NMIReadDSType;
+
+#define NMI_STATUS_INVALID_PARAMETER 0x4
+
+static void nmi_scratch_append(NMIDevice *nmi, const void *buf, size_t count)
+{
+assert(nmi->pos + count <= NMI_MAX_MESSAGE_LENGTH);
+
+memcpy(nmi->scratch + nmi->pos, buf, count);
+nmi->pos += count;
+}
+
+static void nmi_set_parameter_error(NMIDevice *nmi, uint8_t bit, uint16_t byte)
+{
+/* NVM Express Management Interface 1.2c, Figure 30 */
+struct resp {
+uint8_t  status;
+uint8_t  bit;
+uint16_t byte;
+};
+
+struct resp buf = {
+.status = NMI_STATUS_INVALID_PARAMETER,
+.bit = bit & 0x3,
+.byte = byte,
+};
+
+nmi_scratch_append(nmi, , sizeof(buf));
+}
+
+static void nmi_set_error(NMIDevice *nmi, uint8_t status)
+{
+const uint8_t buf[4] = {status,};
+
+nmi_scratch_append(nmi, buf, sizeof(buf));
+}
+
+static void nmi_handle_mi_read_nmi_ds(NMIDevice *nmi, NMIRequest *request)
+{
+I2CSlave *i2c = I2C_SLAVE(nmi);
+
+uint32_t dw0 = le32_to_cpu(request->dw0);
+uint8_t dtyp = FIELD_EX32(dw0, NMI_CMD_READ_NMI_DS_DW0, DTYP);
+
+trace_nmi_handle_mi_read_nmi_ds(dtyp);
+
+static const uint8_t nmi_ds_subsystem[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x00,   /* number of ports */
+0x01,   /* major version */
+0x01,   /* minor version */
+};
+
+/*
+ * Cannot be static (or const) since we need to patch in the i2c
+ * address.
+ */
+const uint8_t nmi_ds_ports[36] = {
+0x00,   /* success */
+0x20, 0x00, /* response data length */
+0x00,   /* reserved */
+0x02,   /* port type (smbus) */
+0x00,   /* reserved */
+0x40, 0x00, /* maximum mctp transission

Re: [PATCH v9 00/12] Add VIRTIO sound card

2023-09-14 Thread Stefano Garzarella


On Wed, Sep 13, 2023 at 10:33:07AM +0300, Emmanouil Pitsidianakis wrote:

This patch series adds an audio device implementing the recent virtio
sound spec (1.2) and a corresponding PCI wrapper device.

v9 can be found online at:

https://gitlab.com/epilys/qemu/-/tree/virtio-snd-v9

Ref 06e6b17186

Main differences with v8 patch series [^v8]
:

- Addressed [^v8] review comments.
- Add cpu_to_le32(_) and le32_to_cpu(_) conversions for messages from/to
 the guest according to the virtio spec.
- Inlined some functions and types to reduce review complexity.
- Corrected the replies to IO messages; now both Playback and Capture
 work correctly for me. (If you hear cracks in pulseaudio+guest, try
 pipewire+guest).


We are seeing something strange with the virtio-sound Linux driver.
It seems that the driver modifies the buffers after exposing them to
the device via the avail ring.

It seems we have this strange behaviour with this built-in QEMU device,
but also with the vhost-device-sound, so it could be some spec
violation in the Linux driver.

Matias also reported on the v8 of this series:
https://lore.kernel.org/qemu-devel/ZPg60lzXWxHPQJEa@fedora/

Can you check if you have the same behaviour?

Nothing that blocks this series of course, but just to confirm that
there may be something to fix in the Linux driver.

Thanks,
Stefano

[PATCH v6 0/3] hw/{i2c,nvme}: mctp endpoint, nvme management interface model

2023-09-14 Thread Klaus Jensen

This adds a generic MCTP endpoint model that other devices may derive
from.

Also included is a very basic implementation of an NVMe-MI device,
supporting only a small subset of the required commands.

Since this all relies on i2c target mode, this can currently only be
used with an SoC that includes the Aspeed I2C controller.

The easiest way to get up and running with this, is to grab my buildroot
overlay[1] (aspeed_ast2600evb_nmi_defconfig). It includes modified a
modified dts as well as a couple of required packages.

QEMU can then be launched along these lines:

  qemu-system-arm \
-nographic \
-M ast2600-evb \
-kernel output/images/zImage \
-initrd output/images/rootfs.cpio \
-dtb output/images/aspeed-ast2600-evb-nmi.dtb \
-nic user,hostfwd=tcp::-:22 \
-device nmi-i2c,address=0x3a \
-serial mon:stdio

>From within the booted system,

  mctp addr add 8 dev mctpi2c15
  mctp link set mctpi2c15 up
  mctp route add 9 via mctpi2c15
  mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
  mi-mctp 1 9 info

Comments are very welcome!

  [1]: https://github.com/birkelund/hwtests/tree/main/br2-external

Signed-off-by: Klaus Jensen 
---
Changes in v6:
- Use nmi_scratch_append() directly where it makes sense. Fixes bug
  observed by Andrew.
- Link to v5: 
https://lore.kernel.org/r/20230905-nmi-i2c-v5-0-0001d372a...@samsung.com

Changes in v5:
- Added a nmi_scratch_append() that asserts available space in the
  scratch buffer. This is a similar defensive strategy as used in
  hw/i2c/mctp.c
- Various small fixups in response to review (Jonathan)
- Link to v4: 
https://lore.kernel.org/r/20230823-nmi-i2c-v4-0-2b0f86e5b...@samsung.com

---
Klaus Jensen (3):
  hw/i2c: add smbus pec utility function
  hw/i2c: add mctp core
  hw/nvme: add nvme management interface model

 MAINTAINERS   |   7 +
 hw/arm/Kconfig|   1 +
 hw/i2c/Kconfig|   4 +
 hw/i2c/mctp.c | 432 ++
 hw/i2c/meson.build|   1 +
 hw/i2c/smbus_master.c |  26 +++
 hw/i2c/trace-events   |  13 ++
 hw/nvme/Kconfig   |   4 +
 hw/nvme/meson.build   |   1 +
 hw/nvme/nmi-i2c.c | 407 +++
 hw/nvme/trace-events  |   6 +
 include/hw/i2c/mctp.h | 125 
 include/hw/i2c/smbus_master.h |   2 +
 include/net/mctp.h|  35 
 14 files changed, 1064 insertions(+)
---
base-commit: 005ad32358f12fe9313a4a01918a55e60d4f39e5
change-id: 20230822-nmi-i2c-d804ed5be7e6

Best regards,
-- 
Klaus Jensen

RE: [PATCH v1 00/22] vfio: Adopt iommufd

2023-09-14 Thread Duan, Zhenzhong

Hi Eric,

>-Original Message-
>From: Eric Auger 
>Sent: Thursday, September 14, 2023 5:04 PM
>To: Duan, Zhenzhong ; qemu-devel@nongnu.org
>Cc: alex.william...@redhat.com; c...@redhat.com; j...@nvidia.com;
>nicol...@nvidia.com; Martins, Joao ;
>pet...@redhat.com; jasow...@redhat.com; Tian, Kevin ;
>Liu, Yi L ; Sun, Yi Y ; Peng, Chao P
>
>Subject: Re: [PATCH v1 00/22] vfio: Adopt iommufd
>
>Hi Zhenzhong
>
>On 8/30/23 12:37, Zhenzhong Duan wrote:
>> Hi All,
>>
>> As the kernel side iommufd cdev and hot reset feature have been queued,
>> also hwpt alloc has been added in Jason's for_next branch [1], I'd like
>> to update a new version matching kernel side update and with rfc flag
>> removed. Qemu code can be found at [2], look forward more comments!
>>
>>
>> We have done wide test with different combinations, e.g:
>>
>> - PCI device were tested
>> - FD passing and hot reset with some trick.
>> - device hotplug test with legacy and iommufd backends
>> - with or without vIOMMU for legacy and iommufd backends
>> - divices linked to different iommufds
>> - VFIO migration with a E800 net card(no dirty sync support) passthrough
>> - platform, ccw and ap were only compile-tested due to environment limit
>>
>>
>> Given some iommufd kernel limitations, the iommufd backend is
>> not yet fully on par with the legacy backend w.r.t. features like:
>> - p2p mappings (you will see related error traces)
>> - dirty page sync
>> - and etc.
>>
>>
>> Changelog:
>> v1:
>> - Alloc hwpt instead of using auto hwpt
>> - elaborate iommufd code per Nicolin
>> - consolidate two patches and drop as.c
>> - typo error fix and function rename
>>
>> I didn't list change log of rfc stage, see [3] if anyone is interested.
>>
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
>> [2] https://github.com/yiliu1765/qemu/commits/zhenzhong/iommufd_cdev_v1
>> [3] https://lists.nongnu.org/archive/html/qemu-devel/2023-07/msg02529.html
>
>Do you have a branch to share?
>
>It does not apply to upstream

Sure, https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_cdev_v1_rebased
I think this one is already based on today's upstream.

Thanks
Zhenzhong

< 1 2 3 4 >

201 - 300 of 374 matches

Mail list logo