Re: [RFC PATCH v2 3/5] virtio-balloon: Add QMP functions for Working Set

2023-05-26 Thread Markus Armbruster
"T.J. Alumbaugh"  writes:

>   - Adds QMP function 'working-set-config'
>   - Adds QMP function 'working-set-request'
>   - Retrieve working set via 'guest-working-set' property on balloon
>
>>> cat script.py
>
> NAME = "name"
> SOCKET = 'vm.sock'
> BALLOON =  "/machine/peripheral/balloon0"
>
> import json
> import asyncio
> from qemu.qmp import QMPClient
>
> async def main():
> client = QMPClient(NAME)
> await client.connect(SOCKET)
> config = { "i0": 200, "i1": 800, "i2": 3000, "refresh": 750, "report": 
> 1000 }
> await client.execute('working-set-config', config)
> await client.execute('working-set-request')
> property = {"path":BALLOON, "property":"guest-working-set"}
> res = await client.execute('qom-get', property)
> return res
>
> if __name__ == "__main__":
> ret = asyncio.run(main())
> print(json.dumps(ret, indent=2))
>
>>> (Execute qemu with flag '-qmp unix:path=vm.sock,server=on,wait=off'
>>> (Perform normal activities on VM to exercise MM code)
>
>>> python3 script.py
> {
>   "working_set": {
> "ws3": {
>   "memory-size-bytes": {
> "anon": 890478592,
> "file": 1285832704
>   },
>   "idle-age": 4294967292
> },
> "ws2": {
>   "memory-size-bytes": {
> "anon": 173465600,
> "file": 83353600
>   },
>   "idle-age": 3000
> },
> "ws1": {
>   "memory-size-bytes": {
> "anon": 44236800,
> "file": 20889600
>   },
>   "idle-age": 800
> },
> "ws0": {
>   "memory-size-bytes": {
> "anon": 14540800,
> "file": 6963200
>   },
>   "idle-age": 200
> }
>   }
> }
>
> Signed-off-by: T.J. Alumbaugh 

[...]

> diff --git a/monitor/monitor.c b/monitor/monitor.c
> index 602535696c..fad1b4aed5 100644
> --- a/monitor/monitor.c
> +++ b/monitor/monitor.c
> @@ -333,6 +333,7 @@ static MonitorQAPIEventConf 
> monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
>  [QAPI_EVENT_QUORUM_FAILURE]= { 1000 * SCALE_MS },
>  [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
>  [QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
> +[QAPI_EVENT_WORKING_SET_EVENT] = { 1000 * SCALE_MS },
>  };
>  
>  /*
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 37660d8f2a..5e03ff21e2 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1055,6 +1055,57 @@
>  ##
>  { 'command': 'balloon', 'data': {'value': 'int'} }
>  
> +##
> +# @working-set-config:
> +#
> +# Specify the config parameters for Working Set reporting.

Don't capitalize "Working Set" unless it is a proper noun.  Is it?

What about "Configure working set reporting"?

> +#
> +# @i0: the endpoint of the first interval (in ms)
> +#
> +# @i1: the endpoint of the second interval (in ms)
> +#
> +# @i2: the endpoint of the third interval (in ms)

An "endpoint" doesn't define an interval.  Also, a "in milliseconds"
suggests relative time.  Relative to what?

What do these intervals do?

> +#
> +# @refresh: the refresh threshold (in ms) for Working Set reporting
> +#
> +# @report: the report threshold (in ms) for Working Set reporting

What do these do?

> +#
> +# Returns: - Nothing on success
> +#  - If no balloon device is present, DeviceNotActive

We use errors other than GenericError only when we have a compelling
reason.  I figure the reason is consistency with other commands
operating on a ballon device.  Correct?

Please format like

   # Returns:
   # - Nothing on success
   # - If no balloon device is present, DeviceNotActive

to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
to conform to current conventions).

> +#
> +# Example:
> +#
> +# -> { "execute": "working-set-config",
> +# "arguments": { "i0": 100,
> +#"i1": 500,
> +#"i2": 2000,
> +#"refresh": 750,
> +#"report": 1000 } }
> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'working-set-config', 'data': {'i0': 'uint64',
> +'i1': 'uint64',
> +'i2': 'uint64',
> +'refresh': 'uint64',
> +'report': 'uint64'} }
> +##
> +# @working-set-request:
> +#
> +# Request the Working Set report from the guest.

"Request a"

> +#
> +# Returns: - Nothing on success
> +#  - If no balloon device is present, DeviceNotActive
> +#
> +# Example:
> +#
> +# -> { "execute": "working-set-request", "arguments": {} }
> +# <- { "return": {} }
> +#
> +##
> +{ 'command': 'working-set-request', 'data': {} }
> +
> +
>  ##
>  # @BalloonInfo:
>  #
> @@ -1113,6 +1164,21 @@
>  { 'event': 'BALLOON_CHANGE',
>'data': { 'actual': 'int' } }
>  
> +##
> +# @WORKING_SET_EVENT:
> +#
> +# Emitted when the guest sends a new Working Set report.

Where does it send the report to?

> +#
> +# N

Re: [RFC PATCH v2 2/5] virtio-balloon: device has Working Set Reporting

2023-05-26 Thread Markus Armbruster
"T.J. Alumbaugh"  writes:

>  - working_set_vq to receive Working Set reports from guest
>  - notification_vq to send config or request to guest
>  - add working set as object property on device
>
> Signed-off-by: T.J. Alumbaugh 

[...]

> diff --git a/qapi/misc.json b/qapi/misc.json
> index ff070ec828..4ba8c74b64 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -550,6 +550,32 @@
>   'returns': ['CommandLineOptionInfo'],
>   'allow-preconfig': true}
>  
> +##
> +# @MemoryBin:
> +#
> +# A bin of memory with a size in bytes. File-backed and
> +# anonymous memory are tracked separately.
> +#
> +# @anon: number of bytes of anonymous memory
> +# @file: number of bytes of file-backed memory

Please format like

   # A bin of memory with a size in bytes.  File-backed and anonymous
   # memory are tracked separately.
   #
   # @anon: number of bytes of anonymous memory
   #
   # @file: number of bytes of file-backed memory

to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
to conform to current conventions).

Separating member sections with blank lines helps with catching certain
errors.  rST loves to surprise...

> +##
> +{ 'struct': 'MemoryBin',
> +  'data': { 'anon': 'uint64',
> +'file': 'uint64' } }
> +

[...]




Re: [RFC PATCH v2 2/5] virtio-balloon: device has Working Set Reporting

2023-05-26 Thread Markus Armbruster
"T.J. Alumbaugh"  writes:

>  - working_set_vq to receive Working Set reports from guest
>  - notification_vq to send config or request to guest
>  - add working set as object property on device
>
> Signed-off-by: T.J. Alumbaugh 
> ---
>  hw/virtio/virtio-balloon.c | 164 -
>  include/hw/virtio/virtio-balloon.h |  13 ++-
>  qapi/misc.json |  26 +
>  3 files changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index d004cf29d2..23481e51b8 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -27,6 +27,7 @@
>  #include "exec/address-spaces.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-events-machine.h"
> +#include "qapi/qapi-visit-misc.h"

Superfluous include.

>  #include "qapi/visitor.h"
>  #include "trace.h"
>  #include "qemu/error-report.h"
> @@ -169,6 +170,119 @@ static void balloon_deflate_page(VirtIOBalloon *balloon,
>  }
>  }
>  
> +/*
> + * reset_working_set - Mark all items in the array as unset
> + *
> + * This function needs to be called at device initialization and
> + * whenever a new Working Set config is specified.
> + */
> +static inline void reset_working_set(VirtIOBalloon *dev)

Why inline?

> +{
> +int i;
> +for (i = 0; i < VIRTIO_BALLOON_WS_NR_BINS; i++) {
> +dev->ws[i].idle_age = 0;
> +if (dev->ws[i].memory_size_bytes) {
> +dev->ws[i].memory_size_bytes->anon = 0;
> +dev->ws[i].memory_size_bytes->file = 0;
> +} else {
> +dev->ws[i].memory_size_bytes = g_malloc0(sizeof(MemoryBin));
> +}
> +}
> +}
> +
> +static void virtio_balloon_receive_working_set(VirtIODevice *vdev,
> +   VirtQueue *vq)
> +{
> +VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> +VirtQueueElement *elem;
> +VirtIOBalloonWorkingSet ws;
> +size_t offset = 0;
> +int count = 0;
> +
> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +if (!elem) {
> +return;
> +}
> +
> +if (s->working_set_vq_elem != NULL) {
> +/* This should never happen if the driver follows the spec. */
> +virtqueue_push(vq, s->working_set_vq_elem, 0);
> +virtio_notify(vdev, vq);
> +g_free(s->working_set_vq_elem);
> +}
> +
> +s->working_set_vq_elem = elem;
> +
> +/* Initialize the Working Set to get rid of any stale values. */
> +reset_working_set(s);
> +
> +while (iov_to_buf(elem->out_sg, elem->out_num, offset, &ws,
> +  sizeof(ws)) == sizeof(ws)) {
> +uint64_t idle_age_ms = virtio_tswap64(vdev, ws.idle_age_ms);
> +uint64_t bytes_anon = virtio_tswap64(vdev, ws.memory_size_bytes[0]);
> +uint64_t bytes_file = virtio_tswap64(vdev, ws.memory_size_bytes[1]);
> +s->ws[count].idle_age = idle_age_ms;
> +s->ws[count].memory_size_bytes->anon = bytes_anon;
> +s->ws[count].memory_size_bytes->file = bytes_file;
> +offset += sizeof(ws);
> +count++;
> +}
> +}
> +
> +static __attribute__((unused)) void virtio_balloon_send_working_set_request(
> +VirtIODevice *vdev, VirtQueue *vq)
> +{
> +VirtQueueElement *elem;
> +size_t sz = 0;
> +uint16_t tag = 0;
> +
> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +if (!elem) {
> +return;
> +}
> +tag = VIRTIO_BALLOON_WS_REQUEST;
> +sz = iov_from_buf(elem->in_sg, elem->in_num, 0, &tag, sizeof(tag));
> +assert(sz == sizeof(tag));
> +virtqueue_push(vq, elem, sz);
> +virtio_notify(vdev, vq);
> +g_free(elem);
> +}
> +
> +static __attribute__((unused)) void virtio_balloon_send_working_set_config(
> +VirtIODevice *vdev, VirtQueue *vq,
> +uint64_t i0, uint64_t i1, uint64_t i2,
> +uint64_t refresh, uint64_t report)
> +{
> +VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> +VirtQueueElement *elem;
> +uint16_t tag = 0;
> +size_t sz = 0;
> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +if (!elem) {
> +return;
> +}
> +
> +tag = VIRTIO_BALLOON_WS_CONFIG;
> +s->ws_intervals[0] = i0;
> +s->ws_intervals[1] = i1;
> +s->ws_intervals[2] = i2;
> +s->ws_refresh_threshold = refresh;
> +s->ws_report_threshold = report;
> +
> +sz = iov_from_buf(elem->in_sg, elem->in_num, 0, &tag, sizeof(tag));
> +assert(sz == sizeof(uint16_t));
> +sz += iov_from_buf(elem->in_sg, elem->in_num, sz, s->ws_intervals,
> +   (VIRTIO_BALLOON_WS_NR_BINS - 1) * \
> +   sizeof(s->ws_intervals[0]));
> +sz += iov_from_buf(elem->in_sg, elem->in_num, sz, 
> &s->ws_refresh_threshold,
> +   sizeof(uint64_t));
> +sz += iov_from_buf(elem->in_sg, elem->in_num, sz, 
> &s->ws_report_threshold,
> +   sizeof(uint64_t));
> +virtqueue_push(vq, elem, sz);
> +virtio_notify(vdev, vq);
> +g_free(

Re: [PATCH v4 2/2] qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa

2023-05-26 Thread Markus Armbruster
Stefano Garzarella  writes:

> The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd
> passing through the new 'fd' property.
>
> Since now we are using qemu_open() on '@path' if the virtio-blk driver
> supports the fd passing, let's announce it.
> In this way, the management layer can pass the file descriptor of an
> already opened vhost-vdpa character device. This is useful especially
> when the device can only be accessed with certain privileges.
>
> Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver
> in libblkio supports it.
>
> Suggested-by: Markus Armbruster 
> Signed-off-by: Stefano Garzarella 
> ---
>
> Notes:
> v4:
> - added this patch to allow libvirt to discover we support fdset [Markus]
>
>  meson.build  | 4 
>  qapi/block-core.json | 8 +++-
>  2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/meson.build b/meson.build
> index 78890f0155..8ea911f7b4 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -2108,6 +2108,10 @@ config_host_data.set('CONFIG_LZO', lzo.found())
>  config_host_data.set('CONFIG_MPATH', mpathpersist.found())
>  config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
>  config_host_data.set('CONFIG_BLKIO', blkio.found())
> +if blkio.found()
> +  config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD',
> +   blkio.version().version_compare('>=1.3.0'))
> +endif
>  config_host_data.set('CONFIG_CURL', curl.found())
>  config_host_data.set('CONFIG_CURSES', curses.found())
>  config_host_data.set('CONFIG_GBM', gbm.found())
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 98d9116dae..1538d84ef4 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3955,10 +3955,16 @@
>  #
>  # @path: path to the vhost-vdpa character device.
>  #
> +# Features:
> +# @fdset: Member @path supports the special "/dev/fdset/N" path (since 8.1)

Slightly long line, break it like this:

   # @fdset: Member @path supports the special "/dev/fdset/N" path
   # since 8.1)

> +#
>  # Since: 7.2
>  ##
>  { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
> -  'data': { 'path': 'str' },
> +  'data': { 'path': { 'type': 'str',
> +  'features': [ { 'name' :'fdset',
> +  'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ]
> +} },
>'if': 'CONFIG_BLKIO' }
>  
>  ##

Tacking the feature to the member works.

For what it's worth, the existing features serving similar purposes are
all tacked to the command or type.

Do libvirt developers have a preference?




Re: [PATCH v8 7/7] hw/cxl/events: Add injection of Memory Module Events

2023-05-26 Thread Markus Armbruster
Jonathan Cameron  writes:

> These events include a copy of the device health information at the
> time of the event. Actually using the emulated device health would
> require a lot of controls to manipulate that state.  Given the aim
> of this injection code is to just test the flows when events occur,
> inject the contents of the device health state as well.
>
> Future work may add more sophisticate device health emulation
> including direct generation of these records when events occur
> (such as a temperature threshold being crossed).  That does not
> reduce the usefulness of this more basic generation of the events.
>
> Acked-by: Markus Armbruster 
> Reviewed-by: Fan Ni 
> Reviewed-by: Ira Weiny 
> Signed-off-by: Jonathan Cameron 
> ---
>  qapi/cxl.json   | 53 +++
>  include/hw/cxl/cxl_events.h | 19 
>  hw/mem/cxl_type3.c  | 62 +
>  hw/mem/cxl_type3_stubs.c| 12 +++
>  4 files changed, 146 insertions(+)
>
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 36bf9fa202..a2e1280573 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -140,6 +140,59 @@
>  '*column': 'uint16', '*correction-mask': [ 'uint64' ]
> }}
>  
> +##
> +# @cxl-inject-memory-module-event:
> +#
> +# Inject an event record for a Memory Module Event (CXL r3.0
> +# 8.2.9.2.1.3). This event includes a copy of the Device Health

Two spaces between sentences for consistency, please.

> +# info at the time of the event.
> +#
> +# @path: CXL type 3 device canonical QOM path
> +#
> +# @log: Event Log to add the event to
> +#
> +# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
> +# Record Format, Event Record Flags for subfield definitions.
> +#
> +# @type: Device Event Type.  See CXL r3.0 Table 8-45 Memory Module
> +# Event Record for bit definitions for bit definiions.
> +#
> +# @health-status: Overall health summary bitmap.  See CXL r3.0 Table
> +# 8-100 Get Health Info Output Payload, Health Status for bit
> +# definitions.
> +#
> +# @media-status: Overall media health summary.  See CXL r3.0 Table
> +# 8-100 Get Health Info Output Payload, Media Status for bit
> +# definitions.
> +#
> +# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output
> +# Payload, Additional Status for subfield definitions.
> +#
> +# @life-used: Percentage (0-100) of factory expected life span.
> +#
> +# @temperature: Device temperature in degrees Celsius.
> +#
> +# @dirty-shutdown-count: Number of times the device has been unable
> +# to determine whether data loss may have occurred.
> +#
> +# @corrected-volatile-error-count: Total number of correctable errors
> +# in volatile memory.
> +#
> +# @corrected-persistent-error-count: Total number correctable errors

Total number of correctable errors

> +# in persistent memory
> +#
> +# Since: 8.1
> +##
> +{ 'command': 'cxl-inject-memory-module-event',
> +  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
> +'type': 'uint8', 'health-status': 'uint8',
> +'media-status': 'uint8', 'additional-status': 'uint8',
> +'life-used': 'uint8', 'temperature' : 'int16',
> +'dirty-shutdown-count': 'uint32',
> +'corrected-volatile-error-count': 'uint32',
> +'corrected-persistent-error-count': 'uint32'
> +}}
> +
>  ##
>  # @cxl-inject-poison:
>  #

Neither these nitpicks nor the one on PATCH 5 calls for a respin.
Simply fix them in the PR.  Thanks!

[...]




Re: [PATCH v8 5/7] hw/cxl/events: Add injection of General Media Events

2023-05-26 Thread Markus Armbruster
Jonathan Cameron  writes:

> From: Ira Weiny 
>
> To facilitate testing provide a QMP command to inject a general media
> event.  The event can be added to the log specified.
>
> Signed-off-by: Ira Weiny 
> Reviewed-by: Fan Ni 
> Acked-by: Markus Armbruster 
> Signed-off-by: Jonathan Cameron 
> ---
>  qapi/cxl.json   |  74 
>  include/hw/cxl/cxl_events.h |  20 +++
>  hw/mem/cxl_type3.c  | 111 
>  hw/mem/cxl_type3_stubs.c|  10 
>  4 files changed, 215 insertions(+)
>
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index ed1c7eea3a..7f0b432767 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json

[...]

> +##
> +# @cxl-inject-general-media-event:
> +#
> +# Inject an event record for a General Media Event (CXL r3.0
> +# 8.2.9.2.1.1).  This event type is reported via one of the event logs
> +# specified via the log parameter.
> +#
> +# @path: CXL type 3 device canonical QOM path
> +#
> +# @log: event log to add the event to
> +#
> +# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
> +# Record Format, Event Record Flags for subfield definitions.
> +#
> +# @dpa: Device Physical Address (relative to @path device).  Note
> +# lower bits include some flags. See CXL r3.0 Table 8-43 General

Two spaces between sentences for consistency, please.

> +# Media Event Record, Physical Address.
> +#

[...]




Re: [PULL 00/12] (Mostly) build system patches for 2023-05-26

2023-05-26 Thread Richard Henderson

On 5/26/23 09:08, Paolo Bonzini wrote:

The following changes since commit a3cb6d5004ff638aefe686ecd540718a793bd1b1:

   Merge tag 'pull-tcg-20230525' of https://gitlab.com/rth7680/qemu into 
staging (2023-05-25 11:11:52 -0700)

are available in the Git repository at:

   https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to b17bbf835c8998e93fd99b06164f1d63843fe8c9:

   configure: ignore --make (2023-05-26 12:36:20 +0200)


* build system fixes and cleanups
* use subproject() for the dtc and keycodemapdb submodules
* fix virtio memory leak
* update slirp.wrap to latest commit in the master branch


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~





Re: [PATCH v9 0/7] igb: packet-split descriptors support

2023-05-26 Thread Akihiko Odaki

On 2023/05/26 0:37, Tomasz Dzieciol wrote:

Based-on: <20230523024339.50875-1-akihiko.od...@daynix.com>
("[PATCH v5 00/48] igb: Fix for DPDK")

Purposes of this series of patches:
* introduce packet-split RX descriptors support. This feature is used by Linux
   VF driver for MTU values from 2048.
* refactor RX descriptor handling for introduction of packet-split RX
   descriptors support
* fix descriptors flags handling

In addition to comments from previous review endianess issues in
igb_write_adv_ps_rx_descr were fixed.

Tomasz Dzieciol (7):
   igb: remove TCP ACK detection
   igb: rename E1000E_RingInfo_st
   igb: RX descriptors guest writting refactoring
   igb: RX payload guest writting refactoring
   igb: add IPv6 extended headers traffic detection
   igb: packet-split descriptors support
   e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers

  hw/net/e1000e_core.c |  78 +++--
  hw/net/igb_core.c| 740 ---
  hw/net/igb_regs.h|  20 +-
  hw/net/trace-events  |   6 +-
  tests/qtest/libqos/igb.c |   5 +
  5 files changed, 602 insertions(+), 247 deletions(-)



Please rebase to the current master and remove Based-on: tag. This 
series should cleanly apply.


I left some minor comments that does not involve semantic changes so 
check them out. I think I can give Reviewed-by: and Tested-by: for the 
next version. Thanks for keeping working on this.


Regards,
Akihiko Odaki



Re: [PATCH v9 6/7] igb: packet-split descriptors support

2023-05-26 Thread Akihiko Odaki




On 2023/05/26 0:37, Tomasz Dzieciol wrote:

Packet-split descriptors are used by Linux VF driver for MTU values from 2048

Signed-off-by: Tomasz Dzieciol 
---
  hw/net/igb_core.c   | 357 ++--
  hw/net/igb_regs.h   |   9 ++
  hw/net/trace-events |   2 +-
  3 files changed, 325 insertions(+), 43 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 2bfae517fc..c1a8dfbca8 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
  return false;
  }
  
+typedef struct E1000ERingInfo {

+int dbah;
+int dbal;
+int dlen;
+int dh;
+int dt;
+int idx;
+} E1000ERingInfo;
+
+static uint32_t
+igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
+{
+return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
+}
+
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
+{
+uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+   desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
  static inline bool
  igb_rss_enabled(IGBCore *core)
  {
@@ -694,15 +717,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
  return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
  }
  
-typedef struct E1000ERingInfo {

-int dbah;
-int dbal;
-int dlen;
-int dh;
-int dt;
-int idx;
-} E1000ERingInfo;
-
  static inline bool
  igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
  {
@@ -1233,12 +1247,25 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct 
e1000_rx_desc *desc,
  }
  
  static inline void

-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-  hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+ hwaddr *buff_addr)
  {
  *buff_addr = le64_to_cpu(desc->read.pkt_addr);
  }
  
+static inline void

+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+hwaddr *buff_addr)
+{
+buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGBBAState {
+uint16_t written[IGB_MAX_PS_BUFFERS];
+uint8_t cur_idx;
+} IGBBAState;
+
  typedef struct IGBPacketRxDMAState {
  size_t size;
  size_t total_size;
@@ -1249,20 +1276,41 @@ typedef struct IGBPacketRxDMAState {
  uint32_t rx_desc_header_buf_size;
  struct iovec *iov;
  size_t iov_ofs;
+bool do_ps;
  bool is_first;
-uint16_t written;
-hwaddr ba;
+IGBBAState bastate;
+hwaddr ba[IGB_MAX_PS_BUFFERS];
  } IGBPacketRxDMAState;
  
  static inline void

-igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-  hwaddr *buff_addr)
+igb_read_rx_descr(IGBCore *core,
+  union e1000_rx_desc_union *desc,
+  IGBPacketRxDMAState *pdma_st,
+  const E1000ERingInfo *r)
  {
+uint32_t desc_type;
+
  if (igb_rx_use_legacy_descriptor(core)) {
-igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
-} else {
-igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+pdma_st->ba[0] = 0;
+return;
+}
+
+/* advanced header split descriptor */
+if (igb_rx_use_ps_descriptor(core, r)) {
+igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+return;
+}
+
+/* descriptor replication modes not supported */
+desc_type = igb_rx_queue_desctyp_get(core, r);
+if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
  }
+
+/* advanced single buffer descriptor */
+igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+pdma_st->ba[0] = 0;
  }
  
  static void

@@ -1405,6 +1453,13 @@ igb_write_lgcy_rx_descr(IGBCore *core, struct 
e1000_rx_desc *desc,
  desc->status = (uint8_t) le32_to_cpu(status_flags);
  }
  
+static bool

+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
+{
+uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
  static uint16_t
  igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t 
etqf)
  {
@@ -1494,16 +1549,64 @@ igb_write_adv_rx_descr(IGBCore *core, union 
e1000_adv_rx_desc *desc,
  desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error);
  }
  
+typedef struct IGBSplitDescriptorData {

+bool sph;
+bool hbo;
+size_t hdr_len;
+} IGBSplitDescriptorData;
+
  static inline void
-igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-   struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
-   uint16_t

Re: [PATCH v9 4/7] igb: RX payload guest writting refactoring

2023-05-26 Thread Akihiko Odaki




On 2023/05/26 0:37, Tomasz Dzieciol wrote:

Refactoring is done in preparation for support of multiple advanced
descriptors RX modes, especially packet-split modes.

Signed-off-by: Tomasz Dzieciol 
---
  hw/net/e1000e_core.c |  18 ++--
  hw/net/igb_core.c| 214 +--
  tests/qtest/libqos/igb.c |   5 +
  3 files changed, 151 insertions(+), 86 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f6b628eaef..f29e5e2897 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
  }
  
  static void

-e1000e_write_to_rx_buffers(E1000ECore *core,
-   hwaddr ba[MAX_PS_BUFFERS],
-   e1000e_ba_state *bastate,
-   const char *data,
-   dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+hwaddr ba[MAX_PS_BUFFERS],
+e1000e_ba_state *bastate,
+const char *data,
+dma_addr_t data_len)
  {
  while (data_len > 0) {
  uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
NetRxPkt *pkt,
  while (copy_size) {
  iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
  
-e1000e_write_to_rx_buffers(core, ba, &bastate,

-iov->iov_base + iov_ofs, iov_copy);
+e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+iov->iov_base +
+iov_ofs,
+iov_copy);
  
  copy_size -= iov_copy;

  iov_ofs += iov_copy;
@@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct 
NetRxPkt *pkt,
  
  if (desc_offset + desc_size >= total_size) {

  /* Simulate FCS checksum presence in the last descriptor 
*/
-e1000e_write_to_rx_buffers(core, ba, &bastate,
+e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
  }
  }
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 3a6df0d55e..8e32f39ece 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, 
size_t total_size)
   bufsize;
  }
  
+static uint32_t

+igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
+{
+uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+   E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
  void
  igb_start_recv(IGBCore *core)
  {
@@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union 
e1000_adv_rx_desc *desc,
  *buff_addr = le64_to_cpu(desc->read.pkt_addr);
  }
  
+typedef struct IGBPacketRxDMAState {

+size_t size;
+size_t total_size;
+size_t ps_hdr_len;
+size_t desc_size;
+size_t desc_offset;
+uint32_t rx_desc_packet_buf_size;
+uint32_t rx_desc_header_buf_size;
+struct iovec *iov;
+size_t iov_ofs;
+bool is_first;
+uint16_t written;
+hwaddr ba;
+} IGBPacketRxDMAState;
+
  static inline void
  igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
hwaddr *buff_addr)
@@ -1514,19 +1537,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, 
dma_addr_t addr,
  }
  }
  
-static void

-igb_write_to_rx_buffers(IGBCore *core,
-PCIDevice *d,
-hwaddr ba,
-uint16_t *written,
-const char *data,
-dma_addr_t data_len)
-{
-trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
-pci_dma_write(d, ba + *written, data, data_len);
-*written += data_len;
-}
-
  static void
  igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
  size_t pkt_size, size_t pkt_fcs_size)
@@ -1552,6 +1562,93 @@ igb_rx_descr_threshold_hit(IGBCore *core, const 
E1000ERingInfo *rxi)
 ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
  }
  
+static void

+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+if (*size > pdma_st->rx_desc_packet_buf_size) {
+*size = pdma_st->rx_desc_packet_buf_size;
+}
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+ PCIDevice *d,
+ hwaddr ba,
+  

[RFC PATCH v2 2/4] vfio: Implement a common device info helper

2023-05-26 Thread Alex Williamson
A common helper implementing the realloc algorithm for handling
capabilities.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Cédric Le Goater 
Signed-off-by: Alex Williamson 
---
 hw/s390x/s390-pci-vfio.c  | 37 
 hw/vfio/common.c  | 46 ++-
 include/hw/vfio/vfio-common.h |  1 +
 3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index f51190d4662f..66e8ec3bdef9 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
 memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS);
 }
 
-static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev,
-uint32_t argsz)
+static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev);
 {
-struct vfio_device_info *info = g_malloc0(argsz);
-VFIOPCIDevice *vfio_pci;
-int fd;
+VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
 
-vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
-fd = vfio_pci->vbasedev.fd;
-
-/*
- * If the specified argsz is not large enough to contain all capabilities
- * it will be updated upon return from the ioctl.  Retry until we have
- * a big enough buffer to hold the entire capability chain.  On error,
- * just exit and rely on CLP defaults.
- */
-retry:
-info->argsz = argsz;
-
-if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) {
-trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name);
-g_free(info);
-return NULL;
-}
-
-if (info->argsz > argsz) {
-argsz = info->argsz;
-info = g_realloc(info, argsz);
-goto retry;
-}
-
-return info;
+return vfio_get_device_info(vfio_pci->vbasedev.fd);
 }
 
 /*
@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t 
*fh)
 
 assert(fh);
 
-info = get_device_info(pbdev, sizeof(*info));
+info = get_device_info(pbdev);
 if (!info) {
 return false;
 }
@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
 {
 g_autofree struct vfio_device_info *info = NULL;
 
-info = get_device_info(pbdev, sizeof(*info));
+info = get_device_info(pbdev);
 if (!info) {
 return;
 }
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 78358ede2764..ed142296e9fe 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -2843,11 +2843,35 @@ void vfio_put_group(VFIOGroup *group)
 }
 }
 
+struct vfio_device_info *vfio_get_device_info(int fd)
+{
+struct vfio_device_info *info;
+uint32_t argsz = sizeof(*info);
+
+info = g_malloc0(argsz);
+
+retry:
+info->argsz = argsz;
+
+if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) {
+g_free(info);
+return NULL;
+}
+
+if (info->argsz > argsz) {
+argsz = info->argsz;
+info = g_realloc(info, argsz);
+goto retry;
+}
+
+return info;
+}
+
 int vfio_get_device(VFIOGroup *group, const char *name,
 VFIODevice *vbasedev, Error **errp)
 {
-struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
-int ret, fd;
+g_autofree struct vfio_device_info *info = NULL;
+int fd;
 
 fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
 if (fd < 0) {
@@ -2859,11 +2883,11 @@ int vfio_get_device(VFIOGroup *group, const char *name,
 return fd;
 }
 
-ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info);
-if (ret) {
+info = vfio_get_device_info(fd);
+if (!info) {
 error_setg_errno(errp, errno, "error getting device info");
 close(fd);
-return ret;
+return -1;
 }
 
 /*
@@ -2891,14 +2915,14 @@ int vfio_get_device(VFIOGroup *group, const char *name,
 vbasedev->group = group;
 QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
 
-vbasedev->num_irqs = dev_info.num_irqs;
-vbasedev->num_regions = dev_info.num_regions;
-vbasedev->flags = dev_info.flags;
+vbasedev->num_irqs = info->num_irqs;
+vbasedev->num_regions = info->num_regions;
+vbasedev->flags = info->flags;
+
+trace_vfio_get_device(name, info->flags, info->num_regions, 
info->num_irqs);
 
-trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions,
-  dev_info.num_irqs);
+vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET);
 
-vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
 return 0;
 }
 
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index eed244f25f34..a8dcda592c08 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -212,6 +212,7 @@ void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *a

[RFC PATCH v2 1/4] linux-headers: Update for vfio capability reporting AtomicOps

2023-05-26 Thread Alex Williamson
This is a partial linux-headers update for illustrative and testing
purposes only, NOT FOR COMMIT.

Signed-off-by: Alex Williamson 
---
 linux-headers/linux/vfio.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4a534edbdcba..443a8851e156 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -240,6 +240,20 @@ struct vfio_device_info {
 #define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
 #define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
 
+/*
+ * The following VFIO_DEVICE_INFO capability reports support for PCIe AtomicOp
+ * completion to the root bus with supported widths provided via flags.
+ */
+#define VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP   5
+struct vfio_device_info_cap_pci_atomic_comp {
+   struct vfio_info_cap_header header;
+   __u32 flags;
+#define VFIO_PCI_ATOMIC_COMP32 (1 << 0)
+#define VFIO_PCI_ATOMIC_COMP64 (1 << 1)
+#define VFIO_PCI_ATOMIC_COMP128(1 << 2)
+   __u32 reserved;
+};
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *struct vfio_region_info)
-- 
2.39.2




[RFC PATCH v2 0/4] vfio/pci: Atomic Ops completer support

2023-05-26 Thread Alex Williamson
This RFC proposes to allow a vfio-pci device to manipulate the PCI
Express capability of an associated root port to enable Atomic Op
completer support as equivalent to host capabilities.  This would
dynamically change capability bits in the config space of the root
port on realize and exit of the vfio-pci device under the following
condiations:

 - The vfio-pci device is directly connected to the root port and
   the root port implements a v2 PCIe capability, thereby supporting
   the DEVCAP2 register.

 - The vfio-pci device is exposed as a single-function device.

 - Atomic completer support is not otherwise reported on the root port.

 - The vfio-pci device reports the VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP
   capability with a non-zero set of supported atomic completer widths.

This proposal aims to avoid complications of reporting Atomic Ops
Routing, which can easily escalate into invalid P2P paths.  We also
require a specific VM configuration to avoid dependencies between
devices which may be sourced from dissimilar host paths.

While it's not exactly standard practice to modify root port device
capabilities runtime, it also does not seem to be precluded by the PCIe
Spec (6.0.1).  The Atomic Op completion bits of the DEVCAP2 register
are defined as Read-only:

7.4 Configuration Register Types
 Read-only - Register bits are read-only and cannot be altered by software.
 Where explicitly defined, these bits are used to reflect changing
 hardware state, and as a result bit values can be observed to
 change at run time. Register bit default values and bits that
 cannot change value at run time, are permitted to be hard-coded,
 initialized by system/device firmware, or initialized by hardware
 mechanisms such as pin strapping or nonvolatile storage.
 Initialization by system firmware is permitted only for system-
 integrated devices. If the optional feature that would Set the
 bits is not implemented, the bits must be hardwired to Zero.

Here "altered by software" is relative to guest writes to the config
space register, whereas in this implementation we're acting as hardware
and the bits are changing to reflect a change in runtime capabilities.
The spec does include a HwInit register type which would restrict the
value from changing at runtime outside of resets.  Therefore while it
would not be advised to update these bits arbitrarily, it does seem safe
and compatible with guest software to update the value on device attach
and detach.

Note that of the Linux in-kernel drivers that make use of Atomic Ops,
it's not common for the driver to test Atomic Ops support of the device
itself.  Support is assumed, therefore it's fruitless to provide masking
of support at the device rather than the root port.

Also, by allowing this dynamic support, enabling Atomic Ops becomes
transparent to VM management tools.  There is no requirement to
designate specific Atomic Ops capabilities to a root port and impose a
burden on other userspace utilities.

Feedback welcome.  Thanks,

Alex

v2:
 - Don't require cold-plug device, modify RP bits around realize/exit

Alex Williamson (4):
  linux-headers: Update for vfio capability reporting AtomicOps
  vfio: Implement a common device info helper
  pcie: Add a PCIe capability version helper
  vfio/pci: Enable AtomicOps completers on root ports

 hw/pci/pcie.c |  7 
 hw/s390x/s390-pci-vfio.c  | 37 +++--
 hw/vfio/common.c  | 46 -
 hw/vfio/pci.c | 78 +++
 hw/vfio/pci.h |  1 +
 include/hw/pci/pcie.h |  1 +
 include/hw/vfio/vfio-common.h |  1 +
 linux-headers/linux/vfio.h| 14 +++
 8 files changed, 142 insertions(+), 43 deletions(-)

-- 
2.39.2




[RFC PATCH v2 3/4] pcie: Add a PCIe capability version helper

2023-05-26 Thread Alex Williamson
Report the PCIe capability version for a device

Signed-off-by: Alex Williamson 
---
 hw/pci/pcie.c | 7 +++
 include/hw/pci/pcie.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index b8c24cf45f7e..b7f107ed8dd4 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -274,6 +274,13 @@ uint8_t pcie_cap_get_type(const PCIDevice *dev)
 PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
 }
 
+uint8_t pcie_cap_get_version(const PCIDevice *dev)
+{
+uint32_t pos = dev->exp.exp_cap;
+assert(pos > 0);
+return pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & 
PCI_EXP_FLAGS_VERS;
+}
+
 /* MSI/MSI-X */
 /* pci express interrupt message number */
 /* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 3cc2b159570f..51ab57bc3c50 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -93,6 +93,7 @@ void pcie_cap_exit(PCIDevice *dev);
 int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset);
 void pcie_cap_v1_exit(PCIDevice *dev);
 uint8_t pcie_cap_get_type(const PCIDevice *dev);
+uint8_t pcie_cap_get_version(const PCIDevice *dev);
 void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector);
 uint8_t pcie_cap_flags_get_vector(PCIDevice *dev);
 
-- 
2.39.2




[RFC PATCH v2 4/4] vfio/pci: Enable AtomicOps completers on root ports

2023-05-26 Thread Alex Williamson
Dynamically enable Atomic Ops completer support around realize/exit of
vfio-pci devices reporting host support for these accesses and adhering
to a minimal configuration standard.  While the Atomic Ops completer
bits in the root port device capabilities2 register are read-only, the
PCIe spec does allow RO bits to change to reflect hardware state.  We
take advantage of that here around the realize and exit functions of
the vfio-pci device.

Signed-off-by: Alex Williamson 
---
 hw/vfio/pci.c | 78 +++
 hw/vfio/pci.h |  1 +
 2 files changed, 79 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index bf27a3990564..d8a0fd595560 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1826,6 +1826,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, 
int pos,
 vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
 }
 
+static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
+{
+struct vfio_device_info_cap_pci_atomic_comp *cap;
+g_autofree struct vfio_device_info *info = NULL;
+PCIBus *bus = pci_get_bus(&vdev->pdev);
+PCIDevice *parent = bus->parent_dev;
+struct vfio_info_cap_header *hdr;
+uint32_t mask = 0;
+uint8_t *pos;
+
+/*
+ * PCIe Atomic Ops completer support is only added automatically for single
+ * function devices downstream of a root port supporting DEVCAP2.  Support
+ * is added during realize and, if added, removed during device exit.  The
+ * single function requirement avoids conflicting requirements should a
+ * slot be composed of multiple devices with differing capabilities.
+ */
+if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
+pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
+pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
+vdev->pdev.devfn ||
+vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+return;
+}
+
+pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+/* Abort if there'a already an Atomic Ops configuration on the root port */
+if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
+return;
+}
+
+info = vfio_get_device_info(vdev->vbasedev.fd);
+if (!info) {
+return;
+}
+
+hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
+if (!hdr) {
+return;
+}
+
+cap = (void *)hdr;
+if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
+mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
+}
+if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
+mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
+}
+if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
+mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
+}
+
+if (!mask) {
+return;
+}
+
+pci_long_test_and_set_mask(pos, mask);
+vdev->clear_parent_atomics_on_exit = true;
+}
+
+static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
+{
+if (vdev->clear_parent_atomics_on_exit) {
+PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
+uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+  PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+  PCI_EXP_DEVCAP2_ATOMIC_COMP128);
+}
+}
+
 static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
Error **errp)
 {
@@ -1929,6 +2004,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int 
pos, uint8_t size,
QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), 
~0);
 vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
 }
+
+vfio_pci_enable_rp_atomics(vdev);
 }
 
 /*
@@ -3265,6 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 timer_free(vdev->intx.mmap_timer);
 }
 vfio_teardown_msi(vdev);
+vfio_pci_disable_rp_atomics(vdev);
 vfio_bars_exit(vdev);
 vfio_migration_exit(&vdev->vbasedev);
 }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 2674476d6c77..a2771b9ff3cc 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -174,6 +174,7 @@ struct VFIOPCIDevice {
 bool no_vfio_ioeventfd;
 bool enable_ramfb;
 bool defer_kvm_irq_routing;
+bool clear_parent_atomics_on_exit;
 VFIODisplay *dpy;
 Notifier irqchip_change_notifier;
 };
-- 
2.39.2




Re: [PATCH 4/4] tests/tcg/s390x: Test LOCFHR

2023-05-26 Thread Richard Henderson

On 5/26/23 11:12, Ilya Leoshkevich wrote:

Add a small test to prevent regressions.

Cc:qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich
---
  tests/tcg/s390x/Makefile.target |  1 +
  tests/tcg/s390x/locfhr.c| 29 +
  2 files changed, 30 insertions(+)
  create mode 100644 tests/tcg/s390x/locfhr.c


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 2/4] tests/tcg/s390x: Test LCBB

2023-05-26 Thread Richard Henderson

On 5/26/23 11:12, Ilya Leoshkevich wrote:

Add a test to prevent regressions.

Cc:qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich
---
  tests/tcg/s390x/Makefile.target |  1 +
  tests/tcg/s390x/lcbb.c  | 51 +
  2 files changed, 52 insertions(+)
  create mode 100644 tests/tcg/s390x/lcbb.c

Acked-by: Richard Henderson 

r~



Re: [PATCH 1/4] target/s390x: Fix LCBB overwriting the top 32 bits

2023-05-26 Thread Richard Henderson

On 5/26/23 11:12, Ilya Leoshkevich wrote:

LCBB is supposed to overwrite only the bottom 32 bits, but QEMU
erroneously overwrites the entire register.

Fixes: 6d9303322ed9 ("s390x/tcg: Implement LOAD COUNT TO BLOCK BOUNDARY")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 


Reviewed-by: Richard Henderson 


r~


---
  target/s390x/tcg/insn-data.h.inc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index bcc70d99ba2..e41672684aa 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -486,7 +486,7 @@
  F(0xb343, LCXBR,   RRE,   Z,   x2h, x2l, new_P, x1_P, negf128, f128, 
IF_BFP)
  F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | 
IF_AFP2)
  /* LOAD COUNT TO BLOCK BOUNDARY */
-C(0xe727, LCBB,RXE,   V,   la2, 0, r1, 0, lcbb, 0)
+C(0xe727, LCBB,RXE,   V,   la2, 0, new, r1_32, lcbb, 0)
  /* LOAD HALFWORD */
  C(0xb927, LHR, RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
  C(0xb907, LGHR,RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)





Re: [PATCH 3/4] target/s390x: Fix LOCFHR taking the wrong half of R2

2023-05-26 Thread Richard Henderson

On 5/26/23 11:12, Ilya Leoshkevich wrote:

LOCFHR should write top-to-top, but QEMU erroneously writes
bottom-to-top.

Fixes: 45aa9aa3b773 ("target/s390x: Implement load-on-condition-2 insns")
Cc:qemu-sta...@nongnu.org
Reported-by: Mikhail Mitskevich
Closes:https://gitlab.com/qemu-project/qemu/-/issues/1668
Signed-off-by: Ilya Leoshkevich
---
  target/s390x/tcg/insn-data.h.inc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH] migration: stop tracking ram writes when cancelling background migration

2023-05-26 Thread Peter Xu
On Fri, May 26, 2023 at 01:59:08PM +0200, Fiona Ebner wrote:
> Currently, it is only done when the iteration finishes successfully.
> Not cleaning up the userfaultfd write protection can lead to
> symptoms/issues such as the process hanging in memmove or GDB not
> being able to attach.
> 
> Signed-off-by: Fiona Ebner 

Reviewed-by: Peter Xu 

> For the success case, the stuff in between the old call and new call
> site should not depend on tracking to already be stopped, right?

Yes I think so.  There's only device states to be flushed and no guest
memory should be touched.

Even if we'll touch them, since we've finished migrating all of them so
they're already unprotected anyway (actually, even during ram save it's
read-only here from bg thread), so not any problem I can see.

Thanks,

-- 
Peter Xu




Re: [PATCH v1] migration: fix migrate_params_test_apply to set the dest param correctly

2023-05-26 Thread Peter Xu
On Wed, May 24, 2023 at 04:01:57PM +0800, Wei Wang wrote:
> qmp_migrate_set_parameters expects to use tmp for parameters check,
> so migrate_params_test_apply is expected to copy the related fields from
> params to tmp. So fix migrate_params_test_apply to use the function
> parameter, *dest, rather than the global one. The dest->has_xxx (xxx is
> the feature name) related fields need to be set, as they will be checked
> by migrate_params_check.

I think it's fine to do as what you suggested, but I don't see much benefit
either.. the old code IIUC will check all params even if 1 param changed,
while after your change it only checks the modified ones.

There's slight benefits but not so much, especially "22+, 2-" LOCs, because
we don't really do this a lot; some more sanity check also makes sense to
me even if everything is always checked, so we'll hit errors if anything
accidentally goes wrong too.

Is there a real bug somewhere?

> 
> Signed-off-by: Wei Wang 
> ---
>  migration/options.c | 24 ++--
>  1 file changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/options.c b/migration/options.c
> index b62ab30cd5..a560483871 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -1089,39 +1089,45 @@ bool migrate_params_check(MigrationParameters 
> *params, Error **errp)
>  static void migrate_params_test_apply(MigrateSetParameters *params,
>MigrationParameters *dest)
>  {
> -*dest = migrate_get_current()->parameters;
> -
>  /* TODO use QAPI_CLONE() instead of duplicating it inline */
>  
>  if (params->has_compress_level) {
> +dest->has_compress_level = true;
>  dest->compress_level = params->compress_level;
>  }
>  
>  if (params->has_compress_threads) {
> +dest->has_compress_threads = true;
>  dest->compress_threads = params->compress_threads;
>  }
>  
>  if (params->has_compress_wait_thread) {
> +dest->has_compress_wait_thread = true;
>  dest->compress_wait_thread = params->compress_wait_thread;
>  }
>  
>  if (params->has_decompress_threads) {
> +dest->has_decompress_threads = true;
>  dest->decompress_threads = params->decompress_threads;
>  }
>  
>  if (params->has_throttle_trigger_threshold) {
> +dest->has_throttle_trigger_threshold = true;
>  dest->throttle_trigger_threshold = 
> params->throttle_trigger_threshold;
>  }
>  
>  if (params->has_cpu_throttle_initial) {
> +dest->has_cpu_throttle_initial = true;
>  dest->cpu_throttle_initial = params->cpu_throttle_initial;
>  }
>  
>  if (params->has_cpu_throttle_increment) {
> +dest->has_cpu_throttle_increment = true;
>  dest->cpu_throttle_increment = params->cpu_throttle_increment;
>  }
>  
>  if (params->has_cpu_throttle_tailslow) {
> +dest->has_cpu_throttle_tailslow = true;
>  dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
>  }
>  
> @@ -1136,45 +1142,58 @@ static void 
> migrate_params_test_apply(MigrateSetParameters *params,
>  }
>  
>  if (params->has_max_bandwidth) {
> +dest->has_max_bandwidth = true;
>  dest->max_bandwidth = params->max_bandwidth;
>  }
>  
>  if (params->has_downtime_limit) {
> +dest->has_downtime_limit = true;
>  dest->downtime_limit = params->downtime_limit;
>  }
>  
>  if (params->has_x_checkpoint_delay) {
> +dest->has_x_checkpoint_delay = true;
>  dest->x_checkpoint_delay = params->x_checkpoint_delay;
>  }
>  
>  if (params->has_block_incremental) {
> +dest->has_block_incremental = true;
>  dest->block_incremental = params->block_incremental;
>  }
>  if (params->has_multifd_channels) {
> +dest->has_multifd_channels = true;
>  dest->multifd_channels = params->multifd_channels;
>  }
>  if (params->has_multifd_compression) {
> +dest->has_multifd_compression = true;
>  dest->multifd_compression = params->multifd_compression;
>  }
>  if (params->has_xbzrle_cache_size) {
> +dest->has_xbzrle_cache_size = true;
>  dest->xbzrle_cache_size = params->xbzrle_cache_size;
>  }
>  if (params->has_max_postcopy_bandwidth) {
> +dest->has_max_postcopy_bandwidth = true;
>  dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
>  }
>  if (params->has_max_cpu_throttle) {
> +dest->has_max_cpu_throttle = true;
>  dest->max_cpu_throttle = params->max_cpu_throttle;
>  }
>  if (params->has_announce_initial) {
> +dest->has_announce_initial = true;
>  dest->announce_initial = params->announce_initial;
>  }
>  if (params->has_announce_max) {
> +dest->has_announce_max = true;
>  dest->announce_max = params->announce_max;
>  }
>  if (params->has_announce_rounds) {
> +dest->has_

Re: [PATCH] decodetree: Do not remove output_file from /dev

2023-05-26 Thread Richard Henderson

On 5/26/23 12:52, Peter Maydell wrote:

On Fri, 26 May 2023 at 18:40, Richard Henderson
 wrote:


Nor report any PermissionError on remove.

Previously we were testing with "> /dev/null", but that's not easy
to do with meson test(), so we want to use '-o /dev/null' instead.
That works fine for all of the existing tests, where all errors are
diagnosed before opening the output file.  However, PMM's named field
patch set diagnoses cycle errors during output.  This is fair, but
we need to be more careful with the remove.

Signed-off-by: Richard Henderson 
---
  scripts/decodetree.py | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e4ef0a03cc..a9a0cd0fa3 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -71,7 +71,12 @@ def error_with_file(file, lineno, *args):

  if output_file and output_fd:
  output_fd.close()
-os.remove(output_file)
+# Do not try to remove e.g. -o /dev/null
+if not output_file.startswith("/dev"):
+try:
+os.remove(output_file)
+except PermissionError:
+pass


Maybe rather than hardcoding /dev, only try to delete the file
if it's a normal file, i.e.:
if os.path.isfile(output_file):
os.remove(output_file)


Good idea.


r~




Re: [PATCH v3 00/10] migration: Remove QEMUFileHooks

2023-05-26 Thread Peter Xu
On Tue, May 09, 2023 at 02:06:50PM +0200, Juan Quintela wrote:
> Hi
> 
> Changes in v3:
> - fix rdma_migration to reset clearly (thanks danp)
> - redo the cherks for migration/rdma
> - rebased on top of the counters series:
> [PATCH 00/21] Migration: More migration atomic counters
> Based-on: Message-Id: <20230508130909.65420-1-quint...@redhat.com>

Patch 9 should logically be split and spread into previous patches, but not
a big issue.

For the series:

Reviewed-by: Peter Xu 

-- 
Peter Xu




Re: [PULL 0/5] Hexagon update

2023-05-26 Thread Richard Henderson

On 5/26/23 07:20, Taylor Simpson wrote:

The following changes since commit a3cb6d5004ff638aefe686ecd540718a793bd1b1:

   Merge tag 'pull-tcg-20230525' ofhttps://gitlab.com/rth7680/qemu  into 
staging (2023-05-25 11:11:52 -0700)

are available in the Git repository at:

   https://github.com/quic/qemu  tags/pull-hex-20230526

for you to fetch changes up to 7d196e2196d50e0dda0f87f396d4f4a7ad9aafbe:

   Hexagon (target/hexagon) Change Hexagon maintainer (2023-05-26 07:03:41 
-0700)


Hexagon update


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~




Re: [PATCH v4 2/2] qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa

2023-05-26 Thread Jonathon Jongsma

On 5/26/23 10:03 AM, Stefano Garzarella wrote:

The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd
passing through the new 'fd' property.

Since now we are using qemu_open() on '@path' if the virtio-blk driver
supports the fd passing, let's announce it.
In this way, the management layer can pass the file descriptor of an
already opened vhost-vdpa character device. This is useful especially
when the device can only be accessed with certain privileges.

Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver
in libblkio supports it.

Suggested-by: Markus Armbruster 
Signed-off-by: Stefano Garzarella 
---

Notes:
 v4:
 - added this patch to allow libvirt to discover we support fdset [Markus]

  meson.build  | 4 
  qapi/block-core.json | 8 +++-
  2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 78890f0155..8ea911f7b4 100644
--- a/meson.build
+++ b/meson.build
@@ -2108,6 +2108,10 @@ config_host_data.set('CONFIG_LZO', lzo.found())
  config_host_data.set('CONFIG_MPATH', mpathpersist.found())
  config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
  config_host_data.set('CONFIG_BLKIO', blkio.found())
+if blkio.found()
+  config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD',
+   blkio.version().version_compare('>=1.3.0'))
+endif
  config_host_data.set('CONFIG_CURL', curl.found())
  config_host_data.set('CONFIG_CURSES', curses.found())
  config_host_data.set('CONFIG_GBM', gbm.found())
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 98d9116dae..1538d84ef4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3955,10 +3955,16 @@
  #
  # @path: path to the vhost-vdpa character device.
  #
+# Features:
+# @fdset: Member @path supports the special "/dev/fdset/N" path (since 8.1)
+#
  # Since: 7.2
  ##
  { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
-  'data': { 'path': 'str' },
+  'data': { 'path': { 'type': 'str',
+  'features': [ { 'name' :'fdset',
+  'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ]
+} },
'if': 'CONFIG_BLKIO' }
  
  ##



Take this for what it's worth and do what's best for qemu, but... It's 
easier for libvirt if the 'features' are on the object rather than on 
the 'path' member. Our schema parsing code already supports object 
features but does not yet support features on builtin types.


i.e. something like this just works without any refactoring in libvirt:

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1538d84ef4..78cfd10cdb 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3961,11 +3961,11 @@
 # Since: 7.2
 ##
 { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
-  'data': { 'path': { 'type': 'str',
-  'features': [ { 'name' :'fdset',
-  'if': 
'CONFIG_BLKIO_VHOST_VDPA_FD' } ]

-} },
-  'if': 'CONFIG_BLKIO' }
+  'data': { 'path': 'str' },
+  'features': [ { 'name' :'fdset',
+'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ],
+  'if': 'CONFIG_BLKIO'
+ }

 ##
 # @IscsiTransport:





Re: [PATCH 4/5] migration/rdma: It makes no sense to recive that flag without RDMA

2023-05-26 Thread Peter Xu
On Thu, May 04, 2023 at 01:44:42PM +0200, Juan Quintela wrote:
> This could only happen if the source send
> RAM_SAVE_FLAG_HOOK (i.e. rdma) and destination don't have CONFIG_RDMA.
> 
> Signed-off-by: Juan Quintela 

Bah, first patch to start reading the master code and it's already merged.
I'll stop here then... :)

-- 
Peter Xu




Re: [PATCH 3/5] migration/rdma: We can calculate the rioc from the QEMUFile

2023-05-26 Thread Peter Xu
On Thu, May 04, 2023 at 01:44:41PM +0200, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- 
Peter Xu




Re: [PATCH 2/5] migration/rdma: simplify ram_control_load_hook()

2023-05-26 Thread Peter Xu
On Thu, May 04, 2023 at 01:44:40PM +0200, Juan Quintela wrote:
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- 
Peter Xu




Re: [PATCH 1/5] migration: Make RAM_SAVE_FLAG_HOOK a normal case entry

2023-05-26 Thread Peter Xu
On Thu, May 04, 2023 at 01:44:39PM +0200, Juan Quintela wrote:
> Fixes this commit, clearly a bad merge after a rebase or similar, it
> should have been its own case since that point.
> 
> commit 5b0e9dd46fbda5152566a4a26fd96bc0d0452bf7
> Author: Peter Lieven 
> Date:   Tue Jun 24 11:32:36 2014 +0200
> 
> migration: catch unknown flag combinations in ram_load
> 
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- 
Peter Xu




[PATCH] Add virtio-sound and virtio-sound-pci devices

2023-05-26 Thread Manos Pitsidianakis
This patch adds an audio device implementing the recent virtio sound
spec (1.2) and a corresponding PCI wrapper device.

PCM functionality is implemented, and jack[0], chmaps[1] messages are
at the moment ignored.

To test this, you'll need a >6.0 kernel compiled with the virtio-snd
flag enabled, which distros have off by default.

Use with following flags in the invocation:

  -device virtio-sound-pci,disable-legacy=on

And an audio backend listed with `-audio driver=help` that works on
your host machine, e.g.:

Pulseaudio:
  -audio driver=pa,model=virtio-sound,server=/run/user/1000/pulse/native
sdl:
  -audio driver=sdl,model=virtio-sound
coreaudio:
  -audio driver=coreaudio,model=virtio-sound
etc.

You can use speaker-test from alsa-tools to play noise, sines, or
WAV files.

PS2: This patch was based on a draft patch posted by opensynergy a few
 years ago.

[0]: https://www.kernel.org/doc/html/latest/sound/designs/jack-controls.html
[1]: 
https://www.kernel.org/doc/html/latest/sound/designs/channel-mapping-api.html

Signed-off-by: Emmanouil Pitsidianakis 
---
 hw/audio/Kconfig   |5 +
 hw/audio/meson.build   |1 +
 hw/audio/trace-events  |   23 +
 hw/audio/virtio-snd.c  | 1139 
 hw/virtio/Kconfig  |5 +
 hw/virtio/meson.build  |1 +
 hw/virtio/virtio-snd-pci.c |  104 +++
 include/hw/pci/pci.h   |1 +
 include/hw/virtio/virtio-snd.h |  193 ++
 softmmu/qdev-monitor.c |1 +
 10 files changed, 1473 insertions(+)
 create mode 100644 hw/audio/virtio-snd.c
 create mode 100644 hw/virtio/virtio-snd-pci.c
 create mode 100644 include/hw/virtio/virtio-snd.h

diff --git a/hw/audio/Kconfig b/hw/audio/Kconfig
index e76c69ca7e..74afa21c50 100644
--- a/hw/audio/Kconfig
+++ b/hw/audio/Kconfig
@@ -47,3 +47,8 @@ config PL041
 
 config CS4231
 bool
+
+config VIRTIO_SND
+bool
+default y
+depends on VIRTIO
diff --git a/hw/audio/meson.build b/hw/audio/meson.build
index e48a9fc73d..455e6a1501 100644
--- a/hw/audio/meson.build
+++ b/hw/audio/meson.build
@@ -12,3 +12,4 @@ softmmu_ss.add(when: 'CONFIG_PL041', if_true: 
files('pl041.c', 'lm4549.c'))
 softmmu_ss.add(when: 'CONFIG_SB16', if_true: files('sb16.c'))
 softmmu_ss.add(when: 'CONFIG_VT82C686', if_true: files('via-ac97.c'))
 softmmu_ss.add(when: 'CONFIG_WM8750', if_true: files('wm8750.c'))
+softmmu_ss.add(when: 'CONFIG_VIRTIO_SND', if_true: files('virtio-snd.c'))
diff --git a/hw/audio/trace-events b/hw/audio/trace-events
index 4dec48a4fd..d8ade63f13 100644
--- a/hw/audio/trace-events
+++ b/hw/audio/trace-events
@@ -17,3 +17,26 @@ via_ac97_codec_write(uint8_t addr, uint16_t val) "0x%x <- 
0x%x"
 via_ac97_sgd_fetch(uint32_t curr, uint32_t addr, char stop, char eol, char 
flag, uint32_t len) "curr=0x%x addr=0x%x %c%c%c len=%d"
 via_ac97_sgd_read(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d 
-> 0x%"PRIx64
 via_ac97_sgd_write(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d 
<- 0x%"PRIx64
+
+#virtio-snd.c
+virtio_snd_pcm_stream_flush(int stream) "flushing stream %d"
+virtio_snd_handle_ctrl(void *vdev, void *vq) "snd %p: handle ctrl event for 
queue %p"
+#virtio_snd_handle_jack_info(int jack) "VIRTIO_SND_JACK_INFO called for jack 
%d"
+#virtio_snd_handle_jack_remap(void) "VIRTIO_SND_PCM_JACK_REMAP called"
+virtio_snd_handle_pcm_info(int stream) "VIRTIO_SND_R_PCM_INFO called for 
stream %d"
+virtio_snd_handle_pcm_set_params(int stream) "VIRTIO_SND_PCM_SET_PARAMS called 
for stream %d"
+virtio_snd_handle_pcm_start_stop(const char *code, int stream) "%s called for 
stream %d"
+virtio_snd_handle_pcm_release(int stream) "VIRTIO_SND_PCM_RELEASE called for 
stream %id"
+virtio_snd_handle_chmap_info(void) "VIRTIO_SND_CHMAP_INFO called"
+virtio_snd_handle_xfer(void) "tx/rx queue callback called"
+virtio_snd_handle_xfer_elem(const char * k) "xfer handled in virtio_snd_pcm_%s"
+virtio_snd_handle_event(void) "event queue callback called"
+virtio_snd_cpu_is_stopped(void *snd, int size) "snd %p: cpu is stopped, 
dropping %d bytes"
+virtio_snd_realize(void *snd) "snd %p: realize"
+virtio_snd_unrealize(void *snd) "snd %p: realize"
+virtio_snd_get_features(void *vdev, uint64_t features) "snd %p: get_features 
0x%"PRIx64
+virtio_snd_get_config(void *vdev, uint32_t jacks, uint32_t streams, uint32_t 
chmaps) "snd %p: get_config jacks=%d streams=%d chmaps=%d"
+virtio_snd_set_config(void *vdev, uint32_t jacks, uint32_t new_jacks, uint32_t 
streams, uint32_t new_streams, uint32_t chmaps, uint32_t new_chmaps) "snd %p: 
set_config jacks from %d->%d, streams from %d->%d, chmaps from %d->%d"
+virtio_snd_vm_state_running(void) "vm state running"
+virtio_snd_vm_state_stopped(void) "vm state stopped"
+virtio_snd_handle_code(int val, const char *code) "ctrl code msg val = %d == 
%s"
diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c
new file mode 100644
index 00..3bf657f368
--- /dev/null
+++ b/hw/audio/virtio-snd.c
@@ -0,0 

Support for multiple UARTS on qemu for Arm64

2023-05-26 Thread Anton Dimov Hristozov



Re: [PATCH v7 7/7] hw/cxl/events: Add injection of Memory Module Events

2023-05-26 Thread Markus Armbruster
Jonathan Cameron  writes:

>> > +# @temperature: Device temperature in degrees Celsius.
>> > +#
>> > +# @dirty-shutdown-count: Number of time the device has been unable to  
>> 
>> Number of times
>> 
>> > +#determine whether data loss may have occurred.
>> > +#
>> > +# @corrected-volatile-error-count: Total number of correctable errors in
>> > +#  volatile memory.
>> > +#
>> > +# @corrected-persistent-error-count: Total number correctable errors in
>> > +#persistent memory  
>> 
>> Please format like
>> 
>># @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
>># Record Format, Event Record Flags for subfield definitions.
>>#
>># @type: Device Event Type.  See CXL r3.0 Table 8-45 Memory Module
>># Event Record for bit definitions for bit definiions.
>>#
>># @health-status: Overall health summary bitmap.  See CXL r3.0 Table
>># 8-100 Get Health Info Output Payload, Health Status for bit
>># definitions.
>>#
>># @media-status: Overall media health summary.  See CXL r3.0 Table
>># 8-100 Get Health Info Output Payload, Media Status for bit
>># definitions.
>>#
>># @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output
>># Payload, Additional Status for subfield definitions.
>>#
>># @life-used: Percentage (0-100) of factory expected life span.
>>#
>># @temperature: Device temperature in degrees Celsius.
>>#
>># @dirty-shutdown-count: Number of time the device has been unable to
>># determine whether data loss may have occurred.
>
> With "Number of times" this runs to 71 chars. reflowed appropriately for v8

Appreciated!

>>#
>># @corrected-volatile-error-count: Total number of correctable errors
>># in volatile memory.
>>#
>># @corrected-persistent-error-count: Total number correctable errors
>># in persistent memory
>> 
>> to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
>> to conform to current conventions).
>> 
>> 
>> > +#




Re: [PATCH] decodetree: Do not remove output_file from /dev

2023-05-26 Thread Peter Maydell
On Fri, 26 May 2023 at 18:40, Richard Henderson
 wrote:
>
> Nor report any PermissionError on remove.
>
> Previously we were testing with "> /dev/null", but that's not easy
> to do with meson test(), so we want to use '-o /dev/null' instead.
> That works fine for all of the existing tests, where all errors are
> diagnosed before opening the output file.  However, PMM's named field
> patch set diagnoses cycle errors during output.  This is fair, but
> we need to be more careful with the remove.
>
> Signed-off-by: Richard Henderson 
> ---
>  scripts/decodetree.py | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/scripts/decodetree.py b/scripts/decodetree.py
> index e4ef0a03cc..a9a0cd0fa3 100644
> --- a/scripts/decodetree.py
> +++ b/scripts/decodetree.py
> @@ -71,7 +71,12 @@ def error_with_file(file, lineno, *args):
>
>  if output_file and output_fd:
>  output_fd.close()
> -os.remove(output_file)
> +# Do not try to remove e.g. -o /dev/null
> +if not output_file.startswith("/dev"):
> +try:
> +os.remove(output_file)
> +except PermissionError:
> +pass

Maybe rather than hardcoding /dev, only try to delete the file
if it's a normal file, i.e.:
   if os.path.isfile(output_file):
   os.remove(output_file)

?

-- PMM



Re: [PATCH 6/6] tests/decode: Add tests for various named-field cases

2023-05-26 Thread Peter Maydell
On Fri, 26 May 2023 at 18:07, Richard Henderson
 wrote:
>
> On 5/24/23 03:26, Peter Maydell wrote:
> > On Tue, 23 May 2023 at 13:04, Peter Maydell  
> > wrote:
> >>
> >> Add some tests for various cases of named-field use, both ones that
> >> should work and ones that should be diagnosed as errors.
> >>
> >> Signed-off-by: Peter Maydell 
> >> ---
> >>   tests/decode/err_field1.decode   |  2 +-
> >>   tests/decode/err_field10.decode  |  7 +++
> >>   tests/decode/err_field7.decode   |  7 +++
> >>   tests/decode/err_field8.decode   |  8 
> >>   tests/decode/err_field9.decode   | 14 ++
> >>   tests/decode/succ_named_field.decode | 19 +++
> >>   6 files changed, 56 insertions(+), 1 deletion(-)
> >>   create mode 100644 tests/decode/err_field10.decode
> >>   create mode 100644 tests/decode/err_field7.decode
> >>   create mode 100644 tests/decode/err_field8.decode
> >>   create mode 100644 tests/decode/err_field9.decode
> >>   create mode 100644 tests/decode/succ_named_field.decode
> >>
> >> diff --git a/tests/decode/err_field1.decode 
> >> b/tests/decode/err_field1.decode
> >> index e07a5a73e0e..85c3f326d07 100644
> >> --- a/tests/decode/err_field1.decode
> >> +++ b/tests/decode/err_field1.decode
> >> @@ -2,4 +2,4 @@
> >>   # See the COPYING.LIB file in the top-level directory.
> >>
> >>   # Diagnose invalid field syntax
> >> -%field asdf
> >> +%field 1asdf
> >
> > I just realized that this specific change needs to go before patch 5:
> > it's updating an existing test because "asdf" used to be invalid
> > syntax and now is not. Otherwise bisection will break.
>
> Really?  The test still fails here at patch 5:
>
> /home/rth/qemu/bld/../src/tests/decode/err_field1.decode:5: error: invalid 
> field token "asdf"

Oh, right, because there's no trailing size specification
so it doesn't get recognized as a named field.

thanks
-- PMM



Re: [RESEND PATCH v2 0/3] Enable -cpu ,help

2023-05-26 Thread Peter Maydell
On Fri, 26 May 2023 at 15:28, Igor Mammedov  wrote:
>
> On Mon,  3 Apr 2023 21:19:53 -0400
> Dinah Baum  wrote:
>
> > Part 1 is a refactor/code motion patch for
> > qapi/machine target required for setup of
> >
> > Part 2 which enables query-cpu-model-expansion
> > on all architectures
> >
> > Part 3 implements the ',help' feature
> >
> > Limitations:
> > Currently only 'FULL' expansion queries are implemented since
> > that's the only type enabled on the architectures that
> > allow feature probing
> >
> > Unlike the 'device,help' command, default values aren't
> > printed
>
> what's wrong with 'device,help' if it's used for cpu devices?

Nothing, but almost no creation/configuration of CPUs
is done with -device. -cpu is by far the more usual way,
so '-cpu foo,help' should work...

-- PMM



Re: [PATCH v2 16/16] migration/multifd: Compute transferred bytes correctly

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:24 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> >> In the past, we had to put the in the main thread all the operations
> >> related with sizes due to qemu_file not beeing thread safe.  As now
> >> all counters are atomic, we can update the counters just after the
> >> do the write.  As an aditional bonus, we are able to use the right
> >> value for the compression methods.  Right now we were assuming that
> >> there were no compression at all.
> >>
> >> Signed-off-by: Juan Quintela 
> >> ---
> >>  migration/multifd.c | 13 -
> >>  1 file changed, 8 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/migration/multifd.c b/migration/multifd.c
> >> index aabf9b6d98..0bf5958a9c 100644
> >> --- a/migration/multifd.c
> >> +++ b/migration/multifd.c
> >> @@ -175,6 +175,7 @@ void multifd_register_ops(int method, MultiFDMethods 
> >> *ops)
> >>  static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
> >>  {
> >>  MultiFDInit_t msg = {};
> >> +size_t size = sizeof(msg);
> >>  int ret;
> >>
> >>  msg.magic = cpu_to_be32(MULTIFD_MAGIC);
> >> @@ -182,10 +183,12 @@ static int 
> >> multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
> >>  msg.id = p->id;
> >>  memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
> >>
> >> -ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
> >> +ret = qio_channel_write_all(p->c, (char *)&msg, size, errp);
> >>  if (ret != 0) {
> >>  return -1;
> >>  }
> >> +stat64_add(&mig_stats.multifd_bytes, size);
> >> +stat64_add(&mig_stats.transferred, size);
> >>  return 0;
> >>  }
> >
> > Humm, those are atomic ops, right?
> >
> > You think we could have 'multifd_bytes' and 'transferred' in the same 
> > cacheline,
> > to avoid 2 cacheline bounces?
>
> Don't matter on next series.
>
> mig_stats.transferred is dropped.
>
> And transferred becomes:
>
> qemu_file_transferred + multifd_bytes + rdma_bytes.
>
> So everytime that we do a write, we only update one counter.

That's even better :)

Thanks!

>
> > Well, it's unrelated to this patchset, so:
> >
> > Reviewed-by: Leonardo Bras 
>




Re: [PATCH v2 15/16] migration/rdma: Simplify the function that saves a page

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:21 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> >> When we sent a page through QEMUFile hooks (RDMA) there are three
> >> posiblities:
> >> - We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and
> >>   control_save_page() returns false to let anything else to proceed.
> >> - There is one error but we are using RDMA.  Then we return a negative
> >>   value, control_save_page() needs to return true.
> >> - Everything goes well and RDMA start the sent of the page
> >>   asynchronously.  It returns RAM_SAVE_CONTROL_DELAYED and we need to
> >>   return 1 for ram_save_page_legacy.
> >>
> >> Clear?
> >>
> >> I know, I know, the interfaz is as bad as it gets.  I think that now
> >> it is a bit clearer, but this needs to be done some other way.
> >
> > interface?
>
> Yeap.  I used the Spanish spelling, that, you know, in English is wrong O:-)

Happens to me all the time :)

>
> Thanks.
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index 416dec00a2..12d3c23fdc 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -3239,13 +3239,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
> >>   *
> >>   *@size : Number of bytes to transfer
> >>   *
> >> - *@bytes_sent : User-specificed pointer to indicate how many bytes 
> >> were
> >> + *@pages_sent : User-specificed pointer to indicate how many pages 
> >> were
> >>   *  sent. Usually, this will not be more than a few bytes 
> >> of
> >>   *  the protocol because most transfers are sent 
> >> asynchronously.
> >>   */
> >
> > There is new doc to pages_sent but the parameter is not added to the 
> > signature
> > bellow. Am I missing something?
>
> Good catch.

:)

>
> I redid this patch several times.  And it appears that I forgot some 
> leftovers.
>
> >
> >> -static size_t qemu_rdma_save_page(QEMUFile *f,
> >> -  ram_addr_t block_offset, ram_addr_t 
> >> offset,
> >> -  size_t size, uint64_t *bytes_sent)
> >> +static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
> >> +   ram_addr_t offset, size_t size)
> >>  {
> >>  QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
> >>  RDMAContext *rdma;
> >> @@ -3277,18 +3276,6 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
> >>  goto err;
> >>  }
> >>
> >> -/*
> >> - * We always return 1 bytes because the RDMA
> >> - * protocol is completely asynchronous. We do not yet know
> >> - * whether an  identified chunk is zero or not because we're
> >> - * waiting for other pages to potentially be merged with
> >> - * the current chunk. So, we have to call qemu_update_position()
> >> - * later on when the actual write occurs.
> >> - */
> >> -if (bytes_sent) {
> >> -*bytes_sent = 1;
> >> -}
> >> -
> >>  /*
> >>   * Drain the Completion Queue if possible, but do not block,
> >>   * just poll.
> >
> > Oh, so this one complements 13/16.
> > Since it doesn't do imaginary transfers anymore, there is no need to use
> > bytes_sent pointer to keep track of them anymore.
> >
> > Other than the pages_sent above that I couldn't understand:
> > Reviewed-by: Leonardo Bras 
>
> Dropping that bit.
>
> Thanks.
>




Re: [PATCH v2 09/16] migration: We don't need the field rate_limit_used anymore

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:18 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> >> Since previous commit, we calculate how much data we have send with
> >> migration_transferred_bytes() so no need to maintain this counter and
> >> remember to always update it.
> >>
> >> Signed-off-by: Juan Quintela 
> >> Reviewed-by: Cédric Le Goater 
>
> > I reviewed this one together with 8/16.
> > It makes sense for me to squash them together, but anyway:
>
> Already in tree.
>
> See explanation for the split on previous patch.
>
> The other reason for the split is that this part of the patch is trivial
> to review O:-)

:-)

>
> Later, Juan.
>
> >
> > Reviewed-by: Leonardo Bras 
>




Re: [PATCH v2 08/16] migration: Use migration_transferred_bytes() to calculate rate_limit

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:17 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> >> Signed-off-by: Juan Quintela 
> >> Reviewed-by: Cédric Le Goater 
> >> ---
> >>  migration/migration-stats.h | 8 +++-
> >>  migration/migration-stats.c | 7 +--
> >>  migration/migration.c   | 2 +-
> >>  3 files changed, 13 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/migration/migration-stats.h b/migration/migration-stats.h
> >> index 91fda378d3..f1465c2ebe 100644
> >> --- a/migration/migration-stats.h
> >> +++ b/migration/migration-stats.h
> >> @@ -81,6 +81,10 @@ typedef struct {
> >>   * Number of bytes sent during precopy stage.
> >>   */
> >>  Stat64 precopy_bytes;
> >> +/*
> >> + * Amount of transferred data at the start of current cycle.
> >> + */
> >> +Stat64 rate_limit_start;
> >>  /*
> >>   * Maximum amount of data we can send in a cycle.
> >>   */
> >> @@ -136,8 +140,10 @@ uint64_t migration_rate_get(void);
> >>   * migration_rate_reset: Reset the rate limit counter.
> >>   *
> >>   * This is called when we know we start a new transfer cycle.
> >> + *
> >> + * @f: QEMUFile used for main migration channel
> >>   */
> >> -void migration_rate_reset(void);
> >> +void migration_rate_reset(QEMUFile *f);
> >>
> >>  /**
> >>   * migration_rate_set: Set the maximum amount that can be transferred.
> >> diff --git a/migration/migration-stats.c b/migration/migration-stats.c
> >> index 301392d208..da2bb69a15 100644
> >> --- a/migration/migration-stats.c
> >> +++ b/migration/migration-stats.c
> >> @@ -31,7 +31,9 @@ bool migration_rate_exceeded(QEMUFile *f)
> >>  return true;
> >>  }
> >>
> >> -uint64_t rate_limit_used = stat64_get(&mig_stats.rate_limit_used);
> >> +uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
> >> +uint64_t rate_limit_current = migration_transferred_bytes(f);
> >> +uint64_t rate_limit_used = rate_limit_current - rate_limit_start;
> >>  uint64_t rate_limit_max = stat64_get(&mig_stats.rate_limit_max);
> >
> > So, IIUC, instead of updating mig_stats.rate_limit_used every time data is 
> > sent,
> > the idea is to 'reset' it to migration_transferred_bytes() at the beginning 
> > of a
> > cycle, and read migration_transferred_bytes() again for checking if the 
> > limit
> > was not crossed.
> >
> > Its a nice change since there is no need to update 2 counters, when 1 is 
> > enough.
> >
> > I think it would look nicer if squashed with 9/16, though. It would make it 
> > more
> > clear this is being added to replace migration_rate_account() strategy.
> >
> > What do you think?
>
> Already in tree.

My bad.
After I ended up reviewing the patchset I noticed a lot of it was
already in the PULL request.

>
> Done this way because on my tree there was an intermediate patch that
> did something like:
>
>
> uint64_t rate_limit_used = stat64_get(&mig_stats.rate_limit_used);
> uint64_t rate_limit_start = stat64_get(&mig_stats.rate_limit_start);
> uint64_t rate_limit_current = migration_transferred_bytes(f);
> uint64_t rate_limit_used_new = rate_limit_current - rate_limit_start;
>
> if (rate_limit_used_new != rate_limit_used) {
> printf("rate_limit old %lu new %lu\n", ...);
> }
>
> So I was sure that the counter that I was replacing had the same value
> that the new one.

Oh, I see.
You kept both to verify the implementation.
Makes sense

>
> This is the reason why I fixed transferred atomic in the previous patch,
> not because it mattered on the big scheme of things (migration_test was
> missing something like 100KB for the normal stage when I started, that
> for calculations don't matter).  But to check if I was doing the things
> right it mattered.  With that patch my replacement counter was exact,
> and none of the if's triggered.
>
> Except for the device transffer stages, there I missed something like
> 900KB, but it made no sense to go all over the tree to fix a counter
> that I was going to remove later.

Yeah, it makes no sense to invest time on stuff that will be removed later.

Thanks for helping me understand this :)

>
> Regards, Juan.
>




Re: [PATCH v2 04/16] qemu-file: Account for rate_limit usage on qemu_fflush()

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:09 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:56 +0200, Juan Quintela wrote:
> >> That is the moment we know we have transferred something.
> >>
> >> Signed-off-by: Juan Quintela 
> >> Reviewed-by: Cédric Le Goater 
> >> ---
> >>  migration/qemu-file.c | 7 +++
> >>  1 file changed, 3 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> >> index 4bc875b452..956bd2a580 100644
> >> --- a/migration/qemu-file.c
> >> +++ b/migration/qemu-file.c
> >> @@ -302,7 +302,9 @@ void qemu_fflush(QEMUFile *f)
> >> &local_error) < 0) {
> >>  qemu_file_set_error_obj(f, -EIO, local_error);
> >>  } else {
> >> -f->total_transferred += iov_size(f->iov, f->iovcnt);
> >> +uint64_t size = iov_size(f->iov, f->iovcnt);
> >> +qemu_file_acct_rate_limit(f, size);
> >> +f->total_transferred += size;
> >>  }
> >>
> >>  qemu_iovec_release_ram(f);
> >> @@ -519,7 +521,6 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t 
> >> *buf, size_t size,
> >>  return;
> >>  }
> >>
> >> -f->rate_limit_used += size;
> >>  add_to_iovec(f, buf, size, may_free);
> >>  }
> >>
> >> @@ -537,7 +538,6 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, 
> >> size_t size)
> >>  l = size;
> >>  }
> >>  memcpy(f->buf + f->buf_index, buf, l);
> >> -f->rate_limit_used += l;
> >>  add_buf_to_iovec(f, l);
> >>  if (qemu_file_get_error(f)) {
> >>  break;
> >> @@ -554,7 +554,6 @@ void qemu_put_byte(QEMUFile *f, int v)
> >>  }
> >>
> >>  f->buf[f->buf_index] = v;
> >> -f->rate_limit_used++;
> >>  add_buf_to_iovec(f, 1);
> >>  }
> >>
> >
> > If we are counting transferred data at fflush, it makes sense to increase 
> > rate-
> > limit accounting at the same place. It may be less granular, but is more
> > efficient.
>
> Yeap, the whole point is that in my next series, rate_limit_used
> dissapear, we just use transferred for both things(*).
>
> Later, Juan.
>
> *: It is a bit more complicated than that, but we go from three counters
>  to a single counter.
>

Seems great to simplify stuff.
Thanks!
Leo




Re: [PATCH v2 03/16] migration: Move setup_time to mig_stats

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:07 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:56 +0200, Juan Quintela wrote:
> >> It is a time that needs to be cleaned each time cancel migration.
> >> Once there create migration_time_since() to calculate how time since a
> >> time in the past.
> >>
> >> Signed-off-by: Juan Quintela 
> >>
> >> ---
> >>
> >> Rename to migration_time_since (cédric)
> >> ---
> >>  migration/migration-stats.h | 13 +
> >>  migration/migration.h   |  1 -
> >>  migration/migration-stats.c |  7 +++
> >>  migration/migration.c   |  9 -
> >>  4 files changed, 24 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/migration/migration-stats.h b/migration/migration-stats.h
> >> index e782f1b0df..21402af9e4 100644
> >> --- a/migration/migration-stats.h
> >> +++ b/migration/migration-stats.h
> >> @@ -75,6 +75,10 @@ typedef struct {
> >>   * Number of bytes sent during precopy stage.
> >>   */
> >>  Stat64 precopy_bytes;
> >> +/*
> >> + * How long has the setup stage took.
> >> + */
> >> +Stat64 setup_time;
> >>  /*
> >>   * Total number of bytes transferred.
> >>   */
> >> @@ -87,4 +91,13 @@ typedef struct {
> >>
> >>  extern MigrationAtomicStats mig_stats;
> >>
> >> +/**
> >> + * migration_time_since: Calculate how much time has passed
> >> + *
> >> + * @stats: migration stats
> >> + * @since: reference time since we want to calculate
> >> + *
> >> + * Returns: Nothing.  The time is stored in val.
> >> + */
> >> +void migration_time_since(MigrationAtomicStats *stats, int64_t since);
> >>  #endif
> >> diff --git a/migration/migration.h b/migration/migration.h
> >> index 48a46123a0..27aa3b1035 100644
> >> --- a/migration/migration.h
> >> +++ b/migration/migration.h
> >> @@ -316,7 +316,6 @@ struct MigrationState {
> >>  int64_t downtime;
> >>  int64_t expected_downtime;
> >>  bool capabilities[MIGRATION_CAPABILITY__MAX];
> >> -int64_t setup_time;
> >>  /*
> >>   * Whether guest was running when we enter the completion stage.
> >>   * If migration is interrupted by any reason, we need to continue
> >> diff --git a/migration/migration-stats.c b/migration/migration-stats.c
> >> index 2f2cea965c..3431453c90 100644
> >> --- a/migration/migration-stats.c
> >> +++ b/migration/migration-stats.c
> >> @@ -12,6 +12,13 @@
> >>
> >>  #include "qemu/osdep.h"
> >>  #include "qemu/stats64.h"
> >> +#include "qemu/timer.h"
> >>  #include "migration-stats.h"
> >>
> >>  MigrationAtomicStats mig_stats;
> >> +
> >> +void migration_time_since(MigrationAtomicStats *stats, int64_t since)
> >> +{
> >> +int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >> +stat64_set(&stats->setup_time, now - since);
> >> +}
> >
> > IIUC this calculates a time delta and saves on stats->setup_time, is that 
> > right?
> >
> > It took me some time to understand that, since the function name is
> > migration_time_since(), which seems more generic.
> >
> > Would not be more intuitive to name it migration_setup_time_set() or so?
>
> Dropped this.
> Other reviewer commented that this was not a counter, what is right.  So
> I left the times for future work (it don't interfere with current
> cleanups).

Oh, it makes sense.

>
>
> > I could not see MigrationState->setup_time being initialized as 0 in this 
> > patch.
> > In a quick look in the code I noticed there is no initialization of this 
> > struct,
> > but on qemu_savevm_state() and migrate_prepare() we have:
> >
> > memset(&mig_stats, 0, sizeof(mig_stats));
> >
> > I suppose this is enough, right?
>
> Yeap.  All migration_stats() are initialized to zero at the start of
> qemu, or when we start a migration.
>
> After a migration, it don't matter if it finished with/without error,
> they are there with the right value until we start another migration (in
> the case of error, of course).

That's great to simplify the code.
Thanks!

>
> Later, Juan.
>




Re: [PATCH v2 02/16] migration: Correct transferred bytes value

2023-05-26 Thread Leonardo Bras Soares Passos
On Fri, May 26, 2023 at 5:04 AM Juan Quintela  wrote:
>
> Leonardo Brás  wrote:
> > On Mon, 2023-05-15 at 21:56 +0200, Juan Quintela wrote:
> >> We forget several places to add to trasferred amount of data.  With
> >> this fixes I get:
> >>
> >>qemu_file_transferred() + multifd_bytes == transferred
> >>
> >> The only place whrer this is not true is during devices sending.  But
> >> going all through the full tree searching for devices that use
> >> QEMUFile directly is a bit too much.
> >>
> >> Multifd, precopy and xbzrle work as expected. Postocpy still misses 35
> >> bytes, but searching for them is getting complicated, so I stop here.
> >>
> >> Signed-off-by: Juan Quintela 
> >> ---
> >>  migration/ram.c   | 14 ++
> >>  migration/savevm.c| 19 +--
> >>  migration/vmstate.c   |  3 +++
> >>  migration/meson.build |  2 +-
> >>  4 files changed, 35 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/migration/ram.c b/migration/ram.c
> >> index f69d8d42b0..fd5a8db0f8 100644
> >> --- a/migration/ram.c
> >> +++ b/migration/ram.c
> >> @@ -337,6 +337,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
> >>
> >>  g_free(le_bitmap);
> >>
> >> +stat64_add(&mig_stats.transferred, 8 + size + 8);
> >>  if (qemu_file_get_error(file)) {
> >>  return qemu_file_get_error(file);
> >>  }
> >> @@ -1392,6 +1393,7 @@ static int find_dirty_block(RAMState *rs, 
> >> PageSearchStatus *pss)
> >>  return ret;
> >>  }
> >>  qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
> >> +stat64_add(&mig_stats.transferred, 8);
> >>  qemu_fflush(f);
> >>  }
> >>  /*
> >> @@ -3020,6 +3022,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >>  RAMState **rsp = opaque;
> >>  RAMBlock *block;
> >>  int ret;
> >> +size_t size = 0;
> >>
> >>  if (compress_threads_save_setup()) {
> >>  return -1;
> >> @@ -3038,16 +3041,20 @@ static int ram_save_setup(QEMUFile *f, void 
> >> *opaque)
> >>  qemu_put_be64(f, ram_bytes_total_with_ignored()
> >>   | RAM_SAVE_FLAG_MEM_SIZE);
> >>
> >> +size += 8;
> >>  RAMBLOCK_FOREACH_MIGRATABLE(block) {
> >>  qemu_put_byte(f, strlen(block->idstr));
> >>  qemu_put_buffer(f, (uint8_t *)block->idstr, 
> >> strlen(block->idstr));
> >>  qemu_put_be64(f, block->used_length);
> >> +size += 1 + strlen(block->idstr) + 8;
> >
> > I was thinking some of them would look better with sizeof()s instead of 
> > given
> > literal number, such as:
> >
> > size += sizeof(Byte) + strlen(block->idstr) + sizeof(block->used_length);
> >
> > Maybe too much?
>
> I dropped this patch for two reasons:
>
> - reviewers gave me a bad time with it O:-)
> - it was there only so if anyone was meassuring that new counters are
>   the same that old counters.
>
> But as I have already checked that, we don't need it.
>
> I drop it on the next round that I send.
> > Maybe, it would be nice to have qemu_put_* to return the value, and in this
> > case:
> >
> > size += qemu_put_be64(...)
> >
> > What do you think?
>
> Even more important than that is to return an error value, but that
> is a very long project.
>
> See on my next series that qemu_fflush() return errors, so code gets
> simplifed:
>
> qemu_fflush(file);
> if (qemu_file_get_error(file)) {
> handle error;
> }
>
> to:
>
> qemu_fflush(file);
> if (qemu_file_get_error(file)) {
> handle error;
> }
>

They look the same to me, what changed?

> We need to do basically all qemu_put_*() and qemu_get_*() functions, but
> it is a step on the right direction.
>
> Later, Juan.
>




[PATCH 3/4] target/s390x: Fix LOCFHR taking the wrong half of R2

2023-05-26 Thread Ilya Leoshkevich
LOCFHR should write top-to-top, but QEMU erroneously writes
bottom-to-top.

Fixes: 45aa9aa3b773 ("target/s390x: Implement load-on-condition-2 insns")
Cc: qemu-sta...@nongnu.org
Reported-by: Mikhail Mitskevich 
Closes: https://gitlab.com/qemu-project/qemu/-/issues/1668
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/insn-data.h.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index e41672684aa..937e18ea9d9 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -564,7 +564,7 @@
 C(0xec46, LOCGHI,  RIE_g, LOC2, r1, i2, r1, 0, loc, 0)
 C(0xec4e, LOCHHI,  RIE_g, LOC2, r1_sr32, i2, new, r1_32h, loc, 0)
 /* LOAD HIGH ON CONDITION */
-C(0xb9e0, LOCFHR,  RRF_c, LOC2, r1_sr32, r2, new, r1_32h, loc, 0)
+C(0xb9e0, LOCFHR,  RRF_c, LOC2, r1_sr32, r2_sr32, new, r1_32h, loc, 0)
 C(0xebe0, LOCFH,   RSY_b, LOC2, r1_sr32, m2_32u, new, r1_32h, loc, 0)
 /* LOAD PAIR DISJOINT */
 D(0xc804, LPD, SSF,   ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
-- 
2.40.1




[PATCH 4/4] tests/tcg/s390x: Test LOCFHR

2023-05-26 Thread Ilya Leoshkevich
Add a small test to prevent regressions.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/locfhr.c| 29 +
 2 files changed, 30 insertions(+)
 create mode 100644 tests/tcg/s390x/locfhr.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index c48de439625..3c239fdd082 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -48,6 +48,7 @@ TESTS += $(PGM_SPECIFICATION_TESTS)
 
 Z13_TESTS=vistr
 Z13_TESTS+=lcbb
+Z13_TESTS+=locfhr
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
 TESTS+=$(Z13_TESTS)
 
diff --git a/tests/tcg/s390x/locfhr.c b/tests/tcg/s390x/locfhr.c
new file mode 100644
index 000..ab9ff6e4490
--- /dev/null
+++ b/tests/tcg/s390x/locfhr.c
@@ -0,0 +1,29 @@
+/*
+ * Test the LOCFHR instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+static inline __attribute__((__always_inline__)) long
+locfhr(long r1, long r2, int m3, int cc)
+{
+cc <<= 28;
+asm("spm %[cc]\n"
+"locfhr %[r1],%[r2],%[m3]\n"
+: [r1] "+r" (r1)
+: [cc] "r" (cc), [r2] "r" (r2), [m3] "i" (m3)
+: "cc");
+return r1;
+}
+
+int main(void)
+{
+assert(locfhr(0x, 0x, 8, 0) ==
+   0x);
+assert(locfhr(0x, 0x, 11, 1) ==
+   0x);
+
+return EXIT_SUCCESS;
+}
-- 
2.40.1




[PATCH 2/4] tests/tcg/s390x: Test LCBB

2023-05-26 Thread Ilya Leoshkevich
Add a test to prevent regressions.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/lcbb.c  | 51 +
 2 files changed, 52 insertions(+)
 create mode 100644 tests/tcg/s390x/lcbb.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 73f7cb828e3..c48de439625 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -47,6 +47,7 @@ $(PGM_SPECIFICATION_TESTS): LDFLAGS+=pgm-specification-user.o
 TESTS += $(PGM_SPECIFICATION_TESTS)
 
 Z13_TESTS=vistr
+Z13_TESTS+=lcbb
 $(Z13_TESTS): CFLAGS+=-march=z13 -O2
 TESTS+=$(Z13_TESTS)
 
diff --git a/tests/tcg/s390x/lcbb.c b/tests/tcg/s390x/lcbb.c
new file mode 100644
index 000..8d368e0998d
--- /dev/null
+++ b/tests/tcg/s390x/lcbb.c
@@ -0,0 +1,51 @@
+/*
+ * Test the LCBB instruction.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+static inline __attribute__((__always_inline__)) void
+lcbb(long *r1, void *dxb2, int m3, int *cc)
+{
+asm("lcbb %[r1],%[dxb2],%[m3]\n"
+"ipm %[cc]"
+: [r1] "+r" (*r1), [cc] "=r" (*cc)
+: [dxb2] "R" (*(char *)dxb2), [m3] "i" (m3)
+: "cc");
+*cc = (*cc >> 28) & 3;
+}
+
+static char buf[0x1000] __attribute__((aligned(0x1000)));
+
+static inline __attribute__((__always_inline__)) void
+test_lcbb(void *p, int m3, int exp_r1, int exp_cc)
+{
+long r1 = 0xfedcba9876543210;
+int cc;
+
+lcbb(&r1, p, m3, &cc);
+assert(r1 == (0xfedcba98 | exp_r1));
+assert(cc == exp_cc);
+}
+
+int main(void)
+{
+test_lcbb(&buf[0],0, 16, 0);
+test_lcbb(&buf[63],   0,  1, 3);
+test_lcbb(&buf[0],1, 16, 0);
+test_lcbb(&buf[127],  1,  1, 3);
+test_lcbb(&buf[0],2, 16, 0);
+test_lcbb(&buf[255],  2,  1, 3);
+test_lcbb(&buf[0],3, 16, 0);
+test_lcbb(&buf[511],  3,  1, 3);
+test_lcbb(&buf[0],4, 16, 0);
+test_lcbb(&buf[1023], 4,  1, 3);
+test_lcbb(&buf[0],5, 16, 0);
+test_lcbb(&buf[2047], 5,  1, 3);
+test_lcbb(&buf[0],6, 16, 0);
+test_lcbb(&buf[4095], 6,  1, 3);
+
+return EXIT_SUCCESS;
+}
-- 
2.40.1




[PATCH 0/4] Fix Fedora 38 Clang on s390x

2023-05-26 Thread Ilya Leoshkevich
Hi,

It was reported that Fedora 38 Clang does not run correctly under
qemu-s390x [1]. Comparing qemu and real s390x instruction traces has
shown that the implementations of LCBB and LOCFHR were not fully
correct.

This series fixes the issues and adds tests. I can now run Fedora 38
Clang under s390x emulation and compile "hello world" with it.

Best regards,
Ilya

[1] https://bugzilla.redhat.com/show_bug.cgi?id=2209635

Ilya Leoshkevich (4):
  target/s390x: Fix LCBB overwriting the top 32 bits
  tests/tcg/s390x: Test LCBB
  target/s390x: Fix LOCFHR taking the wrong half of R2
  tests/tcg/s390x: Test LOCFHR

 target/s390x/tcg/insn-data.h.inc |  4 +--
 tests/tcg/s390x/Makefile.target  |  2 ++
 tests/tcg/s390x/lcbb.c   | 51 
 tests/tcg/s390x/locfhr.c | 29 ++
 4 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 tests/tcg/s390x/lcbb.c
 create mode 100644 tests/tcg/s390x/locfhr.c

-- 
2.40.1




[PATCH 1/4] target/s390x: Fix LCBB overwriting the top 32 bits

2023-05-26 Thread Ilya Leoshkevich
LCBB is supposed to overwrite only the bottom 32 bits, but QEMU
erroneously overwrites the entire register.

Fixes: 6d9303322ed9 ("s390x/tcg: Implement LOAD COUNT TO BLOCK BOUNDARY")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/tcg/insn-data.h.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index bcc70d99ba2..e41672684aa 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -486,7 +486,7 @@
 F(0xb343, LCXBR,   RRE,   Z,   x2h, x2l, new_P, x1_P, negf128, f128, 
IF_BFP)
 F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | 
IF_AFP2)
 /* LOAD COUNT TO BLOCK BOUNDARY */
-C(0xe727, LCBB,RXE,   V,   la2, 0, r1, 0, lcbb, 0)
+C(0xe727, LCBB,RXE,   V,   la2, 0, new, r1_32, lcbb, 0)
 /* LOAD HALFWORD */
 C(0xb927, LHR, RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
 C(0xb907, LGHR,RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)
-- 
2.40.1




Re: [PATCH v2 4/6] tests/qtest: make more migration pre-copy scenarios run non-live

2023-05-26 Thread Daniel P . Berrangé
On Mon, Apr 24, 2023 at 06:01:36PM -0300, Fabiano Rosas wrote:
> Daniel P. Berrangé  writes:
> 
> > There are 27 pre-copy live migration scenarios being tested. In all of
> > these we force non-convergance and run for one iteration, then let it
> > converge and wait for completion during the second (or following)
> > iterations. At 3 mbps bandwidth limit the first iteration takes a very
> > long time (~30 seconds).
> >
> > While it is important to test the migration passes and convergance
> > logic, it is overkill to do this for all 27 pre-copy scenarios. The
> > TLS migration scenarios in particular are merely exercising different
> > code paths during connection establishment.
> >
> > To optimize time taken, switch most of the test scenarios to run
> > non-live (ie guest CPUs paused) with no bandwidth limits. This gives
> > a massive speed up for most of the test scenarios.
> >
> > For test coverage the following scenarios are unchanged
> >
> >  * Precopy with UNIX sockets
> >  * Precopy with UNIX sockets and dirty ring tracking
> >  * Precopy with XBZRLE
> >  * Precopy with multifd
> >
> > Signed-off-by: Daniel P. Berrangé 
> > ---
> >  tests/qtest/migration-test.c | 60 ++--
> >  1 file changed, 50 insertions(+), 10 deletions(-)
> >
> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > index 6492ffa7fe..40d0f75480 100644
> > --- a/tests/qtest/migration-test.c
> > +++ b/tests/qtest/migration-test.c
> > @@ -568,6 +568,9 @@ typedef struct {
> >  MIG_TEST_FAIL_DEST_QUIT_ERR,
> >  } result;
> >  
> > +/* Whether the guest CPUs should be running during migration */
> > +bool live;
> > +
> >  /* Postcopy specific fields */
> >  void *postcopy_data;
> >  bool postcopy_preempt;
> > @@ -1324,8 +1327,6 @@ static void test_precopy_common(MigrateCommon *args)
> >  return;
> >  }
> >  
> > -migrate_ensure_non_converge(from);
> > -
> >  if (args->start_hook) {
> >  data_hook = args->start_hook(from, to);
> >  }
> > @@ -1335,6 +1336,31 @@ static void test_precopy_common(MigrateCommon *args)
> >  wait_for_serial("src_serial");
> >  }
> >  
> > +if (args->live) {
> > +/*
> > + * Testing live migration, we want to ensure that some
> > + * memory is re-dirtied after being transferred, so that
> > + * we exercise logic for dirty page handling. We achieve
> > + * this with a ridiculosly low bandwidth that guarantees
> > + * non-convergance.
> > + */
> > +migrate_ensure_non_converge(from);
> > +} else {
> > +/*
> > + * Testing non-live migration, we allow it to run at
> > + * full speed to ensure short test case duration.
> > + * For tests expected to fail, we don't need to
> > + * change anything.
> > + */
> > +if (args->result == MIG_TEST_SUCCEED) {
> > +qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
> > +if (!got_stop) {
> > +qtest_qmp_eventwait(from, "STOP");
> > +}
> > +migrate_ensure_converge(from);
> > +}
> > +}
> > +
> >  if (!args->connect_uri) {
> >  g_autofree char *local_connect_uri =
> >  migrate_get_socket_address(to, "socket-address");
> > @@ -1352,19 +1378,29 @@ static void test_precopy_common(MigrateCommon *args)
> >  qtest_set_expected_status(to, EXIT_FAILURE);
> >  }
> >  } else {
> > -wait_for_migration_pass(from);
> > +if (args->live) {
> > +wait_for_migration_pass(from);
> >  
> > -migrate_ensure_converge(from);
> > +migrate_ensure_converge(from);
> >  
> > -/* We do this first, as it has a timeout to stop us
> > - * hanging forever if migration didn't converge */
> > -wait_for_migration_complete(from);
> > +/*
> > + * We do this first, as it has a timeout to stop us
> > + * hanging forever if migration didn't converge
> > + */
> > +wait_for_migration_complete(from);
> > +
> > +if (!got_stop) {
> > +qtest_qmp_eventwait(from, "STOP");
> > +}
> > +} else {
> > +wait_for_migration_complete(from);
> >  
> > -if (!got_stop) {
> > -qtest_qmp_eventwait(from, "STOP");
> > +qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
> 
> I retested and the problem still persists. The issue is with this wait +
> cont sequence:
> 
> wait_for_migration_complete(from);
> qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
> 
> We wait for the source to finish but by the time qmp_cont executes, the
> dst is still INMIGRATE, autostart gets set and I never see the RESUME
> event.

This is ultimately caused by the broken logic in the previous
patch 3 that looked for RESUME. The loooking for the STOP would
discard 

Multiple UART for virt platform on qemu

2023-05-26 Thread Anton Dimov Hristozov



[PATCH] decodetree: Do not remove output_file from /dev

2023-05-26 Thread Richard Henderson
Nor report any PermissionError on remove.

Previously we were testing with "> /dev/null", but that's not easy
to do with meson test(), so we want to use '-o /dev/null' instead.
That works fine for all of the existing tests, where all errors are
diagnosed before opening the output file.  However, PMM's named field
patch set diagnoses cycle errors during output.  This is fair, but
we need to be more careful with the remove.

Signed-off-by: Richard Henderson 
---
 scripts/decodetree.py | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e4ef0a03cc..a9a0cd0fa3 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -71,7 +71,12 @@ def error_with_file(file, lineno, *args):
 
 if output_file and output_fd:
 output_fd.close()
-os.remove(output_file)
+# Do not try to remove e.g. -o /dev/null
+if not output_file.startswith("/dev"):
+try:
+os.remove(output_file)
+except PermissionError:
+pass
 exit(0 if testforerror else 1)
 # end error_with_file
 
-- 
2.34.1




[PATCH] igb: Add Function Level Reset to PF and VF

2023-05-26 Thread Cédric Le Goater
The Intel 82576EB GbE Controller say that the Physical and Virtual
Functions support Function Level Reset. Add the capability to each
device model.

Cc: Akihiko Odaki 
Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
Signed-off-by: Cédric Le Goater 
---
 hw/net/igb.c   | 3 +++
 hw/net/igbvf.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/hw/net/igb.c b/hw/net/igb.c
index 1c989d767725..08e389338dca 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t addr,
 
 trace_igb_write_config(addr, val, len);
 pci_default_write_config(dev, addr, val, len);
+pcie_cap_flr_write_config(dev, addr, val, len);
 
 if (range_covers_byte(addr, len, PCI_COMMAND) &&
 (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
@@ -427,6 +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 }
 
 /* PCIe extended capabilities (in order) */
+pcie_cap_flr_init(pci_dev);
+
 if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
 hw_error("Failed to initialize AER capability");
 }
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea611848b..0a58dad06802 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev, uint32_t 
addr, uint32_t val,
 {
 trace_igbvf_write_config(addr, val, len);
 pci_default_write_config(dev, addr, val, len);
+pcie_cap_flr_write_config(dev, addr, val, len);
 }
 
 static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
@@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
 hw_error("Failed to initialize PCIe capability");
 }
 
+pcie_cap_flr_init(dev);
+
 if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
 hw_error("Failed to initialize AER capability");
 }
-- 
2.40.1




Re: [PATCH] hw/ppc/mac_newworld: Check for the availability of pci-ohci before using it

2023-05-26 Thread Mark Cave-Ayland

On 26/05/2023 14:30, BALATON Zoltan wrote:


On Fri, 26 May 2023, Thomas Huth wrote:

pci-ohci might habe been disabled in the QEMU binary (e.g. when "configure"
has been run with "--without-default-devices"). Thus we should check
for its availability before blindly using it.

Signed-off-by: Thomas Huth 
---
hw/ppc/mac_newworld.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 535710314a..c7cca430e1 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -349,7 +349,8 @@ static void ppc_core99_init(MachineState *machine)
    sysbus_mmio_get_region(s, 3));
    }

-    machine->usb |= defaults_enabled() && !machine->usb_disabled;
+    machine->usb |= defaults_enabled() && !machine->usb_disabled &&
+    module_object_class_by_name("pci-ohci") != 0;


Considering that PowerMacs have an OHCI controller built in soldered to the 
motherboard should this depend on it instead and not rely on pulling it in with 
PCI_DEVICES and --without-default-devices disabling it?


Currently it's not quite emulating a real Mac but I think we should aim for going 
that way rather than to keep emulating random Mac hardware.


Indeed that's correct: New World Macs should always have USB ports built-in. I guess 
the problem here is that this isn't being indicated correctly via Kconfig and/or the 
machine->usb_disabled logic?



Regards,
BALATON Zoltan


    has_pmu = (core99_machine->via_config != CORE99_VIA_CONFIG_CUDA);
    has_adb = (core99_machine->via_config == CORE99_VIA_CONFIG_CUDA ||
   core99_machine->via_config == CORE99_VIA_CONFIG_PMU_ADB);



ATB,

Mark.




[PATCH v8 7/7] hw/cxl/events: Add injection of Memory Module Events

2023-05-26 Thread Jonathan Cameron via
These events include a copy of the device health information at the
time of the event. Actually using the emulated device health would
require a lot of controls to manipulate that state.  Given the aim
of this injection code is to just test the flows when events occur,
inject the contents of the device health state as well.

Future work may add more sophisticate device health emulation
including direct generation of these records when events occur
(such as a temperature threshold being crossed).  That does not
reduce the usefulness of this more basic generation of the events.

Acked-by: Markus Armbruster 
Reviewed-by: Fan Ni 
Reviewed-by: Ira Weiny 
Signed-off-by: Jonathan Cameron 
---
 qapi/cxl.json   | 53 +++
 include/hw/cxl/cxl_events.h | 19 
 hw/mem/cxl_type3.c  | 62 +
 hw/mem/cxl_type3_stubs.c| 12 +++
 4 files changed, 146 insertions(+)

diff --git a/qapi/cxl.json b/qapi/cxl.json
index 36bf9fa202..a2e1280573 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -140,6 +140,59 @@
 '*column': 'uint16', '*correction-mask': [ 'uint64' ]
}}
 
+##
+# @cxl-inject-memory-module-event:
+#
+# Inject an event record for a Memory Module Event (CXL r3.0
+# 8.2.9.2.1.3). This event includes a copy of the Device Health
+# info at the time of the event.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: Event Log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+# Record Format, Event Record Flags for subfield definitions.
+#
+# @type: Device Event Type.  See CXL r3.0 Table 8-45 Memory Module
+# Event Record for bit definitions for bit definiions.
+#
+# @health-status: Overall health summary bitmap.  See CXL r3.0 Table
+# 8-100 Get Health Info Output Payload, Health Status for bit
+# definitions.
+#
+# @media-status: Overall media health summary.  See CXL r3.0 Table
+# 8-100 Get Health Info Output Payload, Media Status for bit
+# definitions.
+#
+# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output
+# Payload, Additional Status for subfield definitions.
+#
+# @life-used: Percentage (0-100) of factory expected life span.
+#
+# @temperature: Device temperature in degrees Celsius.
+#
+# @dirty-shutdown-count: Number of times the device has been unable
+# to determine whether data loss may have occurred.
+#
+# @corrected-volatile-error-count: Total number of correctable errors
+# in volatile memory.
+#
+# @corrected-persistent-error-count: Total number correctable errors
+# in persistent memory
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-memory-module-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
+'type': 'uint8', 'health-status': 'uint8',
+'media-status': 'uint8', 'additional-status': 'uint8',
+'life-used': 'uint8', 'temperature' : 'int16',
+'dirty-shutdown-count': 'uint32',
+'corrected-volatile-error-count': 'uint32',
+'corrected-persistent-error-count': 'uint32'
+}}
+
 ##
 # @cxl-inject-poison:
 #
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index a39e30d973..089ba2091f 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -146,4 +146,23 @@ typedef struct CXLEventDram {
 uint8_t reserved[0x17];
 } QEMU_PACKED CXLEventDram;
 
+/*
+ * Memory Module Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45
+ * All fields little endian.
+ */
+typedef struct CXLEventMemoryModule {
+CXLEventRecordHdr hdr;
+uint8_t type;
+uint8_t health_status;
+uint8_t media_status;
+uint8_t additional_status;
+uint8_t life_used;
+int16_t temperature;
+uint32_t dirty_shutdown_count;
+uint32_t corrected_volatile_error_count;
+uint32_t corrected_persistent_error_count;
+uint8_t reserved[0x3d];
+} QEMU_PACKED CXLEventMemoryModule;
+
 #endif /* CXL_EVENTS_H */
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3c07b1b7a3..4e314748d3 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1201,6 +1201,11 @@ static const QemuUUID dram_uuid = {
  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
 };
 
+static const QemuUUID memory_module_uuid = {
+.data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
+ 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
+};
+
 #define CXL_GMER_VALID_CHANNEL  BIT(0)
 #define CXL_GMER_VALID_RANK BIT(1)
 #define CXL_GMER_VALID_DEVICE   BIT(2)
@@ -1408,6 +1413,63 @@ void qmp_cxl_inject_dram_event(const char *path, 
CxlEventLog log, uint8_t flags,
 return;
 }
 
+void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
+uint8_t flags, uint8_t type,
+uint8_t health_status,
+   

[PATCH v8 6/7] hw/cxl/events: Add injection of DRAM events

2023-05-26 Thread Jonathan Cameron via
Defined in CXL r3.0 8.2.9.2.1.2 DRAM Event Record, this event
provides information related to DRAM devices.

Example injection command in QMP:

{ "execute": "cxl-inject-dram-event",
"arguments": {
"path": "/machine/peripheral/cxl-mem0",
"log": "informational",
"flags": 1,
"dpa": 1000,
"descriptor": 3,
"type": 3,
"transaction-type": 192,
"channel": 3,
"rank": 17,
"nibble-mask": 37421234,
"bank-group": 7,
"bank": 11,
"row": 2,
"column": 77,
"correction-mask": [33, 44, 55,66]
}}

Acked-by: Markus Armbruster 
Reviewed-by: Fan Ni 
Reviewed-by: Ira Weiny 
Signed-off-by: Jonathan Cameron 
---
 qapi/cxl.json   |  61 +++
 include/hw/cxl/cxl_events.h |  23 +++
 hw/mem/cxl_type3.c  | 116 
 hw/mem/cxl_type3_stubs.c|  13 
 4 files changed, 213 insertions(+)

diff --git a/qapi/cxl.json b/qapi/cxl.json
index 7f0b432767..36bf9fa202 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -79,6 +79,67 @@
 '*channel': 'uint8', '*rank': 'uint8',
 '*device': 'uint32', '*component-id': 'str' } }
 
+##
+# @cxl-inject-dram-event:
+#
+# Inject an event record for a DRAM Event (CXL r3.0 8.2.9.2.1.2).
+# This event type is reported via one of the event logs specified via
+# the log parameter.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: Event log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+# Record Format, Event Record Flags for subfield definitions.
+#
+# @dpa: Device Physical Address (relative to @path device).  Note
+# lower bits include some flags.  See CXL r3.0 Table 8-44 DRAM
+# Event Record, Physical Address.
+#
+# @descriptor: Memory Event Descriptor with additional memory event
+# information.  See CXL r3.0 Table 8-44 DRAM Event Record, Memory
+# Event Descriptor for bit definitions.
+#
+# @type: Type of memory event that occurred.  See CXL r3.0 Table 8-44
+# DRAM Event Record, Memory Event Type for possible values.
+#
+# @transaction-type: Type of first transaction that caused the event
+# to occur.  See CXL r3.0 Table 8-44 DRAM Event Record,
+# Transaction Type for possible values.
+#
+# @channel: The channel of the memory event location.  A channel is an
+# interface that can be independently accessed for a transaction.
+#
+# @rank: The rank of the memory event location.  A rank is a set of
+# memory devices on a channel that together execute a transaction.
+#
+# @nibble-mask: Identifies one or more nibbles that the error affects
+#
+# @bank-group: Bank group of the memory event location, incorporating
+# a number of Banks.
+#
+# @bank: Bank of the memory event location.  A single bank is accessed
+# per read or write of the memory.
+#
+# @row: Row address within the DRAM.
+#
+# @column: Column address within the DRAM.
+#
+# @correction-mask: Bits within each nibble.  Used in order of bits
+# set in the nibble-mask.  Up to 4 nibbles may be covered.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-dram-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+'dpa': 'uint64', 'descriptor': 'uint8',
+'type': 'uint8', 'transaction-type': 'uint8',
+'*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32',
+'*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32',
+'*column': 'uint16', '*correction-mask': [ 'uint64' ]
+   }}
+
 ##
 # @cxl-inject-poison:
 #
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index b189193f4c..a39e30d973 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -123,4 +123,27 @@ typedef struct CXLEventGenMedia {
 uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
 } QEMU_PACKED CXLEventGenMedia;
 
+/*
+ * DRAM Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.2: Table 8-44
+ * All fields little endian.
+ */
+typedef struct CXLEventDram {
+CXLEventRecordHdr hdr;
+uint64_t phys_addr;
+uint8_t descriptor;
+uint8_t type;
+uint8_t transaction_type;
+uint16_t validity_flags;
+uint8_t channel;
+uint8_t rank;
+uint8_t nibble_mask[3];
+uint8_t bank_group;
+uint8_t bank;
+uint8_t row[3];
+uint16_t column;
+uint64_t correction_mask[4];
+uint8_t reserved[0x17];
+} QEMU_PACKED CXLEventDram;
+
 #endif /* CXL_EVENTS_H */
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b1618779d2..3c07b1b7a3 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1196,6 +1196,11 @@ static const QemuUUID gen_media_uuid = {
  0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
 };
 
+static const QemuUUID dram_uuid = {
+.data = UUID(0x601dcbb3, 0x9c06, 0x4eab, 0xb8, 0xaf,
+ 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
+};
+
 #define CXL_GMER_VALID_CHANNEL   

[PATCH v8 5/7] hw/cxl/events: Add injection of General Media Events

2023-05-26 Thread Jonathan Cameron via
From: Ira Weiny 

To facilitate testing provide a QMP command to inject a general media
event.  The event can be added to the log specified.

Signed-off-by: Ira Weiny 
Reviewed-by: Fan Ni 
Acked-by: Markus Armbruster 
Signed-off-by: Jonathan Cameron 
---
 qapi/cxl.json   |  74 
 include/hw/cxl/cxl_events.h |  20 +++
 hw/mem/cxl_type3.c  | 111 
 hw/mem/cxl_type3_stubs.c|  10 
 4 files changed, 215 insertions(+)

diff --git a/qapi/cxl.json b/qapi/cxl.json
index ed1c7eea3a..7f0b432767 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -5,6 +5,80 @@
 # = CXL devices
 ##
 
+##
+# @CxlEventLog:
+#
+# CXL has a number of separate event logs for different types of
+# events.  Each such event log is handled and signaled independently.
+#
+# @informational: Information Event Log
+#
+# @warning: Warning Event Log
+#
+# @failure: Failure Event Log
+#
+# @fatal: Fatal Event Log
+#
+# Since: 8.1
+##
+{ 'enum': 'CxlEventLog',
+  'data': ['informational',
+   'warning',
+   'failure',
+   'fatal']
+ }
+
+##
+# @cxl-inject-general-media-event:
+#
+# Inject an event record for a General Media Event (CXL r3.0
+# 8.2.9.2.1.1).  This event type is reported via one of the event logs
+# specified via the log parameter.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @log: event log to add the event to
+#
+# @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
+# Record Format, Event Record Flags for subfield definitions.
+#
+# @dpa: Device Physical Address (relative to @path device).  Note
+# lower bits include some flags. See CXL r3.0 Table 8-43 General
+# Media Event Record, Physical Address.
+#
+# @descriptor: Memory Event Descriptor with additional memory event
+# information.  See CXL r3.0 Table 8-43 General Media Event
+# Record, Memory Event Descriptor for bit definitions.
+#
+# @type: Type of memory event that occurred.  See CXL r3.0 Table 8-43
+# General Media Event Record, Memory Event Type for possible
+# values.
+#
+# @transaction-type: Type of first transaction that caused the event
+# to occur.  See CXL r3.0 Table 8-43 General Media Event Record,
+# Transaction Type for possible values.
+#
+# @channel: The channel of the memory event location.  A channel is an
+# interface that can be independently accessed for a transaction.
+#
+# @rank: The rank of the memory event location.  A rank is a set of
+# memory devices on a channel that together execute a transaction.
+#
+# @device: Bitmask that represents all devices in the rank associated
+# with the memory event location.
+#
+# @component-id: Device specific component identifier for the event.
+# May describe a field replaceable sub-component of the device.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-general-media-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8',
+'dpa': 'uint64', 'descriptor': 'uint8',
+'type': 'uint8', 'transaction-type': 'uint8',
+'*channel': 'uint8', '*rank': 'uint8',
+'*device': 'uint32', '*component-id': 'str' } }
+
 ##
 # @cxl-inject-poison:
 #
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index 4bf8b7aa08..b189193f4c 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -103,4 +103,24 @@ typedef struct CXLEventInterruptPolicy {
 /* DCD is optional but other fields are not */
 #define CXL_EVENT_INT_SETTING_MIN_LEN 4
 
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE  0x10
+#define CXL_EVENT_GEN_MED_RES_SIZE  0x2e
+typedef struct CXLEventGenMedia {
+CXLEventRecordHdr hdr;
+uint64_t phys_addr;
+uint8_t descriptor;
+uint8_t type;
+uint8_t transaction_type;
+uint16_t validity_flags;
+uint8_t channel;
+uint8_t rank;
+uint8_t device[3];
+uint8_t component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+uint8_t reserved[CXL_EVENT_GEN_MED_RES_SIZE];
+} QEMU_PACKED CXLEventGenMedia;
+
 #endif /* CXL_EVENTS_H */
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index c9e347f42b..b1618779d2 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1181,6 +1181,117 @@ void qmp_cxl_inject_correctable_error(const char *path, 
CxlCorErrorType type,
 pcie_aer_inject_error(PCI_DEVICE(obj), &err);
 }
 
+static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
+const QemuUUID *uuid, uint32_t flags,
+uint8_t length, uint64_t timestamp)
+{
+st24_le_p(&hdr->flags, flags);
+hdr->length = length;
+memcpy(&hdr->id, uuid, sizeof(hdr->id));
+stq_le_p(&hdr->timestamp, timestamp);
+}
+
+static const QemuUUID gen_media_uuid = {
+.data = UUID(0xfbcd0a77, 0xc260, 0x417f,
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6),
+};
+
+#defin

[PATCH v8 4/7] hw/cxl/events: Add event interrupt support

2023-05-26 Thread Jonathan Cameron via
From: Ira Weiny 

Replace the stubbed out CXL Get/Set Event interrupt policy mailbox
commands.  Enable those commands to control interrupts for each of the
event log types.

Skip the standard input mailbox length on the Set command due to DCD
being optional.  Perform the checks separately.

Signed-off-by: Ira Weiny 
Reviewed-by: Fan Ni 
Reviewed-by: Davidlohr Bueso 
Signed-off-by: Jonathan Cameron 
---
 include/hw/cxl/cxl_device.h |   6 +-
 include/hw/cxl/cxl_events.h |  23 
 hw/cxl/cxl-events.c |  33 ++-
 hw/cxl/cxl-mailbox-utils.c  | 106 +---
 hw/mem/cxl_type3.c  |   4 +-
 5 files changed, 147 insertions(+), 25 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index d3aec1bc0e..1978730fba 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -121,6 +121,8 @@ typedef struct CXLEventLog {
 uint16_t overflow_err_count;
 uint64_t first_overflow_timestamp;
 uint64_t last_overflow_timestamp;
+bool irq_enabled;
+int irq_vec;
 QemuMutex lock;
 QSIMPLEQ_HEAD(, CXLEvent) events;
 } CXLEventLog;
@@ -369,7 +371,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, 
uint64_t data,
 
 uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
 
-void cxl_event_init(CXLDeviceState *cxlds);
+void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num);
 bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
   CXLEventRecordRaw *event);
 CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
@@ -378,6 +380,8 @@ CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, 
CXLGetEventPayload *pl,
 CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
CXLClearEventPayload *pl);
 
+void cxl_event_irq_assert(CXLType3Dev *ct3d);
+
 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
 
 #endif
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index d4aaa894f1..4bf8b7aa08 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -80,4 +80,27 @@ typedef struct CXLClearEventPayload {
 uint16_t handle[];
 } CXLClearEventPayload;
 
+/**
+ * Event Interrupt Policy
+ *
+ * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
+ */
+typedef enum CXLEventIntMode {
+CXL_INT_NONE = 0x00,
+CXL_INT_MSI_MSIX = 0x01,
+CXL_INT_FW   = 0x02,
+CXL_INT_RES  = 0x03,
+} CXLEventIntMode;
+#define CXL_EVENT_INT_MODE_MASK 0x3
+#define CXL_EVENT_INT_SETTING(vector) uint8_t)vector & 0xf) << 4) | 
CXL_INT_MSI_MSIX)
+typedef struct CXLEventInterruptPolicy {
+uint8_t info_settings;
+uint8_t warn_settings;
+uint8_t failure_settings;
+uint8_t fatal_settings;
+uint8_t dyn_cap_settings;
+} QEMU_PACKED CXLEventInterruptPolicy;
+/* DCD is optional but other fields are not */
+#define CXL_EVENT_INT_SETTING_MIN_LEN 4
+
 #endif /* CXL_EVENTS_H */
diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c
index 5da1b76b97..d161d57456 100644
--- a/hw/cxl/cxl-events.c
+++ b/hw/cxl/cxl-events.c
@@ -13,6 +13,8 @@
 #include "qemu/bswap.h"
 #include "qemu/typedefs.h"
 #include "qemu/error-report.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
 #include "hw/cxl/cxl.h"
 #include "hw/cxl/cxl_events.h"
 
@@ -26,7 +28,7 @@ static void reset_overflow(CXLEventLog *log)
 log->last_overflow_timestamp = 0;
 }
 
-void cxl_event_init(CXLDeviceState *cxlds)
+void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num)
 {
 CXLEventLog *log;
 int i;
@@ -37,9 +39,16 @@ void cxl_event_init(CXLDeviceState *cxlds)
 log->overflow_err_count = 0;
 log->first_overflow_timestamp = 0;
 log->last_overflow_timestamp = 0;
+log->irq_enabled = false;
+log->irq_vec = start_msg_num++;
 qemu_mutex_init(&log->lock);
 QSIMPLEQ_INIT(&log->events);
 }
+
+/* Override -- Dynamic Capacity uses the same vector as info */
+cxlds->event_logs[CXL_EVENT_TYPE_DYNAMIC_CAP].irq_vec =
+  cxlds->event_logs[CXL_EVENT_TYPE_INFO].irq_vec;
+
 }
 
 static CXLEvent *cxl_event_get_head(CXLEventLog *log)
@@ -215,3 +224,25 @@ CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, 
CXLClearEventPayload *
 
 return CXL_MBOX_SUCCESS;
 }
+
+void cxl_event_irq_assert(CXLType3Dev *ct3d)
+{
+CXLDeviceState *cxlds = &ct3d->cxl_dstate;
+PCIDevice *pdev = &ct3d->parent_obj;
+int i;
+
+for (i = 0; i < CXL_EVENT_TYPE_MAX; i++) {
+CXLEventLog *log = &cxlds->event_logs[i];
+
+if (!log->irq_enabled || cxl_event_empty(log)) {
+continue;
+}
+
+/*  Notifies interrupt, legacy IRQ is not supported */
+if (msix_enabled(pdev)) {
+msix_notify(pdev, log->irq_vec);
+} else if (msi_enabled(pdev)) {
+msi_notify(pdev, log->irq_vec);
+}
+}
+}
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mail

[PATCH v8 3/7] hw/cxl/events: Wire up get/clear event mailbox commands

2023-05-26 Thread Jonathan Cameron via
From: Ira Weiny 

CXL testing is benefited from an artificial event log injection
mechanism.

Add an event log infrastructure to insert, get, and clear events from
the various logs available on a device.

Replace the stubbed out CXL Get/Clear Event mailbox commands with
commands that operate on the new infrastructure.

Signed-off-by: Ira Weiny 
Reviewed-by: Fan Ni 
Signed-off-by: Jonathan Cameron 
---
 include/hw/cxl/cxl_device.h |  25 +
 include/hw/cxl/cxl_events.h |  55 +
 hw/cxl/cxl-events.c | 217 
 hw/cxl/cxl-mailbox-utils.c  |  40 ++-
 hw/mem/cxl_type3.c  |   1 +
 hw/cxl/meson.build  |   1 +
 6 files changed, 337 insertions(+), 2 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 9f8ee85f8a..d3aec1bc0e 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -111,6 +111,20 @@ typedef enum {
 CXL_MBOX_MAX = 0x17
 } CXLRetCode;
 
+typedef struct CXLEvent {
+CXLEventRecordRaw data;
+QSIMPLEQ_ENTRY(CXLEvent) node;
+} CXLEvent;
+
+typedef struct CXLEventLog {
+uint16_t next_handle;
+uint16_t overflow_err_count;
+uint64_t first_overflow_timestamp;
+uint64_t last_overflow_timestamp;
+QemuMutex lock;
+QSIMPLEQ_HEAD(, CXLEvent) events;
+} CXLEventLog;
+
 typedef struct cxl_device_state {
 MemoryRegion device_registers;
 
@@ -161,6 +175,8 @@ typedef struct cxl_device_state {
 uint64_t mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
+
+CXLEventLog event_logs[CXL_EVENT_TYPE_MAX];
 } CXLDeviceState;
 
 /* Initialize the register block for a device */
@@ -353,6 +369,15 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr 
host_addr, uint64_t data,
 
 uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
 
+void cxl_event_init(CXLDeviceState *cxlds);
+bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
+  CXLEventRecordRaw *event);
+CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
+ uint8_t log_type, int max_recs,
+ uint16_t *len);
+CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
+   CXLClearEventPayload *pl);
+
 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
 
 #endif
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index aeb3b0590e..d4aaa894f1 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -10,6 +10,8 @@
 #ifndef CXL_EVENTS_H
 #define CXL_EVENTS_H
 
+#include "qemu/uuid.h"
+
 /*
  * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
  *
@@ -25,4 +27,57 @@ typedef enum CXLEventLogType {
 CXL_EVENT_TYPE_MAX
 } CXLEventLogType;
 
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+#define CXL_EVENT_REC_HDR_RES_LEN 0xf
+typedef struct CXLEventRecordHdr {
+QemuUUID id;
+uint8_t length;
+uint8_t flags[3];
+uint16_t handle;
+uint16_t related_handle;
+uint64_t timestamp;
+uint8_t maint_op_class;
+uint8_t reserved[CXL_EVENT_REC_HDR_RES_LEN];
+} QEMU_PACKED CXLEventRecordHdr;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+typedef struct CXLEventRecordRaw {
+CXLEventRecordHdr hdr;
+uint8_t data[CXL_EVENT_RECORD_DATA_LENGTH];
+} QEMU_PACKED CXLEventRecordRaw;
+#define CXL_EVENT_RECORD_SIZE (sizeof(CXLEventRecordRaw))
+
+/*
+ * Get Event Records output payload
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-50
+ */
+#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0)
+#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
+typedef struct CXLGetEventPayload {
+uint8_t flags;
+uint8_t reserved1;
+uint16_t overflow_err_count;
+uint64_t first_overflow_timestamp;
+uint64_t last_overflow_timestamp;
+uint16_t record_count;
+uint8_t reserved2[0xa];
+CXLEventRecordRaw records[];
+} QEMU_PACKED CXLGetEventPayload;
+#define CXL_EVENT_PAYLOAD_HDR_SIZE (sizeof(CXLGetEventPayload))
+
+/*
+ * Clear Event Records input payload
+ * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
+ */
+typedef struct CXLClearEventPayload {
+uint8_t event_log;  /* CXLEventLogType */
+uint8_t clear_flags;
+uint8_t nr_recs;
+uint8_t reserved[3];
+uint16_t handle[];
+} CXLClearEventPayload;
+
 #endif /* CXL_EVENTS_H */
diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c
new file mode 100644
index 00..5da1b76b97
--- /dev/null
+++ b/hw/cxl/cxl-events.c
@@ -0,0 +1,217 @@
+/*
+ * CXL Event processing
+ *
+ * Copyright(C) 2023 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include 
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "qemu/typedefs.h"
+#include "qemu/error-report.h"
+#include "hw/cxl/cxl.h"
+#include "hw/cxl/cxl_events.h"
+
+/* Artificial limit on the number of events a log can hold */
+#define CXL_T

[PATCH v8 2/7] hw/cxl: Move CXLRetCode definition to cxl_device.h

2023-05-26 Thread Jonathan Cameron via
Following patches will need access to the mailbox return code
type so move it to the header.

Reviewed-by: Ira Weiny 
Reviewed-by: Fan Ni 
Signed-off-by: Jonathan Cameron 
---
 include/hw/cxl/cxl_device.h | 28 
 hw/cxl/cxl-mailbox-utils.c  | 28 
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 16993f7098..9f8ee85f8a 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -83,6 +83,34 @@
 (CXL_DEVICE_CAP_REG_SIZE + CXL_DEVICE_STATUS_REGISTERS_LENGTH + \
  CXL_MAILBOX_REGISTERS_LENGTH + CXL_MEMORY_DEVICE_REGISTERS_LENGTH)
 
+/* 8.2.8.4.5.1 Command Return Codes */
+typedef enum {
+CXL_MBOX_SUCCESS = 0x0,
+CXL_MBOX_BG_STARTED = 0x1,
+CXL_MBOX_INVALID_INPUT = 0x2,
+CXL_MBOX_UNSUPPORTED = 0x3,
+CXL_MBOX_INTERNAL_ERROR = 0x4,
+CXL_MBOX_RETRY_REQUIRED = 0x5,
+CXL_MBOX_BUSY = 0x6,
+CXL_MBOX_MEDIA_DISABLED = 0x7,
+CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
+CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
+CXL_MBOX_FW_AUTH_FAILED = 0xa,
+CXL_MBOX_FW_INVALID_SLOT = 0xb,
+CXL_MBOX_FW_ROLLEDBACK = 0xc,
+CXL_MBOX_FW_REST_REQD = 0xd,
+CXL_MBOX_INVALID_HANDLE = 0xe,
+CXL_MBOX_INVALID_PA = 0xf,
+CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
+CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
+CXL_MBOX_ABORTED = 0x12,
+CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
+CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
+CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
+CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
+CXL_MBOX_MAX = 0x17
+} CXLRetCode;
+
 typedef struct cxl_device_state {
 MemoryRegion device_registers;
 
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index e3401b6be8..d7e114aaae 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -68,34 +68,6 @@ enum {
 #define CLEAR_POISON   0x2
 };
 
-/* 8.2.8.4.5.1 Command Return Codes */
-typedef enum {
-CXL_MBOX_SUCCESS = 0x0,
-CXL_MBOX_BG_STARTED = 0x1,
-CXL_MBOX_INVALID_INPUT = 0x2,
-CXL_MBOX_UNSUPPORTED = 0x3,
-CXL_MBOX_INTERNAL_ERROR = 0x4,
-CXL_MBOX_RETRY_REQUIRED = 0x5,
-CXL_MBOX_BUSY = 0x6,
-CXL_MBOX_MEDIA_DISABLED = 0x7,
-CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
-CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
-CXL_MBOX_FW_AUTH_FAILED = 0xa,
-CXL_MBOX_FW_INVALID_SLOT = 0xb,
-CXL_MBOX_FW_ROLLEDBACK = 0xc,
-CXL_MBOX_FW_REST_REQD = 0xd,
-CXL_MBOX_INVALID_HANDLE = 0xe,
-CXL_MBOX_INVALID_PA = 0xf,
-CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
-CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
-CXL_MBOX_ABORTED = 0x12,
-CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
-CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
-CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
-CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
-CXL_MBOX_MAX = 0x17
-} CXLRetCode;
-
 struct cxl_cmd;
 typedef CXLRetCode (*opcode_handler)(struct cxl_cmd *cmd,
CXLDeviceState *cxl_dstate, uint16_t *len);
-- 
2.39.2




[PATCH v8 1/7] hw/cxl/events: Add event status register

2023-05-26 Thread Jonathan Cameron via
From: Ira Weiny 

The device status register block was defined.  However, there were no
individual registers nor any data wired up.

Define the event status register [CXL 3.0; 8.2.8.3.1] as part of the
device status register block.  Wire up the register and initialize the
event status for each log.

To support CXL 3.0 the version of the device status register block needs
to be 2.  Change the macro to allow for setting the version.

Signed-off-by: Ira Weiny 
Reviewed-by: Fan Ni 
Signed-off-by: Jonathan Cameron 
---
 include/hw/cxl/cxl_device.h | 23 +---
 include/hw/cxl/cxl_events.h | 28 
 hw/cxl/cxl-device-utils.c   | 43 -
 3 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 73328a52cf..16993f7098 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -13,6 +13,7 @@
 #include "hw/cxl/cxl_component.h"
 #include "hw/pci/pci_device.h"
 #include "hw/register.h"
+#include "hw/cxl/cxl_events.h"
 
 /*
  * The following is how a CXL device's Memory Device registers are laid out.
@@ -86,7 +87,16 @@ typedef struct cxl_device_state {
 MemoryRegion device_registers;
 
 /* mmio for device capabilities array - 8.2.8.2 */
-MemoryRegion device;
+struct {
+MemoryRegion device;
+union {
+uint8_t dev_reg_state[CXL_DEVICE_STATUS_REGISTERS_LENGTH];
+uint16_t dev_reg_state16[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 2];
+uint32_t dev_reg_state32[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 4];
+uint64_t dev_reg_state64[CXL_DEVICE_STATUS_REGISTERS_LENGTH / 8];
+};
+uint64_t event_status;
+};
 MemoryRegion memory_device;
 struct {
 MemoryRegion caps;
@@ -141,6 +151,9 @@ REG64(CXL_DEV_CAP_ARRAY, 0) /* Documented as 128 bit 
register but 64 byte access
 FIELD(CXL_DEV_CAP_ARRAY, CAP_VERSION, 16, 8)
 FIELD(CXL_DEV_CAP_ARRAY, CAP_COUNT, 32, 16)
 
+void cxl_event_set_status(CXLDeviceState *cxl_dstate, CXLEventLogType log_type,
+  bool available);
+
 /*
  * Helper macro to initialize capability headers for CXL devices.
  *
@@ -175,7 +188,7 @@ CXL_DEVICE_CAPABILITY_HEADER_REGISTER(MEMORY_DEVICE,
 void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate);
 void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
 
-#define cxl_device_cap_init(dstate, reg, cap_id)   \
+#define cxl_device_cap_init(dstate, reg, cap_id, ver)  \
 do {   \
 uint32_t *cap_hdrs = dstate->caps_reg_state32; \
 int which = R_CXL_DEV_##reg##_CAP_HDR0;\
@@ -183,7 +196,7 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
 FIELD_DP32(cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0,  \
CAP_ID, cap_id);\
 cap_hdrs[which] = FIELD_DP32(  \
-cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, 1);\
+cap_hdrs[which], CXL_DEV_##reg##_CAP_HDR0, CAP_VERSION, ver);  \
 cap_hdrs[which + 1] =  \
 FIELD_DP32(cap_hdrs[which + 1], CXL_DEV_##reg##_CAP_HDR1,  \
CAP_OFFSET, CXL_##reg##_REGISTERS_OFFSET);  \
@@ -192,6 +205,10 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate);
CAP_LENGTH, CXL_##reg##_REGISTERS_LENGTH);  \
 } while (0)
 
+/* CXL 3.0 8.2.8.3.1 Event Status Register */
+REG64(CXL_DEV_EVENT_STATUS, 0)
+FIELD(CXL_DEV_EVENT_STATUS, EVENT_STATUS, 0, 32)
+
 /* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */
 REG32(CXL_DEV_MAILBOX_CAP, 0)
 FIELD(CXL_DEV_MAILBOX_CAP, PAYLOAD_SIZE, 0, 5)
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
new file mode 100644
index 00..aeb3b0590e
--- /dev/null
+++ b/include/hw/cxl/cxl_events.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU CXL Events
+ *
+ * Copyright (c) 2022 Intel
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_EVENTS_H
+#define CXL_EVENTS_H
+
+/*
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
+ *
+ * Define these as the bit position for the event status register for ease of
+ * setting the status.
+ */
+typedef enum CXLEventLogType {
+CXL_EVENT_TYPE_INFO  = 0,
+CXL_EVENT_TYPE_WARN  = 1,
+CXL_EVENT_TYPE_FAIL  = 2,
+CXL_EVENT_TYPE_FATAL = 3,
+CXL_EVENT_TYPE_DYNAMIC_CAP   = 4,
+CXL_EVENT_TYPE_MAX
+} CXLEventLogType;
+
+#endif /* CXL_EVENTS_H */
diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
index 86e1cea8ce..517f06d869 100644
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-

[PATCH v8 0/7] QEMU CXL Provide mock CXL events and irq support

2023-05-26 Thread Jonathan Cameron via
v8: QMP documentation formatting fixes from Markus.
Gathered tags.

Based on:
[PATCH v8 0/4] hw/cxl: Poison get, inject, clear

Based on: Message-ID: 20230526170010.574-1-jonathan.came...@huawei.com

Updated cover letter from earlier versions:

One challenge here is striking the right balance between lots of constraints
in the injection code to enforce particular reserved bits etc by breaking
out all the flags as individual parameters vs having a reasonably concise
API.  I think this set strikes the right balance but others may well
disagree :)   Note that Ira raised the question of whether we should be
automatically establishing the volatile flag based on the Device Physical
Address of the injected error. My proposal is to not do so for now, but
to possibly revisit tightening the checking of injected errors in future.
Whilst the volatile flag is straight forwards, some of the other flags that
could be automatically set (or perhaps checked for validiaty) are much more
complex. Adding verification at this stage would greatly increase the
complexity of the patch + we are missing other elements that would interact
with this.  I'm not concerned about potential breaking of backwards 
compatibility
if it only related to the injection of errors that make no sense for a real
device.

CXL Event records inform the OS of various CXL device events.  Thus far CXL
memory devices are emulated and therefore don't naturally generate events.

Add an event infrastructure and mock event injection.  Previous versions
included a bulk insertion of lots of events.  However, this series focuses on
providing the ability to inject individual events through QMP.  Only the
General Media Event is included in this series as an example.  Other events can
be added pretty easily once the infrastructure is acceptable.

In addition, this version updates the code to be in line with the
specification based on discussions around the kernel patches.

Injection examples;

{ "execute": "cxl-inject-general-media-event",
"arguments": {
"path": "/machine/peripheral/cxl-mem0",
"log": "informational",
"flags": 1,
"dpa": 1000,
"descriptor": 3,
"type": 3,
"transaction-type": 192,
"channel": 3,
"device": 5,
"component-id": "iras mem"
}}


{ "execute": "cxl-inject-dram-event",
"arguments": {
"path": "/machine/peripheral/cxl-mem0",
"log": "informational",
"flags": 1,
"dpa": 1000,
"descriptor": 3,
"type": 3,
"transaction-type": 192,
"channel": 3,
"rank": 17,
"nibble-mask": 37421234,
"bank-group": 7,
"bank": 11,
"row": 2,
"column": 77,
"correction-mask": [33, 44, 55, 66]
}}

{ "execute": "cxl-inject-memory-module-event",
  "arguments": {
"path": "/machine/peripheral/cxl-mem0",
"log": "informational",
"flags": 1,
"type": 3,
"health-status": 3,
"media-status": 7,
"additional-status": 33,
"life-used": 30,
"temperature": -15,
"dirty-shutdown-count": 4,
"corrected-volatile-error-count": 3233,
"corrected-persistent-error-count": 1300
  }}

Ira Weiny (4):
  hw/cxl/events: Add event status register
  hw/cxl/events: Wire up get/clear event mailbox commands
  hw/cxl/events: Add event interrupt support
  hw/cxl/events: Add injection of General Media Events

Jonathan Cameron (3):
  hw/cxl: Move CXLRetCode definition to cxl_device.h
  hw/cxl/events: Add injection of DRAM events
  hw/cxl/events: Add injection of Memory Module Events

 qapi/cxl.json   | 188 +++
 include/hw/cxl/cxl_device.h |  80 +-
 include/hw/cxl/cxl_events.h | 168 +
 hw/cxl/cxl-device-utils.c   |  43 +-
 hw/cxl/cxl-events.c | 248 ++
 hw/cxl/cxl-mailbox-utils.c  | 166 ++--
 hw/mem/cxl_type3.c  | 292 +++-
 hw/mem/cxl_type3_stubs.c|  35 +
 hw/cxl/meson.build  |   1 +
 9 files changed, 1165 insertions(+), 56 deletions(-)
 create mode 100644 include/hw/cxl/cxl_events.h
 create mode 100644 hw/cxl/cxl-events.c

-- 
2.39.2




Re: [PATCH v7 7/7] hw/cxl/events: Add injection of Memory Module Events

2023-05-26 Thread Jonathan Cameron via


> > +# @temperature: Device temperature in degrees Celsius.
> > +#
> > +# @dirty-shutdown-count: Number of time the device has been unable to  
> 
> Number of times
> 
> > +#determine whether data loss may have occurred.
> > +#
> > +# @corrected-volatile-error-count: Total number of correctable errors in
> > +#  volatile memory.
> > +#
> > +# @corrected-persistent-error-count: Total number correctable errors in
> > +#persistent memory  
> 
> Please format like
> 
># @flags: Event Record Flags.  See CXL r3.0 Table 8-42 Common Event
># Record Format, Event Record Flags for subfield definitions.
>#
># @type: Device Event Type.  See CXL r3.0 Table 8-45 Memory Module
># Event Record for bit definitions for bit definiions.
>#
># @health-status: Overall health summary bitmap.  See CXL r3.0 Table
># 8-100 Get Health Info Output Payload, Health Status for bit
># definitions.
>#
># @media-status: Overall media health summary.  See CXL r3.0 Table
># 8-100 Get Health Info Output Payload, Media Status for bit
># definitions.
>#
># @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output
># Payload, Additional Status for subfield definitions.
>#
># @life-used: Percentage (0-100) of factory expected life span.
>#
># @temperature: Device temperature in degrees Celsius.
>#
># @dirty-shutdown-count: Number of time the device has been unable to
># determine whether data loss may have occurred.

With "Number of times" this runs to 71 chars. reflowed appropriately for v8

>#
># @corrected-volatile-error-count: Total number of correctable errors
># in volatile memory.
>#
># @corrected-persistent-error-count: Total number correctable errors
># in persistent memory
> 
> to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
> to conform to current conventions).
> 
> 
> > +#



Re: [PATCH 6/6] tests/decode: Add tests for various named-field cases

2023-05-26 Thread Richard Henderson

On 5/24/23 03:26, Peter Maydell wrote:

On Tue, 23 May 2023 at 13:04, Peter Maydell  wrote:


Add some tests for various cases of named-field use, both ones that
should work and ones that should be diagnosed as errors.

Signed-off-by: Peter Maydell 
---
  tests/decode/err_field1.decode   |  2 +-
  tests/decode/err_field10.decode  |  7 +++
  tests/decode/err_field7.decode   |  7 +++
  tests/decode/err_field8.decode   |  8 
  tests/decode/err_field9.decode   | 14 ++
  tests/decode/succ_named_field.decode | 19 +++
  6 files changed, 56 insertions(+), 1 deletion(-)
  create mode 100644 tests/decode/err_field10.decode
  create mode 100644 tests/decode/err_field7.decode
  create mode 100644 tests/decode/err_field8.decode
  create mode 100644 tests/decode/err_field9.decode
  create mode 100644 tests/decode/succ_named_field.decode

diff --git a/tests/decode/err_field1.decode b/tests/decode/err_field1.decode
index e07a5a73e0e..85c3f326d07 100644
--- a/tests/decode/err_field1.decode
+++ b/tests/decode/err_field1.decode
@@ -2,4 +2,4 @@
  # See the COPYING.LIB file in the top-level directory.

  # Diagnose invalid field syntax
-%field asdf
+%field 1asdf


I just realized that this specific change needs to go before patch 5:
it's updating an existing test because "asdf" used to be invalid
syntax and now is not. Otherwise bisection will break.


Really?  The test still fails here at patch 5:

/home/rth/qemu/bld/../src/tests/decode/err_field1.decode:5: error: invalid field token 
"asdf"


r~




[PATCH v8 4/4] hw/cxl: Add clear poison mailbox command support.

2023-05-26 Thread Jonathan Cameron via
Current implementation is very simple so many of the corner
cases do not exist (e.g. fragmenting larger poison list entries)

Reviewed-by: Fan Ni 
Reviewed-by: Ira Weiny 
Signed-off-by: Jonathan Cameron 
---
 include/hw/cxl/cxl_device.h |  1 +
 hw/cxl/cxl-mailbox-utils.c  | 82 +
 hw/mem/cxl_type3.c  | 37 +
 3 files changed, 120 insertions(+)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 32c234ea91..73328a52cf 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -298,6 +298,7 @@ struct CXLType3Class {
 uint64_t offset);
 void (*set_lsa)(CXLType3Dev *ct3d, const void *buf, uint64_t size,
 uint64_t offset);
+bool (*set_cacheline)(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t 
*data);
 };
 
 MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 6c476ad7f4..e3401b6be8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -65,6 +65,7 @@ enum {
 MEDIA_AND_POISON = 0x43,
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
+#define CLEAR_POISON   0x2
 };
 
 /* 8.2.8.4.5.1 Command Return Codes */
@@ -512,6 +513,85 @@ static CXLRetCode cmd_media_inject_poison(struct cxl_cmd 
*cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+static CXLRetCode cmd_media_clear_poison(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len_unused)
+{
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+CXLPoisonList *poison_list = &ct3d->poison_list;
+CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
+struct clear_poison_pl {
+uint64_t dpa;
+uint8_t data[64];
+};
+CXLPoison *ent;
+uint64_t dpa;
+
+struct clear_poison_pl *in = (void *)cmd->payload;
+
+dpa = ldq_le_p(&in->dpa);
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+return CXL_MBOX_INVALID_PA;
+}
+
+/* Clearing a region with no poison is not an error so always do so */
+if (cvc->set_cacheline) {
+if (!cvc->set_cacheline(ct3d, dpa, in->data)) {
+return CXL_MBOX_INTERNAL_ERROR;
+}
+}
+
+QLIST_FOREACH(ent, poison_list, node) {
+/*
+ * Test for contained in entry. Simpler than general case
+ * as clearing 64 bytes and entries 64 byte aligned
+ */
+if ((dpa >= ent->start) && (dpa < ent->start + ent->length)) {
+break;
+}
+}
+if (!ent) {
+return CXL_MBOX_SUCCESS;
+}
+
+QLIST_REMOVE(ent, node);
+ct3d->poison_list_cnt--;
+
+if (dpa > ent->start) {
+CXLPoison *frag;
+/* Cannot overflow as replacing existing entry */
+
+frag = g_new0(CXLPoison, 1);
+
+frag->start = ent->start;
+frag->length = dpa - ent->start;
+frag->type = ent->type;
+
+QLIST_INSERT_HEAD(poison_list, frag, node);
+ct3d->poison_list_cnt++;
+}
+
+if (dpa + CXL_CACHE_LINE_SIZE < ent->start + ent->length) {
+CXLPoison *frag;
+
+if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
+cxl_set_poison_list_overflowed(ct3d);
+} else {
+frag = g_new0(CXLPoison, 1);
+
+frag->start = dpa + CXL_CACHE_LINE_SIZE;
+frag->length = ent->start + ent->length - frag->start;
+frag->type = ent->type;
+QLIST_INSERT_HEAD(poison_list, frag, node);
+ct3d->poison_list_cnt++;
+}
+}
+/* Any fragments have been added, free original entry */
+g_free(ent);
+
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -543,6 +623,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
 cmd_media_get_poison_list, 16, 0 },
 [MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON",
 cmd_media_inject_poison, 8, 0 },
+[MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
+cmd_media_clear_poison, 72, 0 },
 };
 
 void cxl_process_mailbox(CXLDeviceState *cxl_dstate)
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index ab600735eb..d751803188 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -947,6 +947,42 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, 
uint64_t size,
  */
 }
 
+static bool set_cacheline(CXLType3Dev *ct3d, uint64_t dpa_offset, uint8_t 
*data)
+{
+MemoryRegion *vmr = NULL, *pmr = NULL;
+AddressSpace *as;
+
+if (ct3d->hostvmem) {
+vmr = host_memory_backend_get_memory(ct3d->hostvmem);
+}
+if (ct3d->hostpmem) {
+pmr = host_memory_backend_get_memory(ct3d->hostpmem);
+}
+
+if (!vm

[PATCH v8 3/4] hw/cxl: Add poison injection via the mailbox.

2023-05-26 Thread Jonathan Cameron via
Very simple implementation to allow testing of corresponding
kernel code. Note that for now we track each 64 byte section
independently.  Whilst a valid implementation choice, it may
make sense to fuse entries so as to prove out more complex
corners of the kernel code.

Reviewed-by: Ira Weiny 
Reviewed-by: Fan Ni 
Signed-off-by: Jonathan Cameron 
---
 hw/cxl/cxl-mailbox-utils.c | 42 ++
 1 file changed, 42 insertions(+)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 1f74b26ea2..6c476ad7f4 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -64,6 +64,7 @@ enum {
 #define SET_LSA   0x3
 MEDIA_AND_POISON = 0x43,
 #define GET_POISON_LIST0x0
+#define INJECT_POISON  0x1
 };
 
 /* 8.2.8.4.5.1 Command Return Codes */
@@ -472,6 +473,45 @@ static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd 
*cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+static CXLRetCode cmd_media_inject_poison(struct cxl_cmd *cmd,
+  CXLDeviceState *cxl_dstate,
+  uint16_t *len_unused)
+{
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+CXLPoisonList *poison_list = &ct3d->poison_list;
+CXLPoison *ent;
+struct inject_poison_pl {
+uint64_t dpa;
+};
+struct inject_poison_pl *in = (void *)cmd->payload;
+uint64_t dpa = ldq_le_p(&in->dpa);
+CXLPoison *p;
+
+QLIST_FOREACH(ent, poison_list, node) {
+if (dpa >= ent->start &&
+dpa + CXL_CACHE_LINE_SIZE <= ent->start + ent->length) {
+return CXL_MBOX_SUCCESS;
+}
+}
+
+if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
+return CXL_MBOX_INJECT_POISON_LIMIT;
+}
+p = g_new0(CXLPoison, 1);
+
+p->length = CXL_CACHE_LINE_SIZE;
+p->start = dpa;
+p->type = CXL_POISON_TYPE_INJECTED;
+
+/*
+ * Possible todo: Merge with existing entry if next to it and if same type
+ */
+QLIST_INSERT_HEAD(poison_list, p, node);
+ct3d->poison_list_cnt++;
+
+return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -501,6 +541,8 @@ static struct cxl_cmd cxl_cmd_set[256][256] = {
 ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE },
 [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST",
 cmd_media_get_poison_list, 16, 0 },
+[MEDIA_AND_POISON][INJECT_POISON] = { "MEDIA_AND_POISON_INJECT_POISON",
+cmd_media_inject_poison, 8, 0 },
 };
 
 void cxl_process_mailbox(CXLDeviceState *cxl_dstate)
-- 
2.39.2




[PATCH v8 2/4] hw/cxl: QMP based poison injection support

2023-05-26 Thread Jonathan Cameron via
Inject poison using QMP command cxl-inject-poison to add an entry to the
poison list.

For now, the poison is not returned CXL.mem reads, but only via the
mailbox command Get Poison List. So a normal memory read to an address
that is on the poison list will not yet result in a synchronous exception
(and similar for partial cacheline writes).
That is left for a future patch.

See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)

Kernel patches to use this interface here:
https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofi...@intel.com/

To inject poison using QMP (telnet to the QMP port)
{ "execute": "qmp_capabilities" }

{ "execute": "cxl-inject-poison",
"arguments": {
 "path": "/machine/peripheral/cxl-pmem0",
 "start": 2048,
 "length": 256
}
}

Adjusted to select a device on your machine.

Note that the poison list supported is kept short enough to avoid the
complexity of state machine that is needed to handle the MORE flag.

Reviewed-by: Fan Ni 
Reviewed-by: Ira Weiny 
Acked-by: Markus Armbruster 
Signed-off-by: Jonathan Cameron 
---
 qapi/cxl.json   | 21 +
 include/hw/cxl/cxl.h|  1 +
 include/hw/cxl/cxl_device.h | 20 +
 hw/cxl/cxl-mailbox-utils.c  | 90 +
 hw/mem/cxl_type3.c  | 56 +++
 hw/mem/cxl_type3_stubs.c|  6 +++
 6 files changed, 194 insertions(+)

diff --git a/qapi/cxl.json b/qapi/cxl.json
index b21c9b4c1c..ed1c7eea3a 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -5,6 +5,27 @@
 # = CXL devices
 ##
 
+##
+# @cxl-inject-poison:
+#
+# Poison records indicate that a CXL memory device knows that a
+# particular memory region may be corrupted.  This may be because of
+# locally detected errors (e.g. ECC failure) or poisoned writes
+# received from other components in the system.  This injection
+# mechanism enables testing of the OS handling of poison records which
+# may be queried via the CXL mailbox.
+#
+# @path: CXL type 3 device canonical QOM path
+#
+# @start: Start address; must be 64 byte aligned.
+#
+# @length: Length of poison to inject; must be a multiple of 64 bytes.
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-poison',
+  'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }}
+
 ##
 # @CxlUncorErrorType:
 #
diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
index c453983e83..56c9e7676e 100644
--- a/include/hw/cxl/cxl.h
+++ b/include/hw/cxl/cxl.h
@@ -18,6 +18,7 @@
 #include "cxl_component.h"
 #include "cxl_device.h"
 
+#define CXL_CACHE_LINE_SIZE 64
 #define CXL_COMPONENT_REG_BAR_IDX 0
 #define CXL_DEVICE_REG_BAR_IDX 2
 
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 02befda0f6..32c234ea91 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -242,6 +242,18 @@ typedef struct CXLError {
 
 typedef QTAILQ_HEAD(, CXLError) CXLErrorList;
 
+typedef struct CXLPoison {
+uint64_t start, length;
+uint8_t type;
+#define CXL_POISON_TYPE_EXTERNAL 0x1
+#define CXL_POISON_TYPE_INTERNAL 0x2
+#define CXL_POISON_TYPE_INJECTED 0x3
+QLIST_ENTRY(CXLPoison) node;
+} CXLPoison;
+
+typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
+#define CXL_POISON_LIST_LIMIT 256
+
 struct CXLType3Dev {
 /* Private */
 PCIDevice parent_obj;
@@ -264,6 +276,12 @@ struct CXLType3Dev {
 
 /* Error injection */
 CXLErrorList error_list;
+
+/* Poison Injection - cache */
+CXLPoisonList poison_list;
+unsigned int poison_list_cnt;
+bool poison_list_overflowed;
+uint64_t poison_list_overflow_ts;
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
@@ -289,4 +307,6 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, 
uint64_t data,
 
 uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
 
+void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
+
 #endif
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 702e16ca20..1f74b26ea2 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -62,6 +62,8 @@ enum {
 #define GET_PARTITION_INFO 0x0
 #define GET_LSA   0x2
 #define SET_LSA   0x3
+MEDIA_AND_POISON = 0x43,
+#define GET_POISON_LIST0x0
 };
 
 /* 8.2.8.4.5.1 Command Return Codes */
@@ -295,6 +297,10 @@ static CXLRetCode cmd_identify_memory_device(struct 
cxl_cmd *cmd,
 stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / 
CXL_CAPACITY_MULTIPLIER);
 stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / 
CXL_CAPACITY_MULTIPLIER);
 stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d));
+/* 256 poison records */
+st24_le_p(id->poison_list_max_mer, 256);
+/* No limit - so limited by main poison record limit */
+stw_le_p(&id->inject_poison_limit, 0);
 
 *len = sizeof(*id);
 return CXL_MBOX_SUCCESS;
@@ -384,6 +390,88 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * This is ve

[PATCH v8 0/4] hw/cxl: Poison get, inject, clear

2023-05-26 Thread Jonathan Cameron via
v8: Formatting fixes for QMP docs from Markus Armbruster (thanks!)

The bswap naming discussions seems to have died down, so I'll stick
with this version (24)

Precursors now all upstream which make this email easier to write :)

The kernel support for Poison handling is now upstream.

This code has been very useful for testing and helped identify various
corner cases.

Updated cover letter.

The series supports:
1) Injection of variable length poison regions via QMP (to fake real
   memory corruption and ensure we deal with odd overflow corner cases
   such as clearing the middle of a large region making the list overflow
   as we go from one long entry to two smaller entries.
2) Read of poison list via the CXL mailbox.
3) Injection via the poison injection mailbox command (limited to 64 byte
   entries - spec constraint)
4) Clearing of poison injected via either method.

The implementation is meant to be a valid combination of impdef choices
based on what the spec allowed. There are a number of places where it could
be made more sophisticated that we might consider in future:
* Fusing adjacent poison entries if the types match.
* Separate injection list and main poison list, to test out limits on
  injected poison list being smaller than the main list.
* Poison list overflow event (needs event log support in general)
* Connecting up to the poison list error record generation (rather complex
  and not needed for currently kernel handling testing).
* Triggering the synchronous and asynchronous errors that occur on reads
  and writes of the memory when the host receives poison.

As the kernel code is currently fairly simple, it is likely that the above
does not yet matter but who knows what will turn up in future!

Ira Weiny (1):
  bswap: Add the ability to store to an unaligned 24 bit field

Jonathan Cameron (3):
  hw/cxl: QMP based poison injection support
  hw/cxl: Add poison injection via the mailbox.
  hw/cxl: Add clear poison mailbox command support.

 docs/devel/loads-stores.rst |   2 +
 qapi/cxl.json   |  21 
 include/hw/cxl/cxl.h|   1 +
 include/hw/cxl/cxl_device.h |  21 
 include/qemu/bswap.h|  25 +
 hw/cxl/cxl-mailbox-utils.c  | 214 
 hw/mem/cxl_type3.c  |  93 
 hw/mem/cxl_type3_stubs.c|   6 +
 8 files changed, 383 insertions(+)

-- 
2.39.2




[PATCH v8 1/4] bswap: Add the ability to store to an unaligned 24 bit field

2023-05-26 Thread Jonathan Cameron via
From: Ira Weiny 

CXL has 24 bit unaligned fields which need to be stored to.  CXL is
specified as little endian.

Define st24_le_p() and the supporting functions to store such a field
from a 32 bit host native value.

The use of b, w, l, q as the size specifier is limiting.  So "24" was
used for the size part of the function name.

Reviewed-by: Fan Ni 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Ira Weiny 
Signed-off-by: Jonathan Cameron 
---
 docs/devel/loads-stores.rst |  2 ++
 include/qemu/bswap.h| 25 +
 2 files changed, 27 insertions(+)

diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
index d2cefc77a2..dab6dfa0ac 100644
--- a/docs/devel/loads-stores.rst
+++ b/docs/devel/loads-stores.rst
@@ -36,6 +36,7 @@ store: ``st{size}_{endian}_p(ptr, val)``
 ``size``
  - ``b`` : 8 bits
  - ``w`` : 16 bits
+ - ``24`` : 24 bits
  - ``l`` : 32 bits
  - ``q`` : 64 bits
 
@@ -65,6 +66,7 @@ of size ``sz`` bytes.
 Regexes for git grep
  - ``\``
  - ``\``
+ - ``\``
  - ``\``
  - ``\``
 
diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 15a78c0db5..933a66ee87 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -8,11 +8,23 @@
 #undef  bswap64
 #define bswap64(_x) __builtin_bswap64(_x)
 
+static inline uint32_t bswap24(uint32_t x)
+{
+return (((x & 0x00ffU) << 16) |
+((x & 0xff00U) <<  0) |
+((x & 0x00ffU) >> 16));
+}
+
 static inline void bswap16s(uint16_t *s)
 {
 *s = __builtin_bswap16(*s);
 }
 
+static inline void bswap24s(uint32_t *s)
+{
+*s = bswap24(*s & 0x00ffU);
+}
+
 static inline void bswap32s(uint32_t *s)
 {
 *s = __builtin_bswap32(*s);
@@ -26,11 +38,13 @@ static inline void bswap64s(uint64_t *s)
 #if HOST_BIG_ENDIAN
 #define be_bswap(v, size) (v)
 #define le_bswap(v, size) glue(__builtin_bswap, size)(v)
+#define le_bswap24(v) bswap24(v)
 #define be_bswaps(v, size)
 #define le_bswaps(p, size) \
 do { *p = glue(__builtin_bswap, size)(*p); } while (0)
 #else
 #define le_bswap(v, size) (v)
+#define le_bswap24(v) (v)
 #define be_bswap(v, size) glue(__builtin_bswap, size)(v)
 #define le_bswaps(v, size)
 #define be_bswaps(p, size) \
@@ -176,6 +190,7 @@ CPU_CONVERT(le, 64, uint64_t)
  * size is:
  *   b: 8 bits
  *   w: 16 bits
+ *   24: 24 bits
  *   l: 32 bits
  *   q: 64 bits
  *
@@ -248,6 +263,11 @@ static inline void stw_he_p(void *ptr, uint16_t v)
 __builtin_memcpy(ptr, &v, sizeof(v));
 }
 
+static inline void st24_he_p(void *ptr, uint32_t v)
+{
+__builtin_memcpy(ptr, &v, 3);
+}
+
 static inline int ldl_he_p(const void *ptr)
 {
 int32_t r;
@@ -297,6 +317,11 @@ static inline void stw_le_p(void *ptr, uint16_t v)
 stw_he_p(ptr, le_bswap(v, 16));
 }
 
+static inline void st24_le_p(void *ptr, uint32_t v)
+{
+st24_he_p(ptr, le_bswap24(v));
+}
+
 static inline void stl_le_p(void *ptr, uint32_t v)
 {
 stl_he_p(ptr, le_bswap(v, 32));
-- 
2.39.2




[PATCH v6 10/11] hw/9pfs: use qemu_xxhash4

2023-05-26 Thread Alex Bennée
No need to pass zeros as we have helpers that do that for us.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Christian Schoenebeck 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-10-alex.ben...@linaro.org>
---
 hw/9pfs/9p.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 9621ec1341..991645adca 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -738,15 +738,14 @@ static VariLenAffix affixForIndex(uint64_t index)
 return invertAffix(&prefix); /* convert prefix to suffix */
 }
 
-/* creative abuse of tb_hash_func7, which is based on xxhash */
 static uint32_t qpp_hash(QppEntry e)
 {
-return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
+return qemu_xxhash4(e.ino_prefix, e.dev);
 }
 
 static uint32_t qpf_hash(QpfEntry e)
 {
-return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
+return qemu_xxhash4(e.ino, e.dev);
 }
 
 static bool qpd_cmp_func(const void *obj, const void *userp)
-- 
2.39.2




[PATCH v6 07/11] trace: remove code that depends on setting vcpu

2023-05-26 Thread Alex Bennée
Now we no longer have any events that are for vcpus we can start
excising the code from the trace control. As the vcpu parameter is
encoded as part of QMP we just stub out the has_vcpu/vcpu parameters
rather than alter the API.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-7-alex.ben...@linaro.org>
---
 trace/control-internal.h |  10 
 trace/control-vcpu.h |  16 --
 trace/control.h  |  48 -
 hw/core/cpu-common.c |   2 -
 stubs/trace-control.c|  13 -
 trace/control-target.c   | 108 ---
 trace/control.c  |  16 --
 trace/qmp.c  |  74 +++
 trace/trace-hmp-cmds.c   |  18 ++-
 9 files changed, 20 insertions(+), 285 deletions(-)

diff --git a/trace/control-internal.h b/trace/control-internal.h
index 0178121720..8d818d359b 100644
--- a/trace/control-internal.h
+++ b/trace/control-internal.h
@@ -25,16 +25,6 @@ static inline uint32_t trace_event_get_id(TraceEvent *ev)
 return ev->id;
 }
 
-static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev)
-{
-return 0;
-}
-
-static inline bool trace_event_is_vcpu(TraceEvent *ev)
-{
-return false;
-}
-
 static inline const char * trace_event_get_name(TraceEvent *ev)
 {
 assert(ev != NULL);
diff --git a/trace/control-vcpu.h b/trace/control-vcpu.h
index 0f98ebe7b5..800fc5a219 100644
--- a/trace/control-vcpu.h
+++ b/trace/control-vcpu.h
@@ -30,13 +30,6 @@
  trace_event_get_vcpu_state_dynamic_by_vcpu_id( \
  vcpu, _ ## id ## _EVENT.vcpu_id))
 
-/**
- * trace_event_get_vcpu_state_dynamic:
- *
- * Get the dynamic tracing state of an event for the given vCPU.
- */
-static bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev);
-
 #include "control-internal.h"
 
 static inline bool
@@ -51,13 +44,4 @@ trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
 }
 }
 
-static inline bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu,
-  TraceEvent *ev)
-{
-uint32_t vcpu_id;
-assert(trace_event_is_vcpu(ev));
-vcpu_id = trace_event_get_vcpu_id(ev);
-return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id);
-}
-
 #endif
diff --git a/trace/control.h b/trace/control.h
index 23b8393b29..dfd209edd8 100644
--- a/trace/control.h
+++ b/trace/control.h
@@ -89,23 +89,6 @@ static bool trace_event_is_pattern(const char *str);
  */
 static uint32_t trace_event_get_id(TraceEvent *ev);
 
-/**
- * trace_event_get_vcpu_id:
- *
- * Get the per-vCPU identifier of an event.
- *
- * Special value #TRACE_VCPU_EVENT_NONE means the event is not vCPU-specific
- * (does not have the "vcpu" property).
- */
-static uint32_t trace_event_get_vcpu_id(TraceEvent *ev);
-
-/**
- * trace_event_is_vcpu:
- *
- * Whether this is a per-vCPU event.
- */
-static bool trace_event_is_vcpu(TraceEvent *ev);
-
 /**
  * trace_event_get_name:
  *
@@ -172,21 +155,6 @@ static bool trace_event_get_state_dynamic(TraceEvent *ev);
  */
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state);
 
-/**
- * trace_event_set_vcpu_state_dynamic:
- *
- * Set the dynamic tracing state of an event for the given vCPU.
- *
- * Pre-condition: trace_event_get_vcpu_state_static(ev) == true
- *
- * Note: Changes for execution-time events with the 'tcg' property will not be
- *   propagated until the next TB is executed (iff executing in TCG mode).
- */
-void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
-TraceEvent *ev, bool state);
-
-
-
 /**
  * trace_init_backends:
  *
@@ -205,22 +173,6 @@ bool trace_init_backends(void);
  */
 void trace_init_file(void);
 
-/**
- * trace_init_vcpu:
- * @vcpu: Added vCPU.
- *
- * Set initial dynamic event state for a hot-plugged vCPU.
- */
-void trace_init_vcpu(CPUState *vcpu);
-
-/**
- * trace_fini_vcpu:
- * @vcpu: Removed vCPU.
- *
- * Disable dynamic event state for a hot-unplugged vCPU.
- */
-void trace_fini_vcpu(CPUState *vcpu);
-
 /**
  * trace_list_events:
  * @f: Where to send output.
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 951477a7fd..f4e51c8a1b 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -211,7 +211,6 @@ static void cpu_common_realizefn(DeviceState *dev, Error 
**errp)
 }
 
 /* NOTE: latest generic point where the cpu is fully realized */
-trace_init_vcpu(cpu);
 }
 
 static void cpu_common_unrealizefn(DeviceState *dev)
@@ -219,7 +218,6 @@ static void cpu_common_unrealizefn(DeviceState *dev)
 CPUState *cpu = CPU(dev);
 
 /* NOTE: latest generic point before the cpu is fully unrealized */
-trace_fini_vcpu(cpu);
 cpu_exec_unrealizefn(cpu);
 }
 
diff --git a/stubs/trace-control.c b/stubs/trace-control.c
index 7f856e5c24..b428f34c87 100644
--- a/stubs/trace-control.c
+++ b/stubs/trace-control.c
@@ -36,16 +36,3 @@ void tra

[PATCH v6 11/11] accel/tcg: include cs_base in our hash calculations

2023-05-26 Thread Alex Bennée
We weren't using cs_base in the hash calculations before. Since the
arm front end moved a chunk of flags in a378206a20 (target/arm: Move
mode specific TB flags to tb->cs_base) they comprise of an important
part of the execution state.

Widen the tb_hash_func to include cs_base and expand to qemu_xxhash8()
to accommodate it.

My initial benchmark shows very little difference in the
runtime.

Before:

armhf

➜  hyperfine -w 2 -m 20 "./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 
'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot"
Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 
'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
  Time (mean ± σ): 24.627 s ±  2.708 s[User: 34.309 s, System: 1.797 s]
  Range (min … max):   22.345 s … 29.864 s20 runs

arm64

➜  hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on 
-machine type=virt,virtualization=on,gic-version=3 -display none -serial 
mon:stdio -netdev user,id=unet,hostfwd=tcp::-:22,hostfwd=tcp::1234-:1234 
-device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64
 -device scsi-hd,drive=hd -smp 4 -kernel 
~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 
root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
  Time (mean ± σ): 62.559 s ±  2.917 s[User: 189.115 s, System: 4.089 s]
  Range (min … max):   59.997 s … 70.153 s10 runs

After:

armhf

Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 
'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
  Time (mean ± σ): 24.223 s ±  2.151 s[User: 34.284 s, System: 1.906 s]
  Range (min … max):   22.000 s … 28.476 s20 runs

arm64

hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine 
type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio 
-netdev user,id=unet,hostfwd=tcp::-:22,hostfwd=tcp::1234-:1234 -device 
virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64
 -device scsi-hd,drive=hd -smp 4 -kernel 
~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 
root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
  Time (mean ± σ): 62.769 s ±  1.978 s[User: 188.431 s, System: 5.269 s]
  Range (min … max):   60.285 s … 66.868 s10 runs

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Message-Id: <20230524133952.3971948-11-alex.ben...@linaro.org>
---
 accel/tcg/tb-hash.h   |  4 ++--
 include/qemu/xxhash.h | 23 +--
 accel/tcg/cpu-exec.c  |  2 +-
 accel/tcg/tb-maint.c  |  4 ++--
 util/qsp.c|  2 +-
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 1d19c69caa..2ba2193731 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -62,9 +62,9 @@ static inline unsigned int 
tb_jmp_cache_hash_func(target_ulong pc)
 
 static inline
 uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
-  uint32_t flags, uint32_t cf_mask)
+  uint32_t flags, uint64_t flags2, uint32_t cf_mask)
 {
-return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
+return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask);
 }
 
 #endif
diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
index c2dcccadbf..0259bbef18 100644
--- a/include/qemu/xxhash.h
+++ b/include/qemu/xxhash.h
@@ -48,8 +48,8 @@
  * xxhash32, customized for input variables that are not guara

[PATCH v6 02/11] trace-events: remove the remaining vcpu trace events

2023-05-26 Thread Alex Bennée
While these are all in helper functions being designated vcpu events
complicates the removal of the dynamic vcpu state code. TCG plugins
allow you to instrument vcpu_[init|exit|idle].

We rename cpu_reset and make it a normal trace point.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-3-alex.ben...@linaro.org>
---
 hw/core/cpu-common.c   |  4 ++--
 trace/control-target.c |  1 -
 trace/control.c|  2 --
 hw/core/trace-events   |  3 +++
 trace-events   | 31 ---
 5 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 5ccc3837b6..951477a7fd 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -32,7 +32,7 @@
 #include "sysemu/tcg.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include "trace/trace-root.h"
+#include "trace.h"
 #include "qemu/plugin.h"
 
 CPUState *cpu_by_arch_id(int64_t id)
@@ -113,7 +113,7 @@ void cpu_reset(CPUState *cpu)
 {
 device_cold_reset(DEVICE(cpu));
 
-trace_guest_cpu_reset(cpu);
+trace_cpu_reset(cpu->cpu_index);
 }
 
 static void cpu_common_reset_hold(Object *obj)
diff --git a/trace/control-target.c b/trace/control-target.c
index c0c1e2310a..a10752924b 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -144,5 +144,4 @@ void trace_init_vcpu(CPUState *vcpu)
 }
 }
 }
-trace_guest_cpu_enter(vcpu);
 }
diff --git a/trace/control.c b/trace/control.c
index 6c77cc6318..d24af91004 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -277,8 +277,6 @@ void trace_fini_vcpu(CPUState *vcpu)
 TraceEventIter iter;
 TraceEvent *ev;
 
-trace_guest_cpu_exit(vcpu);
-
 trace_event_iter_init_all(&iter);
 while ((ev = trace_event_iter_next(&iter)) != NULL) {
 if (trace_event_is_vcpu(ev) &&
diff --git a/hw/core/trace-events b/hw/core/trace-events
index 56da55bd71..2cf085ac66 100644
--- a/hw/core/trace-events
+++ b/hw/core/trace-events
@@ -29,3 +29,6 @@ clock_set(const char *clk, uint64_t old, uint64_t new) "'%s', 
%"PRIu64"Hz->%"PRI
 clock_propagate(const char *clk) "'%s'"
 clock_update(const char *clk, const char *src, uint64_t hz, int cb) "'%s', 
src='%s', val=%"PRIu64"Hz cb=%d"
 clock_set_mul_div(const char *clk, uint32_t oldmul, uint32_t mul, uint32_t 
olddiv, uint32_t div) "'%s', mul: %u -> %u, div: %u -> %u"
+
+# cpu-common.c
+cpu_reset(int cpu_index) "%d"
diff --git a/trace-events b/trace-events
index 691c3533e4..dd318ed1af 100644
--- a/trace-events
+++ b/trace-events
@@ -54,34 +54,3 @@ qmp_job_resume(void *job) "job %p"
 qmp_job_complete(void *job) "job %p"
 qmp_job_finalize(void *job) "job %p"
 qmp_job_dismiss(void *job) "job %p"
-
-
-### Guest events, keep at bottom
-
-
-## vCPU
-
-# trace/control-target.c
-
-# Hot-plug a new virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_enter(void)
-
-# trace/control.c
-
-# Hot-unplug a virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_exit(void)
-
-# hw/core/cpu.c
-
-# Reset the state of a virtual (guest) CPU
-#
-# Mode: user, softmmu
-# Targets: all
-vcpu guest_cpu_reset(void)
-
-- 
2.39.2




[PATCH v6 01/11] *-user: remove the guest_user_syscall tracepoints

2023-05-26 Thread Alex Bennée
This is pure duplication now. Both bsd-user and linux-user have
builtin strace support and we can also track syscalls via the plugins
system.

Reviewed-by: Warner Losh 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-2-alex.ben...@linaro.org>
---
 include/user/syscall-trace.h  |  4 
 bsd-user/freebsd/os-syscall.c |  2 --
 trace-events  | 19 ---
 3 files changed, 25 deletions(-)

diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
index 90bda7631c..557f881a79 100644
--- a/include/user/syscall-trace.h
+++ b/include/user/syscall-trace.h
@@ -26,9 +26,6 @@ static inline void record_syscall_start(void *cpu, int num,
 abi_long arg5, abi_long arg6,
 abi_long arg7, abi_long arg8)
 {
-trace_guest_user_syscall(cpu, num,
- arg1, arg2, arg3, arg4,
- arg5, arg6, arg7, arg8);
 qemu_plugin_vcpu_syscall(cpu, num,
  arg1, arg2, arg3, arg4,
  arg5, arg6, arg7, arg8);
@@ -36,7 +33,6 @@ static inline void record_syscall_start(void *cpu, int num,
 
 static inline void record_syscall_return(void *cpu, int num, abi_long ret)
 {
-trace_guest_user_syscall_ret(cpu, num, ret);
 qemu_plugin_vcpu_syscall_ret(cpu, num, ret);
 }
 
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index c8f998ecec..b0ae43766f 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -531,7 +531,6 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 CPUState *cpu = env_cpu(cpu_env);
 abi_long ret;
 
-trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 
arg7, arg8);
 if (do_strace) {
 print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 }
@@ -541,7 +540,6 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 if (do_strace) {
 print_freebsd_syscall_ret(num, ret);
 }
-trace_guest_user_syscall_ret(cpu, num, ret);
 
 return ret;
 }
diff --git a/trace-events b/trace-events
index b6b84b175e..691c3533e4 100644
--- a/trace-events
+++ b/trace-events
@@ -85,22 +85,3 @@ vcpu guest_cpu_exit(void)
 # Targets: all
 vcpu guest_cpu_reset(void)
 
-# include/user/syscall-trace.h
-
-# @num: System call number.
-# @arg*: System call argument value.
-#
-# Start executing a guest system call in syscall emulation mode.
-#
-# Mode: user
-# Targets: TCG(all)
-vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t 
arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t 
arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" 
arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" 
arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
-
-# @num: System call number.
-# @ret: System call result value.
-#
-# Finish executing a guest system call in syscall emulation mode.
-#
-# Mode: user
-# Targets: TCG(all)
-vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" 
ret=0x%016"PRIx64
-- 
2.39.2




[PATCH v6 04/11] scripts/qapi: document the tool that generated the file

2023-05-26 Thread Alex Bennée
This makes it a little easier for developers to find where things
where being generated.

Reviewed-by: Richard Henderson 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Markus Armbruster 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-5-alex.ben...@linaro.org>

---
v6
  - use Markus' suggestion for prettier wrapping lines
  - drop un-needed str()
---
 scripts/qapi/gen.py | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
index 8f8f784f4a..70bc576a10 100644
--- a/scripts/qapi/gen.py
+++ b/scripts/qapi/gen.py
@@ -13,6 +13,7 @@
 
 from contextlib import contextmanager
 import os
+import sys
 import re
 from typing import (
 Dict,
@@ -162,7 +163,7 @@ def __init__(self, fname: str, blurb: str, pydoc: str):
 
 def _top(self) -> str:
 return mcgen('''
-/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+/* AUTOMATICALLY GENERATED by %(tool)s DO NOT MODIFY */
 
 /*
 %(blurb)s
@@ -174,6 +175,7 @@ def _top(self) -> str:
  */
 
 ''',
+ tool=os.path.basename(sys.argv[0]),
  blurb=self._blurb, copyright=self._copyright)
 
 def _bottom(self) -> str:
@@ -195,7 +197,10 @@ def _bottom(self) -> str:
 
 class QAPIGenTrace(QAPIGen):
 def _top(self) -> str:
-return super()._top() + '# AUTOMATICALLY GENERATED, DO NOT MODIFY\n\n'
+return (super()._top()
++ '# AUTOMATICALLY GENERATED by '
++ os.path.basename(sys.argv[0])
++ ', DO NOT MODIFY\n\n')
 
 
 @contextmanager
-- 
2.39.2




[PATCH v6 09/11] tcg: remove the final vestiges of dstate

2023-05-26 Thread Alex Bennée
Now we no longer have dynamic state affecting things we can remove the
additional fields in cpu.h and simplify the TB hash calculation.

For the benchmark:

hyperfine -w 2 -m 20 \
  "./arm-softmmu/qemu-system-arm -cpu cortex-a15 \
-machine type=virt,highmem=off \
-display none -m 2048 \
-serial mon:stdio \
-netdev user,id=unet,hostfwd=tcp::-:22 \
-device virtio-net-pci,netdev=unet \
-device virtio-scsi-pci \
-blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 \
-device scsi-hd,drive=hd -smp 4 \
-kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage \
-append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' 
\
-snapshot"

It has a marginal effect on runtime, before:

  Time (mean ± σ): 26.279 s ±  2.438 s[User: 41.113 s, System: 1.843 s]
  Range (min … max):   24.420 s … 32.565 s20 runs

after:

  Time (mean ± σ): 24.440 s ±  2.885 s[User: 34.474 s, System: 2.028 s]
  Range (min … max):   21.663 s … 29.937 s20 runs

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1358
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-9-alex.ben...@linaro.org>
---
 accel/tcg/tb-hash.h   | 6 +++---
 include/exec/exec-all.h   | 3 ---
 include/hw/core/cpu.h | 5 -
 accel/tcg/cpu-exec.c  | 7 +--
 accel/tcg/tb-maint.c  | 5 ++---
 accel/tcg/translate-all.c | 6 --
 6 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..1d19c69caa 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -61,10 +61,10 @@ static inline unsigned int 
tb_jmp_cache_hash_func(target_ulong pc)
 #endif /* CONFIG_SOFTMMU */
 
 static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
-  uint32_t cf_mask, uint32_t trace_vcpu_dstate)
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+  uint32_t flags, uint32_t cf_mask)
 {
-return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
+return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
 }
 
 #endif
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d2b151986..3b1b57f6ad 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -545,9 +545,6 @@ struct TranslationBlock {
 #define CF_CLUSTER_MASK  0xff00 /* Top 8 bits are cluster ID */
 #define CF_CLUSTER_SHIFT 24
 
-/* Per-vCPU dynamic tracing state used to generate this TB */
-uint32_t trace_vcpu_dstate;
-
 /*
  * Above fields used for comparing
  */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 39150cf8f8..383456d1b3 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -266,7 +266,6 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, 
run_on_cpu_data data);
 struct qemu_work_item;
 
 #define CPU_UNSET_NUMA_NODE_ID -1
-#define CPU_TRACE_DSTATE_MAX_EVENTS 32
 
 /**
  * CPUState:
@@ -407,10 +406,6 @@ struct CPUState {
 /* Use by accel-block: CPU is executing an ioctl() */
 QemuLockCnt in_ioctl_lock;
 
-/* Used for events with 'vcpu' and *without* the 'disabled' properties */
-DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
-DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
-
 DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
 
 #ifdef CONFIG_PLUGIN
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..4a1dce98ff 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -175,7 +175,6 @@ struct tb_desc {
 tb_page_addr_t page_addr0;
 uint32_t flags;
 uint32_t cflags;
-uint32_t trace_vcpu_dstate;
 };
 
 static bool tb_lookup_cmp(const void *p, const void *d)
@@ -187,7 +186,6 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 tb_page_addr0(tb) == desc->page_addr0 &&
 tb->cs_base == desc->cs_base &&
 tb->flags == desc->flags &&
-tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 tb_cflags(tb) == desc->cflags) {
 /* check next page if needed */
 tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
@@ -228,7 +226,6 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, 
target_ulong pc,
 desc.cs_base = cs_base;
 desc.flags = flags;
 desc.cflags = cflags;
-desc.trace_vcpu_dstate = *cpu->trace_dstate;
 desc.pc = pc;
 phys_pc = get_page_addr_code(desc.env, pc);
 if (phys_pc == -1) {
@@ -236,7 +233,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, 
target_ulong pc,
 }
 desc.page_addr0 = phys_pc;
 h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
- flags, cflags, *cpu->trace_dstate);
+ flags, cflags)

[PATCH v6 08/11] trace: remove control-vcpu.h

2023-05-26 Thread Alex Bennée
Now we no longer have vcpu controlled trace events we can excise the
code that allows us to query its status.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-8-alex.ben...@linaro.org>
---
 trace/control-vcpu.h  | 47 ---
 trace/qmp.c   |  2 +-
 scripts/tracetool/format/h.py |  5 +---
 3 files changed, 2 insertions(+), 52 deletions(-)
 delete mode 100644 trace/control-vcpu.h

diff --git a/trace/control-vcpu.h b/trace/control-vcpu.h
deleted file mode 100644
index 800fc5a219..00
--- a/trace/control-vcpu.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Interface for configuring and controlling the state of tracing events.
- *
- * Copyright (C) 2011-2016 Lluís Vilanova 
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__CONTROL_VCPU_H
-#define TRACE__CONTROL_VCPU_H
-
-#include "control.h"
-#include "event-internal.h"
-#include "hw/core/cpu.h"
-
-/**
- * trace_event_get_vcpu_state:
- * @vcpu: Target vCPU.
- * @id: Event identifier name.
- *
- * Get the tracing state of an event (both static and dynamic) for the given
- * vCPU.
- *
- * If the event has the disabled property, the check will have no performance
- * impact.
- */
-#define trace_event_get_vcpu_state(vcpu, id)\
-((id ##_ENABLED) && \
- trace_event_get_vcpu_state_dynamic_by_vcpu_id( \
- vcpu, _ ## id ## _EVENT.vcpu_id))
-
-#include "control-internal.h"
-
-static inline bool
-trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
-  uint32_t vcpu_id)
-{
-/* it's on fast path, avoid consistency checks (asserts) */
-if (unlikely(trace_events_enabled_count)) {
-return test_bit(vcpu_id, vcpu->trace_dstate);
-} else {
-return false;
-}
-}
-
-#endif
diff --git a/trace/qmp.c b/trace/qmp.c
index aa760f1fc4..3e3971c6a8 100644
--- a/trace/qmp.c
+++ b/trace/qmp.c
@@ -10,7 +10,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-trace.h"
-#include "control-vcpu.h"
+#include "control.h"
 
 
 static bool check_events(bool ignore_unavailable, bool is_pattern,
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index 285d7b03a9..ea126b07ea 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -16,10 +16,7 @@
 
 
 def generate(events, backend, group):
-if group == "root":
-header = "trace/control-vcpu.h"
-else:
-header = "trace/control.h"
+header = "trace/control.h"
 
 out('/* This file is autogenerated by tracetool, do not edit. */',
 '',
-- 
2.39.2




[PATCH v6 00/11] tracing: remove dynamic vcpu state

2023-05-26 Thread Alex Bennée
Hi Stefan,

The references dynamic vcpu tracing support was removed when the
original TCG trace points where removed. However there was still a
legacy of dynamic trace state to track this in cpu.h and extra hash
variables to track TBs. While the removed vcpu tracepoints are not in
generated code (or helpers) they still bring in a bunch of machinery
to manage the state so I've pulled them out. We keep and rename one
(cpu_reset) to a static trace points which dump vcpu->index as it is
useful to f4bug.

v6 new patch to shuffle deprecated, added rth's rb, qapi doc cleanups

Please queue into your tree.

Alex Bennée (11):
  *-user: remove the guest_user_syscall tracepoints
  trace-events: remove the remaining vcpu trace events
  trace: remove vcpu_id from the TraceEvent structure
  scripts/qapi: document the tool that generated the file
  docs/deprecated: move QMP events bellow QMP command section
  qapi: make the vcpu parameters deprecated for 8.1
  trace: remove code that depends on setting vcpu
  trace: remove control-vcpu.h
  tcg: remove the final vestiges of dstate
  hw/9pfs: use qemu_xxhash4
  accel/tcg: include cs_base in our hash calculations

 docs/about/deprecated.rst |  25 +---
 qapi/trace.json   |  40 ++---
 accel/tcg/tb-hash.h   |   6 +-
 include/exec/exec-all.h   |   3 -
 include/hw/core/cpu.h |   5 --
 include/qemu/xxhash.h |  23 +--
 include/user/syscall-trace.h  |   4 --
 trace/control-internal.h  |  10 
 trace/control-vcpu.h  |  63 
 trace/control.h   |  48 ---
 trace/event-internal.h|   2 -
 accel/tcg/cpu-exec.c  |   7 +--
 accel/tcg/tb-maint.c  |   5 +-
 accel/tcg/translate-all.c |   6 --
 bsd-user/freebsd/os-syscall.c |   2 -
 hw/9pfs/9p.c  |   5 +-
 hw/core/cpu-common.c  |   6 +-
 stubs/trace-control.c |  13 
 trace/control-target.c| 109 +++---
 trace/control.c   |  28 -
 trace/qmp.c   |  76 +++-
 trace/trace-hmp-cmds.c|  18 +-
 util/qsp.c|   2 +-
 hw/core/trace-events  |   3 +
 scripts/qapi/gen.py   |   9 ++-
 scripts/tracetool/format/c.py |   6 --
 scripts/tracetool/format/h.py |  16 +
 trace-events  |  50 
 28 files changed, 94 insertions(+), 496 deletions(-)
 delete mode 100644 trace/control-vcpu.h

-- 
2.39.2




[PATCH v6 03/11] trace: remove vcpu_id from the TraceEvent structure

2023-05-26 Thread Alex Bennée
This does involve temporarily stubbing out some helper functions
before we excise the rest of the code.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-4-alex.ben...@linaro.org>
---
 trace/control-internal.h  |  4 ++--
 trace/event-internal.h|  2 --
 trace/control.c   | 10 --
 scripts/tracetool/format/c.py |  6 --
 scripts/tracetool/format/h.py | 11 +--
 5 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/trace/control-internal.h b/trace/control-internal.h
index 8b2b50a7cf..0178121720 100644
--- a/trace/control-internal.h
+++ b/trace/control-internal.h
@@ -27,12 +27,12 @@ static inline uint32_t trace_event_get_id(TraceEvent *ev)
 
 static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev)
 {
-return ev->vcpu_id;
+return 0;
 }
 
 static inline bool trace_event_is_vcpu(TraceEvent *ev)
 {
-return ev->vcpu_id != TRACE_VCPU_EVENT_NONE;
+return false;
 }
 
 static inline const char * trace_event_get_name(TraceEvent *ev)
diff --git a/trace/event-internal.h b/trace/event-internal.h
index f63500b37e..0c24e01b52 100644
--- a/trace/event-internal.h
+++ b/trace/event-internal.h
@@ -19,7 +19,6 @@
 /**
  * TraceEvent:
  * @id: Unique event identifier.
- * @vcpu_id: Unique per-vCPU event identifier.
  * @name: Event name.
  * @sstate: Static tracing state.
  * @dstate: Dynamic tracing state
@@ -33,7 +32,6 @@
  */
 typedef struct TraceEvent {
 uint32_t id;
-uint32_t vcpu_id;
 const char * name;
 const bool sstate;
 uint16_t *dstate;
diff --git a/trace/control.c b/trace/control.c
index d24af91004..5dfb609954 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -68,16 +68,6 @@ void trace_event_register_group(TraceEvent **events)
 size_t i;
 for (i = 0; events[i] != NULL; i++) {
 events[i]->id = next_id++;
-if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) {
-continue;
-}
-
-if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) {
-events[i]->vcpu_id = next_vcpu_id++;
-} else {
-warn_report("too many vcpu trace events; dropping '%s'",
-events[i]->name);
-}
 }
 event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
 event_groups[nevent_groups].events = events;
diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py
index c390c1844a..69edf0d588 100644
--- a/scripts/tracetool/format/c.py
+++ b/scripts/tracetool/format/c.py
@@ -32,19 +32,13 @@ def generate(events, backend, group):
 out('uint16_t %s;' % e.api(e.QEMU_DSTATE))
 
 for e in events:
-if "vcpu" in e.properties:
-vcpu_id = 0
-else:
-vcpu_id = "TRACE_VCPU_EVENT_NONE"
 out('TraceEvent %(event)s = {',
 '.id = 0,',
-'.vcpu_id = %(vcpu_id)s,',
 '.name = \"%(name)s\",',
 '.sstate = %(sstate)s,',
 '.dstate = &%(dstate)s ',
 '};',
 event = e.api(e.QEMU_EVENT),
-vcpu_id = vcpu_id,
 name = e.name,
 sstate = "TRACE_%s_ENABLED" % e.name.upper(),
 dstate = e.api(e.QEMU_DSTATE))
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index e94f0be7da..285d7b03a9 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -74,16 +74,7 @@ def generate(events, backend, group):
 
 out('}')
 
-# tracer wrapper with checks (per-vCPU tracing)
-if "vcpu" in e.properties:
-trace_cpu = next(iter(e.args))[1]
-cond = "trace_event_get_vcpu_state(%(cpu)s,"\
-   " TRACE_%(id)s)"\
-   % dict(
-   cpu=trace_cpu,
-   id=e.name.upper())
-else:
-cond = "true"
+cond = "true"
 
 out('',
 'static inline void %(api)s(%(args)s)',
-- 
2.39.2




[PATCH v6 05/11] docs/deprecated: move QMP events bellow QMP command section

2023-05-26 Thread Alex Bennée
Also rename the section to make the fact this is part of the
management protocol even clearer.

Suggested-by: Markus Armbruster 
Signed-off-by: Alex Bennée 
---
 docs/about/deprecated.rst | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index e934e0a13a..7c45a64363 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -218,6 +218,15 @@ instruction per translated block" mode (which can be set 
on the
 command line or via the HMP, but not via QMP). The information remains
 available via the HMP 'info jit' command.
 
+QEMU Machine Protocol (QMP) events
+--
+
+``MEM_UNPLUG_ERROR`` (since 6.2)
+
+
+Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
+
+
 Human Monitor Protocol (HMP) commands
 -
 
@@ -251,15 +260,6 @@ it. Since all recent x86 hardware from the past >10 years 
is capable of the
 64-bit x86 extensions, a corresponding 64-bit OS should be used instead.
 
 
-QEMU API (QAPI) events
---
-
-``MEM_UNPLUG_ERROR`` (since 6.2)
-
-
-Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
-
-
 System emulator machines
 
 
-- 
2.39.2




[PATCH v6 06/11] qapi: make the vcpu parameters deprecated for 8.1

2023-05-26 Thread Alex Bennée
I don't think I can remove the parameters directly but certainly mark
them as deprecated.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Alex Bennée 
Message-Id: <20230524133952.3971948-6-alex.ben...@linaro.org>

---
v6
  - s/QAPI/QMP/
  - /and always false./and always ignored./
---
 docs/about/deprecated.rst |  7 +++
 qapi/trace.json   | 40 +--
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 7c45a64363..0743459862 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -226,6 +226,13 @@ QEMU Machine Protocol (QMP) events
 
 Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
 
+``vcpu`` trace events (since 8.1)
+'
+
+The ability to instrument QEMU helper functions with vCPU-aware trace
+points was removed in 7.0. However QMP still exposed the vcpu
+parameter. This argument has now been deprecated and the remaining
+remaining trace points that used it are selected just by name.
 
 Human Monitor Protocol (HMP) commands
 -
diff --git a/qapi/trace.json b/qapi/trace.json
index 6bf0af0946..39b752fc88 100644
--- a/qapi/trace.json
+++ b/qapi/trace.json
@@ -37,13 +37,14 @@
 #
 # @vcpu: Whether this is a per-vCPU event (since 2.7).
 #
-# An event is per-vCPU if it has the "vcpu" property in the
-# "trace-events" files.
+# Features:
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
 # Since: 2.2
 ##
 { 'struct': 'TraceEventInfo',
-  'data': {'name': 'str', 'state': 'TraceEventState', 'vcpu': 'bool'} }
+  'data': {'name': 'str', 'state': 'TraceEventState',
+   'vcpu': { 'type': 'bool', 'features': ['deprecated'] } } }
 
 ##
 # @trace-event-get-state:
@@ -52,19 +53,15 @@
 #
 # @name: Event name pattern (case-sensitive glob).
 #
-# @vcpu: The vCPU to query (any by default; since 2.7).
+# @vcpu: The vCPU to query (since 2.7).
 #
-# Returns: a list of @TraceEventInfo for the matching events
-#
-# An event is returned if:
+# Features:
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
-# - its name matches the @name pattern, and
-# - if @vcpu is given, the event has the "vcpu" property.
+# Returns: a list of @TraceEventInfo for the matching events
 #
-# Therefore, if @vcpu is given, the operation will only match per-vCPU
-# events, returning their state on the specified vCPU. Special case:
-# if @name is an exact match, @vcpu is given and the event does not
-# have the "vcpu" property, an error is returned.
+# An event is returned if its name matches the @name pattern
+# (There are no longer any per-vCPU events).
 #
 # Since: 2.2
 #
@@ -75,7 +72,8 @@
 # <- { "return": [ { "name": "qemu_memalign", "state": "disabled", "vcpu": 
false } ] }
 ##
 { 'command': 'trace-event-get-state',
-  'data': {'name': 'str', '*vcpu': 'int'},
+  'data': {'name': 'str',
+   '*vcpu': {'type': 'int', 'features': ['deprecated'] } },
   'returns': ['TraceEventInfo'] }
 
 ##
@@ -91,15 +89,11 @@
 #
 # @vcpu: The vCPU to act upon (all by default; since 2.7).
 #
-# An event's state is modified if:
-#
-# - its name matches the @name pattern, and
-# - if @vcpu is given, the event has the "vcpu" property.
+# Features:
+# @deprecated: Member @vcpu is deprecated, and always ignored.
 #
-# Therefore, if @vcpu is given, the operation will only match per-vCPU
-# events, setting their state on the specified vCPU. Special case: if
-# @name is an exact match, @vcpu is given and the event does not have
-# the "vcpu" property, an error is returned.
+# An event is enabled if its name matches the @name pattern
+# (There are no longer any per-vCPU events).
 #
 # Since: 2.2
 #
@@ -111,4 +105,4 @@
 ##
 { 'command': 'trace-event-set-state',
   'data': {'name': 'str', 'enable': 'bool', '*ignore-unavailable': 'bool',
-   '*vcpu': 'int'} }
+   '*vcpu': {'type': 'int', 'features': ['deprecated'] } } }
-- 
2.39.2




Re: [PATCH v1 6/9] KVM: x86: Add Heki hypervisor support

2023-05-26 Thread Mickaël Salaün



On 08/05/2023 23:18, Wei Liu wrote:

On Fri, May 05, 2023 at 05:20:43PM +0200, Mickaël Salaün wrote:

From: Madhavan T. Venkataraman 

Each supported hypervisor in x86 implements a struct x86_hyper_init to
define the init functions for the hypervisor.  Define a new init_heki()
entry point in struct x86_hyper_init.  Hypervisors that support Heki
must define this init_heki() function.  Call init_heki() of the chosen
hypervisor in init_hypervisor_platform().

Create a heki_hypervisor structure that each hypervisor can fill
with its data and functions. This will allow the Heki feature to work
in a hypervisor agnostic way.

Declare and initialize a "heki_hypervisor" structure for KVM so KVM can
support Heki.  Define the init_heki() function for KVM.  In init_heki(),
set the hypervisor field in the generic "heki" structure to the KVM
"heki_hypervisor".  After this point, generic Heki code can access the
KVM Heki data and functions.


[...]

+static void kvm_init_heki(void)
+{
+   long err;
+
+   if (!kvm_para_available())
+   /* Cannot make KVM hypercalls. */
+   return;
+
+   err = kvm_hypercall3(KVM_HC_LOCK_MEM_PAGE_RANGES, -1, -1, -1);


Why not do a proper version check or capability check here? If the ABI
or supported features ever change then we have something to rely on?


The attributes will indeed get extended, but I wanted to have a simple 
proposal for now.


Do you mean to get the version of this hypercall e.g., with a dedicated 
flag, like with the 
landlock_create_ruleset/LANDLOCK_CREATE_RULESET_VERSION syscall?





Thanks,
Wei.




Re: [PATCH 6/6] target/riscv: Add HS-mode virtual interrupt and IRQ filtering support.

2023-05-26 Thread Rajnesh Kanwal
On Mon, May 22, 2023 at 6:18 PM Daniel Henrique Barboza
 wrote:
>
>
>
> On 5/18/23 08:38, Rajnesh Kanwal wrote:
> > This change adds support for inserting virtual interrupts from HS-mode
> > into VS-mode using hvien and hvip csrs. This also allows for IRQ filtering
> > from HS-mode.
> >
> > Also, the spec doesn't mandate the interrupt to be actually supported
> > in hardware. Which allows HS-mode to assert virtual interrupts to VS-mode
> > that have no connection to any real interrupt events.
> >
> > This is defined as part of the AIA specification [0], "6.3.2 Virtual
> > interrupts for VS level".
> >
> > [0]: 
> > https://github.com/riscv/riscv-aia/releases/download/1.0-RC4/riscv-interrupts-1.0-RC4.pdf
> >
> > Signed-off-by: Rajnesh Kanwal 
> > ---
> >   target/riscv/cpu.c|   3 +-
> >   target/riscv/cpu.h|  14 +++
> >   target/riscv/cpu_helper.c |  48 +++---
> >   target/riscv/csr.c| 196 ++
> >   target/riscv/machine.c|   3 +
> >   5 files changed, 234 insertions(+), 30 deletions(-)
> >
> > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> > index 9557194a21..c2b05d4c37 100644
> > --- a/target/riscv/cpu.c
> > +++ b/target/riscv/cpu.c
> > @@ -713,7 +713,8 @@ static bool riscv_cpu_has_work(CPUState *cs)
> >* mode and delegation registers, but respect individual enables
> >*/
> >   return riscv_cpu_all_pending(env) != 0 ||
> > -riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE;
> > +riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE ||
> > +riscv_cpu_vsirq_pending(env) != RISCV_EXCP_NONE;
> >   #else
> >   return true;
> >   #endif
> > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > index 07cf656471..3e10eee38f 100644
> > --- a/target/riscv/cpu.h
> > +++ b/target/riscv/cpu.h
> > @@ -196,6 +196,12 @@ struct CPUArchState {
> >*/
> >   uint64_t sie;
> >
> > +/*
> > + * When hideleg[i]=0 and hvien[i]=1, vsie[i] is no more
> > + * alias of sie[i] (mie[i]) and needs to be maintained separatly.
> > + */
> > +uint64_t vsie;
> > +
> >   target_ulong satp;   /* since: priv-1.10.0 */
> >   target_ulong stval;
> >   target_ulong medeleg;
> > @@ -230,6 +236,14 @@ struct CPUArchState {
> >   target_ulong hgeie;
> >   target_ulong hgeip;
> >   uint64_t htimedelta;
> > +uint64_t hvien;
> > +
> > +/*
> > + * Bits VSSIP, VSTIP and VSEIP in hvip are maintained in mip. Other 
> > bits
> > + * from 0:12 are reserved. Bits 13:63 are not aliased and must be 
> > separately
> > + * maintain in hvip.
> > + */
> > +uint64_t hvip;
> >
> >   /* Hypervisor controlled virtual interrupt priorities */
> >   target_ulong hvictl;
> > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> > index 681b4ae811..80bdd4cf5a 100644
> > --- a/target/riscv/cpu_helper.c
> > +++ b/target/riscv/cpu_helper.c
> > @@ -366,8 +366,9 @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
> >   }
> >
> >   /*
> > - * Doesn't report interrupts inserted using mvip from M-mode firmware. 
> > Those
> > - * are returned in riscv_cpu_sirq_pending().
> > + * Doesn't report interrupts inserted using mvip from M-mode firmware or
> > + * using hvip bits 13:63 from HS-mode. Those are returned in
> > + * riscv_cpu_sirq_pending() and riscv_cpu_vsirq_pending().
> >*/
> >   uint64_t riscv_cpu_all_pending(CPURISCVState *env)
> >   {
> > @@ -399,16 +400,23 @@ int riscv_cpu_sirq_pending(CPURISCVState *env)
> >
> >   int riscv_cpu_vsirq_pending(CPURISCVState *env)
> >   {
> > -uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg &
> > -(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
> > +uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg & 
> > env->hideleg;
> > +uint64_t irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & 
> > env->vsie;
> > +uint64_t vsbits;
> > +
> > +/* Bring VS-level bits to correct position */
> > +vsbits = irqs & VS_MODE_INTERRUPTS;
> > +irqs &= ~VS_MODE_INTERRUPTS;
> > +irqs |= vsbits >> 1;
> >
> >   return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
> > -irqs >> 1, env->hviprio);
> > +(irqs | irqs_f_vs), env->hviprio);
> >   }
> >
> >   static int riscv_cpu_local_irq_pending(CPURISCVState *env)
> >   {
> > -uint64_t irqs, pending, mie, hsie, vsie, irqs_f;
> > +uint64_t irqs, pending, mie, hsie, vsie, irqs_f, irqs_f_vs;
> > +uint64_t vsbits, irq_delegated;
> >   int virq;
> >
> >   /* Determine interrupt enable state of all privilege modes */
> > @@ -445,12 +453,26 @@ static int riscv_cpu_local_irq_pending(CPURISCVState 
> > *env)
> >   irqs, env->siprio);
> >   }
> >
> > +/* Check for virtual VS-mode interrupts. */
> > +irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & env->vsie;
> > +
> >   /* Check V

[PATCH v2 6/6] target/riscv: Add HS-mode virtual interrupt and IRQ filtering support.

2023-05-26 Thread Rajnesh Kanwal
This change adds support for inserting virtual interrupts from HS-mode
into VS-mode using hvien and hvip csrs. This also allows for IRQ filtering
from HS-mode.

Also, the spec doesn't mandate the interrupt to be actually supported
in hardware. Which allows HS-mode to assert virtual interrupts to VS-mode
that have no connection to any real interrupt events.

This is defined as part of the AIA specification [0], "6.3.2 Virtual
interrupts for VS level".

[0]: 
https://github.com/riscv/riscv-aia/releases/download/1.0-RC4/riscv-interrupts-1.0-RC4.pdf

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.c|   3 +-
 target/riscv/cpu.h|  14 +++
 target/riscv/cpu_helper.c |  48 +++---
 target/riscv/csr.c| 196 ++
 target/riscv/machine.c|   3 +
 5 files changed, 234 insertions(+), 30 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 7c4999431a..6f2f8f21cc 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -713,7 +713,8 @@ static bool riscv_cpu_has_work(CPUState *cs)
  * mode and delegation registers, but respect individual enables
  */
 return riscv_cpu_all_pending(env) != 0 ||
-riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE;
+riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE ||
+riscv_cpu_vsirq_pending(env) != RISCV_EXCP_NONE;
 #else
 return true;
 #endif
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 07cf656471..3e10eee38f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -196,6 +196,12 @@ struct CPUArchState {
  */
 uint64_t sie;
 
+/*
+ * When hideleg[i]=0 and hvien[i]=1, vsie[i] is no more
+ * alias of sie[i] (mie[i]) and needs to be maintained separatly.
+ */
+uint64_t vsie;
+
 target_ulong satp;   /* since: priv-1.10.0 */
 target_ulong stval;
 target_ulong medeleg;
@@ -230,6 +236,14 @@ struct CPUArchState {
 target_ulong hgeie;
 target_ulong hgeip;
 uint64_t htimedelta;
+uint64_t hvien;
+
+/*
+ * Bits VSSIP, VSTIP and VSEIP in hvip are maintained in mip. Other bits
+ * from 0:12 are reserved. Bits 13:63 are not aliased and must be 
separately
+ * maintain in hvip.
+ */
+uint64_t hvip;
 
 /* Hypervisor controlled virtual interrupt priorities */
 target_ulong hvictl;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 6567ddef7b..fd7dae9b68 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -366,8 +366,9 @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
 }
 
 /*
- * Doesn't report interrupts inserted using mvip from M-mode firmware. Those
- * are returned in riscv_cpu_sirq_pending().
+ * Doesn't report interrupts inserted using mvip from M-mode firmware or
+ * using hvip bits 13:63 from HS-mode. Those are returned in
+ * riscv_cpu_sirq_pending() and riscv_cpu_vsirq_pending().
  */
 uint64_t riscv_cpu_all_pending(CPURISCVState *env)
 {
@@ -399,16 +400,23 @@ int riscv_cpu_sirq_pending(CPURISCVState *env)
 
 int riscv_cpu_vsirq_pending(CPURISCVState *env)
 {
-uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg &
-(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
+uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg & env->hideleg;
+uint64_t irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & env->vsie;
+uint64_t vsbits;
+
+/* Bring VS-level bits to correct position */
+vsbits = irqs & VS_MODE_INTERRUPTS;
+irqs &= ~VS_MODE_INTERRUPTS;
+irqs |= vsbits >> 1;
 
 return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-irqs >> 1, env->hviprio);
+(irqs | irqs_f_vs), env->hviprio);
 }
 
 static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 {
-uint64_t irqs, pending, mie, hsie, vsie, irqs_f;
+uint64_t irqs, pending, mie, hsie, vsie, irqs_f, irqs_f_vs;
+uint64_t vsbits, irq_delegated;
 int virq;
 
 /* Determine interrupt enable state of all privilege modes */
@@ -445,12 +453,26 @@ static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 irqs, env->siprio);
 }
 
+/* Check for virtual VS-mode interrupts. */
+irqs_f_vs = env->hvip & env->hvien & ~env->hideleg & env->vsie;
+
 /* Check VS-mode interrupts */
-irqs = pending & env->mideleg & env->hideleg & -vsie;
+irq_delegated = pending & env->mideleg & env->hideleg;
+
+/* Bring VS-level bits to correct position */
+vsbits = irq_delegated & VS_MODE_INTERRUPTS;
+irq_delegated &= ~VS_MODE_INTERRUPTS;
+irq_delegated |= vsbits >> 1;
+
+irqs = (irq_delegated | irqs_f_vs) & -vsie;
 if (irqs) {
 virq = riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-irqs >> 1, env->hviprio);
-return (virq <= 0) ? virq : virq + 1;
+irqs, env->hviprio);
+  

[PATCH v2 4/6] target/riscv: Split interrupt logic from riscv_cpu_update_mip.

2023-05-26 Thread Rajnesh Kanwal
This is to allow virtual interrupts to be inserted into S and VS
modes. Given virtual interrupts will be maintained in separate
mvip and hvip CSRs, riscv_cpu_update_mip will no longer be in the
path and interrupts need to be triggered for these cases from
rmw_hvip64 and rmw_mvip64 functions.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.h|  1 +
 target/riscv/cpu_helper.c | 25 ++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index de7e43126a..de55bfb775 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -562,6 +562,7 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env);
 int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts);
 uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
   uint64_t value);
+void riscv_cpu_interrupt(CPURISCVState *env);
 #define BOOL_TO_MASK(x) (-!!(x)) /* helper for riscv_cpu_update_mip value */
 void riscv_cpu_set_rdtime_fn(CPURISCVState *env, uint64_t (*fn)(void *),
  void *arg);
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index b25ee179e9..c79ec4db76 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -609,11 +609,12 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t 
interrupts)
 }
 }
 
-uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
-  uint64_t value)
+void riscv_cpu_interrupt(CPURISCVState *env)
 {
+uint64_t gein, vsgein = 0, vstip = 0;
 CPUState *cs = env_cpu(env);
-uint64_t gein, vsgein = 0, vstip = 0, old = env->mip;
+
+QEMU_IOTHREAD_LOCK_GUARD();
 
 if (env->virt_enabled) {
 gein = get_field(env->hstatus, HSTATUS_VGEIN);
@@ -622,15 +623,25 @@ uint64_t riscv_cpu_update_mip(CPURISCVState *env, 
uint64_t mask,
 
 vstip = env->vstime_irq ? MIP_VSTIP : 0;
 
-QEMU_IOTHREAD_LOCK_GUARD();
-
-env->mip = (env->mip & ~mask) | (value & mask);
-
 if (env->mip | vsgein | vstip) {
 cpu_interrupt(cs, CPU_INTERRUPT_HARD);
 } else {
 cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
 }
+}
+
+uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask, uint64_t 
value)
+{
+uint64_t old = env->mip;
+
+/* No need to update mip for VSTIP */
+mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask;
+
+QEMU_IOTHREAD_LOCK_GUARD();
+
+env->mip = (env->mip & ~mask) | (value & mask);
+
+riscv_cpu_interrupt(env);
 
 return old;
 }
-- 
2.25.1




[PATCH v2 0/6] target/riscv: Add RISC-V Virtual IRQs and IRQ filtering support

2023-05-26 Thread Rajnesh Kanwal
This series adds M and HS-mode virtual interrupt and IRQ filtering support.
This allows inserting virtual interrupts from M/HS-mode into S/VS-mode
using mvien/hvien and mvip/hvip csrs. IRQ filtering is a use case of
this change, i-e M-mode can stop delegating an interrupt to S-mode and 
instead enable it in MIE and receive those interrupts in M-mode and then
selectively inject the interrupt using mvien and mvip.

Also, the spec doesn't mandate the interrupt to be actually supported
in hardware. Which allows M/HS-mode to assert virtual interrupts to
S/VS-mode that have no connection to any real interrupt events.
 
This is defined as part of the AIA specification [0], "5.3 Interrupt
filtering and virtual interrupts for supervisor level" and "6.3.2 Virtual
interrupts for VS level".

Most of the testing is done by hacking around OpenSBI and linux host.
The changes for those can be found at [1] and [2].

It's my first touch on RISC-V qemu IRQ subsystem. Any feedback would
be much appreciated.

The change can also be found on github [3].

TODO: This change doesn't support delegating virtual interrupts injected 
by M-mode to VS-mode by the Hypervisor. This is true for bits 13:63 only.

Thanks
Rajnesh

[0]: 
https://github.com/riscv/riscv-aia/releases/download/1.0-RC4/riscv-interrupts-1.0-RC4.pdf
[1]: https://github.com/rajnesh-kanwal/opensbi/tree/dev/rkanwal/irq_filter
[2]: https://github.com/rajnesh-kanwal/linux/commits/dev/rkanwal/aia_irq_filter
[3]: https://github.com/rajnesh-kanwal/qemu/tree/dev/rkanwal/riscv_irq_filter

v2:
 * Move RISCV_EXCP_SEMIHOST to switch case and remove special handling.
 * Fix linux-user build.

Rajnesh Kanwal (6):
  target/riscv: Without H-mode mask all HS mode inturrupts in mie.
  target/riscv: Check for async flag in case of RISCV_EXCP_SEMIHOST.
  target/riscv: Set VS* bits to one in mideleg when H-Ext is enabled
  target/riscv: Split interrupt logic from riscv_cpu_update_mip.
  target/riscv: Add M-mode virtual interrupt and IRQ filtering support.
  target/riscv: Add HS-mode virtual interrupt and IRQ filtering support.

 target/riscv/cpu.c|   9 +-
 target/riscv/cpu.h|  23 ++
 target/riscv/cpu_bits.h   |   6 +
 target/riscv/cpu_helper.c |  99 +---
 target/riscv/csr.c| 477 ++
 target/riscv/machine.c|   6 +
 6 files changed, 546 insertions(+), 74 deletions(-)

-- 
2.25.1




[PATCH v2 5/6] target/riscv: Add M-mode virtual interrupt and IRQ filtering support.

2023-05-26 Thread Rajnesh Kanwal
This change adds support for inserting virtual interrupts from M-mode
into S-mode using mvien and mvip csrs. IRQ filtering is a use case of
this change, i-e M-mode can stop delegating an interrupt to S-mode and
instead enable it in MIE and receive those interrupts in M-mode and then
selectively inject the interrupt using mvien and mvip.

Also, the spec doesn't mandate the interrupt to be actually supported
in hardware. Which allows M-mode to assert virtual interrupts to S-mode
that have no connection to any real interrupt events.

This is defined as part of the AIA specification [0], "5.3 Interrupt
filtering and virtual interrupts for supervisor level".

[0]: 
https://github.com/riscv/riscv-aia/releases/download/1.0-RC4/riscv-interrupts-1.0-RC4.pdf

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.c|   3 +-
 target/riscv/cpu.h|   8 ++
 target/riscv/cpu_bits.h   |   6 +
 target/riscv/cpu_helper.c |  26 +++-
 target/riscv/csr.c| 279 ++
 target/riscv/machine.c|   3 +
 6 files changed, 289 insertions(+), 36 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 269a094f42..7c4999431a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -712,7 +712,8 @@ static bool riscv_cpu_has_work(CPUState *cs)
  * Definition of the WFI instruction requires it to ignore the privilege
  * mode and delegation registers, but respect individual enables
  */
-return riscv_cpu_all_pending(env) != 0;
+return riscv_cpu_all_pending(env) != 0 ||
+riscv_cpu_sirq_pending(env) != RISCV_EXCP_NONE;
 #else
 return true;
 #endif
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index de55bfb775..07cf656471 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -190,6 +190,12 @@ struct CPUArchState {
 uint64_t mie;
 uint64_t mideleg;
 
+/*
+ * When mideleg[i]=0 and mvien[i]=1, sie[i] is no more
+ * alias of mie[i] and needs to be maintained separatly.
+ */
+uint64_t sie;
+
 target_ulong satp;   /* since: priv-1.10.0 */
 target_ulong stval;
 target_ulong medeleg;
@@ -210,6 +216,8 @@ struct CPUArchState {
 /* AIA CSRs */
 target_ulong miselect;
 target_ulong siselect;
+uint64_t mvien;
+uint64_t mvip;
 
 /* Hypervisor CSRs */
 target_ulong hstatus;
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 59f0ffd9e1..0d32d2a5a7 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -735,6 +735,12 @@ typedef enum RISCVException {
 #define MIE_SSIE   (1 << IRQ_S_SOFT)
 #define MIE_USIE   (1 << IRQ_U_SOFT)
 
+/* Machine constants */
+#define M_MODE_INTERRUPTS  ((uint64_t)(MIP_MSIP | MIP_MTIP | MIP_MEIP))
+#define S_MODE_INTERRUPTS  ((uint64_t)(MIP_SSIP | MIP_STIP | MIP_SEIP))
+#define VS_MODE_INTERRUPTS ((uint64_t)(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP))
+#define HS_MODE_INTERRUPTS ((uint64_t)(MIP_SGEIP | VS_MODE_INTERRUPTS))
+
 /* General PointerMasking CSR bits */
 #define PM_ENABLE   0x0001ULL
 #define PM_CURRENT  0x0002ULL
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index c79ec4db76..6567ddef7b 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -365,6 +365,10 @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
 return best_irq;
 }
 
+/*
+ * Doesn't report interrupts inserted using mvip from M-mode firmware. Those
+ * are returned in riscv_cpu_sirq_pending().
+ */
 uint64_t riscv_cpu_all_pending(CPURISCVState *env)
 {
 uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN);
@@ -387,9 +391,10 @@ int riscv_cpu_sirq_pending(CPURISCVState *env)
 {
 uint64_t irqs = riscv_cpu_all_pending(env) & env->mideleg &
 ~(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
+uint64_t irqs_f = env->mvip & env->mvien & ~env->mideleg & env->sie;
 
 return riscv_cpu_pending_to_irq(env, IRQ_S_EXT, IPRIO_DEFAULT_S,
-irqs, env->siprio);
+irqs | irqs_f, env->siprio);
 }
 
 int riscv_cpu_vsirq_pending(CPURISCVState *env)
@@ -403,8 +408,8 @@ int riscv_cpu_vsirq_pending(CPURISCVState *env)
 
 static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 {
+uint64_t irqs, pending, mie, hsie, vsie, irqs_f;
 int virq;
-uint64_t irqs, pending, mie, hsie, vsie;
 
 /* Determine interrupt enable state of all privilege modes */
 if (env->virt_enabled) {
@@ -430,8 +435,11 @@ static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 irqs, env->miprio);
 }
 
+/* Check for virtual S-mode interrupts. */
+irqs_f = env->mvip & (env->mvien & ~env->mideleg) & env->sie;
+
 /* Check HS-mode interrupts */
-irqs = pending & env->mideleg & ~env->hideleg & -hsie;
+irqs =  ((pending & env->mideleg & ~env->hideleg) | irqs_f) & -hsie;
 if (irqs) {
 return riscv_cpu_pending_to_

[PATCH v2 3/6] target/riscv: Set VS* bits to one in mideleg when H-Ext is enabled

2023-05-26 Thread Rajnesh Kanwal
With H-Ext supported, VS bits are all hardwired to one in MIDELEG
denoting always delegated interrupts. This is being done in rmw_mideleg
but given mideleg is used in other places when routing interrupts
this change initializes it in riscv_cpu_realize to be on the safe side.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index db0875fb43..269a094f42 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1280,6 +1280,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
   riscv_pmu_timer_cb, cpu);
 }
  }
+
+/* With H-Ext, VSSIP, VSTIP, VSEIP and SGEIP are hardwired to one. */
+if (riscv_has_ext(env, RVH)) {
+env->mideleg = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP | MIP_SGEIP;
+}
 #endif
 
 riscv_cpu_finalize_features(cpu, &local_err);
-- 
2.25.1




[PATCH v2 2/6] target/riscv: Check for async flag in case of RISCV_EXCP_SEMIHOST.

2023-05-26 Thread Rajnesh Kanwal
RISCV_EXCP_SEMIHOST is set to 0x10, which can be a local interrupt id
as well. This change moves RISCV_EXCP_SEMIHOST to switch case so that
async flag check is performed before invoking semihosting logic.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu_helper.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 57d04385f1..b25ee179e9 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1602,15 +1602,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 target_ulong htval = 0;
 target_ulong mtval2 = 0;
 
-if  (cause == RISCV_EXCP_SEMIHOST) {
-do_common_semihosting(cs);
-env->pc += 4;
-return;
-}
-
 if (!async) {
 /* set tval to badaddr for traps with address information */
 switch (cause) {
+case RISCV_EXCP_SEMIHOST:
+do_common_semihosting(cs);
+env->pc += 4;
+return;
 case RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT:
 case RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT:
 case RISCV_EXCP_LOAD_ADDR_MIS:
-- 
2.25.1




[PATCH v2 1/6] target/riscv: Without H-mode mask all HS mode inturrupts in mie.

2023-05-26 Thread Rajnesh Kanwal
Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/csr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 4451bd1263..041f0b3e2e 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1522,7 +1522,7 @@ static RISCVException rmw_mie64(CPURISCVState *env, int 
csrno,
 env->mie = (env->mie & ~mask) | (new_val & mask);
 
 if (!riscv_has_ext(env, RVH)) {
-env->mie &= ~((uint64_t)MIP_SGEIP);
+env->mie &= ~((uint64_t)HS_MODE_INTERRUPTS);
 }
 
 return RISCV_EXCP_NONE;
-- 
2.25.1




Re: [PULL 00/15] Improve --without-default-devices testing, fix CVE-2023-0330

2023-05-26 Thread Richard Henderson

On 5/26/23 02:08, Thomas Huth wrote:

The following changes since commit a3cb6d5004ff638aefe686ecd540718a793bd1b1:

   Merge tag 'pull-tcg-20230525' ofhttps://gitlab.com/rth7680/qemu  into 
staging (2023-05-25 11:11:52 -0700)

are available in the Git repository at:

   https://gitlab.com/thuth/qemu.git  tags/pull-request-2023-05-26

for you to fetch changes up to b987718bbb1d0eabf95499b976212dd5f0120d75:

   hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller 
(CVE-2023-0330) (2023-05-26 09:37:04 +0200)


* Use MachineClass->default_nic in more machines to allow running them
   without "--nodefaults" in builds that used "--without-default-devices"
* Improve qtests for such builds
* Add up-/downsampling qtest
* Avoid crash if default RAM backend name has been stolen
* Fix reentrant DMA problem in the lsi53c895a device (CVE-2023-0330)


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~




Re: [PULL 0/2] loongarch-to-apply queue

2023-05-26 Thread Richard Henderson

On 5/26/23 02:27, Song Gao wrote:

The following changes since commit a3cb6d5004ff638aefe686ecd540718a793bd1b1:

   Merge tag 'pull-tcg-20230525' ofhttps://gitlab.com/rth7680/qemu  into 
staging (2023-05-25 11:11:52 -0700)

are available in the Git repository at:

   https://gitlab.com/gaosong/qemu.git  tags/pull-loongarch-20230526

for you to fetch changes up to 65bfaaae6ac79ebc623acc0ce28cc3bd4fe8b5e5:

   target/loongarch: Fix the vinsgr2vr/vpickve2gr instructions cause system 
coredump (2023-05-26 17:21:16 +0800)


pull-loongarch-20230526


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~




Re: [PATCH v5 10/10] accel/tcg: include cs_base in our hash calculations

2023-05-26 Thread Richard Henderson

On 5/24/23 06:39, Alex Bennée wrote:

We weren't using cs_base in the hash calculations before. Since the
arm front end moved a chunk of flags in a378206a20 (target/arm: Move
mode specific TB flags to tb->cs_base) they comprise of an important
part of the execution state.

Widen the tb_hash_func to include cs_base and expand to qemu_xxhash8()
to accommodate it.

My initial benchmark shows very little difference in the
runtime.

Before:

armhf

➜  hyperfine -w 2 -m 20 "./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 
root=/dev/sda2 systemd.unit=benchmark.service' -snapshot"
Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 
'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
   Time (mean ± σ): 24.627 s ±  2.708 s[User: 34.309 s, System: 1.797 s]
   Range (min … max):   22.345 s … 29.864 s20 runs

arm64

➜  hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine 
type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22,hostfwd=tcp::1234-:1234 -device 
virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64
 -device scsi-hd,drive=hd -smp 4 -kernel 
~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 
root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
   Time (mean ± σ): 62.559 s ±  2.917 s[User: 189.115 s, System: 4.089 
s]
   Range (min … max):   59.997 s … 70.153 s10 runs

After:

armhf

Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine 
type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22 -device virtio-net-pci,netdev=unet -device 
virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf
 -device scsi-hd,drive=hd -smp 4 -kernel 
/home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 
'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
   Time (mean ± σ): 24.223 s ±  2.151 s[User: 34.284 s, System: 1.906 s]
   Range (min … max):   22.000 s … 28.476 s20 runs

arm64

hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine 
type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev 
user,id=unet,hostfwd=tcp::-:22,hostfwd=tcp::1234-:1234 -device 
virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev 
driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64
 -device scsi-hd,drive=hd -smp 4 -kernel 
~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 
root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
   Time (mean ± σ): 62.769 s ±  1.978 s[User: 188.431 s, System: 5.269 
s]
   Range (min … max):   60.285 s … 66.868 s10 runs

Signed-off-by: Alex Bennée
Message-Id:<20230523125000.3674739-11-alex.ben...@linaro.org>


Reviewed-by: Richard Henderson 


r~



[PULL 07/12] slirp: update wrap to latest master

2023-05-26 Thread Paolo Bonzini
It is recommended to use SSIZE_T for ssize_t on win32, but the commit
that is being used for slirp.wrap uses int.  Update to include the fix
as well as the other bugfix commit "ip: Enforce strict aliasing".

Reported-by: Michael Tokarev 
Signed-off-by: Paolo Bonzini 
---
 subprojects/slirp.wrap | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subprojects/slirp.wrap b/subprojects/slirp.wrap
index ace4f26102f5..08291a4cf99a 100644
--- a/subprojects/slirp.wrap
+++ b/subprojects/slirp.wrap
@@ -1,6 +1,6 @@
 [wrap-git]
 url = https://gitlab.freedesktop.org/slirp/libslirp
-revision = 15c52d697529eb3e78c5d8aa324d61715bce33b6
+revision = 26be815b86e8d49add8c9a8b320239b9594ff03d
 
 [provide]
 slirp = libslirp_dep
-- 
2.40.1




[PULL 04/12] configure: unset harmful environment variables

2023-05-26 Thread Paolo Bonzini
Apart from CLICOLOR_FORCE and GREP_OPTIONS, there are other variables
that are listed in the Autoconf manual.  While Autoconf neutralizes them
very early, and assumes it does not (yet) run in a shell that has "unset",
QEMU assumes that the user invoked configure under a POSIX shell, and
therefore can simply use "unset" to clear them.

CDPATH is particularly nasty because it messes up "cd ... && pwd".

Reported-by: Juan Quintela 
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
 configure | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index 80ca1c922151..9cdce69b7852 100755
--- a/configure
+++ b/configure
@@ -4,9 +4,8 @@
 #
 
 # Unset some variables known to interfere with behavior of common tools,
-# just as autoconf does.
-CLICOLOR_FORCE= GREP_OPTIONS=
-unset CLICOLOR_FORCE GREP_OPTIONS
+# just as autoconf does.  Unlike autoconf, we assume that unset exists.
+unset CLICOLOR_FORCE GREP_OPTIONS BASH_ENV ENV MAIL MAILPATH CDPATH
 
 # Don't allow CCACHE, if present, to use cached results of compile tests!
 export CCACHE_RECACHE=yes
-- 
2.40.1




[PULL 12/12] configure: ignore --make

2023-05-26 Thread Paolo Bonzini
Setting the MAKE variable to a GNU Make executable does not really have
any effect: if a non-GNU Make is used, the QEMU Makefile will fail to
parse.  Just remove everything related to --make and $make as dead code.

Signed-off-by: Paolo Bonzini 
---
 configure   | 18 +-
 meson.build |  1 -
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/configure b/configure
index 0e9305848955..d674a9667310 100755
--- a/configure
+++ b/configure
@@ -400,20 +400,16 @@ gnu/kfreebsd)
 ;;
 freebsd)
   bsd="yes"
-  make="${MAKE-gmake}"
   # needed for kinfo_getvmmap(3) in libutil.h
 ;;
 dragonfly)
   bsd="yes"
-  make="${MAKE-gmake}"
 ;;
 netbsd)
   bsd="yes"
-  make="${MAKE-gmake}"
 ;;
 openbsd)
   bsd="yes"
-  make="${MAKE-gmake}"
 ;;
 darwin)
   bsd="yes"
@@ -421,7 +417,6 @@ darwin)
 ;;
 sunos)
   solaris="yes"
-  make="${MAKE-gmake}"
 ;;
 haiku)
   pie="no"
@@ -525,9 +520,6 @@ case "$cpu" in
 CPU_CFLAGS="-m64 -mcpu=ultrasparc" ;;
 esac
 
-: ${make=${MAKE-make}}
-
-
 check_py_version() {
 # We require python >= 3.7.
 # NB: a True python conditional creates a non-zero return code (Failure)
@@ -630,7 +622,7 @@ for opt do
   ;;
   --objcc=*)
   ;;
-  --make=*) make="$optarg"
+  --make=*)
   ;;
   --install=*)
   ;;
@@ -897,7 +889,6 @@ Advanced options (experts only):
   --cross-cc-ARCH=CC   use compiler when building ARCH guest test cases
   --cross-cc-cflags-ARCH=  use compiler flags when building ARCH guest tests
   --cross-prefix-ARCH=PREFIX cross compiler prefix when building ARCH guest 
test cases
-  --make=MAKE  use specified make [$make]
   --python=PYTHON  use specified python [$python]
   --ninja=NINJAuse specified ninja [$ninja]
   --smbd=SMBD  use specified smbd [$smbd]
@@ -950,11 +941,6 @@ then
 fi
 fi
 
-if ! has "$make"
-then
-error_exit "GNU make ($make) not found"
-fi
-
 if ! check_py_version "$python"; then
   error_exit "Cannot use '$python', Python >= 3.7 is required." \
  "Use --python=/path/to/python to specify a supported Python." \
@@ -1777,7 +1763,6 @@ if test "$container" != no; then
 echo "RUNC=$runc" >> $config_host_mak
 fi
 echo "ROMS=$roms" >> $config_host_mak
-echo "MAKE=$make" >> $config_host_mak
 echo "PYTHON=$python" >> $config_host_mak
 echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
 echo "MESON=$meson" >> $config_host_mak
@@ -2030,7 +2015,6 @@ preserve_env CXXFLAGS
 preserve_env LD
 preserve_env LDFLAGS
 preserve_env LD_LIBRARY_PATH
-preserve_env MAKE
 preserve_env NM
 preserve_env OBJCFLAGS
 preserve_env OBJCOPY
diff --git a/meson.build b/meson.build
index 884b16c74962..2d48aa1e2ef3 100644
--- a/meson.build
+++ b/meson.build
@@ -4028,7 +4028,6 @@ summary(summary_info, bool_yn: true, section: 
'Directories')
 # Host binaries
 summary_info = {}
 summary_info += {'git':   config_host['GIT']}
-summary_info += {'make':  config_host['MAKE']}
 summary_info += {'python':'@0@ (version: 
@1@)'.format(python.full_path(), python.language_version())}
 summary_info += {'sphinx-build':  sphinx_build}
 if config_host.has_key('HAVE_GDB_BIN')
-- 
2.40.1




[PULL 05/12] meson: Remove leftover comment

2023-05-26 Thread Paolo Bonzini
From: Fabiano Rosas 

Commit d2e6f9272d ("fuzz: remove fork-fuzzing scaffolding") removed
the linker script and forgot to remove the comment.

Signed-off-by: Fabiano Rosas 
Message-Id: <20230525212044.30222-2-faro...@suse.de>
Signed-off-by: Paolo Bonzini 
---
 meson.build | 2 --
 1 file changed, 2 deletions(-)

diff --git a/meson.build b/meson.build
index 78890f01550a..ee1b7dac730b 100644
--- a/meson.build
+++ b/meson.build
@@ -404,8 +404,6 @@ if targetos != 'sunos' and not get_option('tsan')
   qemu_ldflags += cc.get_supported_link_arguments('-Wl,--warn-common')
 endif
 
-# Specify linker-script with add_project_link_arguments so that it is not 
placed
-# within a linker --start-group/--end-group pair
 if get_option('fuzzing')
   # Specify a filter to only instrument code that is directly related to
   # virtual-devices.
-- 
2.40.1




Re: [ANNOUNCE] KVM Microconference at LPC 2023

2023-05-26 Thread Mickaël Salaün
See James Morris's proposal here: 
https://lore.kernel.org/all/17f62cb1-a5de-2020-2041-359b8e96b...@linux.microsoft.com/


On 26/05/2023 04:36, James Morris wrote:
> [Side topic]
>
> Would folks be interested in a Linux Plumbers Conference MC on this
> topic generally, across different hypervisors, VMMs, and architectures?
>
> If so, please let me know who the key folk would be and we can try 
writing

> up an MC proposal.

The fine-grain memory management proposal from James Gowans looks 
interesting, especially the "side-car" virtual machines: 
https://lore.kernel.org/all/88db2d9cb42e471692ff1feb0b9ca855906a9d95.ca...@amazon.com/



On 09/05/2023 11:55, Paolo Bonzini wrote:

Hi all!

We are planning on submitting a CFP to host a KVM Microconference at
Linux Plumbers Conference 2023. To help justify the proposal, we would
like to gather a list of folks that would likely attend, and crowdsource
a list of topics to include in the proposal.

For both this year and future years, the intent is that a KVM
Microconference will complement KVM Forum, *NOT* supplant it. As you
probably noticed, KVM Forum is going through a somewhat radical change in
how it's organized; the conference is now free and (with some help from
Red Hat) organized directly by the KVM and QEMU communities. Despite the
unexpected changes and some teething pains, community response to KVM
Forum continues to be overwhelmingly positive! KVM Forum will remain
the venue of choice for KVM/userspace collaboration, for educational
content covering both KVM and userspace, and to discuss new features in
QEMU and other userspace projects.

At least on the x86 side, however, the success of KVM Forum led us
virtualization folks to operate in relative isolation. KVM depends on
and impacts multiple subsystems (MM, scheduler, perf) in profound ways,
and recently we’ve seen more and more ideas/features that require
non-trivial changes outside KVM and buy-in from stakeholders that
(typically) do not attend KVM Forum. Linux Plumbers Conference is a
natural place to establish such collaboration within the kernel.

Therefore, the aim of the KVM Microconference will be:
* to provide a setting in which to discuss KVM and kernel internals
* to increase collaboration and reduce friction with other subsystems
* to discuss system virtualization issues that require coordination with
other subsystems (such as VFIO, or guest support in arch/)

Below is a rough draft of the planned CFP submission.

Thanks!

Paolo Bonzini (KVM Maintainer)
Sean Christopherson (KVM x86 Co-Maintainer)
Marc Zyngier (KVM ARM Co-Maintainer)


===
KVM Microconference
===

KVM (Kernel-based Virtual Machine) enables the use of hardware features
to improve the efficiency, performance, and security of virtual machines
created and managed by userspace.  KVM was originally developed to host
and accelerate "full" virtual machines running a traditional kernel and
operating system, but has long since expanded to cover a wide array of use
cases, e.g. hosting real time workloads, sandboxing untrusted workloads,
deprivileging third party code, reducing the trusted computed base of
security sensitive workloads, etc.  As KVM's use cases have grown, so too
have the requirements placed on KVM and the interactions between it and
other kernel subsystems.

The KVM Microconference will focus on how to evolve KVM and adjacent
subsystems in order to satisfy new and upcoming requirements: serving
guest memory that cannot be accessed by host userspace[1], providing
accurate, feature-rich PMU/perf virtualization in cloud VMs[2], etc.


Potential Topics:
- Serving inaccessible/unmappable memory for KVM guests (protected VMs)
- Optimizing mmu_notifiers, e.g. reducing TLB flushes and spurious zapping
- Supporting multiple KVM modules (for non-disruptive upgrades)
- Improving and hardening KVM+perf interactions
- Implementing arch-agnostic abstractions in KVM (e.g. MMU)
- Defining KVM requirements for hardware vendors
- Utilizing "fault" injection to increase test coverage of edge cases
- KVM vs VFIO (e.g. memory types, a rather hot topic on the ARM side)


Key Attendees:
- Paolo Bonzini  (KVM Maintainer)
- Sean Christopherson   (KVM x86 Co-Maintainer)
- Your name could be here!

[1] 
https://lore.kernel.org/all/20221202061347.1070246-1-chao.p.p...@linux.intel.com
[2] 
https://lore.kernel.org/all/CALMp9eRBOmwz=mspp0m5q093k3rmueasf3vel39mgv5br9w...@mail.gmail.com






[PULL 00/12] (Mostly) build system patches for 2023-05-26

2023-05-26 Thread Paolo Bonzini
The following changes since commit a3cb6d5004ff638aefe686ecd540718a793bd1b1:

  Merge tag 'pull-tcg-20230525' of https://gitlab.com/rth7680/qemu into staging 
(2023-05-25 11:11:52 -0700)

are available in the Git repository at:

  https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to b17bbf835c8998e93fd99b06164f1d63843fe8c9:

  configure: ignore --make (2023-05-26 12:36:20 +0200)


* build system fixes and cleanups
* use subproject() for the dtc and keycodemapdb submodules
* fix virtio memory leak
* update slirp.wrap to latest commit in the master branch


Fabiano Rosas (2):
  meson: Remove leftover comment
  meson: Add static glib dependency for initrd-stress.img

Paolo Bonzini (10):
  tests/docker: simplify HOST_ARCH definition
  tests/vm: fix and simplify HOST_ARCH definition
  Makefile: remove $(TESTS_PYTHON)
  configure: unset harmful environment variables
  slirp: update wrap to latest master
  virtio: qmp: fix memory leak
  meson: simplify logic for -Dfdt
  meson: use subproject for internal libfdt
  meson: use subproject for keycodemapdb
  configure: ignore --make

 .gitmodules  |  8 +++---
 configure| 29 +
 hw/virtio/virtio-qmp.c   | 11 
 meson.build  | 54 ++--
 scripts/archive-source.sh|  2 +-
 dtc => subprojects/dtc   |  0
 {ui => subprojects}/keycodemapdb |  0
 subprojects/slirp.wrap   |  2 +-
 tests/Makefile.include   |  8 +++---
 tests/docker/Makefile.include|  2 +-
 tests/migration/meson.build  |  4 ++-
 tests/vm/Makefile.include|  7 +++---
 ui/meson.build   |  8 +++---
 13 files changed, 50 insertions(+), 85 deletions(-)
 rename dtc => subprojects/dtc (100%)
 rename {ui => subprojects}/keycodemapdb (100%)
-- 
2.40.1




[PULL 08/12] virtio: qmp: fix memory leak

2023-05-26 Thread Paolo Bonzini
The VirtioInfoList is already allocated by QAPI_LIST_PREPEND and
need not be allocated by the caller.

Fixes Coverity CID 1508724.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Paolo Bonzini 
---
 hw/virtio/virtio-qmp.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c
index e84316dcfd21..b5e183529971 100644
--- a/hw/virtio/virtio-qmp.c
+++ b/hw/virtio/virtio-qmp.c
@@ -668,7 +668,7 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t 
device_id, uint64_t bitmap)
 VirtioInfoList *qmp_x_query_virtio(Error **errp)
 {
 VirtioInfoList *list = NULL;
-VirtioInfoList *node;
+VirtioInfo *node;
 VirtIODevice *vdev;
 
 QTAILQ_FOREACH(vdev, &virtio_list, next) {
@@ -682,11 +682,10 @@ VirtioInfoList *qmp_x_query_virtio(Error **errp)
 if (!strncmp(is_realized->str, "false", 4)) {
 QTAILQ_REMOVE(&virtio_list, vdev, next);
 } else {
-node = g_new0(VirtioInfoList, 1);
-node->value = g_new(VirtioInfo, 1);
-node->value->path = g_strdup(dev->canonical_path);
-node->value->name = g_strdup(vdev->name);
-QAPI_LIST_PREPEND(list, node->value);
+node = g_new(VirtioInfo, 1);
+node->path = g_strdup(dev->canonical_path);
+node->name = g_strdup(vdev->name);
+QAPI_LIST_PREPEND(list, node);
 }
g_string_free(is_realized, true);
 }
-- 
2.40.1




[PULL 10/12] meson: use subproject for internal libfdt

2023-05-26 Thread Paolo Bonzini
Recent dtc/libfdt can use either Make or meson as the build system.
By using a subproject, our own meson.build can remove the hard
coded list of source files.

This is also the first step towards managing downloads with .wrap
files instead of submodule.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Paolo Bonzini 
---
 .gitmodules   |  4 ++--
 configure |  2 +-
 meson.build   | 24 
 scripts/archive-source.sh |  2 +-
 dtc => subprojects/dtc|  0
 5 files changed, 8 insertions(+), 24 deletions(-)
 rename dtc => subprojects/dtc (100%)

diff --git a/.gitmodules b/.gitmodules
index 2a3a12033c4b..3ed5d073d630 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,8 +13,8 @@
 [submodule "roms/qemu-palcode"]
path = roms/qemu-palcode
url = https://gitlab.com/qemu-project/qemu-palcode.git
-[submodule "dtc"]
-   path = dtc
+[submodule "subprojects/dtc"]
+   path = subprojects/dtc
url = https://gitlab.com/qemu-project/dtc.git
 [submodule "roms/u-boot"]
path = roms/u-boot
diff --git a/configure b/configure
index 9cdce69b7852..d42bbd07310b 100755
--- a/configure
+++ b/configure
@@ -1187,7 +1187,7 @@ fi
 case "$fdt" in
   auto | enabled | internal)
 # Simpler to always update submodule, even if not needed.
-git_submodules="${git_submodules} dtc"
+git_submodules="${git_submodules} subprojects/dtc"
 ;;
 esac
 
diff --git a/meson.build b/meson.build
index 218428841d36..884b16c74962 100644
--- a/meson.build
+++ b/meson.build
@@ -3088,26 +3088,10 @@ if fdt_required.length() > 0 or fdt_opt == 'enabled'
   error('libfdt source not found - please pull git submodule')
 endif
 
-fdt_files = files(
-  'dtc/libfdt/fdt.c',
-  'dtc/libfdt/fdt_ro.c',
-  'dtc/libfdt/fdt_wip.c',
-  'dtc/libfdt/fdt_sw.c',
-  'dtc/libfdt/fdt_rw.c',
-  'dtc/libfdt/fdt_strerror.c',
-  'dtc/libfdt/fdt_empty_tree.c',
-  'dtc/libfdt/fdt_addresses.c',
-  'dtc/libfdt/fdt_overlay.c',
-  'dtc/libfdt/fdt_check.c',
-)
-
-fdt_inc = include_directories('dtc/libfdt')
-libfdt = static_library('fdt',
-build_by_default: false,
-sources: fdt_files,
-include_directories: fdt_inc)
-fdt = declare_dependency(link_with: libfdt,
- include_directories: fdt_inc)
+libfdt_proj = subproject('dtc', required: true,
+ default_options: ['tools=false',  'yaml=disabled',
+   'python=disabled', 
'default_library=static'])
+fdt = libfdt_proj.get_variable('libfdt_dep')
   endif
 else
   fdt_opt = 'disabled'
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index c03532915471..a7c2886334f0 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -26,7 +26,7 @@ sub_file="${sub_tdir}/submodule.tar"
 # independent of what the developer currently has initialized
 # in their checkout, because the build environment is completely
 # different to the host OS.
-submodules="dtc ui/keycodemapdb"
+submodules="subprojects/dtc ui/keycodemapdb"
 submodules="$submodules tests/fp/berkeley-softfloat-3 
tests/fp/berkeley-testfloat-3"
 sub_deinit=""
 
diff --git a/dtc b/subprojects/dtc
similarity index 100%
rename from dtc
rename to subprojects/dtc
-- 
2.40.1




[PULL 09/12] meson: simplify logic for -Dfdt

2023-05-26 Thread Paolo Bonzini
fdt_opt == 'disabled' is going to give an error if libfdt is required
by any target, so catch that immediately.  For fdt_opt == 'enabled',
instead, do not check immediately whether the internal libfdt is present.
Instead do the check after ascertaining that libfdt is absent or too old.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Paolo Bonzini 
---
 meson.build | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/meson.build b/meson.build
index ee1b7dac730b..218428841d36 100644
--- a/meson.build
+++ b/meson.build
@@ -3059,13 +3059,14 @@ if have_system and vfio_user_server_allowed
 endif
 
 fdt = not_found
-if have_system
-  fdt_opt = get_option('fdt')
+fdt_opt = get_option('fdt')
+if fdt_required.length() > 0 or fdt_opt == 'enabled'
+  if fdt_opt == 'disabled'
+error('fdt disabled but required by targets ' + ', '.join(fdt_required))
+  endif
+
   if fdt_opt in ['enabled', 'auto', 'system']
-have_internal = fs.exists(meson.current_source_dir() / 
'dtc/libfdt/Makefile.libfdt')
-fdt = cc.find_library('fdt',
-  required: fdt_opt == 'system' or
-fdt_opt == 'enabled' and not have_internal)
+fdt = cc.find_library('fdt', required: fdt_opt == 'system')
 if fdt.found() and cc.links('''
#include 
#include 
@@ -3074,14 +3075,19 @@ if have_system
   fdt_opt = 'system'
 elif fdt_opt == 'system'
error('system libfdt requested, but it is too old (1.5.1 or newer 
required)')
-elif have_internal
-  fdt_opt = 'internal'
 else
-  fdt_opt = 'disabled'
+  fdt_opt = 'internal'
   fdt = not_found
 endif
   endif
-  if fdt_opt == 'internal'
+  if not fdt.found()
+assert(fdt_opt == 'internal')
+have_internal = fs.exists(meson.current_source_dir() / 
'subprojects/dtc/meson.build')
+
+if not have_internal
+  error('libfdt source not found - please pull git submodule')
+endif
+
 fdt_files = files(
   'dtc/libfdt/fdt.c',
   'dtc/libfdt/fdt_ro.c',
@@ -3106,9 +3112,6 @@ if have_system
 else
   fdt_opt = 'disabled'
 endif
-if not fdt.found() and fdt_required.length() > 0
-  error('fdt not available but required by targets ' + ', '.join(fdt_required))
-endif
 
 config_host_data.set('CONFIG_CAPSTONE', capstone.found())
 config_host_data.set('CONFIG_FDT', fdt.found())
-- 
2.40.1




  1   2   3   >