date:20220523

Re: [RFC PATCH v4 11/36] i386/tdx: Initialize TDX before creating TD vcpus

2022-05-23 Thread Gerd Hoffmann

  Hi,

> > Hmm, hooking *vm* initialization into *vcpu* creation looks wrong to me.
> 
> That's because for TDX, it has to do VM-scope (feature) initialization
> before creating vcpu. This is new to KVM and QEMU, that every feature is
> vcpu-scope and configured per-vcpu before.
> 
> To minimize the change to QEMU, we want to utilize @cpu and @cpu->env to
> grab the configuration info. That's why it goes this way.
> 
> Do you have any better idea on it?

Maybe it's a bit more work to add VM-scope initialization support to
qemu.  But I expect that approach will work better long-term.  You need
this mutex and the 'initialized' variable in your code to make sure it
runs only once because the way you hook it in is not ideal ...

[ disclaimer: I'm not that familiar with the kvm interface in qemu ]

take care,
  Gerd

[RFC PATCH 12/13] vfio/migration: add some trace-events for vfio migration plugin

2022-05-23 Thread Lei Rao

Add some trace-events including trace_vfio_migration_plugin_probe
trace_vfio_plugin_save_buffer, trace_vfio_plugin_load_state_device_data
trace_vfio_update_pending, and trace_vfio_migration_set_state to make
debugging easier.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration-plugin.c | 10 +++---
 hw/vfio/trace-events   |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/migration-plugin.c b/hw/vfio/migration-plugin.c
index 63124e1571..c545cbe334 100644
--- a/hw/vfio/migration-plugin.c
+++ b/hw/vfio/migration-plugin.c
@@ -19,6 +19,7 @@
 #include "qapi/error.h"
 #include "hw/vfio/vfio-migration-plugin.h"
 #include "sysemu/sysemu.h"
+#include "trace.h"
 
 #define CHUNK_SIZE (1024 * 1024)
 
@@ -128,7 +129,7 @@ static int vfio_migration_update_pending_plugin(VFIODevice 
*vbasedev)
 return ret;
 }
 migration->pending_bytes = pending_bytes;
-
+trace_vfio_update_pending(vbasedev->name, pending_bytes);
 return 0;
 }
 
@@ -159,7 +160,7 @@ static int vfio_migration_set_state_plugin(VFIODevice 
*vbasedev, uint32_t mask,
 }
 
 vbasedev->migration->device_state = device_state;
-
+trace_vfio_migration_set_state(vbasedev->name, device_state);
 return 0;
 }
 
@@ -179,6 +180,7 @@ static int vfio_migration_save_buffer_plugin(QEMUFile *f, 
VFIODevice *vbasedev,
 qemu_put_be64(f, data_size);
 tmp_size = data_size;
 
+trace_vfio_save_buffer_plugin(vbasedev->name, data_size);
 while (tmp_size) {
 uint64_t sz = tmp_size <= CHUNK_SIZE ? tmp_size : CHUNK_SIZE;
 void *buf = g_try_malloc(sz);
@@ -214,6 +216,7 @@ static int vfio_migration_load_buffer_plugin(QEMUFile *f, 
VFIODevice *vbasedev,
 int ret = 0;
 VFIOMigrationPlugin *plugin = vbasedev->migration->plugin;
 
+trace_vfio_load_state_device_data_plugin(vbasedev->name, data_size);
 while (data_size) {
 uint64_t sz = data_size <= CHUNK_SIZE ? data_size : CHUNK_SIZE;
 void *buf = g_try_malloc(sz);
@@ -257,6 +260,7 @@ int vfio_migration_probe_plugin(VFIODevice *vbasedev)
 }
 
 migration->ops = &vfio_plugin_method;
-
+trace_vfio_migration_probe_plugin(vbasedev->name, vbasedev->desc.path,
+  vbasedev->desc.arg);
 return 0;
 }
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index ca85edeb11..6c2cba29fd 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -149,12 +149,14 @@ vfio_display_edid_write_error(void) ""
 
 # migration.c
 vfio_migration_probe_local(const char *name, uint32_t index) " (%s) Region %d"
+vfio_migration_probe_plugin(const char *name, const char *path, const char 
*arg) " (%s) Plugin path=%s arg=%s"
 vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
 vfio_vmstate_change(const char *name, int running, const char *reason, 
uint32_t dev_state) " (%s) running %d reason %s device state %d"
 vfio_migration_state_notifier(const char *name, const char *state) " (%s) 
state %s"
 vfio_save_setup(const char *name) " (%s)"
 vfio_save_cleanup(const char *name) " (%s)"
 vfio_save_buffer_local(const char *name, uint64_t data_offset, uint64_t 
data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 
0x%"PRIx64
+vfio_save_buffer_plugin(const char *name, uint64_t data_size) " (%s) data size 
0x%"PRIx64
 vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 
0x%"PRIx64
 vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, 
uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 
0x%"PRIx64
@@ -163,6 +165,7 @@ vfio_save_complete_precopy(const char *name) " (%s)"
 vfio_load_device_config_state(const char *name) " (%s)"
 vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
 vfio_load_state_device_data_local(const char *name, uint64_t data_offset, 
uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
+vfio_load_state_device_data_plugin(const char *name, uint64_t data_size) " 
(%s) data size 0x%"PRIx64
 vfio_load_cleanup(const char *name) " (%s)"
 vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t 
bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 
0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64
 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu 
dirty @ 0x%"PRIx64" - 0x%"PRIx64
-- 
2.32.0

Re: [RFC PATCH v4 07/36] i386/tdx: Introduce is_tdx_vm() helper and cache tdx_guest object

2022-05-23 Thread Gerd Hoffmann

> > > +#ifdef CONFIG_TDX
> > > +bool is_tdx_vm(void);
> > > +#else
> > > +#define is_tdx_vm() 0
> > 
> > Just add that to the tdx-stubs.c file you already created in one of the
> > previous patches and drop this #ifdef mess ;)
> 
> This is for consistency with SEV.  Anyway Either way is okay.

> From target/i386/sev.h
>   ...
>   #ifdef CONFIG_SEV
>   bool sev_enabled(void);
>   bool sev_es_enabled(void);
>   #else
>   #define sev_enabled() 0
>   #define sev_es_enabled() 0
>   #endif

Hmm, not sure why sev did it this way.  One possible reason is that the
compiler optimizer can see sev_enabled() evaluates to 0 and throw away
the dead code branches then.

So, yes, maybe it makes sense to stick to the #ifdef in this specific
case.

take care,
  Gerd

[RFC PATCH 10/13] vfio/pci: introduce command-line parameters to specify migration method

2022-05-23 Thread Lei Rao

Add command-line parameters (x-plugin-path and x-plugin-arg) of migration plugin
for VFIO PCI functions. x-plugin-path indicates the path of a dynamic load
library and x-plugin-arg is the necessary parameter to load and use it.
A typical example is, if the plugin communicates with the agent running on
IPU/DPU backend SOC through network, the argument should be the IP and Port of
agent. The usage as follows:

-device vfio-pci,id=$ID,host=$bdf,x-enable-migration,\
x-plugin-path=$plugin_path,x-plugin-arg=

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/pci.c | 2 ++
 include/hw/vfio/vfio-common.h | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a..1553ba7116 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3309,6 +3309,8 @@ static Property vfio_pci_dev_properties[] = {
qdev_prop_nv_gpudirect_clique, uint8_t),
 DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
 OFF_AUTOPCIBAR_OFF),
+DEFINE_PROP_STRING("x-plugin-path", VFIOPCIDevice, vbasedev.desc.path),
+DEFINE_PROP_STRING("x-plugin-arg", VFIOPCIDevice, vbasedev.desc.arg),
 /*
  * TODO - support passed fds... is this necessary?
  * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index be8adf890f..45d6d75284 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -58,6 +58,11 @@ typedef struct VFIORegion {
 uint8_t nr; /* cache the region number for debug */
 } VFIORegion;
 
+struct vfio_migration_plugin_desc {
+char *path;
+char *arg;
+};
+
 typedef struct VFIOMigrationOps VFIOMigrationOps;
 
 typedef struct VFIOMigration {
@@ -144,6 +149,7 @@ typedef struct VFIODevice {
 unsigned int num_regions;
 unsigned int flags;
 VFIOMigration *migration;
+struct vfio_migration_plugin_desc desc;
 Error *migration_blocker;
 OnOffAuto pre_copy_dirty_page_tracking;
 } VFIODevice;
-- 
2.32.0

[RFC PATCH 07/13] vfio/migration: move the statistics of bytes_transferred to generic VFIO migration layer

2022-05-23 Thread Lei Rao

The statistics of bytes transferred conceptually belong to The VFIO live
migration framework, and should not belong to any specific implementation
such In-Band approach, so move it out from vfio_migration_region_save_buffer(),
which makes it easier to add other implementations.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 4736af90e7..c114fab3a2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -257,7 +257,6 @@ static int vfio_migration_save_buffer_local(QEMUFile *f, 
VFIODevice *vbasedev,
 *size = data_size;
 }
 
-bytes_transferred += data_size;
 return ret;
 }
 
@@ -540,6 +539,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
  vbasedev->name, strerror(errno));
 return ret;
 }
+bytes_transferred += data_size;
 }
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
@@ -592,6 +592,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 error_report("%s: Failed to save buffer", vbasedev->name);
 return ret;
 }
+bytes_transferred += data_size;
 }
 
 if (data_size == 0) {
-- 
2.32.0

[RFC PATCH 09/13] vfio/migration: move the functions of In-Band approach to a new file

2022-05-23 Thread Lei Rao

Moving the functions of In-Band approach into a new file to match with the new
abstraction layer of migration ops.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/meson.build   |   1 +
 hw/vfio/migration-local.c | 453 ++
 hw/vfio/migration.c   | 421 ---
 include/hw/vfio/vfio-common.h |   1 +
 4 files changed, 455 insertions(+), 421 deletions(-)
 create mode 100644 hw/vfio/migration-local.c

diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index da9af297a0..5a72b8c349 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -3,6 +3,7 @@ vfio_ss.add(files(
   'common.c',
   'spapr.c',
   'migration.c',
+  'migration-local.c',
 ))
 vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
   'display.c',
diff --git a/hw/vfio/migration-local.c b/hw/vfio/migration-local.c
new file mode 100644
index 00..46c8baed50
--- /dev/null
+++ b/hw/vfio/migration-local.c
@@ -0,0 +1,453 @@
+/*
+ * QEMU VFIO Migration Support
+ *
+ * Copyright NVIDIA, Inc. 2020
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
+#include 
+#include 
+
+#include "sysemu/runstate.h"
+#include "hw/vfio/vfio-common.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "migration/qemu-file.h"
+#include "migration/register.h"
+#include "migration/blocker.h"
+#include "migration/misc.h"
+#include "qapi/error.h"
+#include "exec/ramlist.h"
+#include "exec/ram_addr.h"
+#include "pci.h"
+#include "trace.h"
+#include "hw/hw.h"
+#include "ui/console.h"
+
+static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
+  off_t off, bool iswrite)
+{
+int ret;
+
+ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
+pread(vbasedev->fd, val, count, off);
+if (ret < count) {
+error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
+ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
+ vbasedev->name, off, strerror(errno));
+return (ret < 0) ? ret : -EINVAL;
+}
+return 0;
+}
+
+static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
+   off_t off, bool iswrite)
+{
+int ret, done = 0;
+__u8 *tbuf = buf;
+
+while (count) {
+int bytes = 0;
+
+if (count >= 8 && !(off % 8)) {
+bytes = 8;
+} else if (count >= 4 && !(off % 4)) {
+bytes = 4;
+} else if (count >= 2 && !(off % 2)) {
+bytes = 2;
+} else {
+bytes = 1;
+}
+
+ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
+if (ret) {
+return ret;
+}
+
+count -= bytes;
+done += bytes;
+off += bytes;
+tbuf += bytes;
+}
+return done;
+}
+
+#define vfio_mig_read(f, v, c, o)   vfio_mig_rw(f, (__u8 *)v, c, o, false)
+#define vfio_mig_write(f, v, c, o)  vfio_mig_rw(f, (__u8 *)v, c, o, true)
+
+#define VFIO_MIG_STRUCT_OFFSET(f)   \
+ offsetof(struct vfio_device_migration_info, f)
+/*
+ * Change the device_state register for device @vbasedev. Bits set in @mask
+ * are preserved, bits set in @value are set, and bits not set in either @mask
+ * or @value are cleared in device_state. If the register cannot be accessed,
+ * the resulting state would be invalid, or the device enters an error state,
+ * an error is returned.
+ */
+
+static int vfio_migration_set_state_local(VFIODevice *vbasedev, uint32_t mask,
+  uint32_t value)
+{
+VFIOMigration *migration = vbasedev->migration;
+VFIORegion *region = &migration->region;
+off_t dev_state_off = region->fd_offset +
+  VFIO_MIG_STRUCT_OFFSET(device_state);
+uint32_t device_state;
+int ret;
+
+ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
+dev_state_off);
+if (ret < 0) {
+return ret;
+}
+
+device_state = (device_state & mask) | value;
+
+if (!VFIO_DEVICE_STATE_VALID(device_state)) {
+return -EINVAL;
+}
+
+ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
+ dev_state_off);
+if (ret < 0) {
+int rret;
+
+rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
+ dev_state_off);
+
+if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
+hw_error("%s: Device in error state 0x%x", vbasedev->name,
+ device_state);
+

[RFC PATCH 11/13] vfio/migration: add a plugin layer to support out-of-band live migration

2022-05-23 Thread Lei Rao

Introduce a plugin mechanism under VFIOMigrationOps layer. Each vendor can
provide a dynamic load library that implements the communication driver to
talk with IPU/DPU backend agent for saving and restoring device state during
live migration.

There are three interfaces between QEMU VFIO and a migration plugin:

- VFIOLMPluginGetVersion:
This is a function type. Plugin must expose a function symbol named
"vfio_lm_get_plugin_version" with this function type to return the
interface version supported by the plugin.
- VFIOLMPluginGetOps:
This is a function type. Plugin must expose a function symbol named
"vfio_lm_get_plugin_ops" with this function type to return a pointer to
VFIOMigrationPluginOps struct.
- VFIOMigrationPluginOps:
This is a struct type containing a set of callbacks that plugin
exposes. The callbacks will be invoked by QEMU VFIO during live
migration for saving and restoring device states.

The interfaces are defined in include/hw/vfio/vfio-migration-plugin.h.

When QEMU loads a migration plugin, it will first find and invoke function
symbol named "vfio_lm_get_plugin_version" to check the interface version that
plugin supports. And then find and invoke function symbol named
"vfio_lm_get_plugin_ops" to get vendor device specific VFIOMigrationPluginOps
which will be used for saving/restoring device states during live migration.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 docs/devel/vfio-migration-plugin.rst| 165 +++
 hw/vfio/meson.build |   1 +
 hw/vfio/migration-plugin.c  | 262 
 hw/vfio/migration.c |  13 +-
 include/hw/vfio/vfio-common.h   |  12 ++
 include/hw/vfio/vfio-migration-plugin.h |  21 ++
 6 files changed, 471 insertions(+), 3 deletions(-)
 create mode 100644 docs/devel/vfio-migration-plugin.rst
 create mode 100644 hw/vfio/migration-plugin.c
 create mode 100644 include/hw/vfio/vfio-migration-plugin.h

diff --git a/docs/devel/vfio-migration-plugin.rst 
b/docs/devel/vfio-migration-plugin.rst
new file mode 100644
index 00..800d1bac0a
--- /dev/null
+++ b/docs/devel/vfio-migration-plugin.rst
@@ -0,0 +1,165 @@
+
+VFIO Device Migration Plugins
+
+
+Contents:
+=
+* Introduction
+* Usage
+* Plugin based VFIO Live Migration Flow
+* Interface Description between QEMU and Plugins
+
+Introduction:
+
+
+Plugin based VFIO live migration is an extension to VFIO live migration
+mechanism, which is described in ``docs/devel/vfio-migration.rst``. It provides
+an out-of-band migration solution for PCIe functions exposed by Infrastructure
+Processing Units (IPU) and Data Processing Units (DPU).
+
+IPU/DPU usually has an SoC in the backend where a Linux system usually runs
+out-of-band agents to provision and configure the interfaces and communicate
+with a host management stack such as gRPC or JSON-RPC. Plugin based VFIO live
+migration leverage the agents in the Soc to save/restore PCIe device states.
+
+This is a new feature for VFIO live migration and it allows device vendors to
+develop out-of-tree plugins that can be dynamically loaded into a running QEMU
+process during VFIO passthrough devices live migration.
+
+This document describes the interfaces between QEMU VFIO live migration
+framework and the plugins.
+
+Usage:
+==
+
+An example to use VFIO migration plugin is as the following command line:
+
+-device 
vfio-pci-emu,x-enable-migration=on,x-plugin-path=$plugin_path,x-plugin-arg=$plugin_arg
+
+Where,
+
+- the 'x-enable-migration' controls whether the VFIO device supports live
+  migration (Not supported by default).
+
+- 'x-plugin-path' indicates the path of the plugin on the host.
+
+- 'x-plugin-arg' is a parameter required by QEMU to load and use the 
out-of-tree
+  plugin, if the plugin communicates with the backend on IPU/DPU by network,
+  this parameter should be .
+
+Plugin based VFIO Live Migration Flow:
+==
+
+The following ASCII graph describes the overall component relationship:
+
+ ++
+ | QEMU   |
+ | ++ |
+ | |VFIO Live Migration Framework   | |
+ | |+--+| |
+ | || VFIOMigrationOps || |
+ | |+---^-^+| |
+ | || | | |
+ | |+---v---+ +---v+| |
+ | || VFIO LM Based | | VFIO LM Based  || |
+ | ||On Local Region| |   On Plugin|| |
+ | |+---^---+ | +--+| |
+ | || | |Plugin Ops++-++
+ | || +-+--+

[RFC PATCH 13/13] vfio/migration: make the region and plugin member of struct VFIOMigration to be a union

2022-05-23 Thread Lei Rao

Since a VFIO device either uses In-Band or Out-of-Band live migration. So, the
region and plugin in VFIOMigration can be put into a union.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration-local.c | 33 ++---
 include/hw/vfio/vfio-common.h |  6 --
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/migration-local.c b/hw/vfio/migration-local.c
index 46c8baed50..13d1abee5a 100644
--- a/hw/vfio/migration-local.c
+++ b/hw/vfio/migration-local.c
@@ -98,7 +98,7 @@ static int vfio_migration_set_state_local(VFIODevice 
*vbasedev, uint32_t mask,
   uint32_t value)
 {
 VFIOMigration *migration = vbasedev->migration;
-VFIORegion *region = &migration->region;
+VFIORegion *region = migration->region;
 off_t dev_state_off = region->fd_offset +
   VFIO_MIG_STRUCT_OFFSET(device_state);
 uint32_t device_state;
@@ -184,7 +184,7 @@ static int vfio_migration_save_buffer_local(QEMUFile *f, 
VFIODevice *vbasedev,
 uint64_t *size)
 {
 VFIOMigration *migration = vbasedev->migration;
-VFIORegion *region = &migration->region;
+VFIORegion *region = migration->region;
 uint64_t data_offset = 0, data_size = 0, sz;
 int ret;
 
@@ -250,7 +250,7 @@ static int vfio_migration_save_buffer_local(QEMUFile *f, 
VFIODevice *vbasedev,
 static int vfio_migration_load_buffer_local(QEMUFile *f, VFIODevice *vbasedev,
 uint64_t data_size)
 {
-VFIORegion *region = &vbasedev->migration->region;
+VFIORegion *region = vbasedev->migration->region;
 uint64_t data_offset = 0, size, report_size;
 int ret;
 
@@ -322,7 +322,7 @@ static int vfio_migration_load_buffer_local(QEMUFile *f, 
VFIODevice *vbasedev,
 static int vfio_migration_update_pending_local(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
-VFIORegion *region = &migration->region;
+VFIORegion *region = migration->region;
 uint64_t pending_bytes = 0;
 int ret;
 
@@ -342,8 +342,8 @@ static void vfio_migration_cleanup_local(VFIODevice 
*vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 
-if (migration->region.mmaps) {
-vfio_region_unmap(&migration->region);
+if (migration->region->mmaps) {
+vfio_region_unmap(migration->region);
 }
 }
 
@@ -352,14 +352,14 @@ static int vfio_migration_save_setup_local(VFIODevice 
*vbasedev)
 VFIOMigration *migration = vbasedev->migration;
 int ret = -1;
 
-if (migration->region.mmaps) {
+if (migration->region->mmaps) {
 /*
  * Calling vfio_region_mmap() from migration thread. Memory API called
  * from this function require locking the iothread when called from
  * outside the main loop thread.
  */
 qemu_mutex_lock_iothread();
-ret = vfio_region_mmap(&migration->region);
+ret = vfio_region_mmap(migration->region);
 qemu_mutex_unlock_iothread();
 if (ret) {
 error_report("%s: Failed to mmap VFIO migration region: %s",
@@ -375,11 +375,11 @@ static int vfio_migration_load_setup_local(VFIODevice 
*vbasedev)
 VFIOMigration *migration = vbasedev->migration;
 int ret = -1;
 
-if (migration->region.mmaps) {
-ret = vfio_region_mmap(&migration->region);
+if (migration->region->mmaps) {
+ret = vfio_region_mmap(migration->region);
 if (ret) {
 error_report("%s: Failed to mmap VFIO migration region %d: %s",
- vbasedev->name, migration->region.nr,
+ vbasedev->name, migration->region->nr,
  strerror(-ret));
 error_report("%s: Falling back to slow path", vbasedev->name);
 }
@@ -391,8 +391,10 @@ static void vfio_migration_exit_local(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 
-vfio_region_exit(&migration->region);
-vfio_region_finalize(&migration->region);
+vfio_region_exit(migration->region);
+vfio_region_finalize(migration->region);
+g_free(migration->region);
+migration->region = NULL;
 }
 
 static VFIOMigrationOps vfio_local_method = {
@@ -426,7 +428,8 @@ int vfio_migration_probe_local(VFIODevice *vbasedev)
 return -EINVAL;
 }
 
-ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
+migration->region = g_new0(VFIORegion, 1);
+ret = vfio_region_setup(obj, vbasedev, vbasedev->migration->region,
 info->index, "migration");
 if (ret) {
 error_report("%s: Failed to setup VFIO migration region %d: %s",
@@ -434,7 +437,7 @@ int vfio_migration_probe_local(VFIODevice *vbasedev)
 goto err;
 }
 
-if (!vbasedev->migration->region.size) {
+if (!vbasedev->migration->region->size) {
 error_report("%s: Invalid zer

[RFC PATCH 06/13] vfio/migration: introduce VFIOMigrationOps layer in VFIO live migration framework

2022-05-23 Thread Lei Rao

Add an abstraction layer, VFIOMigrationOps, to the VFIO live migration
framework. Also adapt the In-Band approach to this abstraction layer by defining
its own VFIOMigrationOps callbacks.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c   | 203 +-
 include/hw/vfio/vfio-common.h |  14 +++
 2 files changed, 142 insertions(+), 75 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 04360e1f17..4736af90e7 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -407,7 +407,11 @@ static void vfio_migration_cleanup_local(VFIODevice 
*vbasedev)
 
 static void vfio_migration_cleanup(VFIODevice *vbasedev)
 {
-vfio_migration_cleanup_local(vbasedev);
+VFIOMigration *migration = vbasedev->migration;
+
+if (migration->ops->cleanup) {
+migration->ops->cleanup(vbasedev);
+}
 }
 
 /* -- */
@@ -438,24 +442,29 @@ static int vfio_migration_save_setup_local(VFIODevice 
*vbasedev)
 static int vfio_save_setup(QEMUFile *f, void *opaque)
 {
 VFIODevice *vbasedev = opaque;
+VFIOMigration *migration = vbasedev->migration;
 int ret;
 
 trace_vfio_save_setup(vbasedev->name);
 
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
-ret = vfio_migration_save_setup_local(vbasedev);
-if (ret) {
-error_report("%s: Failed to vfio lm save setup:%s",
- vbasedev->name, strerror(-ret));
-return ret;
+if (migration->ops->save_setup) {
+ret = migration->ops->save_setup(vbasedev);
+if (ret) {
+error_report("%s: Failed to vfio lm save setup:%s",
+ vbasedev->name, strerror(-ret));
+return ret;
+}
 }
 
-ret = vfio_migration_set_state_local(vbasedev, VFIO_DEVICE_STATE_MASK,
- VFIO_DEVICE_STATE_V1_SAVING);
-if (ret) {
-error_report("%s: Failed to set state SAVING", vbasedev->name);
-return ret;
+if (migration->ops->set_state) {
+ret = migration->ops->set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+VFIO_DEVICE_STATE_V1_SAVING);
+if (ret) {
+error_report("%s: Failed to set state SAVING", vbasedev->name);
+return ret;
+}
 }
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
@@ -486,9 +495,11 @@ static void vfio_save_pending(QEMUFile *f, void *opaque,
 VFIOMigration *migration = vbasedev->migration;
 int ret;
 
-ret = vfio_migration_update_pending_local(vbasedev);
-if (ret) {
-return;
+if (migration->ops->update_pending) {
+ret = migration->ops->update_pending(vbasedev);
+if (ret) {
+return;
+}
 }
 
 *res_precopy_only += migration->pending_bytes;
@@ -507,9 +518,11 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 
 if (migration->pending_bytes == 0) {
-ret = vfio_migration_update_pending_local(vbasedev);
-if (ret) {
-return ret;
+if (migration->ops->update_pending) {
+ret = migration->ops->update_pending(vbasedev);
+if (ret) {
+return ret;
+}
 }
 
 if (migration->pending_bytes == 0) {
@@ -520,11 +533,13 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
 }
 }
 
-ret = vfio_migration_save_buffer_local(f, vbasedev, &data_size);
-if (ret) {
-error_report("%s: vfio_miragion_save_buffer_local failed %s",
- vbasedev->name, strerror(errno));
-return ret;
+if (migration->ops->save_buffer) {
+ret = migration->ops->save_buffer(f, vbasedev, &data_size);
+if (ret) {
+error_report("%s: vfio_miragion_save_buffer_local failed %s",
+ vbasedev->name, strerror(errno));
+return ret;
+}
 }
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
@@ -551,35 +566,43 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 uint64_t data_size;
 int ret;
 
-ret = vfio_migration_set_state_local(vbasedev,
- ~VFIO_DEVICE_STATE_V1_RUNNING,
- VFIO_DEVICE_STATE_V1_SAVING);
-if (ret) {
-error_report("%s: Failed to set state STOP and SAVING",
- vbasedev->name);
-return ret;
+if (migration->ops->set_state) {
+ret = migration->ops->set_state(vbasedev,
+~VFIO_DEVICE_STATE_V1_RUNNING,
+VFIO_DEVICE_STATE_V1_SAVING);
+if (ret) {
+error_report("%s: Failed to set state STOP and SAVING",
+ vbasedev->name);
+return ret;
+}
 }
 
-ret = vfio_mi

[RFC PATCH 05/13] vfio/migration: rename functions that relate to the In-Band approach

2022-05-23 Thread Lei Rao

Rename some functions that are related to the In-Band approach to facilitate
introducing generic vfio live migration layer.

Rename vfio_migration_set_state to vfio_migration_set_state_local,
vfio_save_buffer to vfio_migration_save_buffer_local,
vfio_load_buffer to vfio_migration_load_buffer_local,
vfio_update_pending to vfio_migration_update_pending_local,
vfio_migration_init to vfio_migration_probe_local,
vfio_migration_exit to vfio_migration_exit_local.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c  | 74 +++-
 hw/vfio/trace-events |  6 ++--
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c2df2caae6..04360e1f17 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -107,8 +107,8 @@ static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, 
size_t count,
  * an error is returned.
  */
 
-static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
-uint32_t value)
+static int vfio_migration_set_state_local(VFIODevice *vbasedev, uint32_t mask,
+  uint32_t value)
 {
 VFIOMigration *migration = vbasedev->migration;
 VFIORegion *region = &migration->region;
@@ -193,7 +193,8 @@ static void *get_data_section_size(VFIORegion *region, 
uint64_t data_offset,
 return ptr;
 }
 
-static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
+static int vfio_migration_save_buffer_local(QEMUFile *f, VFIODevice *vbasedev,
+uint64_t *size)
 {
 VFIOMigration *migration = vbasedev->migration;
 VFIORegion *region = &migration->region;
@@ -212,8 +213,8 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice 
*vbasedev, uint64_t *size)
 return ret;
 }
 
-trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
-   migration->pending_bytes);
+trace_vfio_save_buffer_local(vbasedev->name, data_offset, data_size,
+ migration->pending_bytes);
 
 qemu_put_be64(f, data_size);
 sz = data_size;
@@ -260,8 +261,8 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice 
*vbasedev, uint64_t *size)
 return ret;
 }
 
-static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
-uint64_t data_size)
+static int vfio_migration_load_buffer_local(QEMUFile *f, VFIODevice *vbasedev,
+uint64_t data_size)
 {
 VFIORegion *region = &vbasedev->migration->region;
 uint64_t data_offset = 0, size, report_size;
@@ -288,7 +289,8 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice 
*vbasedev,
 data_size = 0;
 }
 
-trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
+trace_vfio_load_state_device_data_local(vbasedev->name, data_offset,
+size);
 
 while (size) {
 void *buf;
@@ -331,7 +333,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice 
*vbasedev,
 return 0;
 }
 
-static int vfio_update_pending(VFIODevice *vbasedev)
+static int vfio_migration_update_pending_local(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 VFIORegion *region = &migration->region;
@@ -449,8 +451,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
 return ret;
 }
 
-ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
-   VFIO_DEVICE_STATE_V1_SAVING);
+ret = vfio_migration_set_state_local(vbasedev, VFIO_DEVICE_STATE_MASK,
+ VFIO_DEVICE_STATE_V1_SAVING);
 if (ret) {
 error_report("%s: Failed to set state SAVING", vbasedev->name);
 return ret;
@@ -484,7 +486,7 @@ static void vfio_save_pending(QEMUFile *f, void *opaque,
 VFIOMigration *migration = vbasedev->migration;
 int ret;
 
-ret = vfio_update_pending(vbasedev);
+ret = vfio_migration_update_pending_local(vbasedev);
 if (ret) {
 return;
 }
@@ -505,7 +507,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
 
 if (migration->pending_bytes == 0) {
-ret = vfio_update_pending(vbasedev);
+ret = vfio_migration_update_pending_local(vbasedev);
 if (ret) {
 return ret;
 }
@@ -518,10 +520,10 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
 }
 }
 
-ret = vfio_save_buffer(f, vbasedev, &data_size);
+ret = vfio_migration_save_buffer_local(f, vbasedev, &data_size);
 if (ret) {
-error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
- strerror(errno));
+error_report("%s: vfio_miragion_save_buffer_local failed %s",
+ vbasedev->name, strerror(errno));

[RFC PATCH 08/13] vfio/migration: split migration handler registering from vfio_migration_init

2022-05-23 Thread Lei Rao

vfio_migration_init() is mainly related to initialization of In-Band approach.
Migration handler registering may also be used by other approaches. so split it
from vfio_migration_init() and move it to vfio_migration_probe().

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c | 56 ++---
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index c114fab3a2..0c67ed85f3 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -882,6 +882,38 @@ static int vfio_migration_check(VFIODevice *vbasedev)
 return 0;
 }
 
+static int vfio_migration_register_handlers(VFIODevice *vbasedev)
+{
+Object *obj;
+char id[256] = "";
+g_autofree char *path = NULL, *oid = NULL;
+VFIOMigration *migration = vbasedev->migration;
+
+obj = vbasedev->ops->vfio_get_object(vbasedev);
+if (!obj) {
+return -EINVAL;
+}
+
+oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
+if (oid) {
+path = g_strdup_printf("%s/vfio", oid);
+} else {
+path = g_strdup("vfio");
+}
+strpadcpy(id, sizeof(id), path, '\0');
+
+register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
+ vbasedev);
+
+migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
+   vfio_vmstate_change,
+   vbasedev);
+migration->migration_state.notify = vfio_migration_state_notifier;
+add_migration_state_change_notifier(&migration->migration_state);
+
+return 0;
+}
+
 static VFIOMigrationOps vfio_local_method = {
 .save_setup = vfio_migration_save_setup_local,
 .load_setup = vfio_migration_load_setup_local,
@@ -897,9 +929,7 @@ static int vfio_migration_probe_local(VFIODevice *vbasedev)
 {
 int ret;
 Object *obj;
-char id[256] = "";
 struct vfio_region_info *info = NULL;
-g_autofree char *path = NULL, *oid = NULL;
 VFIOMigration *migration = vbasedev->migration;
 
 obj = vbasedev->ops->vfio_get_object(vbasedev);
@@ -930,23 +960,6 @@ static int vfio_migration_probe_local(VFIODevice *vbasedev)
 goto err;
 }
 
-oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
-if (oid) {
-path = g_strdup_printf("%s/vfio", oid);
-} else {
-path = g_strdup("vfio");
-}
-strpadcpy(id, sizeof(id), path, '\0');
-
-register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
- vbasedev);
-
-migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
-   vfio_vmstate_change,
-   vbasedev);
-migration->migration_state.notify = vfio_migration_state_notifier;
-add_migration_state_change_notifier(&migration->migration_state);
-
 trace_vfio_migration_probe_local(vbasedev->name, info->index);
 migration->ops = &vfio_local_method;
 g_free(info);
@@ -982,6 +995,11 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error 
**errp)
 goto add_blocker;
 }
 
+ret = vfio_migration_register_handlers(vbasedev);
+if (ret) {
+goto add_blocker;
+}
+
 return 0;
 
 add_blocker:
-- 
2.32.0

[RFC PATCH 04/13] vfio/migration: Separated functions that relate to the In-Band approach

2022-05-23 Thread Lei Rao

Split functions of In-Band approach from common function, to prepare for
the introduction of generic VFIO live migration layer and another Sub-Ops.

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c | 64 ++---
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index e61c19171a..c2df2caae6 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -394,7 +394,7 @@ static int vfio_load_device_config_state(QEMUFile *f, void 
*opaque)
 return qemu_file_get_error(f);
 }
 
-static void vfio_migration_cleanup(VFIODevice *vbasedev)
+static void vfio_migration_cleanup_local(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 
@@ -403,17 +403,17 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev)
 }
 }
 
+static void vfio_migration_cleanup(VFIODevice *vbasedev)
+{
+vfio_migration_cleanup_local(vbasedev);
+}
+
 /* -- */
 
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_migration_save_setup_local(VFIODevice *vbasedev)
 {
-VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
-int ret;
-
-trace_vfio_save_setup(vbasedev->name);
-
-qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+int ret = -1;
 
 if (migration->region.mmaps) {
 /*
@@ -430,6 +430,24 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
 error_report("%s: Falling back to slow path", vbasedev->name);
 }
 }
+return ret;
+}
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+int ret;
+
+trace_vfio_save_setup(vbasedev->name);
+
+qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+ret = vfio_migration_save_setup_local(vbasedev);
+if (ret) {
+error_report("%s: Failed to vfio lm save setup:%s",
+ vbasedev->name, strerror(-ret));
+return ret;
+}
 
 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
VFIO_DEVICE_STATE_V1_SAVING);
@@ -592,11 +610,10 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 }
 }
 
-static int vfio_load_setup(QEMUFile *f, void *opaque)
+static int vfio_migration_load_setup_local(VFIODevice *vbasedev)
 {
-VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
-int ret = 0;
+int ret = -1;
 
 if (migration->region.mmaps) {
 ret = vfio_region_mmap(&migration->region);
@@ -607,14 +624,26 @@ static int vfio_load_setup(QEMUFile *f, void *opaque)
 error_report("%s: Falling back to slow path", vbasedev->name);
 }
 }
+return ret;
+}
+
+static int vfio_load_setup(QEMUFile *f, void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+int ret = 0;
+
+ret = vfio_migration_load_setup_local(vbasedev);
+if (ret < 0) {
+error_report("%s: Failed to migration load setup", vbasedev->name);
+return ret;
+}
 
 ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
VFIO_DEVICE_STATE_V1_RESUMING);
 if (ret) {
 error_report("%s: Failed to set state RESUMING", vbasedev->name);
-if (migration->region.mmaps) {
-vfio_region_unmap(&migration->region);
-}
+vfio_migration_cleanup(vbasedev);
+return ret;
 }
 return ret;
 }
@@ -777,12 +806,17 @@ static void vfio_migration_state_notifier(Notifier 
*notifier, void *data)
 }
 }
 
-static void vfio_migration_exit(VFIODevice *vbasedev)
+static void vfio_migration_exit_local(VFIODevice *vbasedev)
 {
 VFIOMigration *migration = vbasedev->migration;
 
 vfio_region_exit(&migration->region);
 vfio_region_finalize(&migration->region);
+}
+
+static void vfio_migration_exit(VFIODevice *vbasedev)
+{
+vfio_migration_exit_local(vbasedev);
 g_free(vbasedev->migration);
 vbasedev->migration = NULL;
 }
-- 
2.32.0

[RFC PATCH 01/13] vfio/migration: put together checks of migration initialization conditions

2022-05-23 Thread Lei Rao

Current VFIO live migration initialization code is tightly coupled with
local migration region handling. It is necessary to decouple it to
facilitate the introduction of a generic VFIO live migration framework so
that other approaches can be possible besides the In-Band approach.

This patch puts various checks of migration initialization conditions into
one function vfio_migration_check().

Signed-off-by: Lei Rao 
Reviewed-by: Eddie Dong 
---
 hw/vfio/migration.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index a6ad1f8945..770f535e81 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -787,6 +787,21 @@ static void vfio_migration_exit(VFIODevice *vbasedev)
 vbasedev->migration = NULL;
 }
 
+static int vfio_migration_check(VFIODevice *vbasedev)
+{
+VFIOContainer *container = vbasedev->group->container;
+
+if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+return -EINVAL;
+}
+
+if (!vbasedev->ops->vfio_get_object) {
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int vfio_migration_init(VFIODevice *vbasedev,
struct vfio_region_info *info)
 {
@@ -796,10 +811,6 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 char id[256] = "";
 g_autofree char *path = NULL, *oid = NULL;
 
-if (!vbasedev->ops->vfio_get_object) {
-return -EINVAL;
-}
-
 obj = vbasedev->ops->vfio_get_object(vbasedev);
 if (!obj) {
 return -EINVAL;
@@ -857,11 +868,11 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-VFIOContainer *container = vbasedev->group->container;
 struct vfio_region_info *info = NULL;
 int ret = -ENOTSUP;
 
-if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+ret = vfio_migration_check(vbasedev);
+if (ret) {
 goto add_blocker;
 }
 
-- 
2.32.0

[RFC PATCH 02/13] vfio/migration: move migration struct allocation out of vfio_migration_init

2022-05-23 Thread Lei Rao

Migration struct is a common data structure. Memory allocation of migration
struct is not unique to In-Band approach. So, move it from vfio_migration_init()
to vfio_migration_probe().

Signed-off-by: Lei Rao 
---
 hw/vfio/migration.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 770f535e81..11ce87bb1a 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -807,17 +807,15 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 {
 int ret;
 Object *obj;
-VFIOMigration *migration;
 char id[256] = "";
 g_autofree char *path = NULL, *oid = NULL;
+VFIOMigration *migration = vbasedev->migration;
 
 obj = vbasedev->ops->vfio_get_object(vbasedev);
 if (!obj) {
 return -EINVAL;
 }
 
-vbasedev->migration = g_new0(VFIOMigration, 1);
-
 ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
 info->index, "migration");
 if (ret) {
@@ -833,9 +831,6 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 goto err;
 }
 
-migration = vbasedev->migration;
-migration->vbasedev = vbasedev;
-
 oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
 if (oid) {
 path = g_strdup_printf("%s/vfio", oid);
@@ -876,6 +871,9 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 goto add_blocker;
 }
 
+vbasedev->migration = g_new0(VFIOMigration, 1);
+vbasedev->migration->vbasedev = vbasedev;
+
 ret = vfio_get_dev_region_info(vbasedev,
VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
@@ -903,6 +901,8 @@ add_blocker:
 error_free(vbasedev->migration_blocker);
 vbasedev->migration_blocker = NULL;
 }
+g_free(vbasedev->migration);
+vbasedev->migration = NULL;
 return ret;
 }
 
-- 
2.32.0

[RFC PATCH 00/13] Add a plugin to support out-of-band live migration for VFIO pass-through device

2022-05-23 Thread Lei Rao

Migration of a VFIO passthrough device can be supported by using a device 
specific kernel driver to save/restore the device state thru device specific 
interfaces. But this approach doesn't work for devices that lack a state 
migration interface, e.g. NVMe.

On the other hand, Infrastructure Process Unit (IPU) or Data Processing Unit 
(DPU) vendors may choose to implement an out-of-band interface from the SoC to 
help manage the state of such non-migratable devices e.g. via gRPC or JSON-RPC 
protocols.

This RFC attempts to support such out-of-band migration interface by introducing
the concept of migration backends in vfio. The existing logic around vfio 
migration uAPI is now called the 'local' backend while a new 'out-of-band' 
backend is further introduced allowing vfio to redirect VMState ops to an 
external plugin.

Currently, the backend migration Ops is defined close to SaveVMHandlers. We also
considered whether there is value of abstracting it in a lower level e.g. close 
to vfio migration uAPI but no clear conclusion. Hence this is one part which 
we'd like to hear suggestions.

This proposal adopts a plugin mechanism (an example can be found in [1]) given 
that IPU/DPU vendors usually implement proprietary migration interfaces without
a standard. But we are also open if an alternative option makes better sense,
e.g. via loadable modules (with Qemu supporting gRPC or JSON-RPC support) or an
IPC mechanism similar to vhost-user.

The following graph describes the overall component relationship:

 ++
 | QEMU   |
 | ++ |
 | |VFIO Live Migration Framework   | |
 | |+--+| |
 | || VFIOMigrationOps || |
 | |+---^-^+| |
 | || | | |
 | |+---v---+ +---v+| |
 | || LM Backend Via| | LM Backend Via || |
 | ||   Device Fd   | |Plugins || |
 | |+---^---+ | +--+| |
 | || | |Plugin Ops++-++
 | || +-+--+| ||
 | ||   | |  +-v--+
 | ++---+ |  |  Vendor Specific   |
 |  | |  |Plugins(.so)|
 +--+-+  +--+-+
  UserSpace |   |
+-  |
  Kernel|   |
|   |
 +--v--+|
 |Kernel VFIO Driver   ||
 |+-+  ||
 || |  || Network
 || Vendor-Specific Driver  |  ||
 || |  ||
 |+--^--+  ||
 |   | ||
 +---+-+|
 |  |
 |  |
-+- |
  Hardware   |  |
 |+-+-+-++-+|
  +--v--+ | VF0 | VF1 | VF2 | ...| VFn ||
  |   Traditional   | +-+-+-++-+|
  |  PCIe Devices   | |||
  +-+ |   +++  ||
  |   ||   Agent|<-++
  |   |++  |
  |   | |  |
  |   | SOC |  |
  |   +-+  |
  | IPU|
  ++

Two command-line parameters (x-plugin-path and x-plugin-arg) are introduced to 
enable the out-of-band backend. If specified, vfio will attempt to use the 
out-of-band backend.

The following is an example of VFIO command-line parameters for OOB-Approach:

  -device 
vfio-pci,id=$ID,host=$bdf,x-enable-migration,x-plugin-path=$pl

[RFC PATCH 03/13] vfio/migration: move vfio_get_dev_region_info out of vfio_migration_probe

2022-05-23 Thread Lei Rao

vfio_get_dev_region_info() in vfio_migration_probe() is a specific operation of
In-Band approach.  So, it's better to put it in vfio_migration_init() because
most of the setup of In-Band approach are handled there. The vfio_migration_init
will be rename to vfio_migration_probe_local().

Signed-off-by: Lei Rao 
---
 hw/vfio/migration.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 11ce87bb1a..e61c19171a 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -802,12 +802,12 @@ static int vfio_migration_check(VFIODevice *vbasedev)
 return 0;
 }
 
-static int vfio_migration_init(VFIODevice *vbasedev,
-   struct vfio_region_info *info)
+static int vfio_migration_init(VFIODevice *vbasedev)
 {
 int ret;
 Object *obj;
 char id[256] = "";
+struct vfio_region_info *info = NULL;
 g_autofree char *path = NULL, *oid = NULL;
 VFIOMigration *migration = vbasedev->migration;
 
@@ -816,6 +816,14 @@ static int vfio_migration_init(VFIODevice *vbasedev,
 return -EINVAL;
 }
 
+ret = vfio_get_dev_region_info(vbasedev,
+   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
+   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
+   &info);
+if (ret) {
+return -EINVAL;
+}
+
 ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
 info->index, "migration");
 if (ret) {
@@ -847,10 +855,14 @@ static int vfio_migration_init(VFIODevice *vbasedev,
vbasedev);
 migration->migration_state.notify = vfio_migration_state_notifier;
 add_migration_state_change_notifier(&migration->migration_state);
+
+trace_vfio_migration_probe(vbasedev->name, info->index);
+g_free(info);
 return 0;
 
 err:
 vfio_migration_exit(vbasedev);
+g_free(info);
 return ret;
 }
 
@@ -863,7 +875,6 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-struct vfio_region_info *info = NULL;
 int ret = -ENOTSUP;
 
 ret = vfio_migration_check(vbasedev);
@@ -874,27 +885,16 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error 
**errp)
 vbasedev->migration = g_new0(VFIOMigration, 1);
 vbasedev->migration->vbasedev = vbasedev;
 
-ret = vfio_get_dev_region_info(vbasedev,
-   VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
-   VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
-   &info);
-if (ret) {
-goto add_blocker;
-}
-
-ret = vfio_migration_init(vbasedev, info);
+ret = vfio_migration_init(vbasedev);
 if (ret) {
 goto add_blocker;
 }
 
-trace_vfio_migration_probe(vbasedev->name, info->index);
-g_free(info);
 return 0;
 
 add_blocker:
 error_setg(&vbasedev->migration_blocker,
"VFIO device doesn't support migration");
-g_free(info);
 
 ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
 if (ret < 0) {
-- 
2.32.0

[PATCH v2] target/riscv: add support for zmmul extension v0.1

2022-05-23 Thread Weiwei Li

 - includes all multiplication operations for M extension

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 

v2:
* disable M when both M and Zmmul are enabled
 
---
 target/riscv/cpu.c  |  7 +++
 target/riscv/cpu.h  |  1 +
 target/riscv/insn_trans/trans_rvm.c.inc | 18 --
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e373c61ba2..aec6882c5f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -598,6 +598,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 cpu->cfg.ext_ifencei = true;
 }
 
+if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
+warn_report("Zmmul will override M");
+cpu->cfg.ext_m = false;
+}
+
 if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
 error_setg(errp,
"I and E extensions are incompatible");
@@ -903,6 +908,7 @@ static Property riscv_cpu_properties[] = {
 
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),
 /* ePMP 0.9.3 */
 DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
 DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
@@ -1027,6 +1033,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
  *extensions by an underscore.
  */
 struct isa_ext_data isa_edata_arr[] = {
+ISA_EDATA_ENTRY(zmmul, ext_zmmul),
 ISA_EDATA_ENTRY(zfh, ext_zfh),
 ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
 ISA_EDATA_ENTRY(zfinx, ext_zfinx),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f5ff7294c6..68177eae12 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -405,6 +405,7 @@ struct RISCVCPUConfig {
 bool ext_zhinxmin;
 bool ext_zve32f;
 bool ext_zve64f;
+bool ext_zmmul;
 
 uint32_t mvendorid;
 uint64_t marchid;
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
b/target/riscv/insn_trans/trans_rvm.c.inc
index 16b029edf0..ec7f705aab 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -18,6 +18,12 @@
  * this program.  If not, see .
  */
 
+#define REQUIRE_M_OR_ZMMUL(ctx) do {  \
+if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
+return false; \
+} \
+} while (0)
+
 static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv 
bh)
 {
 TCGv tmpl = tcg_temp_new();
@@ -65,7 +71,7 @@ static void gen_mul_i128(TCGv rl, TCGv rh,
 
 static bool trans_mul(DisasContext *ctx, arg_mul *a)
 {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
 }
 
@@ -109,7 +115,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
 
 static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
 {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
 gen_mulh_i128);
 }
@@ -161,7 +167,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
 
 static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
 {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
 gen_mulhsu_i128);
 }
@@ -176,7 +182,7 @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
 
 static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
 {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 /* gen_mulh_w works for either sign as input. */
 return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
 gen_mulhu_i128);
@@ -349,7 +355,7 @@ static bool trans_remu(DisasContext *ctx, arg_remu *a)
 static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
 {
 REQUIRE_64_OR_128BIT(ctx);
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 ctx->ol = MXL_RV32;
 return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
 }
@@ -389,7 +395,7 @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
 static bool trans_muld(DisasContext *ctx, arg_muld *a)
 {
 REQUIRE_128BIT(ctx);
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
 ctx->ol = MXL_RV64;
 return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
 }
-- 
2.17.1

Re: [PATCH] target/riscv: add support for zmmul extension v0.1

2022-05-23 Thread Alistair Francis

On Tue, May 24, 2022 at 12:25 PM Weiwei Li  wrote:
>
>
> 在 2022/5/24 上午5:15, Alistair Francis 写道:
>
> On Mon, May 23, 2022 at 6:10 PM Weiwei Li  wrote:
>
> 在 2022/5/23 下午2:34, Alistair Francis 写道:
>
> On Wed, May 18, 2022 at 11:54 AM Weiwei Li  wrote:
>
>   - includes all multiplication operations for M extension
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 
> ---
>   target/riscv/cpu.c  |  2 ++
>   target/riscv/cpu.h  |  1 +
>   target/riscv/insn_trans/trans_rvm.c.inc | 18 --
>   3 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index e373c61ba2..01b57d3784 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -903,6 +903,7 @@ static Property riscv_cpu_properties[] = {
>
>   /* These are experimental so mark with 'x-' */
>   DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
> +DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),
>
> Is this really experimental?
>
> Alistair
>
> I think it's experimental currently. The zmmul version in latest riscv
> spec is v0.1, even though described as  v1.0 in spike README.
>
> Hmm... Your right that it is only v0.1, but there is no indication of
> draft state in the RISC-V spec chapter on Zmmul
>
> Its specification status
> (https://wiki.riscv.org/display/home/specification+status) is Freeze
> Complete and TSC Sign-Off Voting.
>
> And It's not in the ratified extension
> list(https://wiki.riscv.org/display/home/recently+ratified+extensions).
>
> Any status update I missed?
>
> Confusing. Ok, I guess let's leave it as experimental, we can always
> remove the `x-` easily :)
>
> Regards,
>
> Weiwei Li
>
>   /* ePMP 0.9.3 */
>   DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
>   DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
> @@ -1027,6 +1028,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>*extensions by an underscore.
>*/
>   struct isa_ext_data isa_edata_arr[] = {
> +ISA_EDATA_ENTRY(zmmul, ext_zmmul),
>
> We should have some checks though. We don't want users to enable this
> and the multiply (M) extension
>
>
> Alistair
>
> Maybe we can add a check like this:
>
> /* M + Zmmul = Zmmul */
> if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
> warn_report("Zmmul will override M");
> cpu->cfg.ext_m = false;
> }

Yep, looks good

Alistair

>
> It seems OK to enable both M and Zmmul in gnu toolchain. However, divide 
> operations
>
> will be disabled when Zmmul is enabled.
>
> Regards,
>
> Weiwei Li
>
>   ISA_EDATA_ENTRY(zfh, ext_zfh),
>   ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
>   ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index f5ff7294c6..68177eae12 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -405,6 +405,7 @@ struct RISCVCPUConfig {
>   bool ext_zhinxmin;
>   bool ext_zve32f;
>   bool ext_zve64f;
> +bool ext_zmmul;
>
>   uint32_t mvendorid;
>   uint64_t marchid;
> diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
> b/target/riscv/insn_trans/trans_rvm.c.inc
> index 16b029edf0..ec7f705aab 100644
> --- a/target/riscv/insn_trans/trans_rvm.c.inc
> +++ b/target/riscv/insn_trans/trans_rvm.c.inc
> @@ -18,6 +18,12 @@
>* this program.  If not, see .
>*/
>
> +#define REQUIRE_M_OR_ZMMUL(ctx) do {  \
> +if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
> +return false; \
> +} \
> +} while (0)
> +
>   static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, 
> TCGv bh)
>   {
>   TCGv tmpl = tcg_temp_new();
> @@ -65,7 +71,7 @@ static void gen_mul_i128(TCGv rl, TCGv rh,
>
>   static bool trans_mul(DisasContext *ctx, arg_mul *a)
>   {
> -REQUIRE_EXT(ctx, RVM);
> +REQUIRE_M_OR_ZMMUL(ctx);
>   return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
>   }
>
> @@ -109,7 +115,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
>
>   static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
>   {
> -REQUIRE_EXT(ctx, RVM);
> +REQUIRE_M_OR_ZMMUL(ctx);
>   return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
>   gen_mulh_i128);
>   }
> @@ -161,7 +167,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
>
>   static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
>   {
> -REQUIRE_EXT(ctx, RVM);
> +REQUIRE_M_OR_ZMMUL(ctx);
>   return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
>   gen_mulhsu_i128);
>   }
> @@ -176,7 +182,7 @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
>
>   static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
>   {
> -REQUIRE_EXT(ctx, RVM);
> +REQUIRE_M_OR_ZMMUL(ct

Re: [RFC PATCH v4 13/36] i386/tdx: Validate TD attributes

2022-05-23 Thread Xiaoyao Li


On 5/23/2022 5:39 PM, Gerd Hoffmann wrote:

Validate TD attributes with tdx_caps that fixed-0 bits must be zero and
fixed-1 bits must be set.



-static void setup_td_guest_attributes(X86CPU *x86cpu)
+static int tdx_validate_attributes(TdxGuest *tdx)
+{
+if (((tdx->attributes & tdx_caps->attrs_fixed0) | tdx_caps->attrs_fixed1) 
!=
+tdx->attributes) {
+error_report("Invalid attributes 0x%lx for TDX VM (fixed0 0x%llx, 
fixed1 0x%llx)",
+  tdx->attributes, tdx_caps->attrs_fixed0, 
tdx_caps->attrs_fixed1);
+return -EINVAL;
+}


So, how is this supposed to work?  Patch #2 introduces attributes as
user-settable property.  So do users have to manually figure and pass
the correct value, so the check passes?  Specifically the fixed1 check?

I think 'attributes' should not be user-settable in the first place.
Each feature-bit which is actually user-settable (and not already
covered by another option like pmu) should be a separate attribute for
tdx-object.  Then the tdx code can create attributes from hardware
capabilities and user settings.


In patch #2, tdx-guest.attributes is defined as a field to hold a 64 
bits value of attributes but it doesn't provide any getter/setter for 
it. So it's *not* user-settable.


Did I miss something? (I'm not good at QEMU object)


When user-settable options might not be available depending on hardware
capabilities best practice is to create them as OnOffAuto properties.

   Auto == qemu can pick the value, typical behavior is to enable the
   feature if the hardware supports it.
   On == must enable, if it isn't possible throw an error and exit.
   Off == must disable, if it isn't possible throw an error and exit.

take care,
   Gerd

Re: [RFC 0/3] Introduce a new Qemu machine for RISC-V

2022-05-23 Thread Atish Patra

On Sun, May 22, 2022 at 10:59 PM Alistair Francis  wrote:
>
> On Wed, May 18, 2022 at 4:38 PM Atish Patra  wrote:
> >
> > On Tue, May 17, 2022 at 1:54 PM Alistair Francis  
> > wrote:
> > >
> > > On Tue, May 17, 2022 at 6:52 PM Daniel P. Berrangé  
> > > wrote:
> > > >
> > > > On Tue, May 17, 2022 at 03:03:38PM +1000, Alistair Francis wrote:
> > > > > On Sat, May 7, 2022 at 6:30 AM Atish Kumar Patra 
> > > > >  wrote:
> > > > > >
> > > > > > On Fri, May 6, 2022 at 4:00 AM Peter Maydell 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Fri, 6 May 2022 at 09:18, Daniel P. Berrangé 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Fri, May 06, 2022 at 06:34:47AM +1000, Alistair Francis 
> > > > > > > > wrote:
> > > > > > > > > Even if we didn't worry about backwards compatibility the 
> > > > > > > > > current virt
> > > > > > > > > machine would still be what most users want. It's just a 
> > > > > > > > > small number
> > > > > > > > > of users who don't want MMIO devices and instead want to use 
> > > > > > > > > PCIe for
> > > > > > > > > everything. Realistically it's only HPC users who would want 
> > > > > > > > > this type
> > > > > > > > > of machine, at least that's my understanding.
> > > > > > > >
> > > > > > > > I'm not so sure about that. Every other architecture has ended 
> > > > > > > > up
> > > > > > > > standardizing on PCI for general purpose virtual machines. IIRC,
> > > > > > > > aarch64 started off with MMIO, but switched to PCI as it 
> > > > > > > > matured.
> > > > > > > >
> > > > > > > > In terms of having VM mgmt tools "just work" for risc-v, I think
> > > > > > > > it will be very compelling for the general 'virt' machine to be
> > > > > > > > PCI based, otherwise all the assumptions about PCI in mgmt apps
> > > > > > > > are going to break requiring never ending riscv fixes.
> > > > > > >
> > > > > > > Mmm, my experience with aarch64 virt is that PCI is much nicer
> > > > > > > as a general preference. aarch64 virt has some MMIO devices
> > > > > > > for historical reasons and some because you can't reasonably
> > > > > > > do the necessary things with PCI, but I'm actively trying to
> > > > > > > push people who submit new MMIO device features for virt to
> > > > > > > try to use a PCI-based solution instead if they possibly can.
> > > > >
> > > > > Interesting...
> > > > >
> > > > > Ok, maybe calling this "virt-pcie" might be a good start, with the
> > > > > expectation to eventually replace the current virt with the new
> > > > > virt-pcie at some point.
> > > >
> > > > Delaying the inevitable by leaving PCIE support in a separate
> > > > machine type initially is going to be more painful long term.
> > > >
> > > > > The other option would be to try and gradually change from the current
> > > > > virt machine to this new virt machine
> > > >
> > > > Yes, I really think the 'virt' machine type needs to aim for PCIE
> > > > support sooner rather than later, if RISC-V wants to get on part
> > > > with other architectures. The best time to have added PCIE support
> > > > to 'virt' was when it was first created, the next best time is now.
> > >
> > > So maybe instead we lock in the current virt machine as the 7.1 virt
> > > machine for QEMU 7.1, then work on migrating to a PCIe only machine
> > > with versions (similar to the other archs)
> > >
> >
> > I am not quite sure what exactly you mean here. Do you mean to modify
> > the current virt
> > machine to be PCIE only after QEMU 7.1 or the new PCIE only machine
> > (with the versioning)
> > which will be the default machine in the future
>
> I mean that we call the current virt machine the virt machine for QEMU
> 7.1. Then for future releases we can make breaking changes, where we
> have the old 7.1 virt machine for backwards compatibility.
>
> >
> > If you intend to say the former, few issues with that approach.
> >
> > 1. virt machine is not well documented and already bloated. There is
> > no specification for virt machine as such.
> > Putting restrictions after a certain release will lead to confusion.
> > 2. Do we support existing MMIO devices after that specific version or not ?
>
> Yeah, so I guess this doesn't achieve the same outcome you want. I
> would say we would still include some MMIO devices, like UART for
> example.
>

Why ? We can just rely on the pcie based uart (virtio-serial-pci or
serial-pci) should be enough.
The only MMIO devices that should be allowed are the ones that can't
be behind pcie.

> But we could simplify things a bit. So for example maybe we could use
> AIA by default and then remove the PLIC code. That would help cleanup
> the board file. Then we could add a `msi-only` option that would be
> similar to 
> https://github.com/atishp04/qemu/commit/d7fc1c6aa7855b414b3484672a076140166a2dcd.
> But without the PLIC it should hopefully be cleaner
>
> We would need AIA ratified before we could remove the PLIC though.
>

And AIA patches available in the upstream Linux kernel.
Even after that, ca

Npcm7xx emac tap networking hitting 75MiB limit

2022-05-23 Thread Patrick Venture

Hey;

I wanted to ask if anyone has seen this before.  When we try to download a
file larger than 75MB from within an Ncpm7xx board using TAP networking
(versus user) it just fails there.

using wget it reports null 75.0M  - stalled -\rnull
75.0M  - stalled -\rnull 75.0M  - stalled -\rnull
  75.0M  - stalled -\rnull 75.0M  - stalled
-\rnull 75.0M  -

I haven't yet dug into what could be going wrong here, but I wanted to ask
if any of y'all have seen some weird stuff here?

Thanks,
Patrick

Re: [PATCH] target/riscv: add support for zmmul extension v0.1

2022-05-23 Thread Weiwei Li



在 2022/5/24 上午5:15, Alistair Francis 写道:

On Mon, May 23, 2022 at 6:10 PM Weiwei Li  wrote:


在 2022/5/23 下午2:34, Alistair Francis 写道:

On Wed, May 18, 2022 at 11:54 AM Weiwei Li  wrote:

   - includes all multiplication operations for M extension

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
   target/riscv/cpu.c  |  2 ++
   target/riscv/cpu.h  |  1 +
   target/riscv/insn_trans/trans_rvm.c.inc | 18 --
   3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e373c61ba2..01b57d3784 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -903,6 +903,7 @@ static Property riscv_cpu_properties[] = {

   /* These are experimental so mark with 'x-' */
   DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),

Is this really experimental?

Alistair

I think it's experimental currently. The zmmul version in latest riscv
spec is v0.1, even though described as  v1.0 in spike README.

Hmm... Your right that it is only v0.1, but there is no indication of
draft state in the RISC-V spec chapter on Zmmul


Its specification status
(https://wiki.riscv.org/display/home/specification+status) is Freeze
Complete and TSC Sign-Off Voting.

And It's not in the ratified extension
list(https://wiki.riscv.org/display/home/recently+ratified+extensions).

Any status update I missed?

Confusing. Ok, I guess let's leave it as experimental, we can always
remove the `x-` easily :)


Regards,

Weiwei Li


   /* ePMP 0.9.3 */
   DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
   DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
@@ -1027,6 +1028,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
*extensions by an underscore.
*/
   struct isa_ext_data isa_edata_arr[] = {
+ISA_EDATA_ENTRY(zmmul, ext_zmmul),

We should have some checks though. We don't want users to enable this
and the multiply (M) extension


Alistair


Maybe we can add a check like this:

/* M + Zmmul = Zmmul */
if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
warn_report("Zmmul will override M");
cpu->cfg.ext_m = false;
}

It seems OK to enable both M and Zmmul in gnu toolchain. However, divide 
operations


will be disabled when Zmmul is enabled.

Regards,

Weiwei Li


   ISA_EDATA_ENTRY(zfh, ext_zfh),
   ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
   ISA_EDATA_ENTRY(zfinx, ext_zfinx),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f5ff7294c6..68177eae12 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -405,6 +405,7 @@ struct RISCVCPUConfig {
   bool ext_zhinxmin;
   bool ext_zve32f;
   bool ext_zve64f;
+bool ext_zmmul;

   uint32_t mvendorid;
   uint64_t marchid;
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
b/target/riscv/insn_trans/trans_rvm.c.inc
index 16b029edf0..ec7f705aab 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -18,6 +18,12 @@
* this program.  If not, see .
*/

+#define REQUIRE_M_OR_ZMMUL(ctx) do {  \
+if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
+return false; \
+} \
+} while (0)
+
   static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv 
bh)
   {
   TCGv tmpl = tcg_temp_new();
@@ -65,7 +71,7 @@ static void gen_mul_i128(TCGv rl, TCGv rh,

   static bool trans_mul(DisasContext *ctx, arg_mul *a)
   {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
   return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
   }

@@ -109,7 +115,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)

   static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
   {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
   return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
   gen_mulh_i128);
   }
@@ -161,7 +167,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)

   static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
   {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
   return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
   gen_mulhsu_i128);
   }
@@ -176,7 +182,7 @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)

   static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
   {
-REQUIRE_EXT(ctx, RVM);
+REQUIRE_M_OR_ZMMUL(ctx);
   /* gen_mulh_w works for either sign as input. */
   return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
   gen_mulhu_i128);
@@ -349,7 +355,7 @@ static bool trans_remu(DisasContext *ctx, arg_remu *a)
   static bool trans_mulw(DisasContext *ctx, arg_m

[PATCH v9 07/12] target/riscv: Support mcycle/minstret write operation

2022-05-23 Thread Atish Patra

From: Atish Patra 

mcycle/minstret are actually WARL registers and can be written with any
given value. With SBI PMU extension, it will be used to store a initial
value provided from supervisor OS. The Qemu also need prohibit the counter
increment if mcountinhibit is set.

Support mcycle/minstret through generic counter infrastructure.

Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h   |  23 --
 target/riscv/csr.c   | 155 ---
 target/riscv/machine.c   |  25 ++-
 target/riscv/meson.build |   3 +-
 target/riscv/pmu.c   |  32 
 target/riscv/pmu.h   |  28 +++
 6 files changed, 213 insertions(+), 53 deletions(-)
 create mode 100644 target/riscv/pmu.c
 create mode 100644 target/riscv/pmu.h

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 32cdd9070be5..f60072e0fd3d 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -111,7 +111,7 @@ typedef struct CPUArchState CPURISCVState;
 #endif
 
 #define RV_VLEN_MAX 1024
-#define RV_MAX_MHPMEVENTS 29
+#define RV_MAX_MHPMEVENTS 32
 #define RV_MAX_MHPMCOUNTERS 32
 
 FIELD(VTYPE, VLMUL, 0, 3)
@@ -121,6 +121,18 @@ FIELD(VTYPE, VMA, 7, 1)
 FIELD(VTYPE, VEDIV, 8, 2)
 FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11)
 
+typedef struct PMUCTRState {
+/* Current value of a counter */
+target_ulong mhpmcounter_val;
+/* Current value of a counter in RV32*/
+target_ulong mhpmcounterh_val;
+/* Snapshot values of counter */
+target_ulong mhpmcounter_prev;
+/* Snapshort value of a counter in RV32 */
+target_ulong mhpmcounterh_prev;
+bool started;
+} PMUCTRState;
+
 struct CPUArchState {
 target_ulong gpr[32];
 target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
@@ -273,13 +285,10 @@ struct CPUArchState {
 
 target_ulong mcountinhibit;
 
-/* PMU counter configured values */
-target_ulong mhpmcounter_val[RV_MAX_MHPMCOUNTERS];
-
-/* for RV32 */
-target_ulong mhpmcounterh_val[RV_MAX_MHPMCOUNTERS];
+/* PMU counter state */
+PMUCTRState pmu_ctrs[RV_MAX_MHPMCOUNTERS];
 
-/* PMU event selector configured values */
+/* PMU event selector configured values. First three are unused*/
 target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
 
 target_ulong sscratch;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 605591072784..d109f329ce73 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "qemu/timer.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 #include "sysemu/cpu-timers.h"
@@ -597,34 +598,28 @@ static int write_vcsr(CPURISCVState *env, int csrno, 
target_ulong val)
 }
 
 /* User Timers and Counters */
-static RISCVException read_instret(CPURISCVState *env, int csrno,
-   target_ulong *val)
+static target_ulong get_ticks(bool shift)
 {
+int64_t val;
+target_ulong result;
+
 #if !defined(CONFIG_USER_ONLY)
 if (icount_enabled()) {
-*val = icount_get();
+val = icount_get();
 } else {
-*val = cpu_get_host_ticks();
+val = cpu_get_host_ticks();
 }
 #else
-*val = cpu_get_host_ticks();
+val = cpu_get_host_ticks();
 #endif
-return RISCV_EXCP_NONE;
-}
 
-static RISCVException read_instreth(CPURISCVState *env, int csrno,
-target_ulong *val)
-{
-#if !defined(CONFIG_USER_ONLY)
-if (icount_enabled()) {
-*val = icount_get() >> 32;
+if (shift) {
+result = val >> 32;
 } else {
-*val = cpu_get_host_ticks() >> 32;
+result = val;
 }
-#else
-*val = cpu_get_host_ticks() >> 32;
-#endif
-return RISCV_EXCP_NONE;
+
+return result;
 }
 
 #if defined(CONFIG_USER_ONLY)
@@ -642,11 +637,23 @@ static RISCVException read_timeh(CPURISCVState *env, int 
csrno,
 return RISCV_EXCP_NONE;
 }
 
+static int read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val)
+{
+*val = get_ticks(false);
+return RISCV_EXCP_NONE;
+}
+
+static int read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val)
+{
+*val = get_ticks(true);
+return RISCV_EXCP_NONE;
+}
+
 #else /* CONFIG_USER_ONLY */
 
 static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
 {
-int evt_index = csrno - CSR_MHPMEVENT3;
+int evt_index = csrno - CSR_MCOUNTINHIBIT;
 
 *val = env->mhpmevent_val[evt_index];
 
@@ -655,7 +662,7 @@ static int read_mhpmevent(CPURISCVState *env, int csrno, 
target_ulong *val)
 
 static int write_mhpmevent(CPURISCVState *env, int csrno, target_ulong val)
 {
-int evt_index = csrno - CSR_MHPMEVENT3;
+int evt_index = csrno - CSR_MCOUNTINHIBIT;
 
 env->mhpmevent_val[evt_index] = val;
 
@@ -664,55 +671,105 @@ static int write_mhpmevent(CPURISCVState *env, int 
csrno, target_ulong val)
 
 static int write_mhpmcounter(CPURISCVSt

[PATCH v9 11/12] hw/riscv: virt: Add PMU DT node to the device tree

2022-05-23 Thread Atish Patra

Qemu virt machine can support few cache events and cycle/instret counters.
It also supports counter overflow for these events.

Add a DT node so that OpenSBI/Linux kernel is aware of the virt machine
capabilities. There are some dummy nodes added for testing as well.

Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 hw/riscv/virt.c| 28 +++
 target/riscv/cpu.c |  1 +
 target/riscv/pmu.c | 57 ++
 target/riscv/pmu.h |  1 +
 4 files changed, 87 insertions(+)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 3326f4db96a2..1b17ba7f8059 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -29,6 +29,7 @@
 #include "hw/char/serial.h"
 #include "target/riscv/cpu.h"
 #include "hw/core/sysbus-fdt.h"
+#include "target/riscv/pmu.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
@@ -715,6 +716,32 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
 aplic_phandles[socket] = aplic_s_phandle;
 }
 
+static void create_fdt_socket_pmu(RISCVVirtState *s,
+  int socket, uint32_t *phandle,
+  uint32_t *intc_phandles)
+{
+int cpu;
+char *pmu_name;
+uint32_t *pmu_cells;
+MachineState *mc = MACHINE(s);
+RISCVCPU hart = s->soc[socket].harts[0];
+
+pmu_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+
+for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+pmu_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+pmu_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_PMU_OVF);
+}
+
+pmu_name = g_strdup_printf("/soc/pmu");
+qemu_fdt_add_subnode(mc->fdt, pmu_name);
+qemu_fdt_setprop_string(mc->fdt, pmu_name, "compatible", "riscv,pmu");
+riscv_pmu_generate_fdt_node(mc->fdt, hart.cfg.pmu_num, pmu_name);
+
+g_free(pmu_name);
+g_free(pmu_cells);
+}
+
 static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
bool is_32_bit, uint32_t *phandle,
uint32_t *irq_mmio_phandle,
@@ -760,6 +787,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
 &intc_phandles[phandle_pos]);
 }
 }
+create_fdt_socket_pmu(s, socket, phandle, intc_phandles);
 }
 
 if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a8f156a66eba..b51ad7496f71 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1032,6 +1032,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
 ISA_EDATA_ENTRY(zkt, ext_zkt),
 ISA_EDATA_ENTRY(zve32f, ext_zve32f),
 ISA_EDATA_ENTRY(zve64f, ext_zve64f),
+ISA_EDATA_ENTRY(sscofpmf, ext_sscofpmf),
 ISA_EDATA_ENTRY(svinval, ext_svinval),
 ISA_EDATA_ENTRY(svnapot, ext_svnapot),
 ISA_EDATA_ENTRY(svpbmt, ext_svpbmt),
diff --git a/target/riscv/pmu.c b/target/riscv/pmu.c
index 7bb85d8d6ad7..0163758297c4 100644
--- a/target/riscv/pmu.c
+++ b/target/riscv/pmu.c
@@ -20,11 +20,68 @@
 #include "cpu.h"
 #include "pmu.h"
 #include "sysemu/cpu-timers.h"
+#include "sysemu/device_tree.h"
 
 #define RISCV_TIMEBASE_FREQ 10 /* 1Ghz */
 #define MAKE_32BIT_MASK(shift, length) \
 (((uint32_t)(~0UL) >> (32 - (length))) << (shift))
 
+/**
+ * To keep it simple, any event can be mapped to any programmable counters in
+ * QEMU. The generic cycle & instruction count events can also be monitored
+ * using programmable counters. In that case, mcycle & minstret must continue
+ * to provide the correct value as well. Heterogeneous PMU per hart is not
+ * supported yet. Thus, number of counters are same across all harts.
+ */
+void riscv_pmu_generate_fdt_node(void *fdt, int num_ctrs, char *pmu_name)
+{
+uint32_t fdt_event_ctr_map[20] = {};
+uint32_t cmask;
+
+/* All the programmable counters can map to any event */
+cmask = MAKE_32BIT_MASK(3, num_ctrs);
+
+   /**
+* The event encoding is specified in the SBI specification
+* Event idx is a 20bits wide number encoded as follows:
+* event_idx[19:16] = type
+* event_idx[15:0] = code
+* The code field in cache events are encoded as follows:
+* event_idx.code[15:3] = cache_id
+* event_idx.code[2:1] = op_id
+* event_idx.code[0:0] = result_id
+*/
+
+   /* SBI_PMU_HW_CPU_CYCLES: 0x01 : type(0x00) */
+   fdt_event_ctr_map[0] = cpu_to_be32(0x0001);
+   fdt_event_ctr_map[1] = cpu_to_be32(0x0001);
+   fdt_event_ctr_map[2] = cpu_to_be32(cmask | 1 << 0);
+
+   /* SBI_PMU_HW_INSTRUCTIONS: 0x02 : type(0x00) */
+   fdt_event_ctr_map[3] = cpu_to_be32(0x0002);
+   fdt_event_ctr_map[4] = cpu_to_be32(0x0002);
+   fdt_event_ctr_map[5] = cpu_to_be32(cmask | 1 << 2);
+
+   /* SBI_PMU_HW_CACHE_DTLB : 0x03 READ : 0x00 MISS : 0x00 type(0x01) */
+   fdt_event_ctr_map[6] = cpu_to_be32(0x00010019);
+   fdt_e

[PATCH v9 06/12] target/riscv: Add support for hpmcounters/hpmevents

2022-05-23 Thread Atish Patra

From: Atish Patra 

With SBI PMU extension, user can use any of the available hpmcounters to
track any perf events based on the value written to mhpmevent csr.
Add read/write functionality for these csrs.

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h |  11 +
 target/riscv/csr.c | 469 -
 target/riscv/machine.c |   3 +
 3 files changed, 331 insertions(+), 152 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 45ac0f2d2614..32cdd9070be5 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -111,6 +111,8 @@ typedef struct CPUArchState CPURISCVState;
 #endif
 
 #define RV_VLEN_MAX 1024
+#define RV_MAX_MHPMEVENTS 29
+#define RV_MAX_MHPMCOUNTERS 32
 
 FIELD(VTYPE, VLMUL, 0, 3)
 FIELD(VTYPE, VSEW, 3, 3)
@@ -271,6 +273,15 @@ struct CPUArchState {
 
 target_ulong mcountinhibit;
 
+/* PMU counter configured values */
+target_ulong mhpmcounter_val[RV_MAX_MHPMCOUNTERS];
+
+/* for RV32 */
+target_ulong mhpmcounterh_val[RV_MAX_MHPMCOUNTERS];
+
+/* PMU event selector configured values */
+target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
+
 target_ulong sscratch;
 target_ulong mscratch;
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ea1cde68610c..605591072784 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -230,6 +230,15 @@ static RISCVException mctr(CPURISCVState *env, int csrno)
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException mctr32(CPURISCVState *env, int csrno)
+{
+if (riscv_cpu_mxl(env) != MXL_RV32) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+return mctr(env, csrno);
+}
+
 static RISCVException any(CPURISCVState *env, int csrno)
 {
 return RISCV_EXCP_NONE;
@@ -635,6 +644,75 @@ static RISCVException read_timeh(CPURISCVState *env, int 
csrno,
 
 #else /* CONFIG_USER_ONLY */
 
+static int read_mhpmevent(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int evt_index = csrno - CSR_MHPMEVENT3;
+
+*val = env->mhpmevent_val[evt_index];
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmevent(CPURISCVState *env, int csrno, target_ulong val)
+{
+int evt_index = csrno - CSR_MHPMEVENT3;
+
+env->mhpmevent_val[evt_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmcounter(CPURISCVState *env, int csrno, target_ulong val)
+{
+int ctr_index = csrno - CSR_MHPMCOUNTER3 + 3;
+
+env->mhpmcounter_val[ctr_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int write_mhpmcounterh(CPURISCVState *env, int csrno, target_ulong val)
+{
+int ctr_index = csrno - CSR_MHPMCOUNTER3H + 3;
+
+env->mhpmcounterh_val[ctr_index] = val;
+
+return RISCV_EXCP_NONE;
+}
+
+static int read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int ctr_index;
+
+if (csrno >= CSR_MCYCLE && csrno <= CSR_MHPMCOUNTER31) {
+ctr_index = csrno - CSR_MHPMCOUNTER3 + 3;
+} else if (csrno >= CSR_CYCLE && csrno <= CSR_HPMCOUNTER31) {
+ctr_index = csrno - CSR_HPMCOUNTER3 + 3;
+} else {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+*val = env->mhpmcounter_val[ctr_index];
+
+return RISCV_EXCP_NONE;
+}
+
+static int read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val)
+{
+int ctr_index;
+
+if (csrno >= CSR_MCYCLEH && csrno <= CSR_MHPMCOUNTER31H) {
+ctr_index = csrno - CSR_MHPMCOUNTER3H + 3;
+} else if (csrno >= CSR_CYCLEH && csrno <= CSR_HPMCOUNTER31H) {
+ctr_index = csrno - CSR_HPMCOUNTER3H + 3;
+} else {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+*val = env->mhpmcounterh_val[ctr_index];
+
+return RISCV_EXCP_NONE;
+}
+
+
 static RISCVException read_time(CPURISCVState *env, int csrno,
 target_ulong *val)
 {
@@ -3703,157 +3781,244 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_SPMBASE] ={ "spmbase", pointer_masking, read_spmbase, 
write_spmbase },
 
 /* Performance Counters */
-[CSR_HPMCOUNTER3]= { "hpmcounter3",ctr,read_zero },
-[CSR_HPMCOUNTER4]= { "hpmcounter4",ctr,read_zero },
-[CSR_HPMCOUNTER5]= { "hpmcounter5",ctr,read_zero },
-[CSR_HPMCOUNTER6]= { "hpmcounter6",ctr,read_zero },
-[CSR_HPMCOUNTER7]= { "hpmcounter7",ctr,read_zero },
-[CSR_HPMCOUNTER8]= { "hpmcounter8",ctr,read_zero },
-[CSR_HPMCOUNTER9]= { "hpmcounter9",ctr,read_zero },
-[CSR_HPMCOUNTER10]   = { "hpmcounter10",   ctr,read_zero },
-[CSR_HPMCOUNTER11]   = { "hpmcounter11",   ctr,read_zero },
-[CSR_HPMCOUNTER12]   = { "hpmcounter12",   ctr,read_zero },
-[CSR_HPMCOUNTER13]   = { "hpmcounter13",   ctr,read_zero },
-[CSR_HPMCOUNTER14]   = { "hpmcounter14",   ctr,read_zero },
-[CSR_HPMCOUNTER15]   = { "hpmcounter15",   ctr,read_zero },
-[CSR_HPMCOUNTER16]   = { "hpmco

[PATCH v9 10/12] target/riscv: Add few cache related PMU events

2022-05-23 Thread Atish Patra

From: Atish Patra 

Qemu can monitor the following cache related PMU events through
tlb_fill functions.

1. DTLB load/store miss
3. ITLB prefetch miss

Increment the PMU counter in tlb_fill function.

Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu_helper.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index e1aa4f2097c1..004cef0febad 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -21,10 +21,12 @@
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
+#include "cpu_bits.h"
 
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 {
@@ -1178,6 +1180,28 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
addr,
 cpu_loop_exit_restore(cs, retaddr);
 }
 
+
+static void pmu_tlb_fill_incr_ctr(RISCVCPU *cpu, MMUAccessType access_type)
+{
+enum riscv_pmu_event_idx pmu_event_type;
+
+switch (access_type) {
+case MMU_INST_FETCH:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS;
+break;
+case MMU_DATA_LOAD:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_DTLB_READ_MISS;
+break;
+case MMU_DATA_STORE:
+pmu_event_type = RISCV_PMU_EVENT_CACHE_DTLB_WRITE_MISS;
+break;
+default:
+return;
+}
+
+riscv_pmu_incr_ctr(cpu, pmu_event_type);
+}
+
 bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 MMUAccessType access_type, int mmu_idx,
 bool probe, uintptr_t retaddr)
@@ -1274,6 +1298,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 }
 }
 } else {
+pmu_tlb_fill_incr_ctr(cpu, access_type);
 /* Single stage lookup */
 ret = get_physical_address(env, &pa, &prot, address, NULL,
access_type, mmu_idx, true, false, false);
-- 
2.25.1

[PATCH v9 12/12] target/riscv: Update the privilege field for sscofpmf CSRs

2022-05-23 Thread Atish Patra

The sscofpmf extension was ratified as a part of priv spec v1.12.
Mark the csr_ops accordingly.

Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 90 ++
 1 file changed, 60 insertions(+), 30 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index e229f53c674d..c6105edd7a1a 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -4012,63 +4012,92 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
write_mhpmevent },
 
 [CSR_MHPMEVENT3H]= { "mhpmevent3h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT4H]= { "mhpmevent4h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT5H]= { "mhpmevent5h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT6H]= { "mhpmevent6h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT7H]= { "mhpmevent7h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT8H]= { "mhpmevent8h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT9H]= { "mhpmevent9h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+  write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT10H]   = { "mhpmevent10h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT11H]   = { "mhpmevent11h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT12H]   = { "mhpmevent12h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT13H]   = { "mhpmevent13h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT14H]   = { "mhpmevent14h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT15H]   = { "mhpmevent15h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT16H]   = { "mhpmevent16h",sscofpmf,  read_mhpmeventh,
-   write_mhpmeventh},
+   write_mhpmeventh,
+ .min_priv_ver = PRIV_VERSION_1_12_0 },
 [CSR_MHPMEVENT17H]   = { "mhpmevent17h",sscofpmf,  read_mhpmeventh,
-

[PATCH v9 05/12] target/riscv: Implement mcountinhibit CSR

2022-05-23 Thread Atish Patra

From: Atish Patra 

As per the privilege specification v1.11, mcountinhibit allows to start/stop
a pmu counter selectively.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.h  |  2 ++
 target/riscv/cpu_bits.h |  4 
 target/riscv/csr.c  | 25 +
 target/riscv/machine.c  |  1 +
 4 files changed, 32 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 7cbcd8d62fc1..45ac0f2d2614 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -269,6 +269,8 @@ struct CPUArchState {
 target_ulong scounteren;
 target_ulong mcounteren;
 
+target_ulong mcountinhibit;
+
 target_ulong sscratch;
 target_ulong mscratch;
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 4d04b20d064e..b3f7fa713000 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -367,6 +367,10 @@
 #define CSR_MHPMCOUNTER29   0xb1d
 #define CSR_MHPMCOUNTER30   0xb1e
 #define CSR_MHPMCOUNTER31   0xb1f
+
+/* Machine counter-inhibit register */
+#define CSR_MCOUNTINHIBIT   0x320
+
 #define CSR_MHPMEVENT3  0x323
 #define CSR_MHPMEVENT4  0x324
 #define CSR_MHPMEVENT5  0x325
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 7e14f7685fb9..ea1cde68610c 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1475,6 +1475,28 @@ static RISCVException write_mtvec(CPURISCVState *env, 
int csrno,
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException read_mcountinhibit(CPURISCVState *env, int csrno,
+ target_ulong *val)
+{
+if (env->priv_ver < PRIV_VERSION_1_11_0) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+*val = env->mcountinhibit;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno,
+  target_ulong val)
+{
+if (env->priv_ver < PRIV_VERSION_1_11_0) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+env->mcountinhibit = val;
+return RISCV_EXCP_NONE;
+}
+
 static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
   target_ulong *val)
 {
@@ -3741,6 +3763,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_MHPMCOUNTER30]  = { "mhpmcounter30",  mctr,   read_zero },
 [CSR_MHPMCOUNTER31]  = { "mhpmcounter31",  mctr,   read_zero },
 
+[CSR_MCOUNTINHIBIT]  = { "mcountinhibit",   any,read_mcountinhibit,
+   write_mcountinhibit },
+
 [CSR_MHPMEVENT3] = { "mhpmevent3", any,read_zero },
 [CSR_MHPMEVENT4] = { "mhpmevent4", any,read_zero },
 [CSR_MHPMEVENT5] = { "mhpmevent5", any,read_zero },
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 2a437b29a1ce..87cd55bfd3a7 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -330,6 +330,7 @@ const VMStateDescription vmstate_riscv_cpu = {
 VMSTATE_UINTTL(env.siselect, RISCVCPU),
 VMSTATE_UINTTL(env.scounteren, RISCVCPU),
 VMSTATE_UINTTL(env.mcounteren, RISCVCPU),
+VMSTATE_UINTTL(env.mcountinhibit, RISCVCPU),
 VMSTATE_UINTTL(env.sscratch, RISCVCPU),
 VMSTATE_UINTTL(env.mscratch, RISCVCPU),
 VMSTATE_UINT64(env.mfromhost, RISCVCPU),
-- 
2.25.1

[PATCH v9 09/12] target/riscv: Simplify counter predicate function

2022-05-23 Thread Atish Patra

All the hpmcounters and the fixed counters (CY, IR, TM) can be represented
as a unified counter. Thus, the predicate function doesn't need handle each
case separately.

Simplify the predicate function so that we just handle things differently
between RV32/RV64 and S/HS mode.

Reviewed-by: Bin Meng 
Acked-by: Alistair Francis 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 111 -
 1 file changed, 10 insertions(+), 101 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 723b52d836d3..e229f53c674d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -74,6 +74,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
+target_ulong ctr_mask;
 int base_csrno = CSR_CYCLE;
 bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
 
@@ -82,122 +83,30 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 base_csrno += 0x80;
 }
 ctr_index = csrno - base_csrno;
+ctr_mask = BIT(ctr_index);
 
 if ((csrno >= CSR_CYCLE && csrno <= CSR_INSTRET) ||
 (csrno >= CSR_CYCLEH && csrno <= CSR_INSTRETH)) {
 goto skip_ext_pmu_check;
 }
 
-if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs & BIT(ctr_index {
+if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs & ctr_mask))) {
 /* No counter is enabled in PMU or the counter is out of range */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
 skip_ext_pmu_check:
 
-if (env->priv == PRV_S) {
-switch (csrno) {
-case CSR_CYCLE:
-if (!get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_TIME:
-if (!get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_INSTRET:
-if (!get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-}
-if (rv32) {
-switch (csrno) {
-case CSR_CYCLEH:
-if (!get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_TIMEH:
-if (!get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_INSTRETH:
-if (!get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
-if (!get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_ILLEGAL_INST;
-}
-break;
-}
-}
+if ((env->priv == PRV_S) && (!get_field(env->mcounteren, ctr_mask))) {
+return RISCV_EXCP_ILLEGAL_INST;
 }
 
 if (riscv_cpu_virt_enabled(env)) {
-switch (csrno) {
-case CSR_CYCLE:
-if (!get_field(env->hcounteren, COUNTEREN_CY) &&
-get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_TIME:
-if (!get_field(env->hcounteren, COUNTEREN_TM) &&
-get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_INSTRET:
-if (!get_field(env->hcounteren, COUNTEREN_IR) &&
-get_field(env->mcounteren, COUNTEREN_IR)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->hcounteren, 1 << ctr_index) &&
- get_field(env->mcounteren, 1 << ctr_index)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-}
-if (rv32) {
-switch (csrno) {
-case CSR_CYCLEH:
-if (!get_field(env->hcounteren, COUNTEREN_CY) &&
-get_field(env->mcounteren, COUNTEREN_CY)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-case CSR_TIMEH:
-if (!get_field(env->hcounteren, COUNTEREN_TM) &&
-get_field(env->mcounteren, COUNTEREN_TM)) {
-return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
-}
-break;
-

[PATCH v9 08/12] target/riscv: Add sscofpmf extension support

2022-05-23 Thread Atish Patra

The Sscofpmf ('Ss' for Privileged arch and Supervisor-level extensions,
and 'cofpmf' for Count OverFlow and Privilege Mode Filtering)
extension allows the perf to handle overflow interrupts and filtering
support. This patch provides a framework for programmable
counters to leverage the extension. As the extension doesn't have any
provision for the overflow bit for fixed counters, the fixed events
can also be monitoring using programmable counters. The underlying
counters for cycle and instruction counters are always running. Thus,
a separate timer device is programmed to handle the overflow.

Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c  |  11 ++
 target/riscv/cpu.h  |  32 
 target/riscv/cpu_bits.h |  55 +++
 target/riscv/csr.c  | 180 +++--
 target/riscv/machine.c  |   4 +
 target/riscv/pmu.c  | 347 +++-
 target/riscv/pmu.h  |   7 +
 7 files changed, 625 insertions(+), 11 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 2dc4b500797d..a8f156a66eba 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -22,6 +22,7 @@
 #include "qemu/ctype.h"
 #include "qemu/log.h"
 #include "cpu.h"
+#include "pmu.h"
 #include "internals.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
@@ -724,6 +725,15 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 set_misa(env, env->misa_mxl, ext);
 }
 
+#ifndef CONFIG_USER_ONLY
+if (cpu->cfg.pmu_num) {
+if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) {
+cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+  riscv_pmu_timer_cb, cpu);
+}
+ }
+#endif
+
 riscv_cpu_register_gdb_regs_for_features(cs);
 
 qemu_init_vcpu(cs);
@@ -823,6 +833,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
+DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f60072e0fd3d..c997384a74c1 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -114,6 +114,10 @@ typedef struct CPUArchState CPURISCVState;
 #define RV_MAX_MHPMEVENTS 32
 #define RV_MAX_MHPMCOUNTERS 32
 
+#define RV32_CSR_LHALF_WRITE 0x01
+#define RV32_CSR_UHALF_WRITE 0x02
+#define RV32_CSR_WRITE_DONE 0x03
+
 FIELD(VTYPE, VLMUL, 0, 3)
 FIELD(VTYPE, VSEW, 3, 3)
 FIELD(VTYPE, VTA, 6, 1)
@@ -130,7 +134,11 @@ typedef struct PMUCTRState {
 target_ulong mhpmcounter_prev;
 /* Snapshort value of a counter in RV32 */
 target_ulong mhpmcounterh_prev;
+/* To track if both lower & upper half of the counter is written */
+uint8_t write_done;
 bool started;
+/* Value beyond UINT32_MAX/UINT64_MAX before overflow interrupt trigger */
+target_ulong irq_overflow_left;
 } PMUCTRState;
 
 struct CPUArchState {
@@ -291,6 +299,10 @@ struct CPUArchState {
 /* PMU event selector configured values. First three are unused*/
 target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
 
+/* PMU event selector configured values for RV32*/
+target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
+uint8_t mhpmevent_write_done[RV_MAX_MHPMEVENTS];
+
 target_ulong sscratch;
 target_ulong mscratch;
 
@@ -426,6 +438,7 @@ struct RISCVCPUConfig {
 bool ext_zhinxmin;
 bool ext_zve32f;
 bool ext_zve64f;
+bool ext_sscofpmf;
 
 uint32_t mvendorid;
 uint64_t marchid;
@@ -469,6 +482,12 @@ struct ArchCPU {
 
 /* Configuration Settings */
 RISCVCPUConfig cfg;
+
+QEMUTimer *pmu_timer;
+/* A bitmask of Available programmable counters */
+uint32_t pmu_avail_ctrs;
+/* Mapping of events to counters */
+GHashTable *pmu_event_ctr_map;
 };
 
 static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext)
@@ -726,6 +745,19 @@ enum {
 CSR_TABLE_SIZE = 0x1000
 };
 
+/**
+ * The event id are encoded based on the encoding specified in the
+ * SBI specification v0.3
+ */
+
+enum riscv_pmu_event_idx {
+RISCV_PMU_EVENT_HW_CPU_CYCLES = 0x01,
+RISCV_PMU_EVENT_HW_INSTRUCTIONS = 0x02,
+RISCV_PMU_EVENT_CACHE_DTLB_READ_MISS = 0x10019,
+RISCV_PMU_EVENT_CACHE_DTLB_WRITE_MISS = 0x1001B,
+RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021,
+};
+
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index b3f7fa713000..d94abefdaa0f 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -400,6 +400,37 @@
 #define CSR_MHPMEVENT29 0x33d
 #define CSR_MHPMEVENT30 0x33e
 #define CSR_MHPME

[PATCH v9 03/12] target/riscv: pmu: Rename the counters extension to pmu

2022-05-23 Thread Atish Patra

From: Atish Patra 

The PMU counters are supported via cpu config "Counters" which doesn't
indicate the correct purpose of those counters.

Rename the config property to pmu to indicate that these counters
are performance monitoring counters. This aligns with cpu options for
ARM architecture as well.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c | 2 +-
 target/riscv/cpu.h | 2 +-
 target/riscv/csr.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ccacdee21575..5ad17b40189f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -822,7 +822,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
-DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
+DEFINE_PROP_BOOL("pmu", RISCVCPU, cfg.ext_pmu, true),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index fe6c9a2c9238..09a0c71093c5 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -391,7 +391,7 @@ struct RISCVCPUConfig {
 bool ext_zksed;
 bool ext_zksh;
 bool ext_zkt;
-bool ext_counters;
+bool ext_pmu;
 bool ext_ifencei;
 bool ext_icsr;
 bool ext_svinval;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index d175fe3f1af3..c625b17dd58e 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -74,8 +74,8 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
 
-if (!cpu->cfg.ext_counters) {
-/* The Counters extensions is not enabled */
+if (!cpu->cfg.ext_pmu) {
+/* The PMU extension is not enabled */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
-- 
2.25.1

[PATCH v9 04/12] target/riscv: pmu: Make number of counters configurable

2022-05-23 Thread Atish Patra

The RISC-V privilege specification provides flexibility to implement
any number of counters from 29 programmable counters. However, the QEMU
implements all the counters.

Make it configurable through pmu config parameter which now will indicate
how many programmable counters should be implemented by the cpu.

Reviewed-by: Bin Meng 
Reviewed-by: Alistair Francis 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/cpu.c |  2 +-
 target/riscv/cpu.h |  2 +-
 target/riscv/csr.c | 94 ++
 3 files changed, 63 insertions(+), 35 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 5ad17b40189f..2dc4b500797d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -822,7 +822,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
 DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
 DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
-DEFINE_PROP_BOOL("pmu", RISCVCPU, cfg.ext_pmu, true),
+DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
 DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 09a0c71093c5..7cbcd8d62fc1 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -391,7 +391,6 @@ struct RISCVCPUConfig {
 bool ext_zksed;
 bool ext_zksh;
 bool ext_zkt;
-bool ext_pmu;
 bool ext_ifencei;
 bool ext_icsr;
 bool ext_svinval;
@@ -413,6 +412,7 @@ struct RISCVCPUConfig {
 /* Vendor-specific custom extensions */
 bool ext_XVentanaCondOps;
 
+uint8_t pmu_num;
 char *priv_spec;
 char *user_spec;
 char *bext_spec;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index c625b17dd58e..7e14f7685fb9 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -73,9 +73,17 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
 int ctr_index;
+int base_csrno = CSR_HPMCOUNTER3;
+bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
 
-if (!cpu->cfg.ext_pmu) {
-/* The PMU extension is not enabled */
+if (rv32 && csrno >= CSR_CYCLEH) {
+/* Offset for RV32 hpmcounternh counters */
+base_csrno += 0x80;
+}
+ctr_index = csrno - base_csrno;
+
+if (!cpu->cfg.pmu_num || ctr_index >= (cpu->cfg.pmu_num)) {
+/* No counter is enabled in PMU or the counter is out of range */
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
@@ -103,7 +111,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 }
-if (riscv_cpu_mxl(env) == MXL_RV32) {
+if (rv32) {
 switch (csrno) {
 case CSR_CYCLEH:
 if (!get_field(env->mcounteren, COUNTEREN_CY)) {
@@ -158,7 +166,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 }
-if (riscv_cpu_mxl(env) == MXL_RV32) {
+if (rv32) {
 switch (csrno) {
 case CSR_CYCLEH:
 if (!get_field(env->hcounteren, COUNTEREN_CY) &&
@@ -202,6 +210,26 @@ static RISCVException ctr32(CPURISCVState *env, int csrno)
 }
 
 #if !defined(CONFIG_USER_ONLY)
+static RISCVException mctr(CPURISCVState *env, int csrno)
+{
+CPUState *cs = env_cpu(env);
+RISCVCPU *cpu = RISCV_CPU(cs);
+int ctr_index;
+int base_csrno = CSR_MHPMCOUNTER3;
+
+if ((riscv_cpu_mxl(env) == MXL_RV32) && csrno >= CSR_MCYCLEH) {
+/* Offset for RV32 mhpmcounternh counters */
+base_csrno += 0x80;
+}
+ctr_index = csrno - base_csrno;
+if (!cpu->cfg.pmu_num || ctr_index >= cpu->cfg.pmu_num) {
+/* The PMU is not enabled or counter is out of range*/
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+return RISCV_EXCP_NONE;
+}
+
 static RISCVException any(CPURISCVState *env, int csrno)
 {
 return RISCV_EXCP_NONE;
@@ -3683,35 +3711,35 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_HPMCOUNTER30]   = { "hpmcounter30",   ctr,read_zero },
 [CSR_HPMCOUNTER31]   = { "hpmcounter31",   ctr,read_zero },
 
-[CSR_MHPMCOUNTER3]   = { "mhpmcounter3",   any,read_zero },
-[CSR_MHPMCOUNTER4]   = { "mhpmcounter4",   any,read_zero },
-[CSR_MHPMCOUNTER5]   = { "mhpmcounter5",   any,read_zero },
-[CSR_MHPMCOUNTER6]   = { "mhpmcounter6",   any,read_zero },
-[CSR_MHPMCOUNTER7]   = { "mhpmcounter7",   any,read_zero },
-[CSR_MHPMCOUNTER8]   = { "mhpmcounter8",   any,read_zero },
-[CSR_MHPMCOUNTER9]   = { "mhpmcounter9",   any,read_zero },
-[CSR_MHPMCOUNTER10]  = { "mhpmcounter10",  any,read_zero },
-[CSR_MHPMCOUNTER11]  = { "mhpmcounter11",  any,read_zero },
-[CSR_MHPMCOUNTER12]  = { "mhpmcounter

[PATCH v9 01/12] target/riscv: Fix PMU CSR predicate function

2022-05-23 Thread Atish Patra

From: Atish Patra 

The predicate function calculates the counter index incorrectly for
hpmcounterx. Fix the counter index to reflect correct CSR number.

Fixes: e39a8320b088 ("target/riscv: Support the Virtual Instruction fault")

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 3500e07f92e1..ee3a35afa256 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -72,6 +72,7 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 #if !defined(CONFIG_USER_ONLY)
 CPUState *cs = env_cpu(env);
 RISCVCPU *cpu = RISCV_CPU(cs);
+int ctr_index;
 
 if (!cpu->cfg.ext_counters) {
 /* The Counters extensions is not enabled */
@@ -99,8 +100,9 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
-if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3)) &&
-get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3))) {
+ctr_index = csrno - CSR_CYCLE;
+if (!get_field(env->hcounteren, 1 << ctr_index) &&
+ get_field(env->mcounteren, 1 << ctr_index)) {
 return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
 }
 break;
@@ -126,8 +128,9 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 }
 break;
 case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
-if (!get_field(env->hcounteren, 1 << (csrno - 
CSR_HPMCOUNTER3H)) &&
-get_field(env->mcounteren, 1 << (csrno - 
CSR_HPMCOUNTER3H))) {
+ctr_index = csrno - CSR_CYCLEH;
+if (!get_field(env->hcounteren, 1 << ctr_index) &&
+ get_field(env->mcounteren, 1 << ctr_index)) {
 return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
 }
 break;
-- 
2.25.1

[PATCH v9 00/12] Improve PMU support

2022-05-23 Thread Atish Patra

The latest version of the SBI specification includes a Performance Monitoring
Unit(PMU) extension[1] which allows the supervisor to start/stop/configure
various PMU events. The Sscofpmf ('Ss' for Privileged arch and Supervisor-level
extensions, and 'cofpmf' for Count OverFlow and Privilege Mode Filtering)
extension[2] allows the perf like tool to handle overflow interrupts and
filtering support.

This series implements full PMU infrastructure to support
PMU in virt machine. This will allow us to add any PMU events in future.

Currently, this series enables the following omu events.
1. cycle count
2. instruction count
3. DTLB load/store miss
4. ITLB prefetch miss

The first two are computed using host ticks while last three are counted during
cpu_tlb_fill. We can do both sampling and count from guest userspace.
This series has been tested on both RV64 and RV32. Both Linux[3] and Opensbi[4]
patches are required to get the perf working.

Here is an output of perf stat/report while running hackbench with latest
OpenSBI & Linux kernel.

Perf stat:
==
[root@fedora-riscv ~]# perf stat -e cycles -e instructions -e dTLB-load-misses 
-e dTLB-store-misses -e iTLB-load-misses \
> perf bench sched messaging -g 1 -l 10
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 1 groups == 40 processes run

 Total time: 0.265 [sec]

 Performance counter stats for 'perf bench sched messaging -g 1 -l 10':

 4,167,825,362  cycles  

 4,166,609,256  instructions  #1.00  insn per cycle 

 3,092,026  dTLB-load-misses

   258,280  dTLB-store-misses   

 2,068,966  iTLB-load-misses


   0.585791767 seconds time elapsed

   0.373802000 seconds user
   1.042359000 seconds sys

Perf record:

[root@fedora-riscv ~]# perf record -e cycles -e instructions \
> -e dTLB-load-misses -e dTLB-store-misses -e iTLB-load-misses -c 1 \
> perf bench sched messaging -g 1 -l 10
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 1 groups == 40 processes run

 Total time: 1.397 [sec]
[ perf record: Woken up 10 times to write data ]
Check IO/CPU overload!
[ perf record: Captured and wrote 8.211 MB perf.data (214486 samples) ]

[root@fedora-riscv riscv]# perf report
Available samples   
107K cycles◆
107K instructions  ▒
250 dTLB-load-misses   ▒
13 dTLB-store-misses   ▒
172 iTLB-load-misses  
..

Changes from v8->v9:
1. Added the write_done flags to the vmstate.
2. Fixed the hpmcounter read access from M-mode.

Changes from v7->v8:
1. Removeding ordering constraints for mhpmcounter & mhpmevent.

Changes from v6->v7:
1. Fixed all the compilation errors for the usermode.

Changes from v5->v6:
1. Fixed compilation issue with PATCH 1.
2. Addressed other comments.

Changes from v4->v5:
1. Rebased on top of the -next with following patches.
   - isa extension
   - priv 1.12 spec
2. Addressed all the comments on v4
3. Removed additional isa-ext DT node in favor of riscv,isa string update

Changes from v3->v4:
1. Removed the dummy events from pmu DT node.
2. Fixed pmu_avail_counters mask generation.
3. Added a patch to simplify the predicate function for counters. 

Changes from v2->v3:
1. Addressed all the comments on PATCH1-4.
2. Split patch1 into two separate patches.
3. Added explicit comments to explain the event types in DT node.
4. Rebased on latest Qemu.

Changes from v1->v2:
1. Dropped the ACks from v1 as signficant changes happened after v1.
2. sscofpmf support.
3. A generic counter management framework.

[1] https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/riscv-sbi.adoc
[2] https://drive.google.com/file/d/171j4jFjIkKdj5LWcExphq4xG_2sihbfd/edit
[3] https://github.com/atishp04/qemu/tree/riscv_pmu_v9

Atish Patra (12):
target/riscv: Fix PMU CSR predicate function
target/riscv: Implement PMU CSR predicate function for S-mode
target/riscv: pmu: Rename the counters extension to pmu
target/riscv: pmu: Make number of counters configurable
target/riscv: Implement mcountinhibit CSR
target/riscv: Add support for hpmcounters/hpmevents
target/riscv: Support mcycle/minstret write operation
target/riscv: Add sscofpmf extension support
target/riscv: Simplify counter predicate function
target/riscv: Add few cache related PMU events
hw/riscv: virt: Add PMU DT node to the device tree
target/riscv: Update the privilege field for sscofpmf CSRs

hw/riscv/virt.c   |  28 ++
target/riscv/cpu.c|

[PATCH v9 02/12] target/riscv: Implement PMU CSR predicate function for S-mode

2022-05-23 Thread Atish Patra

From: Atish Patra 

Currently, the predicate function for PMU related CSRs only works if
virtualization is enabled. It also does not check mcounteren bits before
before cycle/minstret/hpmcounterx access.

Support supervisor mode access in the predicate function as well.

Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
Signed-off-by: Atish Patra 
Signed-off-by: Atish Patra 
---
 target/riscv/csr.c | 51 ++
 1 file changed, 51 insertions(+)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ee3a35afa256..d175fe3f1af3 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -79,6 +79,57 @@ static RISCVException ctr(CPURISCVState *env, int csrno)
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
+if (env->priv == PRV_S) {
+switch (csrno) {
+case CSR_CYCLE:
+if (!get_field(env->mcounteren, COUNTEREN_CY)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_TIME:
+if (!get_field(env->mcounteren, COUNTEREN_TM)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_INSTRET:
+if (!get_field(env->mcounteren, COUNTEREN_IR)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
+ctr_index = csrno - CSR_CYCLE;
+if (!get_field(env->mcounteren, 1 << ctr_index)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+}
+if (riscv_cpu_mxl(env) == MXL_RV32) {
+switch (csrno) {
+case CSR_CYCLEH:
+if (!get_field(env->mcounteren, COUNTEREN_CY)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_TIMEH:
+if (!get_field(env->mcounteren, COUNTEREN_TM)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_INSTRETH:
+if (!get_field(env->mcounteren, COUNTEREN_IR)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
+ctr_index = csrno - CSR_CYCLEH;
+if (!get_field(env->mcounteren, 1 << ctr_index)) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+break;
+}
+}
+}
+
 if (riscv_cpu_virt_enabled(env)) {
 switch (csrno) {
 case CSR_CYCLE:
-- 
2.25.1

Re: [PATCH] target/riscv: add zicsr/zifencei to isa_string

2022-05-23 Thread Alistair Francis

On Wed, May 18, 2022 at 10:50 PM Hongren (Zenithal) Zheng  
wrote:
>
> Zicsr/Zifencei is not in 'I' since ISA version 20190608,
> thus to fully express the capability of the CPU,
> they should be exposed in isa_string.
>
> Signed-off-by: Hongren (Zenithal) Zheng 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 6d01569cad..61fa9b97a4 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1027,6 +1027,8 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>   *extensions by an underscore.
>   */
>  struct isa_ext_data isa_edata_arr[] = {
> +ISA_EDATA_ENTRY(zicsr, ext_icsr),
> +ISA_EDATA_ENTRY(zifencei, ext_ifencei),
>  ISA_EDATA_ENTRY(zfh, ext_zfh),
>  ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
>  ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> --
> 2.35.1
>
>

Re: [PATCH v2 06/12] target/ppc: implement xscvspdpn with helper_todouble

2022-05-23 Thread Daniel Henrique Barboza





On 5/23/22 12:54, Richard Henderson wrote:

On 5/23/22 06:48, Daniel Henrique Barboza wrote:

Checking 0006-target-ppc-declare-xscvspdpn-helper-with-call-flags.patch...
ERROR: spaces required around that '*' (ctx:WxV)
#69: FILE: target/ppc/translate/vsx-impl.c.inc:1049:
+bool trans_XSCVSPDPN(DisasContext *ctx, arg_XX2 *a)
  ^

My guess is that since the var 'arg_XX2' ends with a numeral the script
thinks that the following '*' is an arithmetic operation. Problem is that
we have other examples of this kind of declaration in the same file, e.g.:


static bool trans_XVCVBF16SPN(DisasContext *ctx, arg_XX2 *a)



Is there a way to convince checkpatch.pl that this is an okay format?


Not that I know of.  I just ignore these parsing errors.




Works for me. We should be aware that gitlab will complain about it when pushing
this to master though. E.g. https://gitlab.com/danielhb/qemu/-/jobs/2496047821 .

Thanks,


Daniel




r~

Re: [RFC PATCH] mpqemu: Remove unlock/lock of iothread in mpqemu-link send and recv functions

2022-05-23 Thread Jag Raman



> On May 23, 2022, at 11:09 AM, Alexander Duyck  
> wrote:
> 
> From: Alexander Duyck 
> 
> When I run Multi-process QEMU with an e1000 as the remote device and SMP
> enabled I see the combination lock up and become unresponsive. The QEMU build
> is a fairly standard x86_64-softmmu setup. After doing some digging I tracked
> the lockup down to the what appears to be a race with the mpqemu-link msg_send
> and msg_receive functions and the reacquisition of the lock.
> 
> I am assuming the issue is some sort of lock inversion though I haven't
> identified exactly what the other lock involved is yet. For now removing
> the logic to unlock the iothread and then reacquire the lock seems to
> resolve the issue. I am assuming the releasing of the lock was some form of
> optimization but I am not certain so I am submitting this as an RFC.

Hi Alexander,

We are working on moving away from Multi-process QEMU and to using vfio-user
based approach. The vfio-user patches are under review. I believe we would drop
the Multi-process support once vfio-user is merged.

We release the lock here while communicating with the remote process via the
QIOChannel. It is to prevent lockup of the VM in case the QIOChannel hangs.

I was able to reproduce this issue at my end. There is a deadlock between
"mpqemu_msg_send() -> qemu_mutex_lock_iothread()" and
"mpqemu_msg_send_and_await_reply() -> QEMU_LOCK_GUARD(&pdev->io_mutex)”.

From what I can tell, as soon as one vcpu thread drops the iothread lock, 
another
thread running mpqemu_msg_send_and_await_reply() holds on to it. That prevents
the first thread from completing. Attaching backtrace below.

To avoid the deadlock, I think we should drop both the iothread lock and 
io_mutex
and reacquire them in the correct order - first iothread and then io_mutex. 
Given
multiprocess QEMU would be dropped in the near future, I suppose we don’t have
to proceed further along these lines.

I tested your patch, and that fixes the e1000 issue at my end also. I believe we
could adopt it.

Thank you!
--
Jag

Thread 6 (Thread 0x7f2d12281700 (LWP 31758)):
#0  0x7f2d9b7ac54d in __lll_lock_wait () at /lib64/libpthread.so.0
#1  0x7f2d9b7a7e9b in _L_lock_883 () at /lib64/libpthread.so.0
#2  0x7f2d9b7a7d68 in pthread_mutex_lock () at /lib64/libpthread.so.0
#3  0x55bdeb48663f in qemu_mutex_lock_impl (mutex=0x55bdebf68800 
, file=0x55bdeb5c5c5a "../hw/remote/mpqemu-link.c", line=79) 
at ../util/qemu-thread-posix.c:88
#4  0x55bdeb006546 in qemu_mutex_lock_iothread_impl (file=0x55bdeb5c5c5a 
"../hw/remote/mpqemu-link.c", line=79) at ../softmmu/cpus.c:502
#5  0x55bdeafed3ff in mpqemu_msg_send (msg=0x7f2d12280430, 
ioc=0x55bdeeb02600, errp=0x7f2d12280420) at ../hw/remote/mpqemu-link.c:79
#6  0x55bdeafed93c in mpqemu_msg_send_and_await_reply (msg=0x7f2d12280430, 
pdev=0x55bdeeaff8e0, errp=0x7f2d12280420) at ../hw/remote/mpqemu-link.c:198
#7  0x55bdeafefe0e in send_bar_access_msg (pdev=0x55bdeeaff8e0, 
mr=0x55bdeeb00460, write=false, addr=192, val=0x7f2d12280578, size=4, 
memory=true) at ../hw/remote/proxy.c:256
#8  0x55bdeafeff3e in proxy_bar_read (opaque=0x55bdeeb00450, addr=192, 
size=4) at ../hw/remote/proxy.c:280
#9  0x55bdeb1f3759 in memory_region_read_accessor (mr=0x55bdeeb00460, 
addr=192, value=0x7f2d12280750, size=4, shift=0, mask=4294967295, attrs=...) at 
../softmmu/memory.c:440
#10 0x55bdeb1f3c8e in access_with_adjusted_size (addr=192, 
value=0x7f2d12280750, size=4, access_size_min=1, access_size_max=8, 
access_fn=0x55bdeb1f3716 , mr=0x55bdeeb00460, 
attrs=...) at ../softmmu/memory.c:554
#11 0x55bdeb1f695f in memory_region_dispatch_read1 (mr=0x55bdeeb00460, 
addr=192, pval=0x7f2d12280750, size=4, attrs=...) at ../softmmu/memory.c:1424
#12 0x55bdeb1f6a79 in memory_region_dispatch_read (mr=0x55bdeeb00460, 
addr=192, pval=0x7f2d12280750, op=MO_32, attrs=...) at ../softmmu/memory.c:1457
#13 0x55bdeb20451a in flatview_read_continue (fv=0x7f2d0c30ef50, 
addr=4273602752, attrs=..., ptr=0x7f2d9d988028, len=4, addr1=192, l=4, 
mr=0x55bdeeb00460) at ../softmmu/physmem.c:2881
#14 0x55bdeb204692 in flatview_read (fv=0x7f2d0c30ef50, addr=4273602752, 
attrs=..., buf=0x7f2d9d988028, len=4) at ../softmmu/physmem.c:2923
#15 0x55bdeb20471b in address_space_read_full (as=0x55bdebf705e0 
, addr=4273602752, attrs=..., buf=0x7f2d9d988028, len=4) 
at ../softmmu/physmem.c:2936
#16 0x55bdeb20483f in address_space_rw (as=0x55bdebf705e0 
, addr=4273602752, attrs=..., buf=0x7f2d9d988028, len=4, 
is_write=false) at ../softmmu/physmem.c:2964
#17 0x55bdeb29a60a in kvm_cpu_exec (cpu=0x55bdedcb0410) at 
../accel/kvm/kvm-all.c:2929
#18 0x55bdeb29c3fc in kvm_vcpu_thread_fn (arg=0x55bdedcb0410) at 
../accel/kvm/kvm-accel-ops.c:49
#19 0x55bdeb4872f8 in qemu_thread_start (args=0x55bdedcbf700) at 
../util/qemu-thread-posix.c:504
#20 0x7f2d9b7a5ea5 in start_thread () at /lib64/libpthread.so.0
#21 0x7f2d9b4ceb0d in clone () at /lib64/libc.so.6

Thread 3 (

Re: [PATCH] target/riscv: add zicsr/zifencei to isa_string

2022-05-23 Thread Alistair Francis

On Tue, May 24, 2022 at 2:51 AM Hongren (Zenithal) Zheng  
wrote:
>
> On Mon, May 23, 2022 at 09:22:15AM +1000, Alistair Francis wrote:
> > On Wed, May 18, 2022 at 10:50 PM Hongren (Zenithal) Zheng 
> >  wrote:
> > >
> > > Zicsr/Zifencei is not in 'I' since ISA version 20190608,
> > > thus to fully express the capability of the CPU,
> > > they should be exposed in isa_string.
> > >
> > > Signed-off-by: Hongren (Zenithal) Zheng 
> > > ---
> > >  target/riscv/cpu.c | 2 ++
> > >  1 file changed, 2 insertions(+)
> > >
> > > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> > > index 6d01569cad..61fa9b97a4 100644
> > > --- a/target/riscv/cpu.c
> > > +++ b/target/riscv/cpu.c
> > > @@ -1027,6 +1027,8 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, 
> > > char **isa_str, int max_str_len)
> > >   *extensions by an underscore.
> > >   */
> > >  struct isa_ext_data isa_edata_arr[] = {
> > > +ISA_EDATA_ENTRY(zicsr, ext_icsr),
> > > +ISA_EDATA_ENTRY(zifencei, ext_ifencei),
> >
> > Shouldn't we have a spec version check here?
>
> I think that can be done, but most of the time it is not necessary.
> For old specs, the "short-isa-string" can be a workaround.
>
> This patch is actually a follow-up of "target/riscv: Change "G" expansion"
> https://github.com/alistair23/qemu/commit/72bd25b7b88d0536bfb5666990e296587d4057a5
> where "G" is expanded with "zicsr"/"zifencei" without checking
> unpriv spec version.
>
> From the summary from Kito from gnu toolchain
> https://lkml.org/lkml/2022/1/24/537
> we know that there are at least 3 formally released unpriv specs:
> 2.2, 20190608 and 20191213, and frequent informal release from
> https://github.com/riscv/riscv-isa-manual/releases
>
> If we add a spec check, we need to add a PROP_STRING and
> and parse the version. We then need an enum like
> PRIV_VERSION_1_12_0 and we needs to g_strcmp0
> in riscv_cpu_realize, just as what priv spec had done.
>
> And we need to define a default unpriv spec version, which
> should be 20191213.
>
> I can add a separate patch for it if you do think it is necessary.
>
> If the guest does want old spec version, I think the recently added
> "short-isa-string" option from Tsukasa OI is suitable.
> Instead of -cpu rv64,unpriv_spec=2.2 they can just use
> -cpu rv64,short-isa-string=true to solve the issue.

Ok, fair point

Reviewed-by: Alistair Francis 

Alistair

>
> Cc Tsukasa OI
>
> >
> > Alistair
> >
> > >  ISA_EDATA_ENTRY(zfh, ext_zfh),
> > >  ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
> > >  ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> > > --
> > > 2.35.1
> > >
> > >

Re: [PATCH v2 7/8] target/riscv: Force disable extensions if priv spec version does not match

2022-05-23 Thread Alistair Francis

On Fri, May 20, 2022 at 1:07 AM Anup Patel  wrote:
>
> On Tue, May 17, 2022 at 5:46 AM Alistair Francis  wrote:
> >
> > On Thu, May 12, 2022 at 12:52 AM Anup Patel  wrote:
> > >
> > > We should disable extensions in riscv_cpu_realize() if minimum required
> > > priv spec version is not satisfied. This also ensures that machines with
> > > priv spec v1.11 (or lower) cannot enable H, V, and various multi-letter
> > > extensions.
> > >
> > > Fixes: a775398be2e ("target/riscv: Add isa extenstion strings to the
> > > device tree")
> > > Signed-off-by: Anup Patel 
> >
> > This will potentially confuse users as we just disable the extension
> > without telling them.
> >
> > Could we not just leave this as is and let users specify the
> > extensions they want? Then it's up to them to specify the correct
> > combinations
>
> The ISA extensions are not independent of the Priv spec version.
>
> For example, we have bits for Sstc, Svpbmt, and Zicbo[m|p|z] extensions
> in xenvcfg CSRs which are only available for Priv v1.12 spec.
>
> We can't allow users to enable extensions which don't meet
> the Priv spec version requirements.

Fair point. Ok we should at least report a warning if any of these are
set though

Alistair

>
> Regards,
> Anup
>
> >
> > Alistair
> >
> > > ---
> > >  target/riscv/cpu.c | 34 ++
> > >  1 file changed, 34 insertions(+)
> > >
> > > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> > > index f3b61dfd63..25a4ba3e22 100644
> > > --- a/target/riscv/cpu.c
> > > +++ b/target/riscv/cpu.c
> > > @@ -541,6 +541,40 @@ static void riscv_cpu_realize(DeviceState *dev, 
> > > Error **errp)
> > >  set_priv_version(env, priv_version);
> > >  }
> > >
> > > +/* Force disable extensions if priv spec version does not match */
> > > +if (env->priv_ver < PRIV_VERSION_1_12_0) {
> > > +cpu->cfg.ext_h = false;
> > > +cpu->cfg.ext_v = false;
> > > +cpu->cfg.ext_zfh = false;
> > > +cpu->cfg.ext_zfhmin = false;
> > > +cpu->cfg.ext_zfinx = false;
> > > +cpu->cfg.ext_zhinx = false;
> > > +cpu->cfg.ext_zhinxmin = false;
> > > +cpu->cfg.ext_zdinx = false;
> > > +cpu->cfg.ext_zba = false;
> > > +cpu->cfg.ext_zbb = false;
> > > +cpu->cfg.ext_zbc = false;
> > > +cpu->cfg.ext_zbkb = false;
> > > +cpu->cfg.ext_zbkc = false;
> > > +cpu->cfg.ext_zbkx = false;
> > > +cpu->cfg.ext_zbs = false;
> > > +cpu->cfg.ext_zk = false;
> > > +cpu->cfg.ext_zkn = false;
> > > +cpu->cfg.ext_zknd = false;
> > > +cpu->cfg.ext_zkne = false;
> > > +cpu->cfg.ext_zknh = false;
> > > +cpu->cfg.ext_zkr = false;
> > > +cpu->cfg.ext_zks = false;
> > > +cpu->cfg.ext_zksed = false;
> > > +cpu->cfg.ext_zksh = false;
> > > +cpu->cfg.ext_zkt = false;
> > > +cpu->cfg.ext_zve32f = false;
> > > +cpu->cfg.ext_zve64f = false;
> > > +cpu->cfg.ext_svinval = false;
> > > +cpu->cfg.ext_svnapot = false;
> > > +cpu->cfg.ext_svpbmt = false;
> > > +}
> > > +
> > >  if (cpu->cfg.mmu) {
> > >  riscv_set_feature(env, RISCV_FEATURE_MMU);
> > >  }
> > > --
> > > 2.34.1
> > >
> > >

Re: [RESEND PATCH v2] target/riscv: Fix typo of mimpid cpu option

2022-05-23 Thread Alistair Francis

On Tue, May 24, 2022 at 1:36 AM  wrote:
>
> From: Frank Chang 
>
> "mimpid" cpu option was mistyped to "mipid".
>
> Fixes: 9951ba94 ("target/riscv: Support configuarable marchid, mvendorid, 
> mipid CSR values")
> Signed-off-by: Frank Chang 
> Reviewed-by: Alistair Francis 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu.c | 4 ++--
>  target/riscv/cpu.h | 2 +-
>  target/riscv/csr.c | 8 
>  3 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 6d01569cad..a1f847176e 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -37,7 +37,7 @@
>  #define RISCV_CPU_MARCHID   ((QEMU_VERSION_MAJOR << 16) | \
>   (QEMU_VERSION_MINOR << 8)  | \
>   (QEMU_VERSION_MICRO))
> -#define RISCV_CPU_MIPID RISCV_CPU_MARCHID
> +#define RISCV_CPU_MIMPIDRISCV_CPU_MARCHID
>
>  static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
>
> @@ -869,7 +869,7 @@ static Property riscv_cpu_properties[] = {
>
>  DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
>  DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
> -DEFINE_PROP_UINT64("mipid", RISCVCPU, cfg.mipid, RISCV_CPU_MIPID),
> +DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
>
>  DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
>  DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index f5ff7294c6..44975e3e5a 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -408,7 +408,7 @@ struct RISCVCPUConfig {
>
>  uint32_t mvendorid;
>  uint64_t marchid;
> -uint64_t mipid;
> +uint64_t mimpid;
>
>  /* Vendor-specific custom extensions */
>  bool ext_XVentanaCondOps;
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 4ea7df02c9..0d5bc2f41d 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -674,13 +674,13 @@ static RISCVException read_marchid(CPURISCVState *env, 
> int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> -static RISCVException read_mipid(CPURISCVState *env, int csrno,
> - target_ulong *val)
> +static RISCVException read_mimpid(CPURISCVState *env, int csrno,
> +  target_ulong *val)
>  {
>  CPUState *cs = env_cpu(env);
>  RISCVCPU *cpu = RISCV_CPU(cs);
>
> -*val = cpu->cfg.mipid;
> +*val = cpu->cfg.mimpid;
>  return RISCV_EXCP_NONE;
>  }
>
> @@ -3372,7 +3372,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
>  /* Machine Information Registers */
>  [CSR_MVENDORID] = { "mvendorid", any,   read_mvendorid },
>  [CSR_MARCHID]   = { "marchid",   any,   read_marchid   },
> -[CSR_MIMPID]= { "mimpid",any,   read_mipid },
> +[CSR_MIMPID]= { "mimpid",any,   read_mimpid},
>  [CSR_MHARTID]   = { "mhartid",   any,   read_mhartid   },
>
>  [CSR_MCONFIGPTR]  = { "mconfigptr", any,   read_zero,
> --
> 2.35.1
>
>

Re: [PATCH qemu v18 02/16] target/riscv: rvv: Prune redundant access_type parameter passed

2022-05-23 Thread Alistair Francis

On Fri, May 13, 2022 at 9:50 PM ~eopxd  wrote:
>
> From: eopXD 
>
> No functional change intended in this commit.
>
> Signed-off-by: eop Chen 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/vector_helper.c | 35 ---
>  1 file changed, 16 insertions(+), 19 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 85dd611cd9..60840325c4 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -231,7 +231,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>   target_ulong stride, CPURISCVState *env,
>   uint32_t desc, uint32_t vm,
>   vext_ldst_elem_fn *ldst_elem,
> - uint32_t esz, uintptr_t ra, MMUAccessType access_type)
> + uint32_t esz, uintptr_t ra)
>  {
>  uint32_t i, k;
>  uint32_t nf = vext_nf(desc);
> @@ -259,7 +259,7 @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, 
>   \
>  {   \
>  uint32_t vm = vext_vm(desc);\
>  vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,  \
> - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);  \
> + ctzl(sizeof(ETYPE)), GETPC()); \
>  }
>
>  GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
> @@ -274,7 +274,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base,  
>   \
>  {   \
>  uint32_t vm = vext_vm(desc);\
>  vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
> - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
> + ctzl(sizeof(ETYPE)), GETPC()); \
>  }
>
>  GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
> @@ -290,7 +290,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
>  static void
>  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
>   vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
> - uintptr_t ra, MMUAccessType access_type)
> + uintptr_t ra)
>  {
>  uint32_t i, k;
>  uint32_t nf = vext_nf(desc);
> @@ -319,14 +319,14 @@ void HELPER(NAME##_mask)(void *vd, void *v0, 
> target_ulong base, \
>  {   \
>  uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
>  vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
> - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);  \
> + ctzl(sizeof(ETYPE)), GETPC()); \
>  }   \
>  \
>  void HELPER(NAME)(void *vd, void *v0, target_ulong base,\
>CPURISCVState *env, uint32_t desc)\
>  {   \
>  vext_ldst_us(vd, base, env, desc, LOAD_FN,  \
> - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
> + ctzl(sizeof(ETYPE)), env->vl, GETPC());\
>  }
>
>  GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
> @@ -340,14 +340,14 @@ void HELPER(NAME##_mask)(void *vd, void *v0, 
> target_ulong base,  \
>  {\
>  uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));  \
>  vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
> - ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);  \
> + ctzl(sizeof(ETYPE)), GETPC());  \
>  }\
>   \
>  void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
>CPURISCVState *env, uint32_t desc) \
>  {\
>  vext_ldst_us(vd, base, env, desc, STORE_FN,  \
> - ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
> + ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
>  }
>
>  GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
> @@ -364,7 +364,7 @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
>  /* evl = ceil(vl/8) */
>  uint8_t evl = (env->vl + 7) >> 3;
>  vext_ldst_us(vd, base, env, desc, lde_b,
> - 0, evl, GETPC(), MMU_DATA_LOAD);
> + 0, evl, GETPC()

Re: [PATCH v2 8/8] hw/riscv: virt: Fix interrupt parent for dynamic platform devices

2022-05-23 Thread Alistair Francis

On Thu, May 12, 2022 at 12:53 AM Anup Patel  wrote:
>
> When both APLIC and IMSIC are present in virt machine, the APLIC should
> be used as parent interrupt controller for dynamic platform devices.
>
> In case of  multiple sockets, we should prefer interrupt controller of
> socket0 for dynamic platform devices.
>
> Fixes: 3029fab64309 ("hw/riscv: virt: Add support for generating
> platform FDT entries")
> Signed-off-by: Anup Patel 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/virt.c | 25 -
>  1 file changed, 12 insertions(+), 13 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 3326f4db96..c576173815 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -478,10 +478,12 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
>  qemu_fdt_setprop_cell(mc->fdt, plic_name, "phandle",
>  plic_phandles[socket]);
>
> -platform_bus_add_all_fdt_nodes(mc->fdt, plic_name,
> -   memmap[VIRT_PLATFORM_BUS].base,
> -   memmap[VIRT_PLATFORM_BUS].size,
> -   VIRT_PLATFORM_BUS_IRQ);
> +if (!socket) {
> +platform_bus_add_all_fdt_nodes(mc->fdt, plic_name,
> +   memmap[VIRT_PLATFORM_BUS].base,
> +   memmap[VIRT_PLATFORM_BUS].size,
> +   VIRT_PLATFORM_BUS_IRQ);
> +}
>
>  g_free(plic_name);
>
> @@ -561,11 +563,6 @@ static void create_fdt_imsic(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  }
>  qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_m_phandle);
>
> -platform_bus_add_all_fdt_nodes(mc->fdt, imsic_name,
> -   memmap[VIRT_PLATFORM_BUS].base,
> -   memmap[VIRT_PLATFORM_BUS].size,
> -   VIRT_PLATFORM_BUS_IRQ);
> -
>  g_free(imsic_name);
>
>  /* S-level IMSIC node */
> @@ -704,10 +701,12 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
>  riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket);
>  qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_s_phandle);
>
> -platform_bus_add_all_fdt_nodes(mc->fdt, aplic_name,
> -   memmap[VIRT_PLATFORM_BUS].base,
> -   memmap[VIRT_PLATFORM_BUS].size,
> -   VIRT_PLATFORM_BUS_IRQ);
> +if (!socket) {
> +platform_bus_add_all_fdt_nodes(mc->fdt, aplic_name,
> +   memmap[VIRT_PLATFORM_BUS].base,
> +   memmap[VIRT_PLATFORM_BUS].size,
> +   VIRT_PLATFORM_BUS_IRQ);
> +}
>
>  g_free(aplic_name);
>
> --
> 2.34.1
>
>

Re: [PATCH v2 4/8] target/riscv: Update [m|h]tinst CSR in riscv_cpu_do_interrupt()

2022-05-23 Thread Alistair Francis

On Thu, May 12, 2022 at 12:47 AM Anup Patel  wrote:
>
> We should write transformed instruction encoding of the trapped
> instruction in [m|h]tinst CSR at time of taking trap as defined
> by the RISC-V privileged specification v1.12.
>
> Signed-off-by: Anup Patel 
> ---
>  target/riscv/cpu_helper.c | 168 +-
>  target/riscv/instmap.h|  41 ++
>  2 files changed, 205 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index d99fac9d2d..b24652eb8d 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -22,6 +22,7 @@
>  #include "qemu/main-loop.h"
>  #include "cpu.h"
>  #include "exec/exec-all.h"
> +#include "instmap.h"
>  #include "tcg/tcg-op.h"
>  #include "trace.h"
>  #include "semihosting/common-semi.h"
> @@ -1318,6 +1319,158 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  }
>  #endif /* !CONFIG_USER_ONLY */
>
> +static target_ulong riscv_transformed_insn(CPURISCVState *env,
> +   target_ulong insn)
> +{
> +target_ulong xinsn = 0;
> +
> +/*
> + * Only Quadrant 0 and Quadrant 2 of RVC instruction space need to
> + * be uncompressed. The Quadrant 1 of RVC instruction space need
> + * not be transformed because these instructions won't generate
> + * any load/store trap.
> + */
> +
> +if ((insn & 0x3) != 0x3) {
> +/* Transform 16bit instruction into 32bit instruction */
> +switch (GET_C_OP(insn)) {
> +case OPC_RISC_C_OP_QUAD0: /* Quadrant 0 */
> +switch (GET_C_FUNC(insn)) {
> +case OPC_RISC_C_FUNC_FLD_LQ:
> +if (riscv_cpu_xlen(env) != 128) { /* C.FLD (RV32/64) */
> +xinsn = OPC_RISC_FLD;
> +xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_I_IMM(xinsn, GET_C_LD_IMM(insn));
> +}
> +break;
> +case OPC_RISC_C_FUNC_LW: /* C.LW */
> +xinsn = OPC_RISC_LW;
> +xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_I_IMM(xinsn, GET_C_LW_IMM(insn));
> +break;
> +case OPC_RISC_C_FUNC_FLW_LD:
> +if (riscv_cpu_xlen(env) == 32) { /* C.FLW (RV32) */
> +xinsn = OPC_RISC_FLW;
> +xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_I_IMM(xinsn, GET_C_LW_IMM(insn));
> +} else { /* C.LD (RV64/RV128) */
> +xinsn = OPC_RISC_LD;
> +xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_I_IMM(xinsn, GET_C_LD_IMM(insn));
> +}
> +break;
> +case OPC_RISC_C_FUNC_FSD_SQ:
> +if (riscv_cpu_xlen(env) != 128) { /* C.FSD (RV32/64) */
> +xinsn = OPC_RISC_FSD;
> +xinsn = SET_RS2(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_S_IMM(xinsn, GET_C_SD_IMM(insn));
> +}
> +break;
> +case OPC_RISC_C_FUNC_SW: /* C.SW */
> +xinsn = OPC_RISC_SW;
> +xinsn = SET_RS2(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_S_IMM(xinsn, GET_C_SW_IMM(insn));
> +break;
> +case OPC_RISC_C_FUNC_FSW_SD:
> +if (riscv_cpu_xlen(env) == 32) { /* C.FSW (RV32) */
> +xinsn = OPC_RISC_FSW;
> +xinsn = SET_RS2(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_S_IMM(xinsn, GET_C_SW_IMM(insn));
> +} else { /* C.SD (RV64/RV128) */
> +xinsn = OPC_RISC_SD;
> +xinsn = SET_RS2(xinsn, GET_C_RS2S(insn));
> +xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
> +xinsn = SET_S_IMM(xinsn, GET_C_SD_IMM(insn));
> +}
> +break;
> +default:
> +break;
> +}
> +break;
> +case OPC_RISC_C_OP_QUAD2: /* Quadrant 2 */
> +switch (GET_C_FUNC(insn)) {
> +case OPC_RISC_C_FUNC_FLDSP_LQSP:
> +if (riscv_cpu_xlen(env) != 128) { /* C.FLDSP (RV32/64) */
> +xinsn = OPC_RISC_FLD;
> +xinsn = SET_RD(xinsn, GET_C_RD(insn));
> +xinsn = SET_RS1(xinsn, 2);

Is this right?

Shouldn't this be the offset "between t

Re: [PATCH] linux-user/host/s390: Treat EX and EXRL as writes

2022-05-23 Thread Laurent Vivier


Le 04/05/2022 à 13:48, Ilya Leoshkevich a écrit :

clang-built s390x branch-relative-long test fails on clang-built s390x
QEMU due to the following sequence of events:

- The test zeroes out a code page, clang generates exrl+xc for this.

- do_helper_xc() is called. Clang generates exrl+xc there as well.

- Since there already exists a TB for the code in question, its page is
   read-only and SIGSEGV is raised.

- host_signal_handler() calls host_signal_write() and the latter does
   not recognize exrl as a write. Therefore page_unprotect() is not
   called and the signal is forwarded to the test.

Fix by treating EXRL (and EX, just in case) as writes. There may be
false positives, but they will lead only to an extra page_unprotect()
call.

Reported-by: Thomas Huth 
Signed-off-by: Ilya Leoshkevich 
---
  linux-user/include/host/s390/host-signal.h | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/linux-user/include/host/s390/host-signal.h 
b/linux-user/include/host/s390/host-signal.h
index 6f191e64d7..25fefa00bd 100644
--- a/linux-user/include/host/s390/host-signal.h
+++ b/linux-user/include/host/s390/host-signal.h
@@ -50,6 +50,7 @@ static inline bool host_signal_write(siginfo_t *info, 
host_sigcontext *uc)
  case 0x50: /* ST */
  case 0x42: /* STC */
  case 0x40: /* STH */
+case 0x44: /* EX */
  case 0xba: /* CS */
  case 0xbb: /* CDS */
  return true;
@@ -61,6 +62,12 @@ static inline bool host_signal_write(siginfo_t *info, 
host_sigcontext *uc)
  return true;
  }
  break;
+case 0xc6: /* RIL-b format insns */
+switch (pinsn[0] & 0xf) {
+case 0x0: /* EXRL */
+return true;
+}
+break;
  case 0xc8: /* SSF format insns */
  switch (pinsn[0] & 0xf) {
  case 0x2: /* CSST */


Applied to my linux-user-for-7.1 branch.

Thanks,
Laurent

Fast usermode networking with QEMU

2022-05-23 Thread Anders Pitman

I came across this blog post[0] concerning passt, which is an alternative 
usermode networking implementation for QEMU.

I'm working on a project that uses QEMU on Windows hosts running Linux guests. 
I'm trying to get faster usermode networking than is available with libslirp. 
My performance target is 200Mbps even on older or less powerful hardware, such 
as Celeron mini PCs. Currently I'm seeing 15-30MBps with libslirp.

It appears that passt doesn't currently support Windows, correct? Is there a 
guess as to how difficult that might be to implement? If the speedup is 
significant, I would be interested in taking a crack at adding Windows support.

Thanks,
//anders

[0]: http://blog.vmsplice.net/2021/10/a-new-approach-to-usermode-networking.html

Re: [PATCH] target/riscv: add support for zmmul extension v0.1

2022-05-23 Thread Alistair Francis

On Mon, May 23, 2022 at 6:10 PM Weiwei Li  wrote:
>
>
> 在 2022/5/23 下午2:34, Alistair Francis 写道:
> > On Wed, May 18, 2022 at 11:54 AM Weiwei Li  wrote:
> >>   - includes all multiplication operations for M extension
> >>
> >> Signed-off-by: Weiwei Li 
> >> Signed-off-by: Junqiang Wang 
> >> ---
> >>   target/riscv/cpu.c  |  2 ++
> >>   target/riscv/cpu.h  |  1 +
> >>   target/riscv/insn_trans/trans_rvm.c.inc | 18 --
> >>   3 files changed, 15 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> >> index e373c61ba2..01b57d3784 100644
> >> --- a/target/riscv/cpu.c
> >> +++ b/target/riscv/cpu.c
> >> @@ -903,6 +903,7 @@ static Property riscv_cpu_properties[] = {
> >>
> >>   /* These are experimental so mark with 'x-' */
> >>   DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
> >> +DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),
> > Is this really experimental?
> >
> > Alistair
>
> I think it's experimental currently. The zmmul version in latest riscv
> spec is v0.1, even though described as  v1.0 in spike README.

Hmm... Your right that it is only v0.1, but there is no indication of
draft state in the RISC-V spec chapter on Zmmul

>
> Its specification status
> (https://wiki.riscv.org/display/home/specification+status) is Freeze
> Complete and TSC Sign-Off Voting.
>
> And It's not in the ratified extension
> list(https://wiki.riscv.org/display/home/recently+ratified+extensions).
>
> Any status update I missed?

Confusing. Ok, I guess let's leave it as experimental, we can always
remove the `x-` easily :)

>
> Regards,
>
> Weiwei Li
>
> >>   /* ePMP 0.9.3 */
> >>   DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
> >>   DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
> >> @@ -1027,6 +1028,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> >> **isa_str, int max_str_len)
> >>*extensions by an underscore.
> >>*/
> >>   struct isa_ext_data isa_edata_arr[] = {
> >> +ISA_EDATA_ENTRY(zmmul, ext_zmmul),

We should have some checks though. We don't want users to enable this
and the multiply (M) extension


Alistair

> >>   ISA_EDATA_ENTRY(zfh, ext_zfh),
> >>   ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
> >>   ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> >> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> >> index f5ff7294c6..68177eae12 100644
> >> --- a/target/riscv/cpu.h
> >> +++ b/target/riscv/cpu.h
> >> @@ -405,6 +405,7 @@ struct RISCVCPUConfig {
> >>   bool ext_zhinxmin;
> >>   bool ext_zve32f;
> >>   bool ext_zve64f;
> >> +bool ext_zmmul;
> >>
> >>   uint32_t mvendorid;
> >>   uint64_t marchid;
> >> diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
> >> b/target/riscv/insn_trans/trans_rvm.c.inc
> >> index 16b029edf0..ec7f705aab 100644
> >> --- a/target/riscv/insn_trans/trans_rvm.c.inc
> >> +++ b/target/riscv/insn_trans/trans_rvm.c.inc
> >> @@ -18,6 +18,12 @@
> >>* this program.  If not, see .
> >>*/
> >>
> >> +#define REQUIRE_M_OR_ZMMUL(ctx) do {  \
> >> +if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
> >> +return false; \
> >> +} \
> >> +} while (0)
> >> +
> >>   static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, 
> >> TCGv bh)
> >>   {
> >>   TCGv tmpl = tcg_temp_new();
> >> @@ -65,7 +71,7 @@ static void gen_mul_i128(TCGv rl, TCGv rh,
> >>
> >>   static bool trans_mul(DisasContext *ctx, arg_mul *a)
> >>   {
> >> -REQUIRE_EXT(ctx, RVM);
> >> +REQUIRE_M_OR_ZMMUL(ctx);
> >>   return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
> >>   }
> >>
> >> @@ -109,7 +115,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
> >>
> >>   static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
> >>   {
> >> -REQUIRE_EXT(ctx, RVM);
> >> +REQUIRE_M_OR_ZMMUL(ctx);
> >>   return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
> >>   gen_mulh_i128);
> >>   }
> >> @@ -161,7 +167,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv 
> >> arg2)
> >>
> >>   static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
> >>   {
> >> -REQUIRE_EXT(ctx, RVM);
> >> +REQUIRE_M_OR_ZMMUL(ctx);
> >>   return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
> >>   gen_mulhsu_i128);
> >>   }
> >> @@ -176,7 +182,7 @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
> >>
> >>   static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
> >>   {
> >> -REQUIRE_EXT(ctx, RVM);
> >> +REQUIRE_M_OR_ZMMUL(ctx);
> >>   /* gen_mulh_w works for either sign as input. */
> >>   return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
> >>   gen_mulhu_i128);
> >> @@

[PATCH 13/18] target/arm: Move MDCR_TDE test into exception_target_el

2022-05-23 Thread Richard Henderson

Add a debug parameter, and when true test MDCR_EL2.TDE.
Use this in arm_debug_target_el.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h  | 20 +++-
 target/arm/op_helper.c  | 12 ++--
 target/arm/tlb_helper.c |  4 ++--
 3 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 6df38db836..fbb69e6919 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1074,27 +1074,13 @@ typedef struct ARMVAParameters {
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data);
 
-int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn);
+int exception_target_el(CPUARMState *env, int cur_el,
+uint32_t *psyn, bool debug);
 
 /* Return the Exception Level targeted by debug exceptions. */
 static inline int arm_debug_target_el(CPUARMState *env)
 {
-bool secure = arm_is_secure(env);
-bool route_to_el2 = false;
-
-if (arm_is_el2_enabled(env)) {
-route_to_el2 = env->cp15.hcr_el2 & HCR_TGE ||
-   env->cp15.mdcr_el2 & MDCR_TDE;
-}
-
-if (route_to_el2) {
-return 2;
-} else if (arm_feature(env, ARM_FEATURE_EL3) &&
-   !arm_el_is_aa64(env, 3) && secure) {
-return 3;
-} else {
-return 1;
-}
+return exception_target_el(env, 0, NULL, true);
 }
 
 /* Determine if allocation tags are available.  */
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c4988b6c41..9fc9ab3d20 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -28,7 +28,8 @@
 #define SIGNBIT (uint32_t)0x8000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn)
+int exception_target_el(CPUARMState *env, int cur_el,
+uint32_t *psyn, bool debug)
 {
 /*
  * FIXME: The following tests really apply to an EL0 origin,
@@ -62,6 +63,12 @@ int exception_target_el(CPUARMState *env, int cur_el, 
uint32_t *psyn)
 return 2;
 }
 
+if (debug
+&& (env->cp15.mdcr_el2 & MDCR_TDE)
+&& arm_is_el2_enabled(env)) {
+return 2;
+}
+
 return 1;
 }
 
@@ -83,7 +90,8 @@ void raise_exception(CPUARMState *env, uint32_t excp, 
uint32_t syndrome,
 {
 int target_el = cur_or_target_el;
 if (cur_or_target_el <= 1) {
-target_el = exception_target_el(env, cur_or_target_el, &syndrome);
+target_el = exception_target_el(env, cur_or_target_el,
+&syndrome, false);
 }
 raise_exception_int(env, excp, syndrome, target_el);
 }
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 573e18f830..3bf4107faa 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -90,7 +90,7 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
 uint32_t syn, exc, fsr, fsc;
 
 cur_el = arm_current_el(env);
-target_el = exception_target_el(env, cur_el, NULL);
+target_el = exception_target_el(env, cur_el, NULL, false);
 
 if (fi->stage2) {
 target_el = 2;
@@ -141,7 +141,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
 void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc)
 {
 ARMMMUFaultInfo fi = { .type = ARMFault_Alignment };
-int target_el = exception_target_el(env, arm_current_el(env), NULL);
+int target_el = exception_target_el(env, arm_current_el(env), NULL, false);
 int mmu_idx = cpu_mmu_index(env, true);
 uint32_t fsc;
 
-- 
2.34.1

Re: [PULL 2/3] qga-win32: Add support for NVME but type

2022-05-23 Thread Richard Henderson


On 5/23/22 12:41, Konstantin Kostiuk wrote:

Bus type spaces (Indicates a storage spaces bus) is not
supported, so return it as unknown.

Signed-off-by: Konstantin Kostiuk 
Message-Id: <20220520201401.706630-1-kkost...@redhat.com>
Reviewed-by: Marc-André Lureau 
Signed-off-by: Konstantin Kostiuk 
---
  qga/commands-win32.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index dcdeb76a68..36f94c0f9c 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -490,6 +490,11 @@ static GuestDiskBusType win2qemu[] = {
  #if (_WIN32_WINNT >= 0x0601)
  [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL,
  [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL,
+/*
+ * BusTypeSpaces currently is not suported
+ */
+[BusTypeSpaces] = GUEST_DISK_BUS_TYPE_UNKNOWN,
+[BusTypeNvme] = GUEST_DISK_BUS_TYPE_NVME,
  #endif
  };
  


Build fails:

../qga/commands-win32.c:496:6: error: 'BusTypeSpaces' undeclared here (not in a function); 
did you mean 'BusTypeSas'?

  496 | [BusTypeSpaces] = GUEST_DISK_BUS_TYPE_UNKNOWN,
  |  ^
  |  BusTypeSas
../qga/commands-win32.c:496:6: error: array index in initializer not of integer 
type
../qga/commands-win32.c:496:6: note: (near initialization for 'win2qemu')
../qga/commands-win32.c:497:6: error: 'BusTypeNvme' undeclared here (not in a function); 
did you mean 'BusTypeMmc'?

  497 | [BusTypeNvme] = GUEST_DISK_BUS_TYPE_NVME,
  |  ^~~
  |  BusTypeMmc
../qga/commands-win32.c:497:6: error: array index in initializer not of integer 
type
../qga/commands-win32.c:497:6: note: (near initialization for 'win2qemu')


r~

[PATCH 14/18] target/arm: Mark exception helpers as noreturn

2022-05-23 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/helper.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index b1334e0c42..5161cdf73d 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -44,9 +44,9 @@ DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 
 DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
i32, i32, i32, i32)
-DEF_HELPER_2(exception_internal, void, env, i32)
-DEF_HELPER_4(exception_with_syndrome, void, env, i32, i32, i32)
-DEF_HELPER_2(exception_bkpt_insn, void, env, i32)
+DEF_HELPER_2(exception_internal, noreturn, env, i32)
+DEF_HELPER_4(exception_with_syndrome, noreturn, env, i32, i32, i32)
+DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32)
 DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl)
 DEF_HELPER_1(setend, void, env)
 DEF_HELPER_2(wfi, void, env, i32)
-- 
2.34.1

[PATCH 10/18] target/arm: Move arm_debug_exception_fsr to debug_helper.c

2022-05-23 Thread Richard Henderson

This function now now only used in debug_helper.c, so there is
no reason to have a declaration in a header.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h| 25 -
 target/arm/debug_helper.c | 26 ++
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 91702b3ff7..bb45100f06 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -794,31 +794,6 @@ static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx 
mmu_idx)
 return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
 }
 
-/* Return the FSR value for a debug exception (watchpoint, hardware
- * breakpoint or BKPT insn) targeting the specified exception level.
- */
-static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
-{
-ARMMMUFaultInfo fi = { .type = ARMFault_Debug };
-int target_el = arm_debug_target_el(env);
-bool using_lpae = false;
-
-if (target_el == 2 || arm_el_is_aa64(env, target_el)) {
-using_lpae = true;
-} else {
-if (arm_feature(env, ARM_FEATURE_LPAE) &&
-(env->cp15.tcr_el[target_el].raw_tcr & TTBCR_EAE)) {
-using_lpae = true;
-}
-}
-
-if (using_lpae) {
-return arm_fi_to_lfsc(&fi);
-} else {
-return arm_fi_to_sfsc(&fi);
-}
-}
-
 /**
  * arm_num_brps: Return number of implemented breakpoints.
  * Note that the ID register BRPS field is "number of bps - 1",
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index bdcd5f36d6..08d461fd19 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -378,6 +378,32 @@ bool arm_debug_check_watchpoint(CPUState *cs, 
CPUWatchpoint *wp)
 return check_watchpoints(cpu);
 }
 
+/*
+ * Return the FSR value for a debug exception (watchpoint, hardware
+ * breakpoint or BKPT insn) targeting the specified exception level.
+ */
+static uint32_t arm_debug_exception_fsr(CPUARMState *env)
+{
+ARMMMUFaultInfo fi = { .type = ARMFault_Debug };
+int target_el = arm_debug_target_el(env);
+bool using_lpae = false;
+
+if (target_el == 2 || arm_el_is_aa64(env, target_el)) {
+using_lpae = true;
+} else {
+if (arm_feature(env, ARM_FEATURE_LPAE) &&
+(env->cp15.tcr_el[target_el].raw_tcr & TTBCR_EAE)) {
+using_lpae = true;
+}
+}
+
+if (using_lpae) {
+return arm_fi_to_lfsc(&fi);
+} else {
+return arm_fi_to_sfsc(&fi);
+}
+}
+
 void arm_debug_excp_handler(CPUState *cs)
 {
 /*
-- 
2.34.1

[PATCH 16/18] target/arm: Remove TBFLAG_ANY.DEBUG_TARGET_EL

2022-05-23 Thread Richard Henderson

We no longer need this value during translation,
as it is now handled within the helpers.

Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h   |  6 ++
 target/arm/translate.h |  2 --
 target/arm/helper.c| 12 ++--
 target/arm/translate-a64.c |  1 -
 target/arm/translate.c |  1 -
 5 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 90cdc7b1de..5bc6382fce 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3064,11 +3064,9 @@ FIELD(TBFLAG_ANY, BE_DATA, 3, 1)
 FIELD(TBFLAG_ANY, MMUIDX, 4, 4)
 /* Target EL if we take a floating-point-disabled exception */
 FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2)
-/* For A-profile only, target EL for debug exceptions.  */
-FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2)
 /* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */
-FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1)
-FIELD(TBFLAG_ANY, PSTATE__IL, 13, 1)
+FIELD(TBFLAG_ANY, ALIGN_MEM, 10, 1)
+FIELD(TBFLAG_ANY, PSTATE__IL, 11, 1)
 
 /*
  * Bit usage when in AArch32 state, both A- and M-profile.
diff --git a/target/arm/translate.h b/target/arm/translate.h
index c03dbfb618..cd9ee41bbd 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -59,8 +59,6 @@ typedef struct DisasContext {
  */
 uint32_t svc_imm;
 int current_el;
-/* Debug target exception level for single-step exceptions */
-int debug_target_el;
 GHashTable *cp_regs;
 uint64_t features; /* CPU features bits */
 bool aarch64;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index e0be96b988..63c3fee5ff 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -13626,18 +13626,10 @@ static CPUARMTBFlags rebuild_hflags_m32(CPUARMState 
*env, int fp_el,
 return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
 }
 
-static CPUARMTBFlags rebuild_hflags_aprofile(CPUARMState *env)
-{
-CPUARMTBFlags flags = {};
-
-DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env));
-return flags;
-}
-
 static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
 ARMMMUIdx mmu_idx)
 {
-CPUARMTBFlags flags = rebuild_hflags_aprofile(env);
+CPUARMTBFlags flags = {};
 int el = arm_current_el(env);
 
 if (arm_sctlr(env, el) & SCTLR_A) {
@@ -13663,7 +13655,7 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState 
*env, int fp_el,
 static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
 ARMMMUIdx mmu_idx)
 {
-CPUARMTBFlags flags = rebuild_hflags_aprofile(env);
+CPUARMTBFlags flags = {};
 ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
 uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
 uint64_t sctlr;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index f502545307..cc9344b015 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14645,7 +14645,6 @@ static void 
aarch64_tr_init_disas_context(DisasContextBase *dcbase,
 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
 dc->is_ldex = false;
-dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
 
 /* Bound the number of insns to execute to those left on the page.  */
 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 87a899d638..59d7542a48 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -9354,7 +9354,6 @@ static void arm_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
 dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
 } else {
-dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
 dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
 dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
 dc->ns = EX_TBFLAG_A32(tb_flags, NS);
-- 
2.34.1

Re: [PATCH 0/2] linux-user/s390x: Fix unwinding from signal handlers

2022-05-23 Thread Laurent Vivier


Le 04/05/2022 à 00:51, Ilya Leoshkevich a écrit :

Hi,

This is the fix for the issue discussed in [1].
Patch 1 fixes the issue itself, patch 2 adds a test.

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-04/msg05127.html

Best regards,
Ilya

Ilya Leoshkevich (2):
   linux-user/s390x: Fix unwinding from signal handlers
   tests/tcg/s390x: Test unwinding from signal handlers

  linux-user/s390x/signal.c   |  5 +++
  tests/tcg/s390x/signals-s390x.c | 69 ++---
  2 files changed, 60 insertions(+), 14 deletions(-)



Series applied to my linux-user-for-7.1 branch.

Thanks,
Laurent

[PATCH 18/18] target/arm: Remove route_to_el2 case from sve_exception_el

2022-05-23 Thread Richard Henderson

This adjustment is handled by exception_target_el.

Signed-off-by: Richard Henderson 
---
 target/arm/helper.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 63c3fee5ff..5c875927cf 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6155,8 +6155,7 @@ int sve_exception_el(CPUARMState *env, int el)
 /* fall through */
 case 0:
 case 2:
-/* route_to_el2 */
-return hcr_el2 & HCR_TGE ? 2 : 1;
+return 1;
 }
 
 /* Check CPACR.FPEN.  */
-- 
2.34.1

[PATCH 12/18] target/arm: Create raise_exception_debug

2022-05-23 Thread Richard Henderson

Handle EL testing for debug exceptions in a single place.
Split out raise_exception_int as a common helper.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h|  8 
 target/arm/debug_helper.c | 27 
 target/arm/op_helper.c| 43 ---
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 685214503b..6df38db836 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -125,6 +125,14 @@ G_NORETURN void raise_exception_ra(CPUARMState *env, 
uint32_t excp,
uint32_t syndrome,
uint32_t cur_or_target_el, uintptr_t ra);
 
+/**
+ * raise_exception_debug:
+ * Similarly.  If @excp != EXCPBKPT, modify syndrome to indicate
+ * when origin and target EL are the same.
+ */
+G_NORETURN void raise_exception_debug(CPUARMState *env, uint32_t excp,
+  uint32_t syndrome);
+
 /*
  * For AArch64, map a given EL to an index in the banked_spsr array.
  * Note that this mapping and the AArch32 mapping defined in bank_number()
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 08d461fd19..181ba7b042 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -417,19 +417,16 @@ void arm_debug_excp_handler(CPUState *cs)
 if (wp_hit) {
 if (wp_hit->flags & BP_CPU) {
 bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
-bool same_el = arm_debug_target_el(env) == arm_current_el(env);
 
 cs->watchpoint_hit = NULL;
 
 env->exception.fsr = arm_debug_exception_fsr(env);
 env->exception.vaddress = wp_hit->hitaddr;
-raise_exception(env, EXCP_DATA_ABORT,
-syn_watchpoint(same_el, 0, wnr),
-arm_debug_target_el(env));
+raise_exception_debug(env, EXCP_DATA_ABORT,
+  syn_watchpoint(0, 0, wnr));
 }
 } else {
 uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
-bool same_el = (arm_debug_target_el(env) == arm_current_el(env));
 
 /*
  * (1) GDB breakpoints should be handled first.
@@ -449,9 +446,7 @@ void arm_debug_excp_handler(CPUState *cs)
  * exception/security level.
  */
 env->exception.vaddress = 0;
-raise_exception(env, EXCP_PREFETCH_ABORT,
-syn_breakpoint(same_el),
-arm_debug_target_el(env));
+raise_exception_debug(env, EXCP_PREFETCH_ABORT, syn_breakpoint(0));
 }
 }
 
@@ -461,9 +456,6 @@ void arm_debug_excp_handler(CPUState *cs)
  */
 void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome)
 {
-int debug_el = arm_debug_target_el(env);
-int cur_el = arm_current_el(env);
-
 /* FSR will only be used if the debug target EL is AArch32. */
 env->exception.fsr = arm_debug_exception_fsr(env);
 /*
@@ -472,18 +464,7 @@ void HELPER(exception_bkpt_insn)(CPUARMState *env, 
uint32_t syndrome)
  * exception/security level.
  */
 env->exception.vaddress = 0;
-/*
- * Other kinds of architectural debug exception are ignored if
- * they target an exception level below the current one (in QEMU
- * this is checked by arm_generate_debug_exceptions()). Breakpoint
- * instructions are special because they always generate an exception
- * to somewhere: if they can't go to the configured debug exception
- * level they are taken to the current exception level.
- */
-if (debug_el < cur_el) {
-debug_el = cur_el;
-}
-raise_exception(env, EXCP_BKPT, syndrome, debug_el);
+raise_exception_debug(env, EXCP_BKPT, syndrome);
 }
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 0a50dbf274..c4988b6c41 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -65,15 +65,11 @@ int exception_target_el(CPUARMState *env, int cur_el, 
uint32_t *psyn)
 return 1;
 }
 
-void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome,
- uint32_t cur_or_target_el)
+G_NORETURN static
+void raise_exception_int(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el)
 {
 CPUState *cs = env_cpu(env);
-int target_el = cur_or_target_el;
-
-if (cur_or_target_el <= 1) {
-target_el = exception_target_el(env, cur_or_target_el, &syndrome);
-}
 
 assert(!excp_is_internal(excp));
 cs->exception_index = excp;
@@ -82,6 +78,39 @@ void raise_exception(CPUARMState *env, uint32_t excp, 
uint32_t syndrome,
 cpu_loop_exit(cs);
 }
 
+void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome,
+ uint32_t cur_or_target_el)
+{
+int target_el = cur_or_target_el;
+if (cur_or_target_el <= 1) {
+target_el

[PATCH 08/18] target/arm: Use is_a64 in arm_generate_debug_exceptions

2022-05-23 Thread Richard Henderson

Use the accessor rather than the raw structure member.

Signed-off-by: Richard Henderson 
---
 target/arm/debug_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 2bbf065b3a..3a86901779 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -98,7 +98,7 @@ bool arm_generate_debug_exceptions(CPUARMState *env)
 {
 int cur_el = arm_current_el(env);
 
-if (env->aarch64) {
+if (is_a64(env)) {
 return aa64_generate_debug_exceptions(env, cur_el);
 } else {
 return aa32_generate_debug_exceptions(env, cur_el);
-- 
2.34.1

[PATCH 17/18] target/arm: Add cur_el parameter to arm_generate_debug_exceptions

2022-05-23 Thread Richard Henderson

We often have this value already handy in the caller.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h|  2 +-
 target/arm/debug_helper.c | 11 +--
 target/arm/helper-a64.c   |  2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index fbb69e6919..09d25612af 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1308,6 +1308,6 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 
 void aa32_max_features(ARMCPU *cpu);
 bool arm_singlestep_active(CPUARMState *env);
-bool arm_generate_debug_exceptions(CPUARMState *env);
+bool arm_generate_debug_exceptions(CPUARMState *env, int cur_el);
 
 #endif
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 8d87b65a8d..a5363a5048 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -94,10 +94,8 @@ static bool aa32_generate_debug_exceptions(CPUARMState *env, 
int cur_el)
  * CheckSoftwareStep(), where it is elided because both branches would
  * always return the same value.
  */
-bool arm_generate_debug_exceptions(CPUARMState *env)
+bool arm_generate_debug_exceptions(CPUARMState *env, int cur_el)
 {
-int cur_el = arm_current_el(env);
-
 if (is_a64(env)) {
 return aa64_generate_debug_exceptions(env, cur_el);
 } else {
@@ -111,9 +109,10 @@ bool arm_generate_debug_exceptions(CPUARMState *env)
  */
 bool arm_singlestep_active(CPUARMState *env)
 {
+int cur_el = arm_current_el(env);
 return extract32(env->cp15.mdscr_el1, 0, 1)
 && arm_el_is_aa64(env, arm_debug_target_el(env))
-&& arm_generate_debug_exceptions(env);
+&& arm_generate_debug_exceptions(env, cur_el);
 }
 
 /* Return true if the linked breakpoint entry lbn passes its checks */
@@ -309,7 +308,7 @@ static bool check_watchpoints(ARMCPU *cpu)
  * exceptions here then watchpoint firings are ignored.
  */
 if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
-|| !arm_generate_debug_exceptions(env)) {
+|| !arm_generate_debug_exceptions(env, arm_current_el(env))) {
 return false;
 }
 
@@ -333,7 +332,7 @@ bool arm_debug_check_breakpoint(CPUState *cs)
  * exceptions here then breakpoint firings are ignored.
  */
 if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
-|| !arm_generate_debug_exceptions(env)) {
+|| !arm_generate_debug_exceptions(env, arm_current_el(env))) {
 return false;
 }
 
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 22db213aab..fe2a0aa261 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -924,7 +924,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t 
new_pc)
  * We check 1 here and 2 after we've done the pstate/cpsr write() to
  * transition to the EL we're going to.
  */
-if (arm_generate_debug_exceptions(env)) {
+if (arm_generate_debug_exceptions(env, cur_el)) {
 spsr &= ~PSTATE_SS;
 }
 
-- 
2.34.1

[PATCH 11/18] target/arm: Move arm_debug_target_el to internals.h

2022-05-23 Thread Richard Henderson

This function is private to the implementation.

Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h   | 21 -
 target/arm/internals.h | 21 +
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 92c9758e86..90cdc7b1de 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2967,27 +2967,6 @@ typedef enum ARMASIdx {
 ARMASIdx_TagS = 3,
 } ARMASIdx;
 
-/* Return the Exception Level targeted by debug exceptions. */
-static inline int arm_debug_target_el(CPUARMState *env)
-{
-bool secure = arm_is_secure(env);
-bool route_to_el2 = false;
-
-if (arm_is_el2_enabled(env)) {
-route_to_el2 = env->cp15.hcr_el2 & HCR_TGE ||
-   env->cp15.mdcr_el2 & MDCR_TDE;
-}
-
-if (route_to_el2) {
-return 2;
-} else if (arm_feature(env, ARM_FEATURE_EL3) &&
-   !arm_el_is_aa64(env, 3) && secure) {
-return 3;
-} else {
-return 1;
-}
-}
-
 static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu)
 {
 /* If all the CLIDR.Ctypem bits are 0 there are no caches, and
diff --git a/target/arm/internals.h b/target/arm/internals.h
index bb45100f06..685214503b 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1068,6 +1068,27 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, 
uint64_t va,
 
 int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn);
 
+/* Return the Exception Level targeted by debug exceptions. */
+static inline int arm_debug_target_el(CPUARMState *env)
+{
+bool secure = arm_is_secure(env);
+bool route_to_el2 = false;
+
+if (arm_is_el2_enabled(env)) {
+route_to_el2 = env->cp15.hcr_el2 & HCR_TGE ||
+   env->cp15.mdcr_el2 & MDCR_TDE;
+}
+
+if (route_to_el2) {
+return 2;
+} else if (arm_feature(env, ARM_FEATURE_EL3) &&
+   !arm_el_is_aa64(env, 3) && secure) {
+return 3;
+} else {
+return 1;
+}
+}
+
 /* Determine if allocation tags are available.  */
 static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
  uint64_t sctlr)
-- 
2.34.1

[PATCH 15/18] target/arm: Create helper_exception_swstep

2022-05-23 Thread Richard Henderson

Move the computation from gen_swstep_exception into a helper.
The assert removed here is present in raise_exception_debug.

Signed-off-by: Richard Henderson 
---
 target/arm/helper.h   |  1 +
 target/arm/translate.h| 12 +++-
 target/arm/debug_helper.c |  5 +
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 5161cdf73d..f3fd53f3f9 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -47,6 +47,7 @@ DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
 DEF_HELPER_2(exception_internal, noreturn, env, i32)
 DEF_HELPER_4(exception_with_syndrome, noreturn, env, i32, i32, i32)
 DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32)
+DEF_HELPER_2(exception_swstep, noreturn, env, i32)
 DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl)
 DEF_HELPER_1(setend, void, env)
 DEF_HELPER_2(wfi, void, env, i32)
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 6f0ebdc88e..c03dbfb618 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -340,15 +340,9 @@ static inline void gen_exception(int excp, uint32_t 
syndrome,
 /* Generate an architectural singlestep exception */
 static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
 {
-bool same_el = (s->debug_target_el == s->current_el);
-
-/*
- * If singlestep is targeting a lower EL than the current one,
- * then s->ss_active must be false and we can never get here.
- */
-assert(s->debug_target_el >= s->current_el);
-
-gen_exception(EXCP_UDEF, syn_swstep(same_el, isv, ex), s->debug_target_el);
+/* Fill in the same_el field of the syndrome in the helper. */
+uint32_t syn = syn_swstep(false, isv, ex);
+gen_helper_exception_swstep(cpu_env, tcg_constant_i32(syn));
 }
 
 /*
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 181ba7b042..8d87b65a8d 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -467,6 +467,11 @@ void HELPER(exception_bkpt_insn)(CPUARMState *env, 
uint32_t syndrome)
 raise_exception_debug(env, EXCP_BKPT, syndrome);
 }
 
+void HELPER(exception_swstep)(CPUARMState *env, uint32_t syndrome)
+{
+raise_exception_debug(env, EXCP_UDEF, syndrome);
+}
+
 #if !defined(CONFIG_USER_ONLY)
 
 vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len)
-- 
2.34.1

Re: [PATCH v2 0/3] linux-user: Use CPUArchState* instead of void* when possible

2022-05-23 Thread Laurent Vivier


Le 09/05/2022 à 22:57, Philippe Mathieu-Daudé a écrit :

From: Philippe Mathieu-Daudé 

Since v1:
- Rebased

v1: 
https://lore.kernel.org/qemu-devel/20220306234005.52511-1-philippe.mathieu.da...@gmail.com/

Philippe Mathieu-Daudé (3):
   linux-user/elfload: Remove pointless non-const CPUArchState cast
   linux-user: Have do_syscall() use CPUArchState* instead of void*
   linux-user: Remove pointless CPU{ARCH}State casts

  linux-user/elfload.c|   2 +-
  linux-user/strace.c | 202 ++--
  linux-user/strace.h |   4 +-
  linux-user/syscall.c|  81 +++
  linux-user/uname.c  |   4 +-
  linux-user/uname.h  |   2 +-
  linux-user/user-internals.h |  18 ++--
  7 files changed, 155 insertions(+), 158 deletions(-)



Series applied to my linux-user-for-7.1 branch.

Thanks,
Laurent

[PATCH 06/18] target/arm: Move arm_generate_debug_exceptions out of line

2022-05-23 Thread Richard Henderson

Move arm_generate_debug_exceptions and its two subroutines,
{aa32,aa64}_generate_debug_exceptions into debug_helper.c,
and the one interface declaration to internals.h.

Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h  | 91 -
 target/arm/internals.h|  1 +
 target/arm/debug_helper.c | 94 +++
 3 files changed, 95 insertions(+), 91 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 2e115a0281..92c9758e86 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2996,97 +2996,6 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu)
 return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0;
 }
 
-/* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */
-static inline bool aa64_generate_debug_exceptions(CPUARMState *env)
-{
-int cur_el = arm_current_el(env);
-int debug_el;
-
-if (cur_el == 3) {
-return false;
-}
-
-/* MDCR_EL3.SDD disables debug events from Secure state */
-if (arm_is_secure_below_el3(env)
-&& extract32(env->cp15.mdcr_el3, 16, 1)) {
-return false;
-}
-
-/*
- * Same EL to same EL debug exceptions need MDSCR_KDE enabled
- * while not masking the (D)ebug bit in DAIF.
- */
-debug_el = arm_debug_target_el(env);
-
-if (cur_el == debug_el) {
-return extract32(env->cp15.mdscr_el1, 13, 1)
-&& !(env->daif & PSTATE_D);
-}
-
-/* Otherwise the debug target needs to be a higher EL */
-return debug_el > cur_el;
-}
-
-static inline bool aa32_generate_debug_exceptions(CPUARMState *env)
-{
-int el = arm_current_el(env);
-
-if (el == 0 && arm_el_is_aa64(env, 1)) {
-return aa64_generate_debug_exceptions(env);
-}
-
-if (arm_is_secure(env)) {
-int spd;
-
-if (el == 0 && (env->cp15.sder & 1)) {
-/* SDER.SUIDEN means debug exceptions from Secure EL0
- * are always enabled. Otherwise they are controlled by
- * SDCR.SPD like those from other Secure ELs.
- */
-return true;
-}
-
-spd = extract32(env->cp15.mdcr_el3, 14, 2);
-switch (spd) {
-case 1:
-/* SPD == 0b01 is reserved, but behaves as 0b00. */
-case 0:
-/* For 0b00 we return true if external secure invasive debug
- * is enabled. On real hardware this is controlled by external
- * signals to the core. QEMU always permits debug, and behaves
- * as if DBGEN, SPIDEN, NIDEN and SPNIDEN are all tied high.
- */
-return true;
-case 2:
-return false;
-case 3:
-return true;
-}
-}
-
-return el != 2;
-}
-
-/* Return true if debugging exceptions are currently enabled.
- * This corresponds to what in ARM ARM pseudocode would be
- *if UsingAArch32() then
- *return AArch32.GenerateDebugExceptions()
- *else
- *return AArch64.GenerateDebugExceptions()
- * We choose to push the if() down into this function for clarity,
- * since the pseudocode has it at all callsites except for the one in
- * CheckSoftwareStep(), where it is elided because both branches would
- * always return the same value.
- */
-static inline bool arm_generate_debug_exceptions(CPUARMState *env)
-{
-if (env->aarch64) {
-return aa64_generate_debug_exceptions(env);
-} else {
-return aa32_generate_debug_exceptions(env);
-}
-}
-
 static inline bool arm_sctlr_b(CPUARMState *env)
 {
 return
diff --git a/target/arm/internals.h b/target/arm/internals.h
index b447d850ae..91702b3ff7 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1318,5 +1318,6 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 
 void aa32_max_features(ARMCPU *cpu);
 bool arm_singlestep_active(CPUARMState *env);
+bool arm_generate_debug_exceptions(CPUARMState *env);
 
 #endif
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 1abf41c5f8..20a0e4261a 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -12,6 +12,100 @@
 #include "exec/helper-proto.h"
 
 
+/* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */
+static bool aa64_generate_debug_exceptions(CPUARMState *env)
+{
+int cur_el = arm_current_el(env);
+int debug_el;
+
+if (cur_el == 3) {
+return false;
+}
+
+/* MDCR_EL3.SDD disables debug events from Secure state */
+if (arm_is_secure_below_el3(env)
+&& extract32(env->cp15.mdcr_el3, 16, 1)) {
+return false;
+}
+
+/*
+ * Same EL to same EL debug exceptions need MDSCR_KDE enabled
+ * while not masking the (D)ebug bit in DAIF.
+ */
+debug_el = arm_debug_target_el(env);
+
+if (cur_el == debug_el) {
+return extract32(env->cp15.mdscr_el1, 13, 1)
+&& !(env->daif & PSTATE_D);
+}
+
+/* Otherwise the debug target needs t

[PATCH 07/18] target/arm: Hoist arm_current_el in arm_generate_debug_exceptions

2022-05-23 Thread Richard Henderson

Read this value once in the main function, and pass it
around between the subroutines.

Signed-off-by: Richard Henderson 
---
 target/arm/debug_helper.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 20a0e4261a..2bbf065b3a 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -13,9 +13,8 @@
 
 
 /* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */
-static bool aa64_generate_debug_exceptions(CPUARMState *env)
+static bool aa64_generate_debug_exceptions(CPUARMState *env, int cur_el)
 {
-int cur_el = arm_current_el(env);
 int debug_el;
 
 if (cur_el == 3) {
@@ -43,18 +42,16 @@ static bool aa64_generate_debug_exceptions(CPUARMState *env)
 return debug_el > cur_el;
 }
 
-static bool aa32_generate_debug_exceptions(CPUARMState *env)
+static bool aa32_generate_debug_exceptions(CPUARMState *env, int cur_el)
 {
-int el = arm_current_el(env);
-
-if (el == 0 && arm_el_is_aa64(env, 1)) {
-return aa64_generate_debug_exceptions(env);
+if (cur_el == 0 && arm_el_is_aa64(env, 1)) {
+return aa64_generate_debug_exceptions(env, cur_el);
 }
 
 if (arm_is_secure(env)) {
 int spd;
 
-if (el == 0 && (env->cp15.sder & 1)) {
+if (cur_el == 0 && (env->cp15.sder & 1)) {
 /*
  * SDER.SUIDEN means debug exceptions from Secure EL0
  * are always enabled. Otherwise they are controlled by
@@ -82,7 +79,7 @@ static bool aa32_generate_debug_exceptions(CPUARMState *env)
 }
 }
 
-return el != 2;
+return cur_el != 2;
 }
 
 /*
@@ -99,10 +96,12 @@ static bool aa32_generate_debug_exceptions(CPUARMState *env)
  */
 bool arm_generate_debug_exceptions(CPUARMState *env)
 {
+int cur_el = arm_current_el(env);
+
 if (env->aarch64) {
-return aa64_generate_debug_exceptions(env);
+return aa64_generate_debug_exceptions(env, cur_el);
 } else {
-return aa32_generate_debug_exceptions(env);
+return aa32_generate_debug_exceptions(env, cur_el);
 }
 }
 
-- 
2.34.1

[PATCH 02/18] target/arm: Use arm_current_el for simple exceptions

2022-05-23 Thread Richard Henderson

For these cases, the syndrome does not depend on the
origin or target EL, so we can simply defer selection
of the target EL to raise_exception.

Signed-off-by: Richard Henderson 
---
 target/arm/helper-a64.c |  5 +++--
 target/arm/helper.c | 10 +++---
 target/arm/mte_helper.c |  7 +++
 target/arm/op_helper.c  | 13 ++---
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 77a8502b6b..22db213aab 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -70,12 +70,13 @@ static void daif_check(CPUARMState *env, uint32_t op,
uint32_t imm, uintptr_t ra)
 {
 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set.  */
-if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
+int el = arm_current_el(env);
+if (el == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
 raise_exception_ra(env, EXCP_UDEF,
syn_aa64_sysregtrap(0, extract32(op, 0, 3),
extract32(op, 3, 3), 4,
imm, 0x1f, 0),
-   exception_target_el(env), ra);
+   el, ra);
 }
 }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 40da63913c..e0be96b988 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3232,14 +3232,10 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t 
value,
  * Synchronous external aborts during a translation table walk
  * are taken as Data Abort exceptions.
  */
-if (fi.stage2) {
-if (current_el == 3) {
-target_el = 3;
-} else {
-target_el = 2;
-}
+if (fi.stage2 && current_el < 2) {
+target_el = 2;
 } else {
-target_el = exception_target_el(env);
+target_el = current_el;
 }
 take_exc = true;
 }
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index d11a8c70d0..98f2a3215d 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -540,14 +540,13 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, 
uint64_t val)
 static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
 uint64_t dirty_ptr, uintptr_t ra)
 {
-int is_write, syn;
+int is_write, syn, el = arm_current_el(env);
 
 env->exception.vaddress = dirty_ptr;
 
 is_write = FIELD_EX32(desc, MTEDESC, WRITE);
-syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
-0x11);
-raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), 
ra);
+syn = syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, is_write, 0x11);
+raise_exception_ra(env, EXCP_DATA_ABORT, syn, el, ra);
 g_assert_not_reached();
 }
 
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 6b9141b79a..61e9c1d903 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -503,7 +503,7 @@ uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t 
mode)
  * Other UNPREDICTABLE and UNDEF cases were caught at translate time.
  */
 raise_exception(env, EXCP_UDEF, syn_uncategorized(),
-exception_target_el(env));
+arm_current_el(env));
 }
 
 if ((env->uncached_cpsr & CPSR_M) == mode) {
@@ -567,8 +567,7 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, 
uint32_t tgtmode,
 return;
 
 undef:
-raise_exception(env, EXCP_UDEF, syn_uncategorized(),
-exception_target_el(env));
+raise_exception(env, EXCP_UDEF, syn_uncategorized(), arm_current_el(env));
 }
 
 void HELPER(msr_banked)(CPUARMState *env, uint32_t value, uint32_t tgtmode,
@@ -697,7 +696,7 @@ void HELPER(access_check_cp_reg)(CPUARMState *env, void 
*rip, uint32_t syndrome,
 target_el = res & CP_ACCESS_EL_MASK;
 switch (target_el) {
 case 0:
-target_el = exception_target_el(env);
+target_el = arm_current_el(env);
 break;
 case 2:
 assert(arm_current_el(env) != 3);
@@ -808,7 +807,7 @@ void HELPER(pre_hvc)(CPUARMState *env)
 
 if (undef) {
 raise_exception(env, EXCP_UDEF, syn_uncategorized(),
-exception_target_el(env));
+arm_current_el(env));
 }
 }
 
@@ -870,7 +869,7 @@ void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
  * This handles the very last line of the previous table.
  */
 raise_exception(env, EXCP_UDEF, syn_uncategorized(),
-exception_target_el(env));
+arm_current_el(env));
 }
 
 if (cur_el == 1 && (arm_hcr_el2_eff(env) & HCR_TSC)) {
@@ -889,7 +888,7 @@ void HELPER(pre_smc)(CPUARMStat

[PATCH 09/18] target/arm: Move exception_bkpt_insn to debug_helper.c

2022-05-23 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/debug_helper.c | 31 +++
 target/arm/op_helper.c| 29 -
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 3a86901779..bdcd5f36d6 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -429,6 +429,37 @@ void arm_debug_excp_handler(CPUState *cs)
 }
 }
 
+/*
+ * Raise an EXCP_BKPT with the specified syndrome register value,
+ * targeting the correct exception level for debug exceptions.
+ */
+void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome)
+{
+int debug_el = arm_debug_target_el(env);
+int cur_el = arm_current_el(env);
+
+/* FSR will only be used if the debug target EL is AArch32. */
+env->exception.fsr = arm_debug_exception_fsr(env);
+/*
+ * FAR is UNKNOWN: clear vaddress to avoid potentially exposing
+ * values to the guest that it shouldn't be able to see at its
+ * exception/security level.
+ */
+env->exception.vaddress = 0;
+/*
+ * Other kinds of architectural debug exception are ignored if
+ * they target an exception level below the current one (in QEMU
+ * this is checked by arm_generate_debug_exceptions()). Breakpoint
+ * instructions are special because they always generate an exception
+ * to somewhere: if they can't go to the configured debug exception
+ * level they are taken to the current exception level.
+ */
+if (debug_el < cur_el) {
+debug_el = cur_el;
+}
+raise_exception(env, EXCP_BKPT, syndrome, debug_el);
+}
+
 #if !defined(CONFIG_USER_ONLY)
 
 vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len)
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 55440dfa84..0a50dbf274 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -413,35 +413,6 @@ void HELPER(exception_with_syndrome)(CPUARMState *env, 
uint32_t excp,
 raise_exception(env, excp, syndrome, target_el);
 }
 
-/* Raise an EXCP_BKPT with the specified syndrome register value,
- * targeting the correct exception level for debug exceptions.
- */
-void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome)
-{
-int debug_el = arm_debug_target_el(env);
-int cur_el = arm_current_el(env);
-
-/* FSR will only be used if the debug target EL is AArch32. */
-env->exception.fsr = arm_debug_exception_fsr(env);
-/* FAR is UNKNOWN: clear vaddress to avoid potentially exposing
- * values to the guest that it shouldn't be able to see at its
- * exception/security level.
- */
-env->exception.vaddress = 0;
-/*
- * Other kinds of architectural debug exception are ignored if
- * they target an exception level below the current one (in QEMU
- * this is checked by arm_generate_debug_exceptions()). Breakpoint
- * instructions are special because they always generate an exception
- * to somewhere: if they can't go to the configured debug exception
- * level they are taken to the current exception level.
- */
-if (debug_el < cur_el) {
-debug_el = cur_el;
-}
-raise_exception(env, EXCP_BKPT, syndrome, debug_el);
-}
-
 uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
 return cpsr_read(env) & ~CPSR_EXEC;
-- 
2.34.1

[PATCH 04/18] target/arm: Move HCR_TGE check into exception_target_el

2022-05-23 Thread Richard Henderson

Move the TGE test from raise_exception into
exception_target_el to consolidate tests in one place.
Note that this ought to apply only to origin of EL0,
but that cannot be confirmed at this time.
Update the AdvSIMDFPAccessTrap doc reference to DDI0478H.a.

Signed-off-by: Richard Henderson 
---
 target/arm/op_helper.c | 47 +-
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 6858b8980d..55440dfa84 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -30,17 +30,39 @@
 
 int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn)
 {
-int target_el = MAX(1, cur_el);
+/*
+ * FIXME: The following tests really apply to an EL0 origin,
+ * not to a target of EL1.  However, the origin will never be
+ * EL1 for these cases (no aa32 secure EL1, can't enter EL1
+ * with TGE set).  Delay fixing this until all places that
+ * might perform MAX(cur_el, 1) are audited.
+ */
+if (cur_el >= 2) {
+return 2;
+}
 
 /*
  * No such thing as secure EL1 if EL3 is aarch32,
  * so update the target EL to EL3 in this case.
  */
-if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) {
-target_el = 3;
+if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+return 3;
 }
 
-return target_el;
+if (arm_hcr_el2_eff(env) & HCR_TGE) {
+/*
+ * Redirect NS EL1 exceptions to NS EL2. These are reported with
+ * their original syndrome register value, with the exception of
+ * SIMD/FP access traps, which are reported as uncategorized
+ * (see DDI0487 H.a rule RJNBTN).
+ */
+if (psyn && syn_get_ec(*psyn) == EC_ADVSIMDFPACCESSTRAP) {
+*psyn = syn_uncategorized();
+}
+return 2;
+}
+
+return 1;
 }
 
 void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome,
@@ -49,21 +71,8 @@ void raise_exception(CPUARMState *env, uint32_t excp, 
uint32_t syndrome,
 CPUState *cs = env_cpu(env);
 int target_el = cur_or_target_el;
 
-if (cur_or_target_el == 0) {
-target_el = exception_target_el(env, 0, &syndrome);
-}
-
-if (target_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
-/*
- * Redirect NS EL1 exceptions to NS EL2. These are reported with
- * their original syndrome register value, with the exception of
- * SIMD/FP access traps, which are reported as uncategorized
- * (see DDI0478C.a D1.10.4)
- */
-target_el = 2;
-if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) {
-syndrome = syn_uncategorized();
-}
+if (cur_or_target_el <= 1) {
+target_el = exception_target_el(env, cur_or_target_el, &syndrome);
 }
 
 assert(!excp_is_internal(excp));
-- 
2.34.1

[PATCH 01/18] target/arm: Allow raise_exception to handle finding target EL

2022-05-23 Thread Richard Henderson

The work of finding the correct target EL for an exception is
currently split between raise_exception and target_exception_el.
Begin merging these by allowing the input to raise_exception
to be zero and use exception_target_el for that case.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h | 11 ++-
 target/arm/op_helper.c | 13 +
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index b654bee468..03363b0f32 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -111,18 +111,19 @@ FIELD(DBGWCR, SSCE, 29, 1)
 /**
  * raise_exception: Raise the specified exception.
  * Raise a guest exception with the specified value, syndrome register
- * and target exception level. This should be called from helper functions,
- * and never returns because we will longjump back up to the CPU main loop.
+ * and the current or target exception level. This should be called from
+ * helper functions, and never returns because we will longjump back up
+ * to the CPU main loop.
  */
 G_NORETURN void raise_exception(CPUARMState *env, uint32_t excp,
-uint32_t syndrome, uint32_t target_el);
+uint32_t syndrome, uint32_t cur_or_target_el);
 
 /*
  * Similarly, but also use unwinding to restore cpu state.
  */
 G_NORETURN void raise_exception_ra(CPUARMState *env, uint32_t excp,
-  uint32_t syndrome, uint32_t target_el,
-  uintptr_t ra);
+   uint32_t syndrome,
+   uint32_t cur_or_target_el, uintptr_t ra);
 
 /*
  * For AArch64, map a given EL to an index in the banked_spsr array.
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c4bd668870..6b9141b79a 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -28,10 +28,15 @@
 #define SIGNBIT (uint32_t)0x8000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-void raise_exception(CPUARMState *env, uint32_t excp,
- uint32_t syndrome, uint32_t target_el)
+void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome,
+ uint32_t cur_or_target_el)
 {
 CPUState *cs = env_cpu(env);
+int target_el = cur_or_target_el;
+
+if (cur_or_target_el == 0) {
+target_el = exception_target_el(env);
+}
 
 if (target_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
 /*
@@ -54,7 +59,7 @@ void raise_exception(CPUARMState *env, uint32_t excp,
 }
 
 void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
-uint32_t target_el, uintptr_t ra)
+uint32_t cur_or_target_el, uintptr_t ra)
 {
 CPUState *cs = env_cpu(env);
 
@@ -64,7 +69,7 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, 
uint32_t syndrome,
  * the caller passed us, and cannot use cpu_loop_exit_restore().
  */
 cpu_restore_state(cs, ra, true);
-raise_exception(env, excp, syndrome, target_el);
+raise_exception(env, excp, syndrome, cur_or_target_el);
 }
 
 uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
-- 
2.34.1

[PATCH 05/18] target/arm: Move arm_singlestep_active out of line

2022-05-23 Thread Richard Henderson

Move the function to debug_helper.c, and the
declaration to internals.h.

Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h  | 10 --
 target/arm/internals.h|  1 +
 target/arm/debug_helper.c | 12 
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index c1865ad5da..2e115a0281 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3087,16 +3087,6 @@ static inline bool 
arm_generate_debug_exceptions(CPUARMState *env)
 }
 }
 
-/* Is single-stepping active? (Note that the "is EL_D AArch64?" check
- * implicitly means this always returns false in pre-v8 CPUs.)
- */
-static inline bool arm_singlestep_active(CPUARMState *env)
-{
-return extract32(env->cp15.mdscr_el1, 0, 1)
-&& arm_el_is_aa64(env, arm_debug_target_el(env))
-&& arm_generate_debug_exceptions(env);
-}
-
 static inline bool arm_sctlr_b(CPUARMState *env)
 {
 return
diff --git a/target/arm/internals.h b/target/arm/internals.h
index a71f795628..b447d850ae 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1317,5 +1317,6 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 #endif
 
 void aa32_max_features(ARMCPU *cpu);
+bool arm_singlestep_active(CPUARMState *env);
 
 #endif
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 46893697cc..1abf41c5f8 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -11,6 +11,18 @@
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 
+
+/*
+ * Is single-stepping active? (Note that the "is EL_D AArch64?" check
+ * implicitly means this always returns false in pre-v8 CPUs.)
+ */
+bool arm_singlestep_active(CPUARMState *env)
+{
+return extract32(env->cp15.mdscr_el1, 0, 1)
+&& arm_el_is_aa64(env, arm_debug_target_el(env))
+&& arm_generate_debug_exceptions(env);
+}
+
 /* Return true if the linked breakpoint entry lbn passes its checks */
 static bool linked_bp_matches(ARMCPU *cpu, int lbn)
 {
-- 
2.34.1

[PATCH 03/18] target/arm: Move and expand parameters to exception_target_el

2022-05-23 Thread Richard Henderson

Move exception_target_el out of line.
Add cur_el parameter, because 2 of 3 users already have that handy.
Add psyn parameter in preparation for more code movement.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h  | 15 +--
 target/arm/op_helper.c  | 17 -
 target/arm/tlb_helper.c | 10 ++
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 03363b0f32..a71f795628 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1091,20 +1091,7 @@ typedef struct ARMVAParameters {
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data);
 
-static inline int exception_target_el(CPUARMState *env)
-{
-int target_el = MAX(1, arm_current_el(env));
-
-/*
- * No such thing as secure EL1 if EL3 is aarch32,
- * so update the target EL to EL3 in this case.
- */
-if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) {
-target_el = 3;
-}
-
-return target_el;
-}
+int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn);
 
 /* Determine if allocation tags are available.  */
 static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 61e9c1d903..6858b8980d 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -28,6 +28,21 @@
 #define SIGNBIT (uint32_t)0x8000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
+int exception_target_el(CPUARMState *env, int cur_el, uint32_t *psyn)
+{
+int target_el = MAX(1, cur_el);
+
+/*
+ * No such thing as secure EL1 if EL3 is aarch32,
+ * so update the target EL to EL3 in this case.
+ */
+if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) {
+target_el = 3;
+}
+
+return target_el;
+}
+
 void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome,
  uint32_t cur_or_target_el)
 {
@@ -35,7 +50,7 @@ void raise_exception(CPUARMState *env, uint32_t excp, 
uint32_t syndrome,
 int target_el = cur_or_target_el;
 
 if (cur_or_target_el == 0) {
-target_el = exception_target_el(env);
+target_el = exception_target_el(env, 0, &syndrome);
 }
 
 if (target_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 6421e16202..573e18f830 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -85,11 +85,13 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
int mmu_idx, ARMMMUFaultInfo *fi)
 {
 CPUARMState *env = &cpu->env;
-int target_el;
+int cur_el, target_el;
 bool same_el;
 uint32_t syn, exc, fsr, fsc;
 
-target_el = exception_target_el(env);
+cur_el = arm_current_el(env);
+target_el = exception_target_el(env, cur_el, NULL);
+
 if (fi->stage2) {
 target_el = 2;
 env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4;
@@ -97,7 +99,7 @@ void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
 env->cp15.hpfar_el2 |= HPFAR_NS;
 }
 }
-same_el = (arm_current_el(env) == target_el);
+same_el = cur_el == target_el;
 
 fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc);
 
@@ -139,7 +141,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
 void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc)
 {
 ARMMMUFaultInfo fi = { .type = ARMFault_Alignment };
-int target_el = exception_target_el(env);
+int target_el = exception_target_el(env, arm_current_el(env), NULL);
 int mmu_idx = cpu_mmu_index(env, true);
 uint32_t fsc;
 
-- 
2.34.1

[PATCH 00/18] target/arm: tidy exception routing

2022-05-23 Thread Richard Henderson

The target el for raising an exception currently lives in at
least 3 places: exception_target_el, arm_debug_target_el, and
in {sve,fp}_exception_el.

This patch set aims to put all of the routing into the same place.

For the purposes of prep for SME, the goal is the last patch,
where we do not confuse the level at which SVE exceptions are
trapped with the level to which exceptions are delivered.

I suspect that the existing SME prep patch where I remove the
fp checks and then compare fp vs sve el, is flawed while this
route_to_el2 line is still present.

The end result for debug exceptions isn't quite as clean as I
was hoping, but hopefully it's still better than before.


r~


Richard Henderson (18):
  target/arm: Allow raise_exception to handle finding target EL
  target/arm: Use arm_current_el for simple exceptions
  target/arm: Move and expand parameters to exception_target_el
  target/arm: Move HCR_TGE check into exception_target_el
  target/arm: Move arm_singlestep_active out of line
  target/arm: Move arm_generate_debug_exceptions out of line
  target/arm: Hoist arm_current_el in arm_generate_debug_exceptions
  target/arm: Use is_a64 in arm_generate_debug_exceptions
  target/arm: Move exception_bkpt_insn to debug_helper.c
  target/arm: Move arm_debug_exception_fsr to debug_helper.c
  target/arm: Move arm_debug_target_el to internals.h
  target/arm: Create raise_exception_debug
  target/arm: Move MDCR_TDE test into exception_target_el
  target/arm: Mark exception helpers as noreturn
  target/arm: Create helper_exception_swstep
  target/arm: Remove TBFLAG_ANY.DEBUG_TARGET_EL
  target/arm: Add cur_el parameter to arm_generate_debug_exceptions
  target/arm: Remove route_to_el2 case from sve_exception_el

 target/arm/cpu.h   | 128 +---
 target/arm/helper.h|   7 +-
 target/arm/internals.h |  64 +-
 target/arm/translate.h |  14 +---
 target/arm/debug_helper.c  | 167 ++---
 target/arm/helper-a64.c|   7 +-
 target/arm/helper.c|  25 ++
 target/arm/mte_helper.c|   7 +-
 target/arm/op_helper.c | 128 ++--
 target/arm/tlb_helper.c|  10 ++-
 target/arm/translate-a64.c |   1 -
 target/arm/translate.c |   1 -
 12 files changed, 289 insertions(+), 270 deletions(-)

-- 
2.34.1

Re: [PATCH v2] linux-user/syscall.c: fix build without RLIMIT_RTTIME

2022-05-23 Thread Laurent Vivier


Le 23/05/2022 à 12:52, Fabrice Fontaine a écrit :

RLIMIT_RTTIME is not provided by uclibc-ng or by musl prior to version
1.2.0 and
https://github.com/bminor/musl/commit/2507e7f5312e79620f6337935d0a6c9045ccba09
resulting in the following build failure since
https://git.qemu.org/?p=qemu.git;a=commit;h=244fd08323088db73590ff2317dfe86f810b51d7:

../linux-user/syscall.c: In function 'target_to_host_resource':
../linux-user/syscall.c:1057:16: error: 'RLIMIT_RTTIME' undeclared (first use 
in this function); did you mean 'RLIMIT_NOFILE'?
  1057 | return RLIMIT_RTTIME;
   |^
   |RLIMIT_NOFILE

Fixes:
  - 
http://autobuild.buildroot.org/results/22d3b584b704613d030e1ea9e6b709b713e4cc26

Signed-off-by: Fabrice Fontaine 
---
Changes v1 -> v2 (after review of Laurent Vivier):
  - Use an ifdef block instead of defining RLIMIT_RTTIME

  linux-user/syscall.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index dd0d92ba4e..488facb356 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1053,8 +1053,10 @@ static inline int target_to_host_resource(int code)
  return RLIMIT_RSS;
  case TARGET_RLIMIT_RTPRIO:
  return RLIMIT_RTPRIO;
+#ifdef RLIMIT_RTTIME
  case TARGET_RLIMIT_RTTIME:
  return RLIMIT_RTTIME;
+#endif
  case TARGET_RLIMIT_SIGPENDING:
  return RLIMIT_SIGPENDING;
  case TARGET_RLIMIT_STACK:



Applied to my linux-user-for-7.1 branch.

Thanks,
Laurent

Re: [PATCH 2/5] machine.py: add default pseries params in machine.py

2022-05-23 Thread Matheus K. Ferst


On 19/05/2022 20:18, John Snow wrote:
On Mon, May 16, 2022, 12:53 PM Daniel Henrique Barboza 
mailto:danielhb...@gmail.com>> wrote:


pSeries guests set a handful of machine capabilities on by default, all
of them related to security mitigations, that aren't always available in
the host.

This means that, as is today, running avocado in a Power9 server without
the proper firmware support, and with --disable-tcg, this error will
occur:

  (1/1) tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd:
ERROR: ConnectError:
Failed to establish session: EOFError\n  Exit code: 1\n  (...)
(...)
         Command: ./qemu-system-ppc64 -display none -vga none (...)
         Output: qemu-system-ppc64: warning: netdev vnet has no peer
qemu-system-ppc64: Requested safe cache capability level not
supported by KVM
Try appending -machine cap-cfpc=broken

info_usernet.py happens to trigger this error first, but all tests would
fail in this configuration because the host does not support the default
'cap-cfpc' capability.

A similar situation was already fixed a couple of years ago by Greg Kurz
(commit 63d57c8f91d0) but it was focused on TCG warnings for these same
capabilities and running C qtests. This commit ended up preventing the
problem we're facing with avocado when running qtests with KVM support.

This patch does a similar approach by amending machine.py to disable
these security capabilities in case we're running a pseries guest. The
change is made in the _launch() callback to be sure that we're already
commited into launching the guest. It's also worth noticing that we're
relying on self._machine being set accordingly (i.e. via tag:machine),
which is currently the case for all ppc64 related avocado tests.

Signed-off-by: Daniel Henrique Barboza mailto:danielhb...@gmail.com>>
---
  python/qemu/machine/machine.py | 13 +
  1 file changed, 13 insertions(+)

diff --git a/python/qemu/machine/machine.py
b/python/qemu/machine/machine.py
index 07ac5a710b..12e5e37bff 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -51,6 +51,11 @@


  LOG = logging.getLogger(__name__)
+PSERIES_DEFAULT_CAPABILITIES = ("cap-cfpc=broken,"
+                                "cap-sbbc=broken,"
+                                "cap-ibs=broken,"
+                                "cap-ccf-assist=off,"
+                                "cap-fwnmi=off")


  class QEMUMachineError(Exception):
@@ -447,6 +452,14 @@ def _launch(self) -> None:
          """
          Launch the VM and establish a QMP connection
          """
+
+        # pseries needs extra machine options to disable
Spectre/Meltdown
+        # KVM related capabilities that might not be available in the
+        # host.
+        if "qemu-system-ppc64" in self._binary:
+            if self._machine is None or "pseries" in self._machine:
+                self._args.extend(['-machine',
PSERIES_DEFAULT_CAPABILITIES])
+
          self._pre_launch()
          LOG.debug('VM launch command: %r', '
'.join(self._qemu_full_args))

-- 
2.32.0



Hm, okay.

I have plans to try and factor the machine appliance out and into an 
upstream package in the near future, so I want to avoid more hardcoding 
of defaults.


Does avocado have a subclass of QEMUMachine where it might be more 
appropriate to stick this bandaid? Can we make one?


(I don't think iotests runs into this problem because we always use 
machine:none there, I think. VM tests might have a similar problem 
though, and then it'd be reasonable to want the bandaid here in 
machine.py ... well, boo. okay.)


My verdict is that it's a bandaid, but I'll accept it if the avocado 
folks agree to it and I'll sort it out later when I do my rewrite.


I don't think I have access to a power9 machine to test this with 
either, so I might want a tested-by from someone who does.


--js



Unfortunately, none of our POWER9 machines had a firmware old enough to 
be affected by this issue. The closest I can test is a nested KVM-HV 
with L0 using cap-cfpc=broken, so the L1 receives the quoted message 
when running 'make check-avocado'.


With this setup I can confirm that the patch fixes this error, so
Tested-by: Matheus Ferst 

Thanks,
Matheus K. Ferst
Instituto de Pesquisas ELDORADO 
Analista de Software
Aviso Legal - Disclaimer

[PULL 2/3] qga-win32: Add support for NVME but type

2022-05-23 Thread Konstantin Kostiuk

Bus type spaces (Indicates a storage spaces bus) is not
supported, so return it as unknown.

Signed-off-by: Konstantin Kostiuk 
Message-Id: <20220520201401.706630-1-kkost...@redhat.com>
Reviewed-by: Marc-André Lureau 
Signed-off-by: Konstantin Kostiuk 
---
 qga/commands-win32.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index dcdeb76a68..36f94c0f9c 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -490,6 +490,11 @@ static GuestDiskBusType win2qemu[] = {
 #if (_WIN32_WINNT >= 0x0601)
 [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL,
 [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL,
+/*
+ * BusTypeSpaces currently is not suported
+ */
+[BusTypeSpaces] = GUEST_DISK_BUS_TYPE_UNKNOWN,
+[BusTypeNvme] = GUEST_DISK_BUS_TYPE_NVME,
 #endif
 };
 
-- 
2.25.1

[PULL 1/3] qga: add guest-get-diskstats command for Linux guests

2022-05-23 Thread Konstantin Kostiuk

From: luzhipeng 

Add a new 'guest-get-diskstats' command for report disk io statistics
for Linux guests. This can be useful for getting io flow or handling
IO fault, no need to enter guests.

Signed-off-by: luzhipeng 
Message-Id: <20220520021935.676-1-luzhip...@cestc.cn>
Reviewed-by: Marc-André Lureau 
Reviewed-by: Konstantin Kostiuk 
Signed-off-by: Konstantin Kostiuk 
---
 qga/commands-posix.c | 123 +++
 qga/commands-win32.c |   6 +++
 qga/qapi-schema.json |  86 ++
 3 files changed, 215 insertions(+)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 69f209af87..12b50b7124 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -2783,6 +2783,122 @@ GuestMemoryBlockInfo 
*qmp_guest_get_memory_block_info(Error **errp)
 return info;
 }
 
+#define MAX_NAME_LEN 128
+static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
+{
+#ifdef CONFIG_LINUX
+GuestDiskStatsInfoList *head = NULL, **tail = &head;
+const char *diskstats = "/proc/diskstats";
+FILE *fp;
+size_t n;
+char *line = NULL;
+
+fp = fopen(diskstats, "r");
+if (fp  == NULL) {
+error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
+return NULL;
+}
+
+while (getline(&line, &n, fp) != -1) {
+g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
+g_autofree GuestDiskStats *diskstat = NULL;
+char dev_name[MAX_NAME_LEN];
+unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, 
fl_ticks;
+unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
+unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
+unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
+unsigned int major, minor;
+int i;
+
+i = sscanf(line, "%u %u %s %lu %lu %lu"
+   "%lu %lu %lu %lu %u %u %u %u"
+   "%lu %lu %lu %u %lu %u",
+   &major, &minor, dev_name,
+   &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
+   &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
+   &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
+   &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
+   &fl_ios, &fl_ticks);
+
+if (i < 7) {
+continue;
+}
+
+diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
+diskstatinfo->name = g_strdup(dev_name);
+diskstatinfo->major = major;
+diskstatinfo->minor = minor;
+
+diskstat = g_new0(GuestDiskStats, 1);
+if (i == 7) {
+diskstat->has_read_ios = true;
+diskstat->read_ios = rd_ios;
+diskstat->has_read_sectors = true;
+diskstat->read_sectors = rd_merges_or_rd_sec;
+diskstat->has_write_ios = true;
+diskstat->write_ios = rd_sec_or_wr_ios;
+diskstat->has_write_sectors = true;
+diskstat->write_sectors = rd_ticks_or_wr_sec;
+}
+if (i >= 14) {
+diskstat->has_read_ios = true;
+diskstat->read_ios = rd_ios;
+diskstat->has_read_sectors = true;
+diskstat->read_sectors = rd_sec_or_wr_ios;
+diskstat->has_read_merges = true;
+diskstat->read_merges = rd_merges_or_rd_sec;
+diskstat->has_read_ticks = true;
+diskstat->read_ticks = rd_ticks_or_wr_sec;
+diskstat->has_write_ios = true;
+diskstat->write_ios = wr_ios;
+diskstat->has_write_sectors = true;
+diskstat->write_sectors = wr_sec;
+diskstat->has_write_merges = true;
+diskstat->write_merges = wr_merges;
+diskstat->has_write_ticks = true;
+diskstat->write_ticks = wr_ticks;
+diskstat->has_ios_pgr = true;
+diskstat->ios_pgr = ios_pgr;
+diskstat->has_total_ticks = true;
+diskstat->total_ticks = tot_ticks;
+diskstat->has_weight_ticks = true;
+diskstat->weight_ticks = rq_ticks;
+}
+if (i >= 18) {
+diskstat->has_discard_ios = true;
+diskstat->discard_ios = dc_ios;
+diskstat->has_discard_merges = true;
+diskstat->discard_merges = dc_merges;
+diskstat->has_discard_sectors = true;
+diskstat->discard_sectors = dc_sec;
+diskstat->has_discard_ticks = true;
+diskstat->discard_ticks = dc_ticks;
+}
+if (i >= 20) {
+diskstat->has_flush_ios = true;
+diskstat->flush_ios = fl_ios;
+diskstat->has_flush_ticks = true;
+diskstat->flush_ticks = fl_ticks;
+}
+
+diskstatinfo->stats = g_steal_pointer(&diskstat);
+QAPI_LIST_APPEND(tail, diskstatinfo);
+diskstatinfo = NULL;
+}
+free(line);
+fclose(fp);
+return head;
+#else
+g_debug("disk stats reporting available

Re: [PATCH] block: drop unused bdrv_co_drain() API

2022-05-23 Thread Alberto Faria

On Sat, May 21, 2022 at 1:27 PM Stefan Hajnoczi  wrote:
> bdrv_co_drain() has not been used since commit 9a0cec664eef ("mirror:
> use bdrv_drained_begin/bdrv_drained_end") in 2016. Remove it so there
> are fewer drain scenarios to worry about.
>
> Use bdrv_drained_begin()/bdrv_drained_end() instead. They are "mixed"
> functions that can be called from coroutine context. Unlike
> bdrv_co_drain(), these functions provide control of the length of the
> drained section, which is usually the right thing.
>
> Signed-off-by: Stefan Hajnoczi 

Reviewed-by: Alberto Faria

[PULL 3/3] trivial: qga: Log version on start

2022-05-23 Thread Konstantin Kostiuk

Signed-off-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220523191644.823726-2-kkost...@redhat.com>
Signed-off-by: Konstantin Kostiuk 
---
 qga/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/qga/main.c b/qga/main.c
index 3b9546c185..c373fec3ee 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -1271,6 +1271,8 @@ static GAState *initialize_agent(GAConfig *config, int 
socket_activation)
 g_log_set_fatal_mask(NULL, G_LOG_LEVEL_ERROR);
 ga_enable_logging(s);
 
+g_debug("Guest agent version %s started", QEMU_FULL_VERSION);
+
 #ifdef _WIN32
 /* On win32 the state directory is application specific (be it the default
  * or a user override). We got past the command line parsing; let's create
-- 
2.25.1

[PULL 0/3] qemu-ga patches

2022-05-23 Thread Konstantin Kostiuk

The following changes since commit 3757b0d08b399c609954cf57f273b1167e5d7a8d:

  Merge tag 'pull-request-2022-05-18' of https://gitlab.com/thuth/qemu into 
staging (2022-05-20 08:04:30 -0700)

are available in the Git repository at:

  g...@github.com:kostyanf14/qemu.git tags/qga-win32-pull-2022-05-23

for you to fetch changes up to cb69e5d06bb8ba4929f277daa87160bf2f54c51e:

  trivial: qga: Log version on start (2022-05-23 22:27:15 +0300)


qga-win32-pull-2022-05-23


Konstantin Kostiuk (2):
  qga-win32: Add support for NVME but type
  trivial: qga: Log version on start

luzhipeng (1):
  qga: add guest-get-diskstats command for Linux guests

 qga/commands-posix.c | 123 
+++
 qga/commands-win32.c |  11 +++
 qga/main.c   |   2 ++
 qga/qapi-schema.json |  86 
++
 4 files changed, 222 insertions(+)


--
2.25.1

Re: can QEMU's arm smmu model handle non-PCI devices?

2022-05-23 Thread Eric Auger

Hi Peter,
On 5/23/22 16:52, Peter Maydell wrote:
> Hi; in real hardware an SMMUv3 might be sat in front of any
> DMA-capable device. Putting one in front of a PCI bus is common
> but not the only use case. Does QEMU's SMMUv3 model handle that,
> or is the implementation currently restricted to only handling DMA
> from PCI devices?
only PCI is supported at the moment. IOMMU MR regions are only created
for PCI devices
(smmu_find_add_as() in hw/arm/smmu-common.c)

As a reminder, Chunming Li submitted several iterations of a series dedicated 
to that. See

https://patchew.org/QEMU/1629878922-173270-1-git-send-email-chunming_li1...@163.com/

It is not upstream though.

Thanks

Eric




>
> I ask because for the Realm Management Extension (aka Confidential
> Compute Architecture) we're going to need to put SMMUs in front of
> other devices (such as the GIC), so that their DMA is checked against
> the RME Granule Protection Tables. So if we don't currently handle
> non-PCI devices then that's a bit of extra preliminary work that we'll
> want to do :-)
>
> thanks
> -- PMM
>

Re: [PATCH v2 00/10] Random cleanup patches

2022-05-23 Thread Daniel Henrique Barboza





On 5/21/22 06:55, Mark Cave-Ayland wrote:

On 20/05/2022 19:00, Bernhard Beschow wrote:


v2:
* Omit removal of isa_connect_gpio_out() (Mark)

v1:
This patch series contains random cleanups that I made while studying the code.

Bernhard Beschow (10):
   hw: Reuse TYPE_I8042 define
   hw/audio/cs4231a: Const'ify global tables
   hw/i386/pc: Unexport PC_CPU_MODEL_IDS macro
   hw/i386/pc: Unexport functions used only internally
   hw/i386/pc: Remove orphan declarations
   hw/ppc/e500: Remove unused BINARY_DEVICE_TREE_FILE
   hw/net/fsl_etsec/etsec: Remove obsolete and unused etsec_create()
   accel/tcg/cpu-exec: Unexport dump_drift_info()
   accel/tcg: Inline dump_opcount_info() and remove it
   docs/devel: Fix link to developer mailing lists

  accel/tcg/cpu-exec.c  |  4 ++--
  accel/tcg/translate-all.c |  5 -
  docs/devel/submitting-a-patch.rst |  6 +++---
  hw/audio/cs4231a.c    |  8 
  hw/i386/pc.c  | 17 +
  hw/net/fsl_etsec/etsec.c  | 23 ---
  hw/net/fsl_etsec/etsec.h  |  7 ---
  hw/ppc/e500.c |  1 -
  hw/sparc64/sun4u.c    |  2 +-
  include/exec/cpu-all.h    |  3 ---
  include/hw/i386/pc.h  | 14 --
  11 files changed, 23 insertions(+), 67 deletions(-)


In general these changes look okay, so I'd be fine to give an:

Acked-by: Mark Cave-Ayland 

for those I haven't already given a Reviewed-by tag for.

Laurent, are you happy to take these patches with their current tags via 
qemu-trivial? Or would you prefer an extra set of eyes on the two accel/tcg 
ones first?



BTW I'll send a PR later this week and I was about to queue patch 06
(hw/ppc/e500 change) via the ppc tree. Let me know if you want to queue
the whole series via qemu-trivial instead.


Thanks,


Daniel






ATB,

Mark.

Re: proposed 7.1 release schedule

2022-05-23 Thread Peter Maydell

On Mon, 23 May 2022 at 16:08, Richard Henderson
 wrote:
>
> On 5/23/22 02:53, Peter Maydell wrote:
> > I just put some proposed dates into the 7.1 schedule page:
> > https://wiki.qemu.org/Planning/7.1#Release_Schedule
> >
> > * 2022-07-12  Softfreeze
> > * 2022-07-19  Hardfreeze. Tag rc0
> > * 2022-07-26  Tag rc1
> > * 2022-08-02  Tag rc2
> > * 2022-08-09  Tag rc3
> > * 2022-08-16  Release; or tag rc4 if needed
> > * 2022-08-23  Release if we needed an rc4
> >
> > Does this work for people? I just worked backwards
> > from a final release date about 4 months after 7.0; easy
> > enough to shift it forward or back by a week or so if
> > that works better for some reason.
>
> Shifting later a week or so would be better. I'm on holiday from 13-22 July 
> -- unless
> someone would like to manage merges in that period?

I can cover merges during that period, yes.

-- PMM

Re: [PATCH 0/4] ppc: improve some memory ordering issues

2022-05-23 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/19/22 10:59, Nicholas Piggin wrote:

Since RFC[*], this fixes a compile issue noticed by Richard,
and has survived some basic stressing with mttcg.

Thanks,
Nick

[*] https://lists.nongnu.org/archive/html/qemu-ppc/2022-05/msg00046.html

Nicholas Piggin (4):
   target/ppc: Fix eieio memory ordering semantics
   tcg/ppc: ST_ST memory ordering is not provided with eieio
   tcg/ppc: Optimize memory ordering generation with lwsync
   target/ppc: Implement lwsync with weaker memory ordering

  target/ppc/cpu.h |  4 +++-
  target/ppc/cpu_init.c| 13 +++--
  target/ppc/machine.c |  3 ++-
  target/ppc/translate.c   | 35 +--
  tcg/ppc/tcg-target.c.inc | 11 ++-
  5 files changed, 51 insertions(+), 15 deletions(-)

Re: [PATCH v2 00/12] Change helper declarations to use call flags

2022-05-23 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/19/22 17:18, matheus.fe...@eldorado.org.br wrote:

From: Matheus Ferst 

In our "PowerISA Vector/VSX instruction batch" patch series, rth noted[1]
that helpers that only access vector registers should be declared with
DEF_HELPER_FLAGS_* and TCG_CALL_NO_RWG. We fixed helpers in that series,
but there are older helpers that could use the same optimization.

Guided by the presence of env as the first argument, in patches 1~4 we
change helpers that do not have access to the cpu_env pointer to modify
any globals. Then, we change other helpers that receive cpu_env but do
not use it and apply the same fix, taking the opportunity to move them
to decodetree.

[1] https://lists.gnu.org/archive/html/qemu-ppc/2022-02/msg00568.html

Patches without review: 06.

v2:
  - darn32/darn64 helpers declared with TCG_CALL_NO_RWG_SE;
  - xscvspdpn implemented with helper_todouble, dropped helper_XSCVSPDPN;
  - vmsumuhs and vmsumshs helpers declared with TCG_CALL_NO_RWG;
  - Link to v1: 
https://lists.gnu.org/archive/html/qemu-ppc/2022-05/msg00287.html

Matheus Ferst (12):
   target/ppc: declare darn32/darn64 helpers with TCG_CALL_NO_RWG_SE
   target/ppc: use TCG_CALL_NO_RWG in vector helpers without env
   target/ppc: use TCG_CALL_NO_RWG in BCD helpers
   target/ppc: use TCG_CALL_NO_RWG in VSX helpers without env
   target/ppc: Use TCG_CALL_NO_RWG_SE in fsel helper
   target/ppc: implement xscvspdpn with helper_todouble
   target/ppc: declare xvxsigsp helper with call flags
   target/ppc: declare xxextractuw and xxinsertw helpers with call flags
   target/ppc: introduce do_va_helper
   target/ppc: declare vmsum[um]bm helpers with call flags
   target/ppc: declare vmsumuh[ms] helper with call flags
   target/ppc: declare vmsumsh[ms] helper with call flags

  target/ppc/fpu_helper.c |  22 +--
  target/ppc/helper.h | 225 ++--
  target/ppc/insn32.decode|  28 +++-
  target/ppc/int_helper.c |  22 +--
  target/ppc/translate/fp-impl.c.inc  |  30 +++-
  target/ppc/translate/fp-ops.c.inc   |   1 -
  target/ppc/translate/vmx-impl.c.inc |  62 
  target/ppc/translate/vmx-ops.c.inc  |   4 -
  target/ppc/translate/vsx-impl.c.inc | 107 -
  target/ppc/translate/vsx-ops.c.inc  |   4 -
  10 files changed, 284 insertions(+), 221 deletions(-)

Re: [PATCH v3 2/3] ui: Switch "-display sdl" to use the QAPI parser

2022-05-23 Thread Thomas Huth


On 23/05/2022 15.45, Markus Armbruster wrote:

Thomas Huth  writes:


The "-display sdl" option still uses a hand-crafted parser for its
parameters since we didn't want to drag an interface we considered
somewhat flawed into the QAPI schema. Since the flaws are gone now,
it's time to QAPIfy.

This introduces the new "DisplaySDL" QAPI struct that is used to hold
the parameters that are unique to the SDL display. The only specific
parameter is currently "grab-mod" that is used to specify the required
modifier keys to escape from the mouse grabbing mode.

Signed-off-by: Thomas Huth 
---
  qapi/ui.json| 26 ++-
  include/sysemu/sysemu.h |  2 --
  softmmu/globals.c   |  2 --
  softmmu/vl.c| 70 +
  ui/sdl2.c   | 10 ++
  5 files changed, 36 insertions(+), 74 deletions(-)

diff --git a/qapi/ui.json b/qapi/ui.json
index 11a827d10f..413371d5e8 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1295,6 +1295,29 @@
'*swap-opt-cmd': 'bool'
} }
  
+##

+# @HotKeyMod:
+#
+# Set of modifier keys that need to be held for shortcut key actions.
+#
+# Since: 7.1
+##
+{ 'enum'  : 'HotKeyMod',
+  'data'  : [ 'lctrl-lalt', 'lshift-lctrl-lalt', 'rctrl' ] }


I have a somewhat uneasy feeling about encoding what is essentially a
subset of the sets of modifier keys as an enumeration, but it's what we
have to do to QAPIfy existing grab-mod.


Well, that's exactly what you suggested here:

 https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg03401.html

So I really don't understand your uneasy feeling now?

...

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 57ab9d5322..484e9d9921 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1056,75 +1056,7 @@ static void parse_display(const char *p)
  exit(0);
  }
  
-if (strstart(p, "sdl", &opts)) {

-/*
- * sdl DisplayType needs hand-crafted parser instead of
- * parse_display_qapi() due to some options not in
- * DisplayOptions, specifically:
- *   - ctrl_grab + alt_grab
- * They can't be moved into the QAPI since they use underscores,
- * thus they will get replaced by "grab-mod" in the long term
- */
-#if defined(CONFIG_SDL)
-dpy.type = DISPLAY_TYPE_SDL;
-while (*opts) {
-const char *nextopt;
-
-if (strstart(opts, ",grab-mod=", &nextopt)) {
-opts = nextopt;
-if (strstart(opts, "lshift-lctrl-lalt", &nextopt)) {
-alt_grab = 1;
-} else if (strstart(opts, "rctrl", &nextopt)) {
-ctrl_grab = 1;
-} else {
-goto invalid_sdl_args;
-}
-} else if (strstart(opts, ",window-close=", &nextopt)) {
-opts = nextopt;
-dpy.has_window_close = true;
-if (strstart(opts, "on", &nextopt)) {
-dpy.window_close = true;
-} else if (strstart(opts, "off", &nextopt)) {
-dpy.window_close = false;
-} else {
-goto invalid_sdl_args;
-}
-} else if (strstart(opts, ",show-cursor=", &nextopt)) {
-opts = nextopt;
-dpy.has_show_cursor = true;
-if (strstart(opts, "on", &nextopt)) {
-dpy.show_cursor = true;
-} else if (strstart(opts, "off", &nextopt)) {
-dpy.show_cursor = false;
-} else {
-goto invalid_sdl_args;
-}
-} else if (strstart(opts, ",gl=", &nextopt)) {
-opts = nextopt;
-dpy.has_gl = true;
-if (strstart(opts, "on", &nextopt)) {
-dpy.gl = DISPLAYGL_MODE_ON;
-} else if (strstart(opts, "core", &nextopt)) {
-dpy.gl = DISPLAYGL_MODE_CORE;
-} else if (strstart(opts, "es", &nextopt)) {
-dpy.gl = DISPLAYGL_MODE_ES;
-} else if (strstart(opts, "off", &nextopt)) {
-dpy.gl = DISPLAYGL_MODE_OFF;
-} else {
-goto invalid_sdl_args;
-}
-} else {
-invalid_sdl_args:
-error_report("invalid SDL option string");
-exit(1);
-}
-opts = nextopt;
-}
-#else
-error_report("SDL display supported is not available in this binary");
-exit(1);
-#endif


When CONFIG_SDL is off, the error message changes from

 qemu-system-x86_64: -display sdl: SDL display supported is not available 
in this binary

to

 qemu-system-x86_64: -display sdl: Parameter 'type' does not accept value 
'sdl'

I don't mind, but I'd suggest to mention it in the commit message.


I can do that if I have to respin this series for some other reas

Re: [PATCH] pnv/xive2: Don't overwrite PC registers when writing TCTXT registers

2022-05-23 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/23/22 12:18, Frederic Barrat wrote:

When writing a register from the TCTXT memory region (4th page within
the IC BAR), we were overwriting the Presentation Controller (PC)
register at the same offset. It looks like a silly cut and paste
error.

We were somehow lucky: the TCTXT registers being touched are
TCTXT_ENx/_SET/_RESET to enable physical threads and the PC registers
at the same offset are either not used by our model or the update was
harmless.

Found through code inspection.

Signed-off-by: Frederic Barrat 
---
  hw/intc/pnv_xive2.c | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index 87303b4064..a39e070e82 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -1295,7 +1295,6 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr 
offset,
   uint64_t val, unsigned size)
  {
  PnvXive2 *xive = PNV_XIVE2(opaque);
-uint32_t reg = offset >> 3;
  
  switch (offset) {

  /*
@@ -1322,8 +1321,6 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr 
offset,
  xive2_error(xive, "TCTXT: invalid write @%"HWADDR_PRIx, offset);
  return;
  }
-
-xive->pc_regs[reg] = val;
  }
  
  static const MemoryRegionOps pnv_xive2_ic_tctxt_ops = {

Re: [PATCH] trivial: qga: Log version on start

2022-05-23 Thread Konstantin Kostiuk

On Mon, May 23, 2022 at 8:35 PM Marc-André Lureau <
marcandre.lur...@redhat.com> wrote:

> On Mon, May 23, 2022 at 4:56 PM Konstantin Kostiuk 
> wrote:
> >
> > Signed-off-by: Konstantin Kostiuk 
>
> why not, and why not QEMU_FULL_VERSION? :)
>

QEMU_FULL_VERSION is good too, no specific reason to use QEMU_VERSION.


> Reviewed-by: Marc-André Lureau 
>
> > ---
> >  qga/main.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/qga/main.c b/qga/main.c
> > index 3b9546c185..a58368c75e 100644
> > --- a/qga/main.c
> > +++ b/qga/main.c
> > @@ -1271,6 +1271,8 @@ static GAState *initialize_agent(GAConfig *config,
> int socket_activation)
> >  g_log_set_fatal_mask(NULL, G_LOG_LEVEL_ERROR);
> >  ga_enable_logging(s);
> >
> > +g_debug("Guest agent version %s started", QEMU_VERSION);
> > +
> >  #ifdef _WIN32
> >  /* On win32 the state directory is application specific (be it the
> default
> >   * or a user override). We got past the command line parsing; let's
> create
> > --
> > 2.25.1
> >
>
>

Re: [PATCH v5] qga: add guest-get-diskstats command for Linux guests

2022-05-23 Thread Konstantin Kostiuk

Queued, thanks.

On Fri, May 20, 2022 at 1:19 PM Konstantin Kostiuk 
wrote:

> Reviewed-by: Konstantin Kostiuk 
>
> On Fri, May 20, 2022 at 5:20 AM luzhipeng  wrote:
>
>> Add a new 'guest-get-diskstats' command for report disk io statistics
>> for Linux guests. This can be useful for getting io flow or handling
>> IO fault, no need to enter guests.
>>
>> Signed-off-by: luzhipeng 
>> Reviewed-by: Marc-André Lureau 
>> ---
>>  Changes v4->v5: fix Typo and adjust fileds order in qapi-schema
>>  Changes v3->v4:
>> https://patchew.org/QEMU/20220515095437.1291-1-luzhip...@cestc.cn/
>>  Changes v2->v3: bugfix for memory leak
>>  Changes v1->v2: v1:
>> https://patchew.org/QEMU/20220512011930.214-1-luzhip...@cestc.cn/
>>
>>  qga/commands-posix.c | 123 +++
>>  qga/commands-win32.c |   6 +++
>>  qga/qapi-schema.json |  86 ++
>>  3 files changed, 215 insertions(+)
>>
>> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
>> index 69f209af87..12b50b7124 100644
>> --- a/qga/commands-posix.c
>> +++ b/qga/commands-posix.c
>> @@ -2783,6 +2783,122 @@ GuestMemoryBlockInfo
>> *qmp_guest_get_memory_block_info(Error **errp)
>>  return info;
>>  }
>>
>> +#define MAX_NAME_LEN 128
>> +static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
>> +{
>> +#ifdef CONFIG_LINUX
>> +GuestDiskStatsInfoList *head = NULL, **tail = &head;
>> +const char *diskstats = "/proc/diskstats";
>> +FILE *fp;
>> +size_t n;
>> +char *line = NULL;
>> +
>> +fp = fopen(diskstats, "r");
>> +if (fp  == NULL) {
>> +error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
>> +return NULL;
>> +}
>> +
>> +while (getline(&line, &n, fp) != -1) {
>> +g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
>> +g_autofree GuestDiskStats *diskstat = NULL;
>> +char dev_name[MAX_NAME_LEN];
>> +unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks,
>> fl_ticks;
>> +unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec,
>> wr_ios;
>> +unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
>> +unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
>> +unsigned int major, minor;
>> +int i;
>> +
>> +i = sscanf(line, "%u %u %s %lu %lu %lu"
>> +   "%lu %lu %lu %lu %u %u %u %u"
>> +   "%lu %lu %lu %u %lu %u",
>> +   &major, &minor, dev_name,
>> +   &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
>> +   &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
>> +   &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
>> +   &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
>> +   &fl_ios, &fl_ticks);
>> +
>> +if (i < 7) {
>> +continue;
>> +}
>> +
>> +diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
>> +diskstatinfo->name = g_strdup(dev_name);
>> +diskstatinfo->major = major;
>> +diskstatinfo->minor = minor;
>> +
>> +diskstat = g_new0(GuestDiskStats, 1);
>> +if (i == 7) {
>> +diskstat->has_read_ios = true;
>> +diskstat->read_ios = rd_ios;
>> +diskstat->has_read_sectors = true;
>> +diskstat->read_sectors = rd_merges_or_rd_sec;
>> +diskstat->has_write_ios = true;
>> +diskstat->write_ios = rd_sec_or_wr_ios;
>> +diskstat->has_write_sectors = true;
>> +diskstat->write_sectors = rd_ticks_or_wr_sec;
>> +}
>> +if (i >= 14) {
>> +diskstat->has_read_ios = true;
>> +diskstat->read_ios = rd_ios;
>> +diskstat->has_read_sectors = true;
>> +diskstat->read_sectors = rd_sec_or_wr_ios;
>> +diskstat->has_read_merges = true;
>> +diskstat->read_merges = rd_merges_or_rd_sec;
>> +diskstat->has_read_ticks = true;
>> +diskstat->read_ticks = rd_ticks_or_wr_sec;
>> +diskstat->has_write_ios = true;
>> +diskstat->write_ios = wr_ios;
>> +diskstat->has_write_sectors = true;
>> +diskstat->write_sectors = wr_sec;
>> +diskstat->has_write_merges = true;
>> +diskstat->write_merges = wr_merges;
>> +diskstat->has_write_ticks = true;
>> +diskstat->write_ticks = wr_ticks;
>> +diskstat->has_ios_pgr = true;
>> +diskstat->ios_pgr = ios_pgr;
>> +diskstat->has_total_ticks = true;
>> +diskstat->total_ticks = tot_ticks;
>> +diskstat->has_weight_ticks = true;
>> +diskstat->weight_ticks = rq_ticks;
>> +}
>> +if (i >= 18) {
>> +diskstat->has_discard_ios = true;
>> +diskstat->discard_ios = dc_ios;
>> +diskstat->has_discard_merges = true;
>> +diskstat->discard_merges = dc_merges;
>> +diskstat->has

[PATCH v2 0/1] trivial: qga: Log version on start

2022-05-23 Thread Konstantin Kostiuk

v1 -> v2: Use QEMU_FULL_VERSION instead of QEMU_VERSION
v1: https://patchew.org/QEMU/20220523145610.809663-1-kkost...@redhat.com/

Konstantin Kostiuk (1):
  trivial: qga: Log version on start

 qga/main.c | 2 ++
 1 file changed, 2 insertions(+)

--
2.25.1

[PATCH v2 1/1] trivial: qga: Log version on start

2022-05-23 Thread Konstantin Kostiuk

Signed-off-by: Konstantin Kostiuk 
Reviewed-by: Marc-André Lureau 
---
 qga/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/qga/main.c b/qga/main.c
index 3b9546c185..c373fec3ee 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -1271,6 +1271,8 @@ static GAState *initialize_agent(GAConfig *config, int 
socket_activation)
 g_log_set_fatal_mask(NULL, G_LOG_LEVEL_ERROR);
 ga_enable_logging(s);

+g_debug("Guest agent version %s started", QEMU_FULL_VERSION);
+
 #ifdef _WIN32
 /* On win32 the state directory is application specific (be it the default
  * or a user override). We got past the command line parsing; let's create
--
2.25.1

Re: [PATCH v3 05/15] osdep: export qemu_open_cloexec()

2022-05-23 Thread Marc-André Lureau

Hi

On Mon, May 23, 2022 at 8:11 PM Daniel P. Berrangé 
wrote:

> On Mon, May 23, 2022 at 08:02:45PM +0200, Marc-André Lureau wrote:
> > Hi
> >
> > On Mon, May 23, 2022 at 7:56 PM Daniel P. Berrangé 
> > wrote:
> >
> > > On Mon, May 23, 2022 at 07:30:42PM +0200, Marc-André Lureau wrote:
> > > > Hi
> > > >
> > > > On Mon, May 23, 2022 at 2:43 PM Daniel P. Berrangé <
> berra...@redhat.com>
> > > > wrote:
> > > >
> > > > > On Fri, May 13, 2022 at 08:08:11PM +0200,
> marcandre.lur...@redhat.com
> > > > > wrote:
> > > > > > From: Marc-André Lureau 
> > > > > >
> > > > > > Used in the next patch, to simplify qga code.
> > > > > >
> > > > > > Signed-off-by: Marc-André Lureau 
> > > > > > ---
> > > > > >  include/qemu/osdep.h |  1 +
> > > > > >  util/osdep.c | 10 --
> > > > > >  2 files changed, 9 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > > > > > index 67cc465416..64f51cfb7a 100644
> > > > > > --- a/include/qemu/osdep.h
> > > > > > +++ b/include/qemu/osdep.h
> > > > > > @@ -489,6 +489,7 @@ void sigaction_invoke(struct sigaction
> *action,
> > > > > >   */
> > > > > >  int qemu_open_old(const char *name, int flags, ...);
> > > > > >  int qemu_open(const char *name, int flags, Error **errp);
> > > > > > +int qemu_open_cloexec(const char *name, int flags, mode_t mode,
> > > Error
> > > > > **errp);
> > > > >
> > > > > I don't think we should be exporting this - it is just a variant
> of the
> > > > > 'qemu_open_old' method that we wanted callers to stop using in
> favour
> > > > > of explicitly deciding between 'qemu_open' and 'qemu_create'.
> > > > >
> > > >
> > > >
> > > > qemu_open() has "/dev/fdset" handling, which qemu-ga and other tools
> > > don't
> > > > need.
> > >
> > > Right, but exporting this as 'qemu_open_cloexec' is going to mislead
> > > people into thinking it is a better version of 'qemu_open'. This will
> > > cause us to loose support for /dev/fdset in places where we actually
> > > need it.
> > >
> >
> > > It is pretty harmless to have /dev/fdset there, even if the tool does
> > > not need it - that's been the case with many QEMU tools for many years.
> > > If we think it is actually a real problem though, we should just have
> > > a way to toggle it on/off from the existing APIs.
> > >
> > >
> > It's a bit problematic to make qemu-ga standalone, and have a common
> shared
> > subproject/library.
> >
> > Maybe introduce a callback for QEMU/QMP "/dev/fdset" handling ? any
> better
> > idea ?
>
> If we want to make qemu-ga standalone, then IMHO we should be
> aggressively switching it to use as many GLib APIs as possible,
> eliminating its reliance on any of QEMU's home-grown portability
> functions. All the 'FILE *' / 'open' scenarios could be replaced
> with GIO's GFile/GInputStream/GOutputStream for example.
>

I am not too eager to do that kind of refactoring. Even rewriting in Rust
seems a bit pointless to me, even if I would have more motivation.

Also there are times you do open() for things that are not stream-related.
And glib sadly doesn't really offer a solution for open(CLOEXEC).

I guess I can simply add an open_cloexec() helper function in qemu-ga alone
for now.

Re: [PATCH v2] target/i386/kvm: Fix disabling MPX on "-cpu host" with MPX-capable host

2022-05-23 Thread Paolo Bonzini

Queued, thanks.

Paolo

Re: TianoCore "Add QEMU support to MinPlatform (OpenQEMUBoardPkg)" GSoC project

2022-05-23 Thread Stefan Hajnoczi

On Mon, 23 May 2022 at 19:00, Pedro Falcato  wrote:
>
> Hi Stefan, Gerd,
>
> Some questions: Is emulation of the current boards ever going to be expanded? 
> For instance, can FW rely on the emulation being relatively simple or do you 
> actually need to look at chipset docs?
> For example, I was looking at (most? all?) of the current chipset emulation 
> [1] [2] and it looks relatively simple, such that writing something that 
> directly interfaces with it isn't particularly hard.

I suggest a mix of referencing the hardware datasheets and open source
drivers or firmware code when developing new guest code.

Firmware should follow hardware datasheets and avoid taking relying on
QEMU implementation details.

fw_cfg and other paravirt interfaces that are documented won't change
in backwards incompatible ways. It's fine to rely on them.
QEMU-specific hardware interfaces are documented in docs/specs/ (e.g.
acpi_pci_hotplug.rst).

Anything that isn't documented may not be a stable interface. I
recommend discussing stabilization on qemu-devel before relying on it.

New QEMU versions do not change the hardware interfaces when launched
with a specific machine type version (e.g. -M pc-q35-6.2), so old
firmware should continue working under new QEMU versions as long as a
versioned machine type is specified on the command-line. But
undocumented QEMU hardware interfaces could change in new machine
types, so it's risky to rely on them.

> I've been trying to figure out exactly what one needs to do in FW to get a 
> completely set up virtual machine environment. Are there good docs on that or 
> do you need to just read OVMF/SeaBios code? Have interfaces changed 
> significantly over the years?
> As an example, I remember seeing some OVMF CMOS memory detection shenanigans 
> in the EDK2 mailing list but in my QEMU (7.0.0) you seem to just get the 
> memory map straight from fw_cfg. Also, do you get ALL the ACPI tables from 
> fw_cfg, or do you need to modify them/generate them dynamically in firmware?

Gerd can answer these questions. Please clarify which machine types
you are interested in (see the list from "qemu-system-x86_64 -machine
\?").

Stefan

> Hopefully my questions make sense. Feel free to CC qemu-devel if you think 
> these questions are better suited there.
>
> Thanks,
> Pedro
>
> [1] https://github.com/qemu/qemu/blob/master/hw/isa/lpc_ich9.c
> [2] https://github.com/qemu/qemu/blob/master/hw/isa/piix4.c
>
> On Sat, May 21, 2022 at 8:58 PM Stefan Hajnoczi  wrote:
>>
>> Hi,
>> I am a QEMU developer and saw the "Add QEMU support to MinPlatform
>> (OpenQEMUBoardPkg)" TianoCore GSoC project:
>> https://summerofcode.withgoogle.com/programs/2022/projects/s892c1ox
>>
>> You may already know each other from edk2, but in case not, I wanted
>> to introduce Gerd. He's doing edk2 work for QEMU and has been an
>> active QEMU developer for many years.
>>
>> Feel free to CC qemu-devel@nongnu.org if you ever want input or help
>> from the QEMU community.
>>
>> Have a great summer!
>>
>> Stefan
>
>
>
> --
> Pedro Falcato

Re: [PATCH v3 05/15] osdep: export qemu_open_cloexec()

2022-05-23 Thread Marc-André Lureau

Hi

On Mon, May 23, 2022 at 7:56 PM Daniel P. Berrangé 
wrote:

> On Mon, May 23, 2022 at 07:30:42PM +0200, Marc-André Lureau wrote:
> > Hi
> >
> > On Mon, May 23, 2022 at 2:43 PM Daniel P. Berrangé 
> > wrote:
> >
> > > On Fri, May 13, 2022 at 08:08:11PM +0200, marcandre.lur...@redhat.com
> > > wrote:
> > > > From: Marc-André Lureau 
> > > >
> > > > Used in the next patch, to simplify qga code.
> > > >
> > > > Signed-off-by: Marc-André Lureau 
> > > > ---
> > > >  include/qemu/osdep.h |  1 +
> > > >  util/osdep.c | 10 --
> > > >  2 files changed, 9 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > > > index 67cc465416..64f51cfb7a 100644
> > > > --- a/include/qemu/osdep.h
> > > > +++ b/include/qemu/osdep.h
> > > > @@ -489,6 +489,7 @@ void sigaction_invoke(struct sigaction *action,
> > > >   */
> > > >  int qemu_open_old(const char *name, int flags, ...);
> > > >  int qemu_open(const char *name, int flags, Error **errp);
> > > > +int qemu_open_cloexec(const char *name, int flags, mode_t mode,
> Error
> > > **errp);
> > >
> > > I don't think we should be exporting this - it is just a variant of the
> > > 'qemu_open_old' method that we wanted callers to stop using in favour
> > > of explicitly deciding between 'qemu_open' and 'qemu_create'.
> > >
> >
> >
> > qemu_open() has "/dev/fdset" handling, which qemu-ga and other tools
> don't
> > need.
>
> Right, but exporting this as 'qemu_open_cloexec' is going to mislead
> people into thinking it is a better version of 'qemu_open'. This will
> cause us to loose support for /dev/fdset in places where we actually
> need it.
>

> It is pretty harmless to have /dev/fdset there, even if the tool does
> not need it - that's been the case with many QEMU tools for many years.
> If we think it is actually a real problem though, we should just have
> a way to toggle it on/off from the existing APIs.
>
>
It's a bit problematic to make qemu-ga standalone, and have a common shared
subproject/library.

Maybe introduce a callback for QEMU/QMP "/dev/fdset" handling ? any better
idea ?

eg put  'bool allow_fdset = true"   in softmmu/vl.c, and
> 'bool allow_fdset = false' in stubs/open.c, and then make
> qemu_open_internal conditionalize itself on this global
> variable, so only the system emulators get fdset support
> activated.
>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

-- 
Marc-André Lureau

Re: [PATCH v3 23/49] semihosting: Split out semihost_sys_open

2022-05-23 Thread Richard Henderson


On 5/23/22 09:54, Peter Maydell wrote:

On Mon, 23 May 2022 at 16:46, Richard Henderson
 wrote:

Also, I think I mentioned this in the v2 cover but not here, that having done 
the errno
conversion here for arm semihosting, it worked less well for mips and xtensa, 
which have a
rather better defined set of errnos.

My question from v2 was: should we in fact convert back from gdb's errno to 
host errno in
gdbstub.c handle_file_io(), and then let each semihosting backend convert from 
host to guest?


That sounds like it's probably a better idea (though I'm not sure
what host errno we use for the gdb "unknown errno" case)...


An excellent question.  I note that both mips and xtensa use EINVAL when there is no exact 
match for the guest.  It does seem to be the least bad option.



r~

[PATCH v2 09/11] target/ppc: implement addg6s

2022-05-23 Thread Víctor Colombo

From: Matheus Ferst 

Implements the following Power ISA v2.06 instruction:
addg6s: Add and Generate Sixes

Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  4 +++
 target/ppc/translate/fixedpoint-impl.c.inc | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index a333f33a55..f16f123843 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -296,6 +296,10 @@ CNTTZDM 01 . . . 1000111011 -   @X
 PDEPD   01 . . . 0010011100 -   @X
 PEXTD   01 . . . 001000 -   @X
 
+## BCD Assist
+
+ADDG6S  01 . . . - 001001010 -  @X
+
 ### Float-Point Load Instructions
 
 LFS 11 . .  @D
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 1aab32be03..490e49cfc7 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -492,3 +492,40 @@ static bool trans_PEXTD(DisasContext *ctx, arg_X *a)
 #endif
 return true;
 }
+
+static bool trans_ADDG6S(DisasContext *ctx, arg_X *a)
+{
+const uint64_t carry_bits = 0xULL;
+TCGv t0, t1, carry, zero = tcg_constant_tl(0);
+
+REQUIRE_INSNS_FLAGS2(ctx, BCDA_ISA206);
+
+t0 = tcg_temp_new();
+t1 = tcg_const_tl(0);
+carry = tcg_const_tl(0);
+
+for (int i = 0; i < 16; i++) {
+tcg_gen_shri_tl(t0, cpu_gpr[a->ra], i * 4);
+tcg_gen_andi_tl(t0, t0, 0xf);
+tcg_gen_add_tl(t1, t1, t0);
+
+tcg_gen_shri_tl(t0, cpu_gpr[a->rb], i * 4);
+tcg_gen_andi_tl(t0, t0, 0xf);
+tcg_gen_add_tl(t1, t1, t0);
+
+tcg_gen_andi_tl(t1, t1, 0x10);
+tcg_gen_setcond_tl(TCG_COND_NE, t1, t1, zero);
+
+tcg_gen_shli_tl(t0, t1, i * 4);
+tcg_gen_or_tl(carry, carry, t0);
+}
+
+tcg_gen_xori_tl(carry, carry, (target_long)carry_bits);
+tcg_gen_muli_tl(cpu_gpr[a->rt], carry, 6);
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+tcg_temp_free(carry);
+
+return true;
+}
-- 
2.25.1

Re: [PATCH v3 0/3] target/ppc: Fix FPSCR.FI bit

2022-05-23 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/17/22 13:15, Víctor Colombo wrote:

Hello everyone,

According to Power ISA, the FI bit in FPSCR is non-sticky.
This means that if an instruction is said to modify the FI bit, then
it should be set or cleared depending on the result of the
instruction. Otherwise, it should be kept as was before.

This patch set fixes inconsistencies found in QEMU's handling of the
FPSCR.FI bit, where it's treating as all instructions are supposed
to change FI.

Thanks!

v2:
- move the FI change from float_inexact_excp to do_float_check_status
- remove the setting of FI from float_overflow_excp, making
   do_float_check_status() the only responsible for it.
- make float_overflow_excp() return float_flag_inexact if it should
   update the inexact flags.
- Add patch 3, moving the renaming of sfprf to sfifprf to it
   (previously on patch 1)

v3:
- rewrite patch 1 commit message
- fix missing comment change in patch 3
- add all R-b

Víctor Colombo (3):
   target/ppc: Fix FPSCR.FI bit being cleared when it shouldn't
   target/ppc: Fix FPSCR.FI changing in float_overflow_excp()
   target/ppc: Rename sfprf to sfifprf where it's also used as set fi
 flag

  target/ppc/cpu.h|   2 +
  target/ppc/fpu_helper.c | 223 +---
  2 files changed, 117 insertions(+), 108 deletions(-)

[PATCH v2 08/11] target/ppc: Add flag for ISA v2.06 BCDA instructions

2022-05-23 Thread Víctor Colombo

From: Matheus Ferst 

Adds an insns_flags2 for the BCD assist instructions introduced in
Power ISA 2.06. These instructions are not listed in the manuals for
e5500[1] and e6500[2], so the flag is only added for POWER7/8/9/10
models.

[1] https://www.nxp.com/files-static/32bit/doc/ref_manual/EREF_RM.pdf
[2] https://www.nxp.com/docs/en/reference-manual/E6500RM.pdf

Signed-off-by: Matheus Ferst 
Signed-off-by: Víctor Colombo 
Reviewed-by: Richard Henderson 
---
 target/ppc/cpu.h  | 4 +++-
 target/ppc/cpu_init.c | 9 +
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 48596cfb25..8d31e9578e 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2271,6 +2271,8 @@ enum {
 PPC2_ISA300= 0x0008ULL,
 /* POWER ISA 3.1 */
 PPC2_ISA310= 0x0010ULL,
+/* ISA 2.06 BCD assist instructions  */
+PPC2_BCDA_ISA206   = 0x0020ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
 PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2279,7 +2281,7 @@ enum {
 PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
 PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
 PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
-PPC2_ISA300 | PPC2_ISA310)
+PPC2_ISA300 | PPC2_ISA310 | PPC2_BCDA_ISA206)
 };
 
 /*/
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 527ad40fcb..e35d0c06ba 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5985,7 +5985,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
-PPC2_PM_ISA206;
+PPC2_PM_ISA206 | PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_VR) |
 (1ull << MSR_VSX) |
@@ -6159,7 +6159,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_PM_ISA206;
+PPC2_TM | PPC2_PM_ISA206 | PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6379,7 +6379,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL;
+PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6596,7 +6596,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310;
+PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310 |
+PPC2_BCDA_ISA206;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
-- 
2.25.1

Re: [PATCH v3 05/15] osdep: export qemu_open_cloexec()

2022-05-23 Thread Daniel P . Berrangé

On Mon, May 23, 2022 at 08:02:45PM +0200, Marc-André Lureau wrote:
> Hi
> 
> On Mon, May 23, 2022 at 7:56 PM Daniel P. Berrangé 
> wrote:
> 
> > On Mon, May 23, 2022 at 07:30:42PM +0200, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Mon, May 23, 2022 at 2:43 PM Daniel P. Berrangé 
> > > wrote:
> > >
> > > > On Fri, May 13, 2022 at 08:08:11PM +0200, marcandre.lur...@redhat.com
> > > > wrote:
> > > > > From: Marc-André Lureau 
> > > > >
> > > > > Used in the next patch, to simplify qga code.
> > > > >
> > > > > Signed-off-by: Marc-André Lureau 
> > > > > ---
> > > > >  include/qemu/osdep.h |  1 +
> > > > >  util/osdep.c | 10 --
> > > > >  2 files changed, 9 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > > > > index 67cc465416..64f51cfb7a 100644
> > > > > --- a/include/qemu/osdep.h
> > > > > +++ b/include/qemu/osdep.h
> > > > > @@ -489,6 +489,7 @@ void sigaction_invoke(struct sigaction *action,
> > > > >   */
> > > > >  int qemu_open_old(const char *name, int flags, ...);
> > > > >  int qemu_open(const char *name, int flags, Error **errp);
> > > > > +int qemu_open_cloexec(const char *name, int flags, mode_t mode,
> > Error
> > > > **errp);
> > > >
> > > > I don't think we should be exporting this - it is just a variant of the
> > > > 'qemu_open_old' method that we wanted callers to stop using in favour
> > > > of explicitly deciding between 'qemu_open' and 'qemu_create'.
> > > >
> > >
> > >
> > > qemu_open() has "/dev/fdset" handling, which qemu-ga and other tools
> > don't
> > > need.
> >
> > Right, but exporting this as 'qemu_open_cloexec' is going to mislead
> > people into thinking it is a better version of 'qemu_open'. This will
> > cause us to loose support for /dev/fdset in places where we actually
> > need it.
> >
> 
> > It is pretty harmless to have /dev/fdset there, even if the tool does
> > not need it - that's been the case with many QEMU tools for many years.
> > If we think it is actually a real problem though, we should just have
> > a way to toggle it on/off from the existing APIs.
> >
> >
> It's a bit problematic to make qemu-ga standalone, and have a common shared
> subproject/library.
> 
> Maybe introduce a callback for QEMU/QMP "/dev/fdset" handling ? any better
> idea ?

If we want to make qemu-ga standalone, then IMHO we should be
aggressively switching it to use as many GLib APIs as possible,
eliminating its reliance on any of QEMU's home-grown portability
functions. All the 'FILE *' / 'open' scenarios could be replaced
with GIO's GFile/GInputStream/GOutputStream for example.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 6/9] vfio/migration: Implement VFIO migration protocol v2

2022-05-23 Thread Joao Martins

On 5/12/22 16:43, Avihai Horon wrote:
> Replace the current VFIO migration protocol v1 implementation with a new
> implementation corresponding to VFIO migration protocol v2.
> 
> The main changes are:
> - VFIO device state is now represented as a finite state machine instead
>   of a bitmap.
> 
> - Migration interface with kernel is now done using VFIO_DEVICE_FEATURE
>   ioctl and normal read() and write() instead of the migration region.
> 
> - As VFIO migration protocol v2 currently doesn't support the pre-copy
>   phase of migration, .save_live_pending and .save_live_iterate handlers
>   plus pre-copy relevant code are removed.
> 
> Detailed information about VFIO migration protocol v2 and difference
> compared to v1 can be found here [1].
> 
> [1]
> https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/
> 
> Signed-off-by: Avihai Horon 
> ---
>  hw/vfio/common.c  |  21 +-
>  hw/vfio/migration.c   | 628 +++---
>  hw/vfio/trace-events  |   9 +-
>  include/hw/vfio/vfio-common.h |   8 +-
>  4 files changed, 153 insertions(+), 513 deletions(-)
> 
This looks like a fairly big patch, though more than 70% of it is removing
code. Perhaps you could split it into adding v2 and removing v1 afterwards, 
rather
than a single replacement patch? It's just a suggestion anyhow, to hopefully 
ease
analysis of the new additions. The removal looks to muddle a tiny bit.

I don't wanna throw you into potentially unnecessary work should maintainers 
disagree,
so here's an attempt:

https://github.com/jpemartins/qemu/commits/for-avihai

If you apply your series on top of the same base commit (78ac2eebbab9) you 
should be able
to compare both branches.

I haven't found yet any particular flaws in your new logic (but will reply back 
if I find
any).

[PATCH v2 06/11] target/ppc: Implement mffscdrn[i] instructions

2022-05-23 Thread Víctor Colombo

Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  5 
 target/ppc/translate/fp-impl.c.inc | 41 ++
 2 files changed, 46 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 76bd9e4f57..a333f33a55 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -130,6 +130,9 @@
 &X_imm2 rt imm
 @X_imm2 .. rt:5 . ... imm:2 .. .&X_imm2
 
+&X_imm3 rt imm
+@X_imm3 .. rt:5 . .. imm:3 .. . &X_imm3
+
 %x_xt   0:1 21:5
 &X_imm5 xt imm:uint8_t vrb
 @X_imm5 .. . imm:5 vrb:5 .. .   &X_imm5 
xt=%x_xt
@@ -329,7 +332,9 @@ SETNBCR 01 . . - 00 -   
@X_bi
 MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
+MFFSCDRN11 . 10100 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
+MFFSCDRNI   11 . 10101 --... 1001000111 -   @X_imm3
 MFFSL   11 . 11000 - 1001000111 -   @X_t
 
 ### Decimal Floating-Point Arithmetic Instructions
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index 24adf0ad15..734c960436 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -670,6 +670,27 @@ static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a)
 return true;
 }
 
+static bool trans_MFFSCDRN(DisasContext *ctx, arg_X_tb *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+get_fpr(t1, a->rb);
+tcg_gen_andi_i64(t1, t1, FP_DRN);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a)
 {
 TCGv_i64 t1, fpscr;
@@ -690,6 +711,26 @@ static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 
*a)
 return true;
 }
 
+static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a)
+{
+TCGv_i64 t1, fpscr;
+
+REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+REQUIRE_FPU(ctx);
+
+t1 = tcg_temp_new_i64();
+tcg_gen_movi_i64(t1, (uint64_t)a->imm << FPSCR_DRN0);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, FP_DRN | FP_ENABLES | FP_NI | FP_RN);
+store_fpscr_masked(fpscr, FP_DRN, t1, 0x0100);
+
+tcg_temp_free_i64(t1);
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
-- 
2.25.1

Re: [RFC PATCH] 9p: case-insensitive host filesystems

2022-05-23 Thread Christian Schoenebeck

On Freitag, 22. April 2022 21:57:40 CEST Dominique Martinet wrote:
> Christian Schoenebeck wrote on Fri, Apr 22, 2022 at 08:02:46PM +0200:
> > So maybe it's better to handle case-insensitivity entirely on client side?
> > I've read that some generic "case fold" code has landed in the Linux
> > kernel
> > recently that might do the trick?
> 
> I haven't tried, but settings S_CASEFOLD on every inodes i_flags might do
> what you want client-side.
> That's easy enough to test and could be a mount option

I just made a quick test using:

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 08f48b70a741..5d8e77daed53 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -257,6 +257,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
inode->i_atime = inode->i_mtime = inode->i_ctime = 
current_time(inode);
inode->i_mapping->a_ops = &v9fs_addr_operations;
inode->i_private = NULL;
+   inode->i_flags |= S_CASEFOLD;
 
switch (mode & S_IFMT) {
case S_IFIFO:

Unfortunately that did not help much. I still get EEXIST error e.g. when 
trying 'ln -s foo FOO'.

I am not sure though whether there would be more code places to touch or 
whether that's even the expected behaviour with S_CASEFOLD for some reason.

> Even with that it's possible to do a direct open without readdir first
> if one knows the path and I that would only be case-insensitive if the
> backing server is case insensitive though, so just setting the option
> and expecting it to work all the time might be a little bit
> optimistic... I believe guess that should be an optimization at best.
> 
> Ideally the server should tell the client they are casefolded somehow,
> but 9p doesn't have any capability/mount time negotiation besides msize
> so that's difficult with the current protocol.

[PATCH v2 05/11] target/ppc: Move mffs[.] to decodetree

2022-05-23 Thread Víctor Colombo

Signed-off-by: Víctor Colombo 
---
 target/ppc/insn32.decode   |  4 
 target/ppc/translate/fp-impl.c.inc | 35 +++---
 target/ppc/translate/fp-ops.c.inc  |  1 -
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 68ea34d608..76bd9e4f57 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -97,6 +97,9 @@
 &X_tb   rt rb
 @X_tb   .. rt:5 . rb:5 .. . &X_tb
 
+&X_t_rc rt rc:bool
+@X_t_rc .. rt:5 . . .. rc:1 &X_t_rc
+
 &X_tb_rcrt rb rc:bool
 @X_tb_rc.. rt:5 . rb:5 .. rc:1  &X_tb_rc
 
@@ -323,6 +326,7 @@ SETNBCR 01 . . - 00 -   
@X_bi
 
 ### Move To/From FPSCR
 
+MFFS11 . 0 - 1001000111 .   @X_t_rc
 MFFSCE  11 . 1 - 1001000111 -   @X_t
 MFFSCRN 11 . 10110 . 1001000111 -   @X_tb
 MFFSCRNI11 . 10111 ---.. 1001000111 -   @X_imm2
diff --git a/target/ppc/translate/fp-impl.c.inc 
b/target/ppc/translate/fp-impl.c.inc
index e602cbf0a5..24adf0ad15 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -589,24 +589,6 @@ static void gen_mcrfs(DisasContext *ctx)
 tcg_temp_free_i64(tnew_fpscr);
 }
 
-/* mffs */
-static void gen_mffs(DisasContext *ctx)
-{
-TCGv_i64 t0;
-if (unlikely(!ctx->fpu_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_FPU);
-return;
-}
-t0 = tcg_temp_new_i64();
-gen_reset_fpstatus();
-tcg_gen_extu_tl_i64(t0, cpu_fpscr);
-set_fpr(rD(ctx->opcode), t0);
-if (unlikely(Rc(ctx->opcode))) {
-gen_set_cr1_from_fpscr(ctx);
-}
-tcg_temp_free_i64(t0);
-}
-
 static TCGv_i64 place_from_fpscr(int rt, uint64_t mask)
 {
 TCGv_i64 fpscr = tcg_temp_new_i64();
@@ -634,6 +616,23 @@ static void store_fpscr_masked(TCGv_i64 fpscr, uint64_t 
clear_mask,
 tcg_temp_free_i64(fpscr_masked);
 }
 
+static bool trans_MFFS(DisasContext *ctx, arg_X_t_rc *a)
+{
+TCGv_i64 fpscr;
+
+REQUIRE_FPU(ctx);
+
+gen_reset_fpstatus();
+fpscr = place_from_fpscr(a->rt, UINT64_MAX);
+if (a->rc) {
+gen_set_cr1_from_fpscr(ctx);
+}
+
+tcg_temp_free_i64(fpscr);
+
+return true;
+}
+
 static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a)
 {
 TCGv_i64 fpscr;
diff --git a/target/ppc/translate/fp-ops.c.inc 
b/target/ppc/translate/fp-ops.c.inc
index f7ca1cc8b8..81640553e1 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -75,7 +75,6 @@ GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x, PPC_NONE, 
PPC2_ISA205),
 GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x0001, PPC_NONE, PPC2_VSX207),
 GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
-GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x, PPC_FLOAT, PPC_NONE),
 GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
 GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x, PPC_FLOAT),
-- 
2.25.1

1 2 3 >

1 - 100 of 235 matches

Mail list logo