Re: [PATCH 1/3] qdev-properties: Add DEFINE_PROP_ON_OFF_AUTO_BIT64()

2024-04-30 Thread Akihiko Odaki

On 2024/04/30 23:41, Yuri Benditovich wrote:

On Sun, Apr 28, 2024 at 10:21 AM Akihiko Odaki  wrote:


DEFINE_PROP_ON_OFF_AUTO_BIT64() corresponds to DEFINE_PROP_ON_OFF_AUTO()
as DEFINE_PROP_BIT64() corresponds to DEFINE_PROP_BOOL(). The difference
is that DEFINE_PROP_ON_OFF_AUTO_BIT64() exposes OnOffAuto instead of
bool.

Signed-off-by: Akihiko Odaki 
---
  include/hw/qdev-properties.h | 18 
  hw/core/qdev-properties.c| 65 +++-
  2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 09aa04ca1e27..afec53a48470 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -43,11 +43,22 @@ struct PropertyInfo {
  ObjectPropertyRelease *release;
  };

+/**
+ * struct OnOffAutoBit64 - OnOffAuto storage with 64 elements.
+ * @on_bits: Bitmap of elements with "on".
+ * @auto_bits: Bitmap of elements with "auto".
+ */
+typedef struct OnOffAutoBit64 {
+uint64_t on_bits;
+uint64_t auto_bits;
+} OnOffAutoBit64;
+

  /*** qdev-properties.c ***/

  extern const PropertyInfo qdev_prop_bit;
  extern const PropertyInfo qdev_prop_bit64;
+extern const PropertyInfo qdev_prop_on_off_auto_bit64;
  extern const PropertyInfo qdev_prop_bool;
  extern const PropertyInfo qdev_prop_enum;
  extern const PropertyInfo qdev_prop_uint8;
@@ -100,6 +111,13 @@ extern const PropertyInfo qdev_prop_link;
  .set_default = true,  \
  .defval.u  = (bool)_defval)

+#define DEFINE_PROP_ON_OFF_AUTO_BIT64(_name, _state, _field, _bit, _defval) \
+DEFINE_PROP(_name, _state, _field, qdev_prop_on_off_auto_bit64, \
+OnOffAutoBit64, \
+.bitnr= (_bit), \
+.set_default = true,\
+.defval.i = (OnOffAuto)_defval)
+
  #define DEFINE_PROP_BOOL(_name, _state, _field, _defval) \
  DEFINE_PROP(_name, _state, _field, qdev_prop_bool, bool, \
  .set_default = true, \
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 7d6fa726fdf2..b96f54a1b912 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -188,7 +188,8 @@ const PropertyInfo qdev_prop_bit = {

  static uint64_t qdev_get_prop_mask64(Property *prop)
  {
-assert(prop->info == &qdev_prop_bit64);
+assert(prop->info == &qdev_prop_bit64 ||
+   prop->info == &qdev_prop_on_off_auto_bit64);
  return 0x1ull << prop->bitnr;
  }

@@ -233,6 +234,68 @@ const PropertyInfo qdev_prop_bit64 = {
  .set_default_value = set_default_value_bool,
  };

+static void prop_get_on_off_auto_bit64(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
+{
+Property *prop = opaque;
+OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
+int value;
+uint64_t mask = qdev_get_prop_mask64(prop);
+
+if (p->auto_bits & mask) {
+value = ON_OFF_AUTO_AUTO;
+} else if (p->on_bits & mask) {
+value = ON_OFF_AUTO_ON;
+} else {
+value = ON_OFF_AUTO_OFF;
+}
+
+visit_type_enum(v, name, &value, &OnOffAuto_lookup, errp);
+}
+
+static void prop_set_on_off_auto_bit64(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
+{
+Property *prop = opaque;
+OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
+int value;
+uint64_t mask = qdev_get_prop_mask64(prop);
+
+if (!visit_type_enum(v, name, &value, &OnOffAuto_lookup, errp)) {
+return;
+}
+
+switch (value) {
+case ON_OFF_AUTO_AUTO:
+p->on_bits &= ~mask;
+p->auto_bits |= mask;
+break;
+
+case ON_OFF_AUTO_ON:
+p->on_bits |= mask;
+p->auto_bits &= ~mask;
+break;
+
+case ON_OFF_AUTO_OFF:
+p->on_bits &= ~mask;
+p->auto_bits &= ~mask;
+break;
+}
+}
+
+const PropertyInfo qdev_prop_on_off_auto_bit64 = {
+.name  = "bool",


Does it mean that the name of this tristate type is "bool"? Or I miss something?


No, this should be OnOffAuto. Thanks for pointing out this; I'll fix 
this in the next version.




Re: [PATCH 00/24] Misc PPC exception and BookE MMU clean ups

2024-04-30 Thread Nicholas Piggin
On Wed Apr 24, 2024 at 8:31 AM AEST, BALATON Zoltan wrote:
> This series does some further clean up mostly around BookE MMU to
> untangle it from other MMU models. It also contains some other changes
> that I've come up with while working on this. The first 3 patches are
> from the last exception handling clean up series that were dropped due
> to some error on CI but I'm not sure if that was because of some CI
> infrastructure problem or some problem with the patches as the error
> did not make much sense. So these patches are only rebased now, I made
> no other change to them until the issue is understood better. The rest
> are new patches I've added since tha last series. Please review.

Hey, I'm just back from vacation trying to catch up. Looks like a pretty
nice series. Will take some time to review it.

Thanks,
Nick



[PATCH] ui/gtk: Explicitly set the default size of new window when untabifying

2024-04-30 Thread dongwon . kim
From: Dongwon Kim 

When untabifying, the default size of the new window was inadvertently
set to the size smaller than quarter of the primary window size due
to lack of explicit configuration. This commit addresses the issue by
ensuring that the size of untabified windows is set to match the surface
size.

Cc: Gerd Hoffmann 
Cc: Marc-André Lureau 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 ui/gtk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ui/gtk.c b/ui/gtk.c
index 810d7fc796..269b8207d7 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -1395,6 +1395,9 @@ static void gd_menu_untabify(GtkMenuItem *item, void 
*opaque)
 if (!vc->window) {
 gtk_widget_set_sensitive(vc->menu_item, false);
 vc->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+gtk_window_set_default_size(GTK_WINDOW(vc->window),
+surface_width(vc->gfx.ds),
+surface_height(vc->gfx.ds));
 #if defined(CONFIG_OPENGL)
 if (vc->gfx.esurface) {
 eglDestroySurface(qemu_egl_display, vc->gfx.esurface);
-- 
2.34.1




Re: [PULL 00/21] target-arm queue

2024-04-30 Thread Richard Henderson

On 4/30/24 09:48, Peter Maydell wrote:

Here's another arm pullreq; nothing too exciting in here I think.

thanks
-- PMM

The following changes since commit 5fee33d97a7f2e95716417bd164f2f5264acd976:

   Merge tag 'samuel-thibault' ofhttps://people.debian.org/~sthibault/qemu  
into staging (2024-04-29 14:34:25 -0700)

are available in the Git repository at:

   https://git.linaro.org/people/pmaydell/qemu-arm.git  
tags/pull-target-arm-20240430

for you to fetch changes up to a0c325c4b05cf7815739d6a84e567b95c8c5be7e:

   tests/qtest : Add testcase for DM163 (2024-04-30 16:05:08 +0100)


target-arm queue:
  * hw/core/clock: allow clock_propagate on child clocks
  * hvf: arm: Remove unused PL1_WRITE_MASK define
  * target/arm: Restrict translation disabled alignment check to VMSA
  * docs/system/arm/emulation.rst: Add missing implemented features
  * target/arm: Enable FEAT_CSV2_3, FEAT_ETS2, FEAT_Spec_FPACC for 'max'
  * tests/avocado: update sunxi kernel from armbian to 6.6.16
  * target/arm: Make new CPUs default to 1GHz generic timer
  * hw/dmax/xlnx_dpdma: fix handling of address_extension descriptor fields
  * hw/char/stm32l4x5_usart: Fix memory corruption by adding correct class_size
  * hw/arm/npcm7xx: Store derivative OTP fuse key in little endian
  * hw/arm: Add DM163 display to B-L475E-IOT01A board


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




Re: [PULL 00/19] Misc patches (s390x clean-ups, fixes for crashes, ...)

2024-04-30 Thread Richard Henderson

On 4/30/24 00:13, Thomas Huth wrote:

  Hi Richard!

The following changes since commit 5fee33d97a7f2e95716417bd164f2f5264acd976:

   Merge tag 'samuel-thibault' ofhttps://people.debian.org/~sthibault/qemu  
into staging (2024-04-29 14:34:25 -0700)

are available in the Git repository at:

   https://gitlab.com/thuth/qemu.git  tags/pull-request-2024-04-30

for you to fetch changes up to cc6cb422e09592158586279fddeef107df05ecbb:

   .gitlab-ci.d/cirrus: Remove the netbsd and openbsd jobs (2024-04-30 07:09:22 
+0200)


* Clean-ups for "errp" handling in s390x cpu_model code
* Fix a possible abort in the "edu" device
* Add missing qga stubs for stand-alone qga builds and re-enable qga-ssh-test


Just an fyi, this test failed the first time around in the gcov job, but passed on the 
second attempt.  I'll keep an eye on it, but it may not be stable enough.




* Fix memory corruption caused by the stm32l4x5 uart device
* Update the s390x custom runner to Ubuntu 22.04
* Fix READ NATIVE MAX ADDRESS IDE commands to avoid a possible crash
* Shorten the runtime of Cirrus-CI jobs


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




Re: [PATCH v3 00/13] exec: Rework around CPUState user fields (part 2)

2024-04-30 Thread Ilya Leoshkevich
On Tue, Apr 30, 2024 at 09:00:17PM +0200, Philippe Mathieu-Daudé wrote:
> On 30/4/24 20:45, Philippe Mathieu-Daudé wrote:
> > Hi Ilya,
> > 
> > On 30/4/24 19:55, Ilya Leoshkevich wrote:
> > > On Tue, Apr 30, 2024 at 02:27:54PM +0200, Philippe Mathieu-Daudé wrote:
> > > > Missing WASM testing by Ilya (branch available at
> > > > https://gitlab.com/philmd/qemu/-/commits/tcg_flush_jmp_cache)
> > > 
> > > Hmm, it dies very early now:
> > > 
> > >    # gdb --args ./qemu-s390x -L /usr/s390x-linux-gnu 
> > > /build/wasmtime/target/s390x-unknown-linux-gnu/debug/deps/component_fuzz_util-d10a3a6b4ad8af47
> > > 
> > >    Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
> > >    0x5559b718 in cpu_common_realizefn (dev=0x557c28c0,
> > > errp=) at
> > > ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
> > >    217 cpu->accel->plugin_state =
> > > qemu_plugin_create_vcpu_state();
> > > 
> > >    (gdb) bt
> > >    #0  0x5559b718 in cpu_common_realizefn
> > > (dev=0x557c28c0, errp=) at
> > > ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
> > >    #1  0x5559f59a in s390_cpu_realizefn (dev=0x557c28c0,
> > > errp=0x7fffe1a0) at
> > > ../home/iii/myrepos/qemu/target/s390x/cpu.c:284
> > >    #2  0x5563f76b in device_set_realized (obj= > > out>, value=, errp=0x7fffe2e0) at
> > > ../home/iii/myrepos/qemu/hw/core/qdev.c:510
> > >    #3  0x5564363d in property_set_bool (obj=0x557c28c0,
> > > v=, name=, opaque=0x557a9140,
> > > errp=0x7fffe2e0) at ../home/iii/myrepos/qemu/qom/object.c:2362
> > >    #4  0x55646b9b in object_property_set
> > > (obj=obj@entry=0x557c28c0, name=name@entry=0x556e8ae2
> > > "realized", v=v@entry=0x557c6650,
> > > errp=errp@entry=0x7fffe2e0)
> > >    at ../home/iii/myrepos/qemu/qom/object.c:1471
> > >    #5  0x5564a43f in object_property_set_qobject
> > > (obj=obj@entry=0x557c28c0, name=name@entry=0x556e8ae2
> > > "realized", value=value@entry=0x557a7a90,
> > > errp=errp@entry=0x7fffe2e0)
> > >    at ../home/iii/myrepos/qemu/qom/qom-qobject.c:28
> > >    #6  0x55647204 in object_property_set_bool
> > > (obj=0x557c28c0, name=name@entry=0x556e8ae2 "realized",
> > > value=value@entry=true, errp=errp@entry=0x7fffe2e0)
> > >    at ../home/iii/myrepos/qemu/qom/object.c:1541
> > >    #7  0x5564025c in qdev_realize (dev=,
> > > bus=bus@entry=0x0, errp=errp@entry=0x7fffe2e0) at
> > > ../home/iii/myrepos/qemu/hw/core/qdev.c:291
> > >    #8  0x5559bbb4 in cpu_create (typename=)
> > > at ../home/iii/myrepos/qemu/hw/core/cpu-common.c:61
> > >    #9  0x5559a467 in main (argc=4, argv=0x7fffeaa8,
> > > envp=) at
> > > ../home/iii/myrepos/qemu/linux-user/main.c:811
> > > 
> > >    (gdb) p cpu
> > >    $1 = (CPUState *) 0x557c28c0
> > >    (gdb) p cpu->accel
> > >    $2 = (AccelCPUState *) 0x0
> > > 
> > > Configured with: '/home/iii/myrepos/qemu/configure'
> > > '--target-list=s390x-linux-user' '--disable-tools' '--disable-slirp'
> > > '--disable-fdt' '--disable-capstone' '--disable-docs'
> > > 
> > > If you don't see what can be wrong here right away, I can debug this.
> 
> I added this commit in the same branch:
> 
> -- >8 --
> Author: Philippe Mathieu-Daudé 
> Date:   Tue Apr 30 20:57:15 2024 +0200
> 
> accel/tcg: Initialize TCG plugins in cpu-target.c
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> 
> diff --git a/cpu-target.c b/cpu-target.c
> index 5af120e8aa..585533cfa3 100644
> --- a/cpu-target.c
> +++ b/cpu-target.c
> @@ -46,6 +46,10 @@
>  #include "hw/core/accel-cpu.h"
>  #include "trace/trace-root.h"
>  #include "qemu/accel.h"
> +#ifdef CONFIG_PLUGIN
> +#include "accel/tcg/vcpu-state.h"
> +#include "qemu/plugin.h"
> +#endif
> 
>  #ifndef CONFIG_USER_ONLY
>  static int cpu_common_post_load(void *opaque, int version_id)
> @@ -131,6 +135,13 @@ const VMStateDescription vmstate_cpu_common = {
>  };
>  #endif
> 
> +#ifdef CONFIG_PLUGIN
> +static void qemu_plugin_vcpu_init__async(CPUState *cpu, run_on_cpu_data
> unused)
> +{
> +qemu_plugin_vcpu_init_hook(cpu);
> +}
> +#endif
> +
>  bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
>  {
>  /* cache the cpu class for the hotpath */
> @@ -143,6 +154,15 @@ bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
>  /* Wait until cpu initialization complete before exposing cpu. */
>  cpu_list_add(cpu);
> 
> +#ifdef CONFIG_PLUGIN
> +assert(cpu->accel);
> +/* Plugin initialization must wait until the cpu start executing code
> */
> +if (tcg_enabled()) {
> +cpu->accel->plugin_state = qemu_plugin_create_vcpu_state();
> +async_run_on_cpu(cpu, qemu_plugin_vcpu_init__async,
> RUN_ON_CPU_NULL);
> +}
> +#endif
> +
>  #ifdef CONFIG_USER_ONLY
>  assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
> qdev_get_vmsd(DEVICE(cpu))->unmigratable);
> @@ -171,6 +191,13 @@ void cpu_exec_unrealizefn(CPUState *cpu)
>   

Re: [PATCH v10 0/2] tpm: add mssim backend

2024-04-30 Thread Stefan Berger




On 4/30/24 15:08, James Bottomley wrote:

The requested feedback was to convert the tpmdev handler to being json
based, which requires rethreading all the backends.  The good news is
this reduced quite a bit of code (especially as I converted it to
error_fatal handling as well, which removes the return status
threading).  The bad news is I can't test any of the conversions.
swtpm still isn't building on opensuse and, apparently, passthrough


It does build and packages are available:
- 
https://app.travis-ci.com/github/stefanberger/swtpm-distro-compile/jobs/621150390

- https://software.opensuse.org/package/swtpm


doesn't like my native TPM because it doesn't allow cancellation.

v3 pulls out more unneeded code in the visitor conversion, makes
migration work on external state preservation of the simulator and
adds documentation

v4 puts back the wrapper options (but doesn't add any for mssim since
it post dates the necessity)

v5 rebases to the latest master branch and adjusts for removed use_FOO ptrs

v5 updates help to exit zero; does some checkpatch tidying

v7 merge review feedback and add acks.

v8 adds better error handling, more code tidies and adds command
socket disconnection/reconnection (instead of trying to keep the
socket open the whole time).  This adds overhead, but makes
debugging guest kernel TPM issues much easier.

v9 Fix merge conflict with optarg->optstr conversion

v10 Fix more merge conflicts and update API versions

James

---

James Bottomley (2):
   tpm: convert tpmdev options processing to new visitor format
   tpm: add backend for mssim

  MAINTAINERS|   6 +
  backends/tpm/Kconfig   |   5 +
  backends/tpm/meson.build   |   1 +
  backends/tpm/tpm_emulator.c|  25 ++-
  backends/tpm/tpm_mssim.c   | 319 +
  backends/tpm/tpm_mssim.h   |  44 +
  backends/tpm/tpm_passthrough.c |  23 +--
  docs/specs/tpm.rst |  39 
  include/sysemu/tpm.h   |   5 +-
  include/sysemu/tpm_backend.h   |   2 +-
  qapi/tpm.json  |  50 +-
  system/tpm-hmp-cmds.c  |   9 +
  system/tpm.c   |  91 --
  system/vl.c|  19 +-
  14 files changed, 530 insertions(+), 108 deletions(-)
  create mode 100644 backends/tpm/tpm_mssim.c
  create mode 100644 backends/tpm/tpm_mssim.h





Re: [PATCH v10 2/2] tpm: add backend for mssim

2024-04-30 Thread Stefan Berger




On 4/30/24 15:08, James Bottomley wrote:

The Microsoft Simulator (mssim) is the reference emulation platform
for the TCG TPM 2.0 specification.

https://github.com/Microsoft/ms-tpm-20-ref.git

It exports a fairly simple network socket based protocol on two
sockets, one for command (default 2321) and one for control (default
2322).  This patch adds a simple backend that can speak the mssim
protocol over the network.  It also allows the two sockets to be
specified on the command line.  The benefits are twofold: firstly it
gives us a backend that actually speaks a standard TPM emulation
protocol instead of the linux specific TPM driver format of the
current emulated TPM backend and secondly, using the microsoft
protocol, the end point of the emulator can be anywhere on the
network, facilitating the cloud use case where a central TPM service
can be used over a control network.

The implementation does basic control commands like power off/on, but
doesn't implement cancellation or startup.  The former because
cancellation is pretty much useless on a fast operating TPM emulator
and the latter because this emulator is designed to be used with OVMF
which itself does TPM startup and I wanted to validate that.

To run this, simply download an emulator based on the MS specification
(package ibmswtpm2 on openSUSE) and run it, then add these two lines
to the qemu command and it will use the emulator.

 -tpmdev mssim,id=tpm0 \
 -device tpm-crb,tpmdev=tpm0 \

to use a remote emulator replace the first line with

 -tpmdev 
"{'type':'mssim','id':'tpm0','command':{'type':inet,'host':'remote','port':'2321'}}"

tpm-tis also works as the backend.

Signed-off-by: James Bottomley 
Acked-by: Markus Armbruster 

---

v2: convert to SocketAddr json and use qio_channel_socket_connect_sync()
v3: gate control power off by migration state keep control socket disconnected
 to test outside influence and add docs.
v7: TPMmssim -> TPMMssim; doc and json fixes
 Make command socket open each time (makes OS debugging easier)
---
  MAINTAINERS  |   6 +
  backends/tpm/Kconfig |   5 +
  backends/tpm/meson.build |   1 +
  backends/tpm/tpm_mssim.c | 319 +++
  backends/tpm/tpm_mssim.h |  44 ++
  docs/specs/tpm.rst   |  39 +
  qapi/tpm.json|  31 +++-
  system/tpm-hmp-cmds.c|   9 ++
  8 files changed, 450 insertions(+), 4 deletions(-)
  create mode 100644 backends/tpm/tpm_mssim.c
  create mode 100644 backends/tpm/tpm_mssim.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 302b6fd00c..6bd7e82d1b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3386,10 +3386,16 @@ F: include/hw/acpi/tpm.h
  F: include/sysemu/tpm*
  F: qapi/tpm.json
  F: backends/tpm/
+X: backends/tpm/tpm_mssim.*
  F: tests/qtest/*tpm*
  F: docs/specs/tpm.rst
  T: git https://github.com/stefanberger/qemu-tpm.git tpm-next
  
+MSSIM TPM Backend

+M: James Bottomley 
+S: Maintained
+F: backends/tpm/tpm_mssim.*
+
  Checkpatch
  S: Odd Fixes
  F: scripts/checkpatch.pl
diff --git a/backends/tpm/Kconfig b/backends/tpm/Kconfig
index 5d91eb89c2..d6d6fa53e9 100644
--- a/backends/tpm/Kconfig
+++ b/backends/tpm/Kconfig
@@ -12,3 +12,8 @@ config TPM_EMULATOR
  bool
  default y
  depends on TPM_BACKEND
+
+config TPM_MSSIM
+bool
+default y
+depends on TPM_BACKEND
diff --git a/backends/tpm/meson.build b/backends/tpm/meson.build
index 0bfa6c422b..c6f7c24cb1 100644
--- a/backends/tpm/meson.build
+++ b/backends/tpm/meson.build
@@ -3,4 +3,5 @@ if have_tpm
system_ss.add(files('tpm_util.c'))
system_ss.add(when: 'CONFIG_TPM_PASSTHROUGH', if_true: 
files('tpm_passthrough.c'))
system_ss.add(when: 'CONFIG_TPM_EMULATOR', if_true: files('tpm_emulator.c'))
+  system_ss.add(when: 'CONFIG_TPM_MSSIM', if_true: files('tpm_mssim.c'))
  endif
diff --git a/backends/tpm/tpm_mssim.c b/backends/tpm/tpm_mssim.c
new file mode 100644
index 00..962ad340c3
--- /dev/null
+++ b/backends/tpm/tpm_mssim.c
@@ -0,0 +1,319 @@
+/*
+ * Emulator TPM driver which connects over the mssim protocol
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (c) 2022
+ * Author: James Bottomley 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/sockets.h"
+
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-tpm.h"
+
+#include "io/channel-socket.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/tpm_backend.h"
+#include "sysemu/tpm_util.h"
+
+#include "qom/object.h"
+
+#include "tpm_int.h"
+#include "tpm_mssim.h"
+
+#define ERROR_PREFIX "TPM mssim Emulator: "
+
+#define TYPE_TPM_MSSIM "tpm-mssim"
+OBJECT_DECLARE_SIMPLE_TYPE(TPMMssim, TPM_MSSIM)
+
+struct TPMMssim {
+TPMBackend parent;
+
+TPMMssimOptions opts;
+
+QIOChannelSocket *cmd_qc, *ctrl_qc;
+};
+
+static int tpm_send_ctrl(TPMMssim *t, uint32_t cmd, Error **errp)
+{
+int ret, retc;
+Error *local_err = NULL;
+
+ret = qio_channel_socket_connect_sync(t->ctrl_qc, t->opts.control, errp)

Re: [PATCH v7 08/10] util/bufferiszero: Simplify test_buffer_is_zero_next_accel

2024-04-30 Thread Philippe Mathieu-Daudé

On 30/4/24 21:42, Richard Henderson wrote:

Because the three alternatives are monotonic, we don't need
to keep a couple of bitmasks, just identify the strongest
alternative at startup.

Generalize test_buffer_is_zero_next_accel and init_accel
by always defining an accel_table array.

Signed-off-by: Richard Henderson 
---
  util/bufferiszero.c | 81 -
  1 file changed, 35 insertions(+), 46 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 1/2] cxl/core: correct length of DPA field masks

2024-04-30 Thread Alison Schofield
On Wed, Apr 17, 2024 at 03:50:52PM +0800, Shiyang Ruan wrote:
> The length of Physical Address in General Media Event Record/DRAM Event
> Record is 64-bit, so the field mask should be defined as such length.
> Otherwise, this causes cxl_general_media and cxl_dram tracepoints to
> mask off the upper-32-bits of DPA addresses. The cxl_poison event is
> unaffected.
> 
> If userspace was doing its own DPA-to-HPA translation this could lead to
> incorrect page retirement decisions, but there is no known consumer
> (like rasdaemon) of this event today.
> 
> Fixes: d54a531a430b ("cxl/mem: Trace General Media Event Record")
> Cc: 
> Cc: Dan Williams 
> Cc: Davidlohr Bueso 
> Cc: Jonathan Cameron 
> Cc: Ira Weiny 
> Signed-off-by: Shiyang Ruan 

Hi Ruan,

This fixup is important for the Event DPA->HPA translation work, so I
grabbed it, updated it with most* of the review comments, and posted
with that set. I expect you saw that in your mailbox.

DaveJ queued it in a topic branch for 6.10 here:
https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/log/?h=for-6.10/dpa-to-hpa

*I did not create a common mask for events and poison because I wanted to
limit the changes. If you'd like to make that change it would be welcomed.

-- Alison

> ---
>  drivers/cxl/core/trace.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> index e5f13260fc52..cdfce932d5b1 100644
> --- a/drivers/cxl/core/trace.h
> +++ b/drivers/cxl/core/trace.h
> @@ -253,7 +253,7 @@ TRACE_EVENT(cxl_generic_event,
>   * DRAM Event Record
>   * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
>   */
> -#define CXL_DPA_FLAGS_MASK   0x3F
> +#define CXL_DPA_FLAGS_MASK   0x3FULL
>  #define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
>  
>  #define CXL_DPA_VOLATILE BIT(0)
> -- 
> 2.34.1
> 



[PATCH] hw/display: Add SSD1306 dot matrix display controller support

2024-04-30 Thread Ryan Mamone
>From 617b2d92085d03524dcf5c223568a4856cdff47f Mon Sep 17 00:00:00 2001
From: Ryan Mamone 
Date: Tue, 30 Apr 2024 13:20:50 -0400
Subject: [PATCH] hw/display: Add SSD1306 dot matrix display controller support

Signed-off-by: Ryan Mamone 
---
hw/display/Kconfig |   5 +
hw/display/meson.build |   1 +
hw/display/ssd1306.c   | 612 +
3 files changed, 618 insertions(+)
create mode 100644 hw/display/ssd1306.c

diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index 234c7de027..58afe4048b 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -37,6 +37,11 @@ config SSD0303
config SSD0323
 bool
+config SSD1306
+bool
+depends on I2C
+default y if I2C_DEVICES
+
config VGA_PCI
 bool
 default y if PCI_DEVICES
diff --git a/hw/display/meson.build b/hw/display/meson.build
index 4751aab3ba..39f0724e02 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,6 +14,7 @@ system_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c'))
system_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
system_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
system_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
+system_ss.add(when: 'CONFIG_SSD1306', if_true: files('ssd1306.c'))
system_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c'))
 system_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
diff --git a/hw/display/ssd1306.c b/hw/display/ssd1306.c
new file mode 100644
index 00..f7314efddb
--- /dev/null
+++ b/hw/display/ssd1306.c
@@ -0,0 +1,612 @@
+/*
+ * SSD1306 Dot Matrix Display Controller.
+ *
+ * The SSD1306 controller can support a variety of different displays up to
+ * 128 x 64. The dimensions of the emulated display can be configured by the
+ * 'width' and 'height' properties and has been tested using the most common
+ * displays dimensions of 128x64 and 128x32. A 'scaling' property has also
+ * been provided to perform integer pixel scaling of the output image to make
+ * it more viewable on pc displays. While the SSD1306 controller supports
+ * multiple physical interfaces this implementation only supports the I2C
+ * interface. Most of the commands relating to physical control, scrolling,
+ * multiplexing, and scanning direction are ignored.
+ *
+ * Copyright (C) 2024 Cambridge Consultants.
+ * Written by Ryan Mamone
+ *
+ * This code is licensed under the GPL.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "hw/i2c/i2c.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "ui/console.h"
+#include "qom/object.h"
+
+/*#define DEBUG_SSD1306 1*/
+
+#ifdef DEBUG_SSD1306
+#define DPRINTF(fmt, ...) \
+do { error_printf("ssd1306: " fmt , ## __VA_ARGS__); } while (0)
+#define BADF(fmt, ...) \
+do { error_printf("ssd1306: error: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while (0)
+#define BADF(fmt, ...) \
+do { error_printf("ssd1306: error: " fmt , ## __VA_ARGS__); } while (0)
+#endif
+
+
+/* Max supported display dimensions of the SSD1306 controller */
+#define MAX_WIDTH 128
+#define MAX_HEIGHT 64
+/* Max supported color depth 32bit */
+#define MAX_BPP 32
+
+enum ssd1306_addr_mode {
+ssd1306_ADDR_MODE_HORIZ = 0,
+ssd1306_ADDR_MODE_VERT = 1,
+ssd1306_ADDR_MODE_PAGE = 2,
+ssd1306_ADDR_MODE_INVALID
+};
+
+enum ssd1306_mode {
+ssd1306_IDLE,
+ssd1306_DATA,
+ssd1306_CMD,
+ssd1306_CMD_DATA
+};
+
+#define TYPE_SSD1306 "ssd1306"
+OBJECT_DECLARE_SIMPLE_TYPE(ssd1306_state, SSD1306)
+
+struct ssd1306_state {
+I2CSlave parent_obj;
+
+QemuConsole *con;
+/* Emulated display dimensions */
+uint8_t width;
+uint8_t height;
+/* Integer scaling factor to enlarge pixels for better viewing on PC 
displays */
+uint8_t scaling_factor;
+uint8_t addr_mode;
+uint8_t col;
+uint8_t col_start;
+uint8_t col_end;
+uint8_t page;
+uint8_t page_start;
+uint8_t page_end;
+int mirror;
+int flash;
+int enabled;
+int inverse;
+int redraw;
+enum ssd1306_mode mode;
+uint8_t cmd; /* Command ID byte */
+uint8_t cmd_byte_num; /* Command data parameter number */
+uint8_t mono_framebuffer[MAX_WIDTH * MAX_HEIGHT];
+uint8_t color_framebuffer[MAX_WIDTH * MAX_HEIGHT * (MAX_BPP / 8)];
+};
+
+/* Handler for I2C data transferred from SSD1306 controller */
+static uint8_t ssd1306_recv(I2CSlave *i2c)
+{
+BADF("Reads not implemented\n");
+return 0xff;
+}
+
+/* Handler for I2C data transferred to SSD1306 controller */
+static int ssd1306_send(I2CSlave *i2c, uint8_t data)
+{
+ssd1306_state *s = SSD1306(i2c);
+
+switch (s->mode) {
+case ssd1306_IDLE:
+s->mode = ((data & 0x40) == 0x40) ? ssd1306_DATA : ssd1306_CMD;
+break;
+case ssd1306_DATA:
+/*
+ * Map incoming data to pixels at correct location in framebuffer.
+ * Notably every 8 pixels are mapped ve

Re: [PATCH] system/qdev-monitor: move drain_call_rcu call under if (!dev) in qmp_device_add()

2024-04-30 Thread boris . ostrovsky




On 4/30/24 10:27 AM, Igor Mammedov wrote:

On Fri,  3 Nov 2023 13:56:02 +0300
Dmitrii Gavrilov  wrote:

Seems related to cpu hotpug issues,
CCing Boris for awareness.


Thank you Igor.

This patch appears to change timing in my test which makes the problem 
much more difficult to reproduce. However, it can still be triggered if 
I insert a delay after qdev_device_add() which is roughly equivalent to 
what was happening in drain_call_rcu().


(https://lore.kernel.org/kvm/534247e4-76d6-41d2-86c7-0155406cc...@oracle.com/ 
for context)




-boris



[PATCH v7 07/10] util/bufferiszero: Introduce biz_accel_fn typedef

2024-04-30 Thread Richard Henderson
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index c9a7ded016..f9af7841ba 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -26,7 +26,8 @@
 #include "qemu/bswap.h"
 #include "host/cpuinfo.h"
 
-static bool (*buffer_is_zero_accel)(const void *, size_t);
+typedef bool (*biz_accel_fn)(const void *, size_t);
+static biz_accel_fn buffer_is_zero_accel;
 
 static bool buffer_is_zero_int_lt256(const void *buf, size_t len)
 {
@@ -184,7 +185,7 @@ select_accel_cpuinfo(unsigned info)
 /* Array is sorted in order of algorithm preference. */
 static const struct {
 unsigned bit;
-bool (*fn)(const void *, size_t);
+biz_accel_fn fn;
 } all[] = {
 #ifdef CONFIG_AVX2_OPT
 { CPUINFO_AVX2,buffer_zero_avx2 },
@@ -231,7 +232,7 @@ bool test_buffer_is_zero_next_accel(void)
 #define INIT_ACCEL buffer_is_zero_int_ge256
 #endif
 
-static bool (*buffer_is_zero_accel)(const void *, size_t) = INIT_ACCEL;
+static biz_accel_fn buffer_is_zero_accel = INIT_ACCEL;
 
 bool buffer_is_zero_ool(const void *buf, size_t len)
 {
-- 
2.34.1




[PATCH v7 02/10] util/bufferiszero: Remove AVX512 variant

2024-04-30 Thread Richard Henderson
From: Alexander Monakov 

Thanks to early checks in the inline buffer_is_zero wrapper, the SIMD
routines are invoked much more rarely in normal use when most buffers
are non-zero. This makes use of AVX512 unprofitable, as it incurs extra
frequency and voltage transition periods during which the CPU operates
at reduced performance, as described in
https://travisdowns.github.io/blog/2020/01/17/avxfreq1.html

Signed-off-by: Mikhail Romanov 
Signed-off-by: Alexander Monakov 
Reviewed-by: Richard Henderson 
Message-Id: <20240206204809.9859-4-amona...@ispras.ru>
Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c | 38 +++---
 1 file changed, 3 insertions(+), 35 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index f5a3634f9a..641d5f9b9e 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -64,7 +64,7 @@ buffer_zero_int(const void *buf, size_t len)
 }
 }
 
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || 
defined(__SSE2__)
+#if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
 #include 
 
 /* Note that each of these vectorized functions require len >= 64.  */
@@ -128,41 +128,12 @@ buffer_zero_avx2(const void *buf, size_t len)
 }
 #endif /* CONFIG_AVX2_OPT */
 
-#ifdef CONFIG_AVX512F_OPT
-static bool __attribute__((target("avx512f")))
-buffer_zero_avx512(const void *buf, size_t len)
-{
-/* Begin with an unaligned head of 64 bytes.  */
-__m512i t = _mm512_loadu_si512(buf);
-__m512i *p = (__m512i *)(((uintptr_t)buf + 5 * 64) & -64);
-__m512i *e = (__m512i *)(((uintptr_t)buf + len) & -64);
-
-/* Loop over 64-byte aligned blocks of 256.  */
-while (p <= e) {
-__builtin_prefetch(p);
-if (unlikely(_mm512_test_epi64_mask(t, t))) {
-return false;
-}
-t = p[-4] | p[-3] | p[-2] | p[-1];
-p += 4;
-}
-
-t |= _mm512_loadu_si512(buf + len - 4 * 64);
-t |= _mm512_loadu_si512(buf + len - 3 * 64);
-t |= _mm512_loadu_si512(buf + len - 2 * 64);
-t |= _mm512_loadu_si512(buf + len - 1 * 64);
-
-return !_mm512_test_epi64_mask(t, t);
-
-}
-#endif /* CONFIG_AVX512F_OPT */
-
 /*
  * Make sure that these variables are appropriately initialized when
  * SSE2 is enabled on the compiler command-line, but the compiler is
  * too old to support CONFIG_AVX2_OPT.
  */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#if defined(CONFIG_AVX2_OPT)
 # define INIT_USED 0
 # define INIT_LENGTH   0
 # define INIT_ACCELbuffer_zero_int
@@ -188,9 +159,6 @@ select_accel_cpuinfo(unsigned info)
 unsigned len;
 bool (*fn)(const void *, size_t);
 } all[] = {
-#ifdef CONFIG_AVX512F_OPT
-{ CPUINFO_AVX512F, 256, buffer_zero_avx512 },
-#endif
 #ifdef CONFIG_AVX2_OPT
 { CPUINFO_AVX2,128, buffer_zero_avx2 },
 #endif
@@ -208,7 +176,7 @@ select_accel_cpuinfo(unsigned info)
 return 0;
 }
 
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#if defined(CONFIG_AVX2_OPT)
 static void __attribute__((constructor)) init_accel(void)
 {
 used_accel = select_accel_cpuinfo(cpuinfo_init());
-- 
2.34.1




[PATCH v7 08/10] util/bufferiszero: Simplify test_buffer_is_zero_next_accel

2024-04-30 Thread Richard Henderson
Because the three alternatives are monotonic, we don't need
to keep a couple of bitmasks, just identify the strongest
alternative at startup.

Generalize test_buffer_is_zero_next_accel and init_accel
by always defining an accel_table array.

Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c | 81 -
 1 file changed, 35 insertions(+), 46 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index f9af7841ba..7218154a13 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -27,7 +27,6 @@
 #include "host/cpuinfo.h"
 
 typedef bool (*biz_accel_fn)(const void *, size_t);
-static biz_accel_fn buffer_is_zero_accel;
 
 static bool buffer_is_zero_int_lt256(const void *buf, size_t len)
 {
@@ -179,60 +178,35 @@ buffer_zero_avx2(const void *buf, size_t len)
 }
 #endif /* CONFIG_AVX2_OPT */
 
-static unsigned __attribute__((noinline))
-select_accel_cpuinfo(unsigned info)
-{
-/* Array is sorted in order of algorithm preference. */
-static const struct {
-unsigned bit;
-biz_accel_fn fn;
-} all[] = {
+static biz_accel_fn const accel_table[] = {
+buffer_is_zero_int_ge256,
+buffer_zero_sse2,
 #ifdef CONFIG_AVX2_OPT
-{ CPUINFO_AVX2,buffer_zero_avx2 },
+buffer_zero_avx2,
 #endif
-{ CPUINFO_SSE2,buffer_zero_sse2 },
-{ CPUINFO_ALWAYS,  buffer_is_zero_int_ge256 },
-};
+};
 
-for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
-if (info & all[i].bit) {
-buffer_is_zero_accel = all[i].fn;
-return all[i].bit;
-}
+static unsigned best_accel(void)
+{
+unsigned info = cpuinfo_init();
+
+#ifdef CONFIG_AVX2_OPT
+if (info & CPUINFO_AVX2) {
+return 2;
 }
-return 0;
+#endif
+return info & CPUINFO_SSE2 ? 1 : 0;
 }
 
-static unsigned used_accel;
-
-static void __attribute__((constructor)) init_accel(void)
-{
-used_accel = select_accel_cpuinfo(cpuinfo_init());
-}
-
-#define INIT_ACCEL NULL
-
-bool test_buffer_is_zero_next_accel(void)
-{
-/*
- * Accumulate the accelerators that we've already tested, and
- * remove them from the set to test this round.  We'll get back
- * a zero from select_accel_cpuinfo when there are no more.
- */
-unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
-used_accel |= used;
-return used;
-}
 #else
-bool test_buffer_is_zero_next_accel(void)
-{
-return false;
-}
-
-#define INIT_ACCEL buffer_is_zero_int_ge256
+#define best_accel() 0
+static biz_accel_fn const accel_table[1] = {
+buffer_is_zero_int_ge256
+};
 #endif
 
-static biz_accel_fn buffer_is_zero_accel = INIT_ACCEL;
+static biz_accel_fn buffer_is_zero_accel;
+static unsigned accel_index;
 
 bool buffer_is_zero_ool(const void *buf, size_t len)
 {
@@ -257,3 +231,18 @@ bool buffer_is_zero_ge256(const void *buf, size_t len)
 {
 return buffer_is_zero_accel(buf, len);
 }
+
+bool test_buffer_is_zero_next_accel(void)
+{
+if (accel_index != 0) {
+buffer_is_zero_accel = accel_table[--accel_index];
+return true;
+}
+return false;
+}
+
+static void __attribute__((constructor)) init_accel(void)
+{
+accel_index = best_accel();
+buffer_is_zero_accel = accel_table[accel_index];
+}
-- 
2.34.1




[PATCH v7 04/10] util/bufferiszero: Remove useless prefetches

2024-04-30 Thread Richard Henderson
From: Alexander Monakov 

Use of prefetching in bufferiszero.c is quite questionable:

- prefetches are issued just a few CPU cycles before the corresponding
  line would be hit by demand loads;

- they are done for simple access patterns, i.e. where hardware
  prefetchers can perform better;

- they compete for load ports in loops that should be limited by load
  port throughput rather than ALU throughput.

Signed-off-by: Alexander Monakov 
Signed-off-by: Mikhail Romanov 
Reviewed-by: Richard Henderson 
Message-Id: <20240206204809.9859-5-amona...@ispras.ru>
---
 util/bufferiszero.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 972f394cbd..00118d649e 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -50,7 +50,6 @@ static bool buffer_is_zero_integer(const void *buf, size_t 
len)
 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
 for (; p + 8 <= e; p += 8) {
-__builtin_prefetch(p + 8);
 if (t) {
 return false;
 }
@@ -80,7 +79,6 @@ buffer_zero_sse2(const void *buf, size_t len)
 
 /* Loop over 16-byte aligned blocks of 64.  */
 while (likely(p <= e)) {
-__builtin_prefetch(p);
 t = _mm_cmpeq_epi8(t, zero);
 if (unlikely(_mm_movemask_epi8(t) != 0x)) {
 return false;
@@ -111,7 +109,6 @@ buffer_zero_avx2(const void *buf, size_t len)
 
 /* Loop over 32-byte aligned blocks of 128.  */
 while (p <= e) {
-__builtin_prefetch(p);
 if (unlikely(!_mm256_testz_si256(t, t))) {
 return false;
 }
-- 
2.34.1




[PATCH v7 09/10] util/bufferiszero: Add simd acceleration for aarch64

2024-04-30 Thread Richard Henderson
Because non-embedded aarch64 is expected to have AdvSIMD enabled, merely
double-check with the compiler flags for __ARM_NEON and don't bother with
a runtime check.  Otherwise, model the loop after the x86 SSE2 function.

Use UMAXV for the vector reduction.  This is 3 cycles on cortex-a76 and
2 cycles on neoverse-n1.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c | 67 +
 1 file changed, 67 insertions(+)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 7218154a13..74864f7b78 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -198,6 +198,73 @@ static unsigned best_accel(void)
 return info & CPUINFO_SSE2 ? 1 : 0;
 }
 
+#elif defined(__aarch64__) && defined(__ARM_NEON)
+#include 
+
+/*
+ * Helper for preventing the compiler from reassociating
+ * chains of binary vector operations.
+ */
+#define REASSOC_BARRIER(vec0, vec1) asm("" : "+w"(vec0), "+w"(vec1))
+
+static bool buffer_is_zero_simd(const void *buf, size_t len)
+{
+uint32x4_t t0, t1, t2, t3;
+
+/* Align head/tail to 16-byte boundaries.  */
+const uint32x4_t *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+const uint32x4_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+
+/* Unaligned loads at head/tail.  */
+t0 = vld1q_u32(buf) | vld1q_u32(buf + len - 16);
+
+/* Collect a partial block at tail end.  */
+t1 = e[-7] | e[-6];
+t2 = e[-5] | e[-4];
+t3 = e[-3] | e[-2];
+t0 |= e[-1];
+REASSOC_BARRIER(t0, t1);
+REASSOC_BARRIER(t2, t3);
+t0 |= t1;
+t2 |= t3;
+REASSOC_BARRIER(t0, t2);
+t0 |= t2;
+
+/*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+do {
+/*
+ * Reduce via UMAXV.  Whatever the actual result,
+ * it will only be zero if all input bytes are zero.
+ */
+if (unlikely(vmaxvq_u32(t0) != 0)) {
+return false;
+}
+
+t0 = p[0] | p[1];
+t1 = p[2] | p[3];
+t2 = p[4] | p[5];
+t3 = p[6] | p[7];
+REASSOC_BARRIER(t0, t1);
+REASSOC_BARRIER(t2, t3);
+t0 |= t1;
+t2 |= t3;
+REASSOC_BARRIER(t0, t2);
+t0 |= t2;
+p += 8;
+} while (p < e - 7);
+
+return vmaxvq_u32(t0) == 0;
+}
+
+#define best_accel() 1
+static biz_accel_fn const accel_table[] = {
+buffer_is_zero_int_ge256,
+buffer_is_zero_simd,
+};
 #else
 #define best_accel() 0
 static biz_accel_fn const accel_table[1] = {
-- 
2.34.1




[PATCH v7 00/10]

2024-04-30 Thread Richard Henderson
v3: https://patchew.org/QEMU/20240206204809.9859-1-amona...@ispras.ru/
v6: 
https://patchew.org/QEMU/20240424225705.929812-1-richard.hender...@linaro.org/

Changes for v7:
  - Generalize test_buffer_is_zero_next_accel and initialization (phil)


r~


Alexander Monakov (5):
  util/bufferiszero: Remove SSE4.1 variant
  util/bufferiszero: Remove AVX512 variant
  util/bufferiszero: Reorganize for early test for acceleration
  util/bufferiszero: Remove useless prefetches
  util/bufferiszero: Optimize SSE2 and AVX2 variants

Richard Henderson (5):
  util/bufferiszero: Improve scalar variant
  util/bufferiszero: Introduce biz_accel_fn typedef
  util/bufferiszero: Simplify test_buffer_is_zero_next_accel
  util/bufferiszero: Add simd acceleration for aarch64
  tests/bench: Add bufferiszero-bench

 include/qemu/cutils.h|  32 ++-
 tests/bench/bufferiszero-bench.c |  47 
 util/bufferiszero.c  | 465 ---
 tests/bench/meson.build  |   1 +
 4 files changed, 324 insertions(+), 221 deletions(-)
 create mode 100644 tests/bench/bufferiszero-bench.c

-- 
2.34.1




[PATCH v7 06/10] util/bufferiszero: Improve scalar variant

2024-04-30 Thread Richard Henderson
Split less-than and greater-than 256 cases.
Use unaligned accesses for head and tail.
Avoid using out-of-bounds pointers in loop boundary conditions.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c | 85 +++--
 1 file changed, 51 insertions(+), 34 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 02df82b4ff..c9a7ded016 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -28,40 +28,57 @@
 
 static bool (*buffer_is_zero_accel)(const void *, size_t);
 
-static bool buffer_is_zero_integer(const void *buf, size_t len)
+static bool buffer_is_zero_int_lt256(const void *buf, size_t len)
 {
-if (unlikely(len < 8)) {
-/* For a very small buffer, simply accumulate all the bytes.  */
-const unsigned char *p = buf;
-const unsigned char *e = buf + len;
-unsigned char t = 0;
+uint64_t t;
+const uint64_t *p, *e;
 
-do {
-t |= *p++;
-} while (p < e);
-
-return t == 0;
-} else {
-/* Otherwise, use the unaligned memory access functions to
-   handle the beginning and end of the buffer, with a couple
-   of loops handling the middle aligned section.  */
-uint64_t t = ldq_he_p(buf);
-const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
-const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
-
-for (; p + 8 <= e; p += 8) {
-if (t) {
-return false;
-}
-t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
-}
-while (p < e) {
-t |= *p++;
-}
-t |= ldq_he_p(buf + len - 8);
-
-return t == 0;
+/*
+ * Use unaligned memory access functions to handle
+ * the beginning and end of the buffer.
+ */
+if (unlikely(len <= 8)) {
+return (ldl_he_p(buf) | ldl_he_p(buf + len - 4)) == 0;
 }
+
+t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
+p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
+e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
+
+/* Read 0 to 31 aligned words from the middle. */
+while (p < e) {
+t |= *p++;
+}
+return t == 0;
+}
+
+static bool buffer_is_zero_int_ge256(const void *buf, size_t len)
+{
+/*
+ * Use unaligned memory access functions to handle
+ * the beginning and end of the buffer.
+ */
+uint64_t t = ldq_he_p(buf) | ldq_he_p(buf + len - 8);
+const uint64_t *p = QEMU_ALIGN_PTR_DOWN(buf + 8, 8);
+const uint64_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 8);
+
+/* Collect a partial block at the tail end. */
+t |= e[-7] | e[-6] | e[-5] | e[-4] | e[-3] | e[-2] | e[-1];
+
+/*
+ * Loop over 64 byte blocks.
+ * With the head and tail removed, e - p >= 30,
+ * so the loop must iterate at least 3 times.
+ */
+do {
+if (t) {
+return false;
+}
+t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
+p += 8;
+} while (p < e - 7);
+
+return t == 0;
 }
 
 #if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
@@ -173,7 +190,7 @@ select_accel_cpuinfo(unsigned info)
 { CPUINFO_AVX2,buffer_zero_avx2 },
 #endif
 { CPUINFO_SSE2,buffer_zero_sse2 },
-{ CPUINFO_ALWAYS,  buffer_is_zero_integer },
+{ CPUINFO_ALWAYS,  buffer_is_zero_int_ge256 },
 };
 
 for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
@@ -211,7 +228,7 @@ bool test_buffer_is_zero_next_accel(void)
 return false;
 }
 
-#define INIT_ACCEL buffer_is_zero_integer
+#define INIT_ACCEL buffer_is_zero_int_ge256
 #endif
 
 static bool (*buffer_is_zero_accel)(const void *, size_t) = INIT_ACCEL;
@@ -232,7 +249,7 @@ bool buffer_is_zero_ool(const void *buf, size_t len)
 if (likely(len >= 256)) {
 return buffer_is_zero_accel(buf, len);
 }
-return buffer_is_zero_integer(buf, len);
+return buffer_is_zero_int_lt256(buf, len);
 }
 
 bool buffer_is_zero_ge256(const void *buf, size_t len)
-- 
2.34.1




[PATCH v7 03/10] util/bufferiszero: Reorganize for early test for acceleration

2024-04-30 Thread Richard Henderson
From: Alexander Monakov 

Test for length >= 256 inline, where is is often a constant.
Before calling into the accelerated routine, sample three bytes
from the buffer, which handles most non-zero buffers.

Signed-off-by: Alexander Monakov 
Signed-off-by: Mikhail Romanov 
Message-Id: <20240206204809.9859-3-amona...@ispras.ru>
[rth: Use __builtin_constant_p; move the indirect call out of line.]
Signed-off-by: Richard Henderson 
---
 include/qemu/cutils.h | 32 -
 util/bufferiszero.c   | 84 +--
 2 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c927a6a3..741dade7cf 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -187,9 +187,39 @@ char *freq_to_str(uint64_t freq_hz);
 /* used to print char* safely */
 #define STR_OR_NULL(str) ((str) ? (str) : "null")
 
-bool buffer_is_zero(const void *buf, size_t len);
+/*
+ * Check if a buffer is all zeroes.
+ */
+
+bool buffer_is_zero_ool(const void *vbuf, size_t len);
+bool buffer_is_zero_ge256(const void *vbuf, size_t len);
 bool test_buffer_is_zero_next_accel(void);
 
+static inline bool buffer_is_zero_sample3(const char *buf, size_t len)
+{
+/*
+ * For any reasonably sized buffer, these three samples come from
+ * three different cachelines.  In qemu-img usage, we find that
+ * each byte eliminates more than half of all buffer testing.
+ * It is therefore critical to performance that the byte tests
+ * short-circuit, so that we do not pull in additional cache lines.
+ * Do not "optimize" this to !(a | b | c).
+ */
+return !buf[0] && !buf[len - 1] && !buf[len / 2];
+}
+
+#ifdef __OPTIMIZE__
+static inline bool buffer_is_zero(const void *buf, size_t len)
+{
+return (__builtin_constant_p(len) && len >= 256
+? buffer_is_zero_sample3(buf, len) &&
+  buffer_is_zero_ge256(buf, len)
+: buffer_is_zero_ool(buf, len));
+}
+#else
+#define buffer_is_zero  buffer_is_zero_ool
+#endif
+
 /*
  * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
  * Input is limited to 14-bit numbers
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 641d5f9b9e..972f394cbd 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -26,8 +26,9 @@
 #include "qemu/bswap.h"
 #include "host/cpuinfo.h"
 
-static bool
-buffer_zero_int(const void *buf, size_t len)
+static bool (*buffer_is_zero_accel)(const void *, size_t);
+
+static bool buffer_is_zero_integer(const void *buf, size_t len)
 {
 if (unlikely(len < 8)) {
 /* For a very small buffer, simply accumulate all the bytes.  */
@@ -128,60 +129,38 @@ buffer_zero_avx2(const void *buf, size_t len)
 }
 #endif /* CONFIG_AVX2_OPT */
 
-/*
- * Make sure that these variables are appropriately initialized when
- * SSE2 is enabled on the compiler command-line, but the compiler is
- * too old to support CONFIG_AVX2_OPT.
- */
-#if defined(CONFIG_AVX2_OPT)
-# define INIT_USED 0
-# define INIT_LENGTH   0
-# define INIT_ACCELbuffer_zero_int
-#else
-# ifndef __SSE2__
-#  error "ISA selection confusion"
-# endif
-# define INIT_USED CPUINFO_SSE2
-# define INIT_LENGTH   64
-# define INIT_ACCELbuffer_zero_sse2
-#endif
-
-static unsigned used_accel = INIT_USED;
-static unsigned length_to_accel = INIT_LENGTH;
-static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
-
 static unsigned __attribute__((noinline))
 select_accel_cpuinfo(unsigned info)
 {
 /* Array is sorted in order of algorithm preference. */
 static const struct {
 unsigned bit;
-unsigned len;
 bool (*fn)(const void *, size_t);
 } all[] = {
 #ifdef CONFIG_AVX2_OPT
-{ CPUINFO_AVX2,128, buffer_zero_avx2 },
+{ CPUINFO_AVX2,buffer_zero_avx2 },
 #endif
-{ CPUINFO_SSE2, 64, buffer_zero_sse2 },
-{ CPUINFO_ALWAYS,0, buffer_zero_int },
+{ CPUINFO_SSE2,buffer_zero_sse2 },
+{ CPUINFO_ALWAYS,  buffer_is_zero_integer },
 };
 
 for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
 if (info & all[i].bit) {
-length_to_accel = all[i].len;
-buffer_accel = all[i].fn;
+buffer_is_zero_accel = all[i].fn;
 return all[i].bit;
 }
 }
 return 0;
 }
 
-#if defined(CONFIG_AVX2_OPT)
+static unsigned used_accel;
+
 static void __attribute__((constructor)) init_accel(void)
 {
 used_accel = select_accel_cpuinfo(cpuinfo_init());
 }
-#endif /* CONFIG_AVX2_OPT */
+
+#define INIT_ACCEL NULL
 
 bool test_buffer_is_zero_next_accel(void)
 {
@@ -194,36 +173,37 @@ bool test_buffer_is_zero_next_accel(void)
 used_accel |= used;
 return used;
 }
-
-static bool select_accel_fn(const void *buf, size_t len)
-{
-if (likely(len >= length_to_accel)) {
-return buffer_accel(buf, len);
-}
-return buffer_zero_int(buf, len);
-}
-
 #else
-#define select_accel_fn  buffer_zero_int
 bool test_

[PATCH v7 05/10] util/bufferiszero: Optimize SSE2 and AVX2 variants

2024-04-30 Thread Richard Henderson
From: Alexander Monakov 

Increase unroll factor in SIMD loops from 4x to 8x in order to move
their bottlenecks from ALU port contention to load issue rate (two loads
per cycle on popular x86 implementations).

Avoid using out-of-bounds pointers in loop boundary conditions.

Follow SSE2 implementation strategy in the AVX2 variant. Avoid use of
PTEST, which is not profitable there (like in the removed SSE4 variant).

Signed-off-by: Alexander Monakov 
Signed-off-by: Mikhail Romanov 
Reviewed-by: Richard Henderson 
Message-Id: <20240206204809.9859-6-amona...@ispras.ru>
---
 util/bufferiszero.c | 111 +---
 1 file changed, 73 insertions(+), 38 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 00118d649e..02df82b4ff 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -67,62 +67,97 @@ static bool buffer_is_zero_integer(const void *buf, size_t 
len)
 #if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
 #include 
 
-/* Note that each of these vectorized functions require len >= 64.  */
+/* Helper for preventing the compiler from reassociating
+   chains of binary vector operations.  */
+#define SSE_REASSOC_BARRIER(vec0, vec1) asm("" : "+x"(vec0), "+x"(vec1))
+
+/* Note that these vectorized functions may assume len >= 256.  */
 
 static bool __attribute__((target("sse2")))
 buffer_zero_sse2(const void *buf, size_t len)
 {
-__m128i t = _mm_loadu_si128(buf);
-__m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
-__m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
-__m128i zero = _mm_setzero_si128();
+/* Unaligned loads at head/tail.  */
+__m128i v = *(__m128i_u *)(buf);
+__m128i w = *(__m128i_u *)(buf + len - 16);
+/* Align head/tail to 16-byte boundaries.  */
+const __m128i *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+const __m128i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+__m128i zero = { 0 };
 
-/* Loop over 16-byte aligned blocks of 64.  */
-while (likely(p <= e)) {
-t = _mm_cmpeq_epi8(t, zero);
-if (unlikely(_mm_movemask_epi8(t) != 0x)) {
+/* Collect a partial block at tail end.  */
+v |= e[-1]; w |= e[-2];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-3]; w |= e[-4];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-5]; w |= e[-6];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-7]; v |= w;
+
+/*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+do {
+v = _mm_cmpeq_epi8(v, zero);
+if (unlikely(_mm_movemask_epi8(v) != 0x)) {
 return false;
 }
-t = p[-4] | p[-3] | p[-2] | p[-1];
-p += 4;
-}
+v = p[0]; w = p[1];
+SSE_REASSOC_BARRIER(v, w);
+v |= p[2]; w |= p[3];
+SSE_REASSOC_BARRIER(v, w);
+v |= p[4]; w |= p[5];
+SSE_REASSOC_BARRIER(v, w);
+v |= p[6]; w |= p[7];
+SSE_REASSOC_BARRIER(v, w);
+v |= w;
+p += 8;
+} while (p < e - 7);
 
-/* Finish the aligned tail.  */
-t |= e[-3];
-t |= e[-2];
-t |= e[-1];
-
-/* Finish the unaligned tail.  */
-t |= _mm_loadu_si128(buf + len - 16);
-
-return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0x;
+return _mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) == 0x;
 }
 
 #ifdef CONFIG_AVX2_OPT
 static bool __attribute__((target("avx2")))
 buffer_zero_avx2(const void *buf, size_t len)
 {
-/* Begin with an unaligned head of 32 bytes.  */
-__m256i t = _mm256_loadu_si256(buf);
-__m256i *p = (__m256i *)(((uintptr_t)buf + 5 * 32) & -32);
-__m256i *e = (__m256i *)(((uintptr_t)buf + len) & -32);
+/* Unaligned loads at head/tail.  */
+__m256i v = *(__m256i_u *)(buf);
+__m256i w = *(__m256i_u *)(buf + len - 32);
+/* Align head/tail to 32-byte boundaries.  */
+const __m256i *p = QEMU_ALIGN_PTR_DOWN(buf + 32, 32);
+const __m256i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 32);
+__m256i zero = { 0 };
 
-/* Loop over 32-byte aligned blocks of 128.  */
-while (p <= e) {
-if (unlikely(!_mm256_testz_si256(t, t))) {
+/* Collect a partial block at tail end.  */
+v |= e[-1]; w |= e[-2];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-3]; w |= e[-4];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-5]; w |= e[-6];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-7]; v |= w;
+
+/* Loop over complete 256-byte blocks.  */
+for (; p < e - 7; p += 8) {
+/* PTEST is not profitable here.  */
+v = _mm256_cmpeq_epi8(v, zero);
+if (unlikely(_mm256_movemask_epi8(v) != 0x)) {
 return false;
 }
-t = p[-4] | p[-3] | p[-2] | p[-1];
-p += 4;
-} ;
+v = p[0]; w = p[1];
+SSE_REASSOC_BARRIER(v, w);
+v |= p[2]; w |= p[3];
+SSE_REASSOC_BARRIER(v, w);
+v |= p[4]; w |= p[5];
+SSE_REASSOC_BARRIER(v, w);
+

[PATCH v7 01/10] util/bufferiszero: Remove SSE4.1 variant

2024-04-30 Thread Richard Henderson
From: Alexander Monakov 

The SSE4.1 variant is virtually identical to the SSE2 variant, except
for using 'PTEST+JNZ' in place of 'PCMPEQB+PMOVMSKB+CMP+JNE' for testing
if an SSE register is all zeroes. The PTEST instruction decodes to two
uops, so it can be handled only by the complex decoder, and since
CMP+JNE are macro-fused, both sequences decode to three uops. The uops
comprising the PTEST instruction dispatch to p0 and p5 on Intel CPUs, so
PCMPEQB+PMOVMSKB is comparatively more flexible from dispatch
standpoint.

Hence, the use of PTEST brings no benefit from throughput standpoint.
Its latency is not important, since it feeds only a conditional jump,
which terminates the dependency chain.

I never observed PTEST variants to be faster on real hardware.

Signed-off-by: Alexander Monakov 
Signed-off-by: Mikhail Romanov 
Reviewed-by: Richard Henderson 
Message-Id: <20240206204809.9859-2-amona...@ispras.ru>
---
 util/bufferiszero.c | 29 -
 1 file changed, 29 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 3e6a5dfd63..f5a3634f9a 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -100,34 +100,6 @@ buffer_zero_sse2(const void *buf, size_t len)
 }
 
 #ifdef CONFIG_AVX2_OPT
-static bool __attribute__((target("sse4")))
-buffer_zero_sse4(const void *buf, size_t len)
-{
-__m128i t = _mm_loadu_si128(buf);
-__m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
-__m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
-
-/* Loop over 16-byte aligned blocks of 64.  */
-while (likely(p <= e)) {
-__builtin_prefetch(p);
-if (unlikely(!_mm_testz_si128(t, t))) {
-return false;
-}
-t = p[-4] | p[-3] | p[-2] | p[-1];
-p += 4;
-}
-
-/* Finish the aligned tail.  */
-t |= e[-3];
-t |= e[-2];
-t |= e[-1];
-
-/* Finish the unaligned tail.  */
-t |= _mm_loadu_si128(buf + len - 16);
-
-return _mm_testz_si128(t, t);
-}
-
 static bool __attribute__((target("avx2")))
 buffer_zero_avx2(const void *buf, size_t len)
 {
@@ -221,7 +193,6 @@ select_accel_cpuinfo(unsigned info)
 #endif
 #ifdef CONFIG_AVX2_OPT
 { CPUINFO_AVX2,128, buffer_zero_avx2 },
-{ CPUINFO_SSE4, 64, buffer_zero_sse4 },
 #endif
 { CPUINFO_SSE2, 64, buffer_zero_sse2 },
 { CPUINFO_ALWAYS,0, buffer_zero_int },
-- 
2.34.1




[PATCH v7 10/10] tests/bench: Add bufferiszero-bench

2024-04-30 Thread Richard Henderson
Benchmark each acceleration function vs an aligned buffer of zeros.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tests/bench/bufferiszero-bench.c | 47 
 tests/bench/meson.build  |  1 +
 2 files changed, 48 insertions(+)
 create mode 100644 tests/bench/bufferiszero-bench.c

diff --git a/tests/bench/bufferiszero-bench.c b/tests/bench/bufferiszero-bench.c
new file mode 100644
index 00..222695c1fa
--- /dev/null
+++ b/tests/bench/bufferiszero-bench.c
@@ -0,0 +1,47 @@
+/*
+ * QEMU buffer_is_zero speed benchmark
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/units.h"
+
+static void test(const void *opaque)
+{
+size_t max = 64 * KiB;
+void *buf = g_malloc0(max);
+int accel_index = 0;
+
+do {
+if (accel_index != 0) {
+g_test_message("%s", "");  /* gnu_printf Werror for simple "" */
+}
+for (size_t len = 1 * KiB; len <= max; len *= 4) {
+double total = 0.0;
+
+g_test_timer_start();
+do {
+buffer_is_zero_ge256(buf, len);
+total += len;
+} while (g_test_timer_elapsed() < 0.5);
+
+total /= MiB;
+g_test_message("buffer_is_zero #%d: %2zuKB %8.0f MB/sec",
+   accel_index, len / (size_t)KiB,
+   total / g_test_timer_last());
+}
+accel_index++;
+} while (test_buffer_is_zero_next_accel());
+
+g_free(buf);
+}
+
+int main(int argc, char **argv)
+{
+g_test_init(&argc, &argv, NULL);
+g_test_add_data_func("/cutils/bufferiszero/speed", NULL, test);
+return g_test_run();
+}
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 7e76338a52..4cd7a2f6b5 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build
@@ -21,6 +21,7 @@ benchs = {}
 
 if have_block
   benchs += {
+ 'bufferiszero-bench': [],
  'benchmark-crypto-hash': [crypto],
  'benchmark-crypto-hmac': [crypto],
  'benchmark-crypto-cipher': [crypto],
-- 
2.34.1




RE: [PATCH v3] Hexagon: add PC alignment check and exception

2024-04-30 Thread ltaylorsimpson



> -Original Message-
> From: Matheus Tavares Bernardino 
> Sent: Tuesday, April 30, 2024 9:25 AM
> To: qemu-devel@nongnu.org
> Cc: bc...@quicinc.com; sidn...@quicinc.com; a...@rev.ng; a...@rev.ng;
> ltaylorsimp...@gmail.com; richard.hender...@linaro.org; Laurent Vivier
> 
> Subject: [PATCH v3] Hexagon: add PC alignment check and exception
> 
> The Hexagon Programmer's Reference Manual says that the exception 0x1e
> should be raised upon an unaligned program counter. Let's implement that
> and also add some tests.
> 
> Signed-off-by: Matheus Tavares Bernardino 
> ---
> v2: https://lore.kernel.org/qemu-
> devel/e559b521d1920f804df10244c8c07564431aeba5.1714419461.git.quic_ma
> thb...@quicinc.com/
> 
> Thanks for the comments, Richard and Taylor!
> 
> Changed in v3:
> - Removed now unnecessary pkt_raises_exception addition.
> - Added HEX_EXCP_PC_NOT_ALIGNED handling at
>   linux-user/hexagon/cpu_loop.c.
> - Merged all tests into a C file that uses signal handler to check
>   that the exception was raised.
> 
>  target/hexagon/cpu.h   |  7 ++
>  target/hexagon/cpu_bits.h  |  4 +
>  target/hexagon/macros.h|  3 -
>  linux-user/hexagon/cpu_loop.c  |  4 +
>  target/hexagon/op_helper.c |  9 +--
>  tests/tcg/hexagon/unaligned_pc.c   | 85 ++
>  tests/tcg/hexagon/Makefile.target  |  4 +
>  tests/tcg/hexagon/unaligned_pc_multi_cof.S |  5 ++
>  8 files changed, 113 insertions(+), 8 deletions(-)  create mode 100644
> tests/tcg/hexagon/unaligned_pc.c  create mode 100644
> tests/tcg/hexagon/unaligned_pc_multi_cof.S
> 



> a/tests/tcg/hexagon/unaligned_pc.c b/tests/tcg/hexagon/unaligned_pc.c
> new file mode 100644
> index 00..1add2d0d99
> --- /dev/null
> +++ b/tests/tcg/hexagon/unaligned_pc.c
> @@ -0,0 +1,85 @@
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* will be changed in signal handler */ volatile sig_atomic_t
> +completed_tests; static jmp_buf after_test; static int nr_tests;
> +
> +void __attribute__((naked)) test_return(void) {
> +asm volatile(
> +"allocframe(#0x8)\n"
> +"r0 = #0x\n"
> +"framekey = r0\n"
> +"dealloc_return\n"
> +: : : "r0");

Add r29, r30, r31 to clobbers list.
Add framekey to clobbers list (assuming the compiler will take it).

> +}
> +
> +void test_endloop(void)
> +{
> +asm volatile(
> +"loop0(1f, #2)\n"
> +"1: r0 = #0x3\n"
> +"sa0 = r0\n"
> +"{ nop }:endloop0\n"
> +: : : "r0");
> +}

Add sa0, lc0, usr to the clobbers list.

> +
> +void test_multi_cof(void)
> +{
> +asm volatile(
> +"p0 = cmp.eq(r0, r0)\n"
> +"{\n"
> +"if (p0) jump test_multi_cof_unaligned\n"
> +"jump 1f\n"
> +"}\n"
> +"1: nop\n"
> +: : : "p0");
> +}
> +
> +void sigbus_handler(int signum)
> +{
> +/* retore framekey after test_return */
> +asm volatile(
> +"r0 = #0\n"
> +"framekey = r0\n"
> +: : : "r0");

Add framekey to the clobbers list.

> +printf("Test %d complete\n", completed_tests);
> +completed_tests++;
> +siglongjmp(after_test, 1);
> +}
> +
> +void test_done(void)
> +{
> +int err = (completed_tests != nr_tests);
> +puts(err ? "FAIL" : "PASS");
> +exit(err);
> +}
> +
> +typedef void (*test_fn)(void);
> +
> +int main()
> +{
> +test_fn tests[] = { test_return, test_endloop, test_multi_cof,
test_done
> };
> +nr_tests = (sizeof(tests) / sizeof(tests[0])) - 1;
> +
> +struct sigaction sa = {
> +.sa_sigaction = sigbus_handler,
> +.sa_flags = SA_SIGINFO
> +};
> +
> +if (sigaction(SIGBUS, &sa, NULL) < 0) {
> +perror("sigaction");
> +return EXIT_FAILURE;
> +}
> +
> +sigsetjmp(after_test, 1);
> +tests[completed_tests]();
> +
> +/* should never get here */
> +puts("FAIL");
> +return 1;
> +}
> diff --git a/tests/tcg/hexagon/Makefile.target
> b/tests/tcg/hexagon/Makefile.target
> index f839b2c0d5..75139e731c 100644
> --- a/tests/tcg/hexagon/Makefile.target
> +++ b/tests/tcg/hexagon/Makefile.target
> @@ -51,6 +51,7 @@ HEX_TESTS += scatter_gather  HEX_TESTS += hvx_misc
> HEX_TESTS += hvx_histogram  HEX_TESTS += invalid-slots
> +HEX_TESTS += unaligned_pc
> 
>  run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \
>   test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr) @@ -
> 108,6 +109,9 @@ preg_alias: preg_alias.c hex_test.h
>  read_write_overlap: read_write_overlap.c hex_test.h
>  reg_mut: reg_mut.c hex_test.h
> 
> +unaligned_pc: unaligned_pc.c unaligned_pc_multi_cof.S
> + $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) -mv73 $^ -o $@
> $(LDFLAGS)
> +
>  # This test has to be compiled for the -mv67t target
>  usr: usr.c hex_test.h
>   $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-
> defined $< -o $@ $(LDFLAGS) diff --git
> a/tests/tcg/hexagon/unaligned_pc_multi

Re: [PATCH v2] hw/s390x: Attach the sclpconsole to /machine/sclp/s390-sclp-event-facility

2024-04-30 Thread Philippe Mathieu-Daudé

On 30/4/24 21:08, Thomas Huth wrote:

The sclpconsole currently does not have a proper parent in the QOM
tree, so it shows up under /machine/unattached - which is somewhat
ugly. We should rather attach it to /machine/sclp/s390-sclp-event-facility
where the other devices of type TYPE_SCLP_EVENT already reside.

Signed-off-by: Thomas Huth 
---
  hw/s390x/s390-virtio-ccw.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé 





Re: [PATCH v2] hw/s390x: Attach the sclpconsole to /machine/sclp/s390-sclp-event-facility

2024-04-30 Thread David Hildenbrand

On 30.04.24 21:08, Thomas Huth wrote:

The sclpconsole currently does not have a proper parent in the QOM
tree, so it shows up under /machine/unattached - which is somewhat
ugly. We should rather attach it to /machine/sclp/s390-sclp-event-facility
where the other devices of type TYPE_SCLP_EVENT already reside.

Signed-off-by: Thomas Huth 
---
  hw/s390x/s390-virtio-ccw.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 5c83d1ea17..41be8bf857 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -242,11 +242,13 @@ static void s390_create_virtio_net(BusState *bus, const 
char *name)
  
  static void s390_create_sclpconsole(const char *type, Chardev *chardev)

  {
+BusState *ev_fac_bus = sclp_get_event_facility_bus();
  DeviceState *dev;
  
  dev = qdev_new(type);

+object_property_add_child(OBJECT(ev_fac_bus->parent), type, OBJECT(dev));
  qdev_prop_set_chr(dev, "chardev", chardev);
-qdev_realize_and_unref(dev, sclp_get_event_facility_bus(), &error_fatal);
+qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
  }
  
  static void ccw_init(MachineState *machine)


Reviewed-by: David Hildenbrand 

--
Cheers,

David / dhildenb




[PATCH v10 2/2] tpm: add backend for mssim

2024-04-30 Thread James Bottomley
The Microsoft Simulator (mssim) is the reference emulation platform
for the TCG TPM 2.0 specification.

https://github.com/Microsoft/ms-tpm-20-ref.git

It exports a fairly simple network socket based protocol on two
sockets, one for command (default 2321) and one for control (default
2322).  This patch adds a simple backend that can speak the mssim
protocol over the network.  It also allows the two sockets to be
specified on the command line.  The benefits are twofold: firstly it
gives us a backend that actually speaks a standard TPM emulation
protocol instead of the linux specific TPM driver format of the
current emulated TPM backend and secondly, using the microsoft
protocol, the end point of the emulator can be anywhere on the
network, facilitating the cloud use case where a central TPM service
can be used over a control network.

The implementation does basic control commands like power off/on, but
doesn't implement cancellation or startup.  The former because
cancellation is pretty much useless on a fast operating TPM emulator
and the latter because this emulator is designed to be used with OVMF
which itself does TPM startup and I wanted to validate that.

To run this, simply download an emulator based on the MS specification
(package ibmswtpm2 on openSUSE) and run it, then add these two lines
to the qemu command and it will use the emulator.

-tpmdev mssim,id=tpm0 \
-device tpm-crb,tpmdev=tpm0 \

to use a remote emulator replace the first line with

-tpmdev 
"{'type':'mssim','id':'tpm0','command':{'type':inet,'host':'remote','port':'2321'}}"

tpm-tis also works as the backend.

Signed-off-by: James Bottomley 
Acked-by: Markus Armbruster 

---

v2: convert to SocketAddr json and use qio_channel_socket_connect_sync()
v3: gate control power off by migration state keep control socket disconnected
to test outside influence and add docs.
v7: TPMmssim -> TPMMssim; doc and json fixes
Make command socket open each time (makes OS debugging easier)
---
 MAINTAINERS  |   6 +
 backends/tpm/Kconfig |   5 +
 backends/tpm/meson.build |   1 +
 backends/tpm/tpm_mssim.c | 319 +++
 backends/tpm/tpm_mssim.h |  44 ++
 docs/specs/tpm.rst   |  39 +
 qapi/tpm.json|  31 +++-
 system/tpm-hmp-cmds.c|   9 ++
 8 files changed, 450 insertions(+), 4 deletions(-)
 create mode 100644 backends/tpm/tpm_mssim.c
 create mode 100644 backends/tpm/tpm_mssim.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 302b6fd00c..6bd7e82d1b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3386,10 +3386,16 @@ F: include/hw/acpi/tpm.h
 F: include/sysemu/tpm*
 F: qapi/tpm.json
 F: backends/tpm/
+X: backends/tpm/tpm_mssim.*
 F: tests/qtest/*tpm*
 F: docs/specs/tpm.rst
 T: git https://github.com/stefanberger/qemu-tpm.git tpm-next
 
+MSSIM TPM Backend
+M: James Bottomley 
+S: Maintained
+F: backends/tpm/tpm_mssim.*
+
 Checkpatch
 S: Odd Fixes
 F: scripts/checkpatch.pl
diff --git a/backends/tpm/Kconfig b/backends/tpm/Kconfig
index 5d91eb89c2..d6d6fa53e9 100644
--- a/backends/tpm/Kconfig
+++ b/backends/tpm/Kconfig
@@ -12,3 +12,8 @@ config TPM_EMULATOR
 bool
 default y
 depends on TPM_BACKEND
+
+config TPM_MSSIM
+bool
+default y
+depends on TPM_BACKEND
diff --git a/backends/tpm/meson.build b/backends/tpm/meson.build
index 0bfa6c422b..c6f7c24cb1 100644
--- a/backends/tpm/meson.build
+++ b/backends/tpm/meson.build
@@ -3,4 +3,5 @@ if have_tpm
   system_ss.add(files('tpm_util.c'))
   system_ss.add(when: 'CONFIG_TPM_PASSTHROUGH', if_true: 
files('tpm_passthrough.c'))
   system_ss.add(when: 'CONFIG_TPM_EMULATOR', if_true: files('tpm_emulator.c'))
+  system_ss.add(when: 'CONFIG_TPM_MSSIM', if_true: files('tpm_mssim.c'))
 endif
diff --git a/backends/tpm/tpm_mssim.c b/backends/tpm/tpm_mssim.c
new file mode 100644
index 00..962ad340c3
--- /dev/null
+++ b/backends/tpm/tpm_mssim.c
@@ -0,0 +1,319 @@
+/*
+ * Emulator TPM driver which connects over the mssim protocol
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (c) 2022
+ * Author: James Bottomley 
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/sockets.h"
+
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-tpm.h"
+
+#include "io/channel-socket.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/tpm_backend.h"
+#include "sysemu/tpm_util.h"
+
+#include "qom/object.h"
+
+#include "tpm_int.h"
+#include "tpm_mssim.h"
+
+#define ERROR_PREFIX "TPM mssim Emulator: "
+
+#define TYPE_TPM_MSSIM "tpm-mssim"
+OBJECT_DECLARE_SIMPLE_TYPE(TPMMssim, TPM_MSSIM)
+
+struct TPMMssim {
+TPMBackend parent;
+
+TPMMssimOptions opts;
+
+QIOChannelSocket *cmd_qc, *ctrl_qc;
+};
+
+static int tpm_send_ctrl(TPMMssim *t, uint32_t cmd, Error **errp)
+{
+int ret, retc;
+Error *local_err = NULL;
+
+ret = qio_channel_socket_connect_sync(t->ctrl_qc, t->opts.control, errp);
+if (ret != 0) {
+return ret;
+}
+cmd = htonl(cmd);
+r

[PATCH v10 1/2] tpm: convert tpmdev options processing to new visitor format

2024-04-30 Thread James Bottomley
Instead of processing the tpmdev options using the old qemu options,
convert to the new visitor format which also allows the passing of
json on the command line.

Signed-off-by: James Bottomley 
Tested-by: Stefan Berger 
Reviewed-by: Stefan Berger 

---
v4: add TpmConfiOptions
v5: exit(0) for help
v7: adjust line lengths, free options
v8: minor updates; add tested/reviewed-by
v9: optarg->optstr
---
 backends/tpm/tpm_emulator.c| 25 --
 backends/tpm/tpm_passthrough.c | 23 +++--
 include/sysemu/tpm.h   |  5 +-
 include/sysemu/tpm_backend.h   |  2 +-
 qapi/tpm.json  | 21 
 system/tpm.c   | 91 ++
 system/vl.c| 19 +--
 7 files changed, 81 insertions(+), 105 deletions(-)

diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c
index 5a8fba9bde..99ab0019cc 100644
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -580,33 +580,29 @@ err_exit:
 return -1;
 }
 
-static int tpm_emulator_handle_device_opts(TPMEmulator *tpm_emu, QemuOpts 
*opts)
+static int tpm_emulator_handle_device_opts(TPMEmulator *tpm_emu,
+   TpmCreateOptions *opts)
 {
-const char *value;
 Error *err = NULL;
 Chardev *dev;
 
-value = qemu_opt_get(opts, "chardev");
-if (!value) {
-error_report("tpm-emulator: parameter 'chardev' is missing");
-goto err;
-}
+tpm_emu->options = QAPI_CLONE(TPMEmulatorOptions, &opts->u.emulator);
+tpm_emu->data_ioc = NULL;
 
-dev = qemu_chr_find(value);
+dev = qemu_chr_find(opts->u.emulator.chardev);
 if (!dev) {
-error_report("tpm-emulator: tpm chardev '%s' not found", value);
+error_report("tpm-emulator: tpm chardev '%s' not found",
+ opts->u.emulator.chardev);
 goto err;
 }
 
 if (!qemu_chr_fe_init(&tpm_emu->ctrl_chr, dev, &err)) {
 error_prepend(&err, "tpm-emulator: No valid chardev found at '%s':",
-  value);
+  opts->u.emulator.chardev);
 error_report_err(err);
 goto err;
 }
 
-tpm_emu->options->chardev = g_strdup(value);
-
 if (tpm_emulator_prepare_data_fd(tpm_emu) < 0) {
 goto err;
 }
@@ -645,7 +641,7 @@ err:
 return -1;
 }
 
-static TPMBackend *tpm_emulator_create(QemuOpts *opts)
+static TPMBackend *tpm_emulator_create(TpmCreateOptions *opts)
 {
 TPMBackend *tb = TPM_BACKEND(object_new(TYPE_TPM_EMULATOR));
 
@@ -968,7 +964,6 @@ static void tpm_emulator_inst_init(Object *obj)
 
 trace_tpm_emulator_inst_init();
 
-tpm_emu->options = g_new0(TPMEmulatorOptions, 1);
 tpm_emu->cur_locty_number = ~0;
 qemu_mutex_init(&tpm_emu->mutex);
 tpm_emu->vmstate =
@@ -985,7 +980,7 @@ static void tpm_emulator_shutdown(TPMEmulator *tpm_emu)
 {
 ptm_res res;
 
-if (!tpm_emu->options->chardev) {
+if (!tpm_emu->data_ioc) {
 /* was never properly initialized */
 return;
 }
diff --git a/backends/tpm/tpm_passthrough.c b/backends/tpm/tpm_passthrough.c
index 179697a3a9..54183b89a4 100644
--- a/backends/tpm/tpm_passthrough.c
+++ b/backends/tpm/tpm_passthrough.c
@@ -252,21 +252,13 @@ static int 
tpm_passthrough_open_sysfs_cancel(TPMPassthruState *tpm_pt)
 }
 
 static int
-tpm_passthrough_handle_device_opts(TPMPassthruState *tpm_pt, QemuOpts *opts)
+tpm_passthrough_handle_device_opts(TPMPassthruState *tpm_pt,
+   TpmCreateOptions *opts)
 {
-const char *value;
+tpm_pt->options = QAPI_CLONE(TPMPassthroughOptions, &opts->u.passthrough);
 
-value = qemu_opt_get(opts, "cancel-path");
-if (value) {
-tpm_pt->options->cancel_path = g_strdup(value);
-}
-
-value = qemu_opt_get(opts, "path");
-if (value) {
-tpm_pt->options->path = g_strdup(value);
-}
-
-tpm_pt->tpm_dev = value ? value : TPM_PASSTHROUGH_DEFAULT_DEVICE;
+tpm_pt->tpm_dev = opts->u.passthrough.path ? opts->u.passthrough.path :
+TPM_PASSTHROUGH_DEFAULT_DEVICE;
 tpm_pt->tpm_fd = qemu_open_old(tpm_pt->tpm_dev, O_RDWR);
 if (tpm_pt->tpm_fd < 0) {
 error_report("Cannot access TPM device using '%s': %s",
@@ -288,11 +280,11 @@ tpm_passthrough_handle_device_opts(TPMPassthruState 
*tpm_pt, QemuOpts *opts)
 return 0;
 }
 
-static TPMBackend *tpm_passthrough_create(QemuOpts *opts)
+static TPMBackend *tpm_passthrough_create(TpmCreateOptions *tco)
 {
 Object *obj = object_new(TYPE_TPM_PASSTHROUGH);
 
-if (tpm_passthrough_handle_device_opts(TPM_PASSTHROUGH(obj), opts)) {
+if (tpm_passthrough_handle_device_opts(TPM_PASSTHROUGH(obj), tco)) {
 object_unref(obj);
 return NULL;
 }
@@ -344,7 +336,6 @@ static void tpm_passthrough_inst_init(Object *obj)
 {
 TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(obj);
 
-tpm_pt->options = g_new0(TPMPassthroughOptions, 1);
 tpm_pt->tpm_fd = -1;
 tpm_pt->cancel

[PATCH v10 0/2] tpm: add mssim backend

2024-04-30 Thread James Bottomley
The requested feedback was to convert the tpmdev handler to being json
based, which requires rethreading all the backends.  The good news is
this reduced quite a bit of code (especially as I converted it to
error_fatal handling as well, which removes the return status
threading).  The bad news is I can't test any of the conversions.
swtpm still isn't building on opensuse and, apparently, passthrough
doesn't like my native TPM because it doesn't allow cancellation.

v3 pulls out more unneeded code in the visitor conversion, makes
migration work on external state preservation of the simulator and
adds documentation

v4 puts back the wrapper options (but doesn't add any for mssim since
it post dates the necessity)

v5 rebases to the latest master branch and adjusts for removed use_FOO ptrs

v5 updates help to exit zero; does some checkpatch tidying

v7 merge review feedback and add acks.

v8 adds better error handling, more code tidies and adds command
   socket disconnection/reconnection (instead of trying to keep the
   socket open the whole time).  This adds overhead, but makes
   debugging guest kernel TPM issues much easier.

v9 Fix merge conflict with optarg->optstr conversion

v10 Fix more merge conflicts and update API versions

James

---

James Bottomley (2):
  tpm: convert tpmdev options processing to new visitor format
  tpm: add backend for mssim

 MAINTAINERS|   6 +
 backends/tpm/Kconfig   |   5 +
 backends/tpm/meson.build   |   1 +
 backends/tpm/tpm_emulator.c|  25 ++-
 backends/tpm/tpm_mssim.c   | 319 +
 backends/tpm/tpm_mssim.h   |  44 +
 backends/tpm/tpm_passthrough.c |  23 +--
 docs/specs/tpm.rst |  39 
 include/sysemu/tpm.h   |   5 +-
 include/sysemu/tpm_backend.h   |   2 +-
 qapi/tpm.json  |  50 +-
 system/tpm-hmp-cmds.c  |   9 +
 system/tpm.c   |  91 --
 system/vl.c|  19 +-
 14 files changed, 530 insertions(+), 108 deletions(-)
 create mode 100644 backends/tpm/tpm_mssim.c
 create mode 100644 backends/tpm/tpm_mssim.h

-- 
2.35.3




[PATCH v2] hw/s390x: Attach the sclpconsole to /machine/sclp/s390-sclp-event-facility

2024-04-30 Thread Thomas Huth
The sclpconsole currently does not have a proper parent in the QOM
tree, so it shows up under /machine/unattached - which is somewhat
ugly. We should rather attach it to /machine/sclp/s390-sclp-event-facility
where the other devices of type TYPE_SCLP_EVENT already reside.

Signed-off-by: Thomas Huth 
---
 hw/s390x/s390-virtio-ccw.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 5c83d1ea17..41be8bf857 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -242,11 +242,13 @@ static void s390_create_virtio_net(BusState *bus, const 
char *name)
 
 static void s390_create_sclpconsole(const char *type, Chardev *chardev)
 {
+BusState *ev_fac_bus = sclp_get_event_facility_bus();
 DeviceState *dev;
 
 dev = qdev_new(type);
+object_property_add_child(OBJECT(ev_fac_bus->parent), type, OBJECT(dev));
 qdev_prop_set_chr(dev, "chardev", chardev);
-qdev_realize_and_unref(dev, sclp_get_event_facility_bus(), &error_fatal);
+qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
 }
 
 static void ccw_init(MachineState *machine)
-- 
2.44.0




Re: [PULL 55/63] kvm: handle KVM_EXIT_MEMORY_FAULT

2024-04-30 Thread Paolo Bonzini
On Fri, Apr 26, 2024 at 3:40 PM Peter Maydell  wrote:
> > +addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
> > +rb = qemu_ram_block_from_host(addr, false, &offset);
>
> ...and this call to qemu_ram_block_from_host() will only initialize
> offset if it does not fail (i.e. doesn't return NULL)...
>
> I think this code should either handle the case where
> qemu_ram_block_from_host() fails, or, if it is impossible
> for it to fail in this situation, add an assert() and a
> comment about why we know it can't fail.

The assertion is in memory_region_get_ram_ptr(), but Coverity
understandably cannot see it.

Similar to other code in hw/virtio/virtio-balloon.c, this code is
using memory_region_get_ram_ptr() as a roundabout way to go from
MemoryRegion (in this case MemoryRegionSection) to RAMBlock.  The
right fix is to introduce memory_region_get_ram_block() and use it.

Paolo




Re: [PATCH] hw/s390x: Attach the sclpconsole to the /machine/sclp node

2024-04-30 Thread Thomas Huth

On 30/04/2024 16.24, Thomas Huth wrote:

On 30/04/2024 13.58, Cédric Le Goater wrote:

On 4/30/24 10:04, Thomas Huth wrote:

The sclpconsole currently does not have a proper parent in the QOM
tree, so it shows up under /machine/unattached - which is somewhat
ugly. Let's attach it to /machine/sclp instead.

Signed-off-by: Thomas Huth 
---
  include/hw/s390x/sclp.h    |  2 +-
  hw/s390x/s390-virtio-ccw.c | 11 +++
  hw/s390x/sclp.c    |  4 +++-
  3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index b405a387b6..abfd6d8868 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -222,7 +222,7 @@ static inline int sccb_data_len(SCCB *sccb)
  }
-void s390_sclp_init(void);
+Object *s390_sclp_init(void);
  void sclp_service_interrupt(uint32_t sccb);
  void raise_irq_cpu_hotplug(void);
  int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4dcc213820..e2f9206ded 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -237,11 +237,13 @@ static void s390_create_virtio_net(BusState *bus, 
const char *name)

  }
  }
-static void s390_create_sclpconsole(const char *type, Chardev *chardev)
+static void s390_create_sclpconsole(Object *sclp, const char *type,
+    Chardev *chardev)
  {
  DeviceState *dev;
  dev = qdev_new(type);
+    object_property_add_child(sclp, type, OBJECT(dev));
  qdev_prop_set_chr(dev, "chardev", chardev);
  qdev_realize_and_unref(dev, sclp_get_event_facility_bus(), 
&error_fatal);

  }
@@ -252,8 +254,9 @@ static void ccw_init(MachineState *machine)
  int ret;
  VirtualCssBus *css_bus;
  DeviceState *dev;
+    Object *sclp;
-    s390_sclp_init();
+    sclp = s390_sclp_init();


I would simply drop s390_sclp_init(), same for :

   void s390_init_tod(void);
   void s390_init_ap(void);
   void s390_stattrib_init(void);
   void s390_skeys_init(void);
   void s390_flic_init(void);

These routines all do the same and are not very useful TBH, and I would
add pointers under the s390x MachineState possibly.


Some of them seem to do a little bit more things, like checking whether the 
feature is available or not, e.g. s390_init_ap() ... IMHO it makes sense to 
keep at least those?


But for s390_sclp_init ... it could be inlined, indeed, especially if we 
also switch the object_unref + qdev_realize in there into 
qdev_realize_and_unref. Let me try to do that in a v2 ...


Actually, after looking at the code a little bit longer, it seems to me like 
the sclpconsole should be attached to /machine/sclp/s390-sclp-event-facility
instead of just /machine/sclp, since the other devices of type 
TYPE_SCLP_EVENT are also located there. That makes the patch even easier 
since we already have the pointer from sclp_get_event_facility_bus() in that 
function.


 Thomas





Re: [PATCH v3 00/13] exec: Rework around CPUState user fields (part 2)

2024-04-30 Thread Philippe Mathieu-Daudé

On 30/4/24 20:45, Philippe Mathieu-Daudé wrote:

Hi Ilya,

On 30/4/24 19:55, Ilya Leoshkevich wrote:

On Tue, Apr 30, 2024 at 02:27:54PM +0200, Philippe Mathieu-Daudé wrote:

Missing WASM testing by Ilya (branch available at
https://gitlab.com/philmd/qemu/-/commits/tcg_flush_jmp_cache)


Hmm, it dies very early now:

   # gdb --args ./qemu-s390x -L /usr/s390x-linux-gnu 
/build/wasmtime/target/s390x-unknown-linux-gnu/debug/deps/component_fuzz_util-d10a3a6b4ad8af47


   Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
   0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, 
errp=) at 
../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
   217 cpu->accel->plugin_state = 
qemu_plugin_create_vcpu_state();


   (gdb) bt
   #0  0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, 
errp=) at 
../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
   #1  0x5559f59a in s390_cpu_realizefn (dev=0x557c28c0, 
errp=0x7fffe1a0) at ../home/iii/myrepos/qemu/target/s390x/cpu.c:284
   #2  0x5563f76b in device_set_realized (obj=, 
value=, errp=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:510
   #3  0x5564363d in property_set_bool (obj=0x557c28c0, 
v=, name=, opaque=0x557a9140, 
errp=0x7fffe2e0) at ../home/iii/myrepos/qemu/qom/object.c:2362
   #4  0x55646b9b in object_property_set 
(obj=obj@entry=0x557c28c0, name=name@entry=0x556e8ae2 
"realized", v=v@entry=0x557c6650, errp=errp@entry=0x7fffe2e0)

   at ../home/iii/myrepos/qemu/qom/object.c:1471
   #5  0x5564a43f in object_property_set_qobject 
(obj=obj@entry=0x557c28c0, name=name@entry=0x556e8ae2 
"realized", value=value@entry=0x557a7a90, 
errp=errp@entry=0x7fffe2e0)

   at ../home/iii/myrepos/qemu/qom/qom-qobject.c:28
   #6  0x55647204 in object_property_set_bool 
(obj=0x557c28c0, name=name@entry=0x556e8ae2 "realized", 
value=value@entry=true, errp=errp@entry=0x7fffe2e0)

   at ../home/iii/myrepos/qemu/qom/object.c:1541
   #7  0x5564025c in qdev_realize (dev=, 
bus=bus@entry=0x0, errp=errp@entry=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:291
   #8  0x5559bbb4 in cpu_create (typename=) at 
../home/iii/myrepos/qemu/hw/core/cpu-common.c:61
   #9  0x5559a467 in main (argc=4, argv=0x7fffeaa8, 
envp=) at ../home/iii/myrepos/qemu/linux-user/main.c:811


   (gdb) p cpu
   $1 = (CPUState *) 0x557c28c0
   (gdb) p cpu->accel
   $2 = (AccelCPUState *) 0x0

Configured with: '/home/iii/myrepos/qemu/configure' 
'--target-list=s390x-linux-user' '--disable-tools' '--disable-slirp' 
'--disable-fdt' '--disable-capstone' '--disable-docs'


If you don't see what can be wrong here right away, I can debug this.


I added this commit in the same branch:

-- >8 --
Author: Philippe Mathieu-Daudé 
Date:   Tue Apr 30 20:57:15 2024 +0200

accel/tcg: Initialize TCG plugins in cpu-target.c

Signed-off-by: Philippe Mathieu-Daudé 

diff --git a/cpu-target.c b/cpu-target.c
index 5af120e8aa..585533cfa3 100644
--- a/cpu-target.c
+++ b/cpu-target.c
@@ -46,6 +46,10 @@
 #include "hw/core/accel-cpu.h"
 #include "trace/trace-root.h"
 #include "qemu/accel.h"
+#ifdef CONFIG_PLUGIN
+#include "accel/tcg/vcpu-state.h"
+#include "qemu/plugin.h"
+#endif

 #ifndef CONFIG_USER_ONLY
 static int cpu_common_post_load(void *opaque, int version_id)
@@ -131,6 +135,13 @@ const VMStateDescription vmstate_cpu_common = {
 };
 #endif

+#ifdef CONFIG_PLUGIN
+static void qemu_plugin_vcpu_init__async(CPUState *cpu, run_on_cpu_data 
unused)

+{
+qemu_plugin_vcpu_init_hook(cpu);
+}
+#endif
+
 bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
 {
 /* cache the cpu class for the hotpath */
@@ -143,6 +154,15 @@ bool cpu_exec_realizefn(CPUState *cpu, Error **errp)
 /* Wait until cpu initialization complete before exposing cpu. */
 cpu_list_add(cpu);

+#ifdef CONFIG_PLUGIN
+assert(cpu->accel);
+/* Plugin initialization must wait until the cpu start executing 
code */

+if (tcg_enabled()) {
+cpu->accel->plugin_state = qemu_plugin_create_vcpu_state();
+async_run_on_cpu(cpu, qemu_plugin_vcpu_init__async, 
RUN_ON_CPU_NULL);

+}
+#endif
+
 #ifdef CONFIG_USER_ONLY
 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
qdev_get_vmsd(DEVICE(cpu))->unmigratable);
@@ -171,6 +191,13 @@ void cpu_exec_unrealizefn(CPUState *cpu)
 }
 #endif

+#ifdef CONFIG_PLUGIN
+/* Call the plugin hook before clearing the cpu is fully unrealized */
+if (tcg_enabled()) {
+qemu_plugin_vcpu_exit_hook(cpu);
+}
+#endif
+
 cpu_list_remove(cpu);
 /*
  * Now that the vCPU has been removed from the RCU list, we can call
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index e03d31876f..cd8bd99131 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -30,10 +30,6 @@
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "trace

Re: [PATCH v3 00/13] exec: Rework around CPUState user fields (part 2)

2024-04-30 Thread Philippe Mathieu-Daudé

Hi Ilya,

On 30/4/24 19:55, Ilya Leoshkevich wrote:

On Tue, Apr 30, 2024 at 02:27:54PM +0200, Philippe Mathieu-Daudé wrote:

Missing WASM testing by Ilya (branch available at
https://gitlab.com/philmd/qemu/-/commits/tcg_flush_jmp_cache)


Hmm, it dies very early now:

   # gdb --args ./qemu-s390x -L /usr/s390x-linux-gnu 
/build/wasmtime/target/s390x-unknown-linux-gnu/debug/deps/component_fuzz_util-d10a3a6b4ad8af47

   Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
   0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, errp=) at ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
   217 cpu->accel->plugin_state = qemu_plugin_create_vcpu_state();

   (gdb) bt
   #0  0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, 
errp=) at ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
   #1  0x5559f59a in s390_cpu_realizefn (dev=0x557c28c0, 
errp=0x7fffe1a0) at ../home/iii/myrepos/qemu/target/s390x/cpu.c:284
   #2  0x5563f76b in device_set_realized (obj=, 
value=, errp=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:510
   #3  0x5564363d in property_set_bool (obj=0x557c28c0, v=, name=, opaque=0x557a9140, errp=0x7fffe2e0) at 
../home/iii/myrepos/qemu/qom/object.c:2362
   #4  0x55646b9b in object_property_set (obj=obj@entry=0x557c28c0, 
name=name@entry=0x556e8ae2 "realized", v=v@entry=0x557c6650, 
errp=errp@entry=0x7fffe2e0)
   at ../home/iii/myrepos/qemu/qom/object.c:1471
   #5  0x5564a43f in object_property_set_qobject (obj=obj@entry=0x557c28c0, 
name=name@entry=0x556e8ae2 "realized", value=value@entry=0x557a7a90, 
errp=errp@entry=0x7fffe2e0)
   at ../home/iii/myrepos/qemu/qom/qom-qobject.c:28
   #6  0x55647204 in object_property_set_bool (obj=0x557c28c0, 
name=name@entry=0x556e8ae2 "realized", value=value@entry=true, 
errp=errp@entry=0x7fffe2e0)
   at ../home/iii/myrepos/qemu/qom/object.c:1541
   #7  0x5564025c in qdev_realize (dev=, 
bus=bus@entry=0x0, errp=errp@entry=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:291
   #8  0x5559bbb4 in cpu_create (typename=) at 
../home/iii/myrepos/qemu/hw/core/cpu-common.c:61
   #9  0x5559a467 in main (argc=4, argv=0x7fffeaa8, envp=) at ../home/iii/myrepos/qemu/linux-user/main.c:811

   (gdb) p cpu
   $1 = (CPUState *) 0x557c28c0
   (gdb) p cpu->accel
   $2 = (AccelCPUState *) 0x0

Configured with: '/home/iii/myrepos/qemu/configure' 
'--target-list=s390x-linux-user' '--disable-tools' '--disable-slirp' 
'--disable-fdt' '--disable-capstone' '--disable-docs'

If you don't see what can be wrong here right away, I can debug this.


Useful enough I guess, but I'll ask you to test again later.

Does it work without the last patch?

Is it possible to share component_fuzz_util-d10a3a6b4ad8af47?

Thanks for the testing,

Phil.



Re: [PATCH 1/2] accel/tcg: Make TCGCPUOps::cpu_exec_halt return bool for whether to halt

2024-04-30 Thread Peter Maydell
On Tue, 30 Apr 2024 at 18:15, Alex Bennée  wrote:
>
> Peter Maydell  writes:
>
> > The TCGCPUOps::cpu_exec_halt method is called from cpu_handle_halt()
> > when the CPU is halted, so that a target CPU emulation can do
> > anything target-specific it needs to do.  (At the moment we only use
> > this on i386.)
> >
> > The current specification of the method doesn't allow the target
> > specific code to do something different if the CPU is about to come
> > out of the halt state, because cpu_handle_halt() only determines this
> > after the method has returned.  (If the method called cpu_has_work()
> > itself this would introduce a potential race if an interrupt arrived
> > between the target's method implementation checking and
> > cpu_handle_halt() repeating the check.)
> >
> > Change the definition of the method so that it returns a bool to
> > tell cpu_handle_halt() whether to stay in halt or not.
> >
> > We will want this for the Arm target, where FEAT_WFxT wants to do
> > some work only for the case where the CPU is in halt but about to
> > leave it.
> >
> > Signed-off-by: Peter Maydell 
> > ---
> >  include/hw/core/tcg-cpu-ops.h   | 11 +--
> >  target/i386/tcg/helper-tcg.h|  2 +-
> >  accel/tcg/cpu-exec.c|  7 +--
> >  target/i386/tcg/sysemu/seg_helper.c |  3 ++-
> >  4 files changed, 17 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
> > index dc1f16a9777..f3ac76e6f6d 100644
> > --- a/include/hw/core/tcg-cpu-ops.h
> > +++ b/include/hw/core/tcg-cpu-ops.h
> > @@ -111,8 +111,15 @@ struct TCGCPUOps {
> >  void (*do_interrupt)(CPUState *cpu);
> >  /** @cpu_exec_interrupt: Callback for processing interrupts in 
> > cpu_exec */
> >  bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
> > -/** @cpu_exec_halt: Callback for handling halt in cpu_exec */
> > -void (*cpu_exec_halt)(CPUState *cpu);
> > +/**
> > + * @cpu_exec_halt: Callback for handling halt in cpu_exec.
> > + *
> > + * Return true to indicate that the CPU should now leave halt, false
> > + * if it should remain in the halted state.
> > + * If this method is not provided, the default is to leave halt
> > + * if cpu_has_work() returns true.
> > + */
> > +bool (*cpu_exec_halt)(CPUState *cpu);
>
> Would it be too much to rename the method to cpu_exec_leave_halt() to
> make it clearer on use the sense of the return value?

We could, but that makes it sound like it's a method to say
"should we leave halt?", which ...

> > -void x86_cpu_exec_halt(CPUState *cpu)
> > +bool x86_cpu_exec_halt(CPUState *cpu)
> >  {
> >  if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
> >  X86CPU *x86_cpu = X86_CPU(cpu);
> > @@ -138,6 +138,7 @@ void x86_cpu_exec_halt(CPUState *cpu)
> >  cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
> >  bql_unlock();
> >  }
> > +return cpu_has_work(cpu);
>
> The x86 version is essentially being called for side effects. Do we want
> to document this usage in the method?

...is not how the x86 target is using it, as you note.

thanks
-- PMM



Re: [PATCH 2/2] target/arm: Implement FEAT WFxT and enable for '-cpu max'

2024-04-30 Thread Peter Maydell
On Tue, 30 Apr 2024 at 18:31, Richard Henderson
 wrote:
>
> On 4/30/24 07:00, Peter Maydell wrote:
> > +if (uadd64_overflow(timeout, offset, &nexttick)) {
> > +nexttick = UINT64_MAX;
> > +}
> > +if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) {
> > +/*
> > + * If the timeout is too long for the signed 64-bit range
> > + * of a QEMUTimer, let it expire early.
> > + */
> > +timer_mod_ns(cpu->wfxt_timer, INT64_MAX);
> > +} else {
> > +timer_mod(cpu->wfxt_timer, nexttick);
> > +}
>
> The use of both UINT64_MAX and INT64_MAX is confusing.  Perhaps
>
>  if (uadd64_overflow(timeout, offset, &nexttick) ||
>  nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) {
>  nexttick = INT64_MAX;
>  }
>  timer_mod(cpu->wfxt_timer, nexttick);

I'm following here the pattern of the logic in gt_recalc_timer()
(which could admittedly also be considered confusing...).

Also note that timer_mod_ns() and timer_mod() aren't the
same thing. The latter calls timer_mod_ns() on its argument
multiplied by ts->scale, so if you pass it INT64_MAX
the multiply is liable to overflow.

thanks
-- PMM



RE: [PATCH v3] Hexagon: add PC alignment check and exception

2024-04-30 Thread Brian Cain


> -Original Message-
> From: Richard Henderson 
> Sent: Tuesday, April 30, 2024 10:53 AM
> To: Matheus Bernardino (QUIC) ; qemu-
> de...@nongnu.org
> Cc: Brian Cain ; Sid Manning ;
> a...@rev.ng; a...@rev.ng; ltaylorsimp...@gmail.com; Laurent Vivier
> 
> Subject: Re: [PATCH v3] Hexagon: add PC alignment check and exception
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary of
> any links or attachments, and do not enable macros.
> 
> On 4/30/24 07:25, Matheus Tavares Bernardino wrote:
> > +void test_multi_cof(void)
> > +{
> > +asm volatile(
> > +"p0 = cmp.eq(r0, r0)\n"
> > +"{\n"
> > +"if (p0) jump test_multi_cof_unaligned\n"
> > +"jump 1f\n"
> > +"}\n"
> > +"1: nop\n"
> > +: : : "p0");
> > +}
> 
> I will say you could just add the label to the end of the asm here, like
> 
> .byte 0
> test_multi_cof_unaligned:
> 
> rather than use a separate source file.

Agreed: that would simplify this test case definition and the patch a bit.

-Brian


Re: [PATCH 1/1] tests/fp/meson: don't build fp-bench test if fenv.h is missing

2024-04-30 Thread Richard Henderson

On 4/30/24 09:47, Dario Binacchi wrote:

The fp-bench test (i. e. tests/fp/fp-bench.c) use fenv.h that is not
always provided by the libc (uClibc). The patch disables its compilation
in case the header is not available.


Since uclibc has had fenv.h since 2008, are you sure this isn't simply a case of a corrupt 
installation?



r~



Re: [RFC 1/2] iova_tree: add an id member to DMAMap

2024-04-30 Thread Eugenio Perez Martin
On Mon, Apr 29, 2024 at 1:19 PM Jonah Palmer  wrote:
>
>
>
> On 4/29/24 4:14 AM, Eugenio Perez Martin wrote:
> > On Thu, Apr 25, 2024 at 7:44 PM Si-Wei Liu  wrote:
> >>
> >>
> >>
> >> On 4/24/2024 12:33 AM, Eugenio Perez Martin wrote:
> >>> On Wed, Apr 24, 2024 at 12:21 AM Si-Wei Liu  wrote:
> 
> 
>  On 4/22/2024 1:49 AM, Eugenio Perez Martin wrote:
> > On Sat, Apr 20, 2024 at 1:50 AM Si-Wei Liu  
> > wrote:
> >>
> >> On 4/19/2024 1:29 AM, Eugenio Perez Martin wrote:
> >>> On Thu, Apr 18, 2024 at 10:46 PM Si-Wei Liu  
> >>> wrote:
>  On 4/10/2024 3:03 AM, Eugenio Pérez wrote:
> > IOVA tree is also used to track the mappings of virtio-net shadow
> > virtqueue.  This mappings may not match with the GPA->HVA ones.
> >
> > This causes a problem when overlapped regions (different GPA but 
> > same
> > translated HVA) exists in the tree, as looking them by HVA will 
> > return
> > them twice.  To solve this, create an id member so we can assign 
> > unique
> > identifiers (GPA) to the maps.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   include/qemu/iova-tree.h | 5 +++--
> >   util/iova-tree.c | 3 ++-
> >   2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
> > index 2a10a7052e..34ee230e7d 100644
> > --- a/include/qemu/iova-tree.h
> > +++ b/include/qemu/iova-tree.h
> > @@ -36,6 +36,7 @@ typedef struct DMAMap {
> >   hwaddr iova;
> >   hwaddr translated_addr;
> >   hwaddr size;/* Inclusive */
> > +uint64_t id;
> >   IOMMUAccessFlags perm;
> >   } QEMU_PACKED DMAMap;
> >   typedef gboolean (*iova_tree_iterator)(DMAMap *map);
> > @@ -100,8 +101,8 @@ const DMAMap *iova_tree_find(const IOVATree 
> > *tree, const DMAMap *map);
> >* @map: the mapping to search
> >*
> >* Search for a mapping in the iova tree that translated_addr 
> > overlaps with the
> > - * mapping range specified.  Only the first found mapping will be
> > - * returned.
> > + * mapping range specified and map->id is equal.  Only the first 
> > found
> > + * mapping will be returned.
> >*
> >* Return: DMAMap pointer if found, or NULL if not found.  
> > Note that
> >* the returned DMAMap pointer is maintained internally.  
> > User should
> > diff --git a/util/iova-tree.c b/util/iova-tree.c
> > index 536789797e..0863e0a3b8 100644
> > --- a/util/iova-tree.c
> > +++ b/util/iova-tree.c
> > @@ -97,7 +97,8 @@ static gboolean 
> > iova_tree_find_address_iterator(gpointer key, gpointer value,
> >
> >   needle = args->needle;
> >   if (map->translated_addr + map->size < 
> > needle->translated_addr ||
> > -needle->translated_addr + needle->size < 
> > map->translated_addr) {
> > +needle->translated_addr + needle->size < 
> > map->translated_addr ||
> > +needle->id != map->id) {
>  It looks this iterator can also be invoked by SVQ from
>  vhost_svq_translate_addr() -> iova_tree_find_iova(), where guest GPA
>  space will be searched on without passing in the ID (GPA), and exact
>  match for the same GPA range is not actually needed unlike the 
>  mapping
>  removal case. Could we create an API variant, for the SVQ lookup case
>  specifically? Or alternatively, add a special flag, say 
>  skip_id_match to
>  DMAMap, and the id match check may look like below:
> 
>  (!needle->skip_id_match && needle->id != map->id)
> 
>  I think vhost_svq_translate_addr() could just call the API variant or
>  pass DMAmap with skip_id_match set to true to 
>  svq_iova_tree_find_iova().
> 
> >>> I think you're totally right. But I'd really like to not complicate
> >>> the API of the iova_tree more.
> >>>
> >>> I think we can look for the hwaddr using memory_region_from_host and
> >>> then get the hwaddr. It is another lookup though...
> >> Yeah, that will be another means of doing translation without having to
> >> complicate the API around iova_tree. I wonder how the lookup through
> >> memory_region_from_host() may perform compared to the iova tree one, 
> >> the
> >> former looks to be an O(N) linear search on a linked list while the
> >> latter would be roughly O(log N) on an AVL tree?
> > Even worse, as the reverse lookup (from QEMU vaddr to SVQ IOVA) 

Re: [PATCH 2/3] target/riscv: Enforce WARL behavior for scounteren/hcounteren

2024-04-30 Thread Daniel Henrique Barboza




On 4/29/24 16:28, Atish Patra wrote:

scounteren/hcountern are also WARL registers similar to mcountern.
Only set the bits for the available counters during the write to
preserve the WARL behavior.

Signed-off-by: Atish Patra 
---


Reviewed-by: Daniel Henrique Barboza 


  target/riscv/csr.c | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 68ca31aff47d..a01911541d67 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -2843,7 +2843,11 @@ static RISCVException read_scounteren(CPURISCVState 
*env, int csrno,
  static RISCVException write_scounteren(CPURISCVState *env, int csrno,
 target_ulong val)
  {
-env->scounteren = val;
+RISCVCPU *cpu = env_archcpu(env);
+
+/* WARL register - disable unavailable counters */
+env->scounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM 
|
+ COUNTEREN_IR);
  return RISCV_EXCP_NONE;
  }
  
@@ -3475,7 +3479,11 @@ static RISCVException read_hcounteren(CPURISCVState *env, int csrno,

  static RISCVException write_hcounteren(CPURISCVState *env, int csrno,
 target_ulong val)
  {
-env->hcounteren = val;
+RISCVCPU *cpu = env_archcpu(env);
+
+/* WARL register - disable unavailable counters */
+env->hcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM 
|
+ COUNTEREN_IR);
  return RISCV_EXCP_NONE;
  }
  





Re: [PATCH 1/3] target/riscv: Save counter values during countinhibit update

2024-04-30 Thread Daniel Henrique Barboza




On 4/29/24 16:28, Atish Patra wrote:

Currently, if a counter monitoring cycle/instret is stopped via
mcountinhibit we just update the state while the value is saved
during the next read. This is not accurate as the read may happen
many cycles after the counter is stopped. Ideally, the read should
return the value saved when the counter is stopped.

Thus, save the value of the counter during the inhibit update
operation and return that value during the read if corresponding bit
in mcountihibit is set.

Signed-off-by: Atish Patra 
---
  target/riscv/cpu.h |  1 -
  target/riscv/csr.c | 32 
  target/riscv/machine.c |  1 -
  3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 3b1a02b9449a..09bbf7ce9880 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -153,7 +153,6 @@ typedef struct PMUCTRState {
  target_ulong mhpmcounter_prev;
  /* Snapshort value of a counter in RV32 */
  target_ulong mhpmcounterh_prev;
-bool started;
  /* Value beyond UINT32_MAX/UINT64_MAX before overflow interrupt trigger */
  target_ulong irq_overflow_left;
  } PMUCTRState;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 726096444fae..68ca31aff47d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -929,17 +929,11 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
*env, target_ulong *val,
  
  if (get_field(env->mcountinhibit, BIT(ctr_idx))) {

  /*
- * Counter should not increment if inhibit bit is set. We can't really
- * stop the icount counting. Just return the counter value written by
- * the supervisor to indicate that counter was not incremented.
+ * Counter should not increment if inhibit bit is set. Just return the
+ * current counter value.
   */
-if (!counter->started) {
-*val = ctr_val;
-return RISCV_EXCP_NONE;
-} else {
-/* Mark that the counter has been stopped */
-counter->started = false;
-}
+ *val = ctr_val;
+ return RISCV_EXCP_NONE;
  }
  
  /*

@@ -1973,9 +1967,23 @@ static RISCVException write_mcountinhibit(CPURISCVState 
*env, int csrno,
  
  /* Check if any other counter is also monitoring cycles/instructions */

  for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) {
-if (!get_field(env->mcountinhibit, BIT(cidx))) {
  counter = &env->pmu_ctrs[cidx];
-counter->started = true;
+if (get_field(env->mcountinhibit, BIT(cidx)) && (val & BIT(cidx))) {
+   /*
+ * Update the counter value for cycle/instret as we can't stop the
+ * host ticks. But we should show the current value at this moment.
+ */
+if (riscv_pmu_ctr_monitor_cycles(env, cidx) ||
+riscv_pmu_ctr_monitor_instructions(env, cidx)) {
+counter->mhpmcounter_val = get_ticks(false) -
+   counter->mhpmcounter_prev +
+   counter->mhpmcounter_val;
+if (riscv_cpu_mxl(env) == MXL_RV32) {
+counter->mhpmcounterh_val = get_ticks(false) -
+counter->mhpmcounterh_prev +
+counter->mhpmcounterh_val;
+   }
+}
  }
  }
  
diff --git a/target/riscv/machine.c b/target/riscv/machine.c

index 76f2150f78b5..3e0f2dd2ce2a 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -328,7 +328,6 @@ static const VMStateDescription vmstate_pmu_ctr_state = {
  VMSTATE_UINTTL(mhpmcounterh_val, PMUCTRState),
  VMSTATE_UINTTL(mhpmcounter_prev, PMUCTRState),
  VMSTATE_UINTTL(mhpmcounterh_prev, PMUCTRState),
-VMSTATE_BOOL(started, PMUCTRState),


Unfortunately we can't remove fields from the VMStateDescription without 
breaking
migration backward compatibility. Older QEMUs will attempt to read a field that
doesn't exist and migration will fail.

I'm assuming that we care about backward compat. If we're not up to this point 
yet
then we can just bump the version_id of vmstate_pmu_ctr_state and be done with 
it.
This is fine to do unless someone jumps in and complains that we broke a 
migration
case for the 'virt' board. Granted, we don't have versioned boards yet so I'm 
unsure
if someone would actually have a base to complain. Alistair, Drew, care to 
comment?


Now, if we care about backward migration compat, we'll need to do as described 
in
devel/migration/main.rst, section "Not sending existing elements". An example on
how we need to proceed can also be seen in commit 6cc88d6bf9. But in short we
would need to:

- add a dummy property, e.g. a 'mig_started' bool

- use a slightly different macro in vmstate:


-VMSTATE_BOOL(started, PMUCTRState),
+VMSTA

Re: [PATCH v3 00/13] exec: Rework around CPUState user fields (part 2)

2024-04-30 Thread Ilya Leoshkevich
On Tue, Apr 30, 2024 at 02:27:54PM +0200, Philippe Mathieu-Daudé wrote:
> Missing WASM testing by Ilya (branch available at
> https://gitlab.com/philmd/qemu/-/commits/tcg_flush_jmp_cache)

Hmm, it dies very early now:

  # gdb --args ./qemu-s390x -L /usr/s390x-linux-gnu 
/build/wasmtime/target/s390x-unknown-linux-gnu/debug/deps/component_fuzz_util-d10a3a6b4ad8af47

  Thread 1 "qemu-s390x" received signal SIGSEGV, Segmentation fault.
  0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, 
errp=) at ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
  217 cpu->accel->plugin_state = qemu_plugin_create_vcpu_state();

  (gdb) bt
  #0  0x5559b718 in cpu_common_realizefn (dev=0x557c28c0, 
errp=) at ../home/iii/myrepos/qemu/hw/core/cpu-common.c:217
  #1  0x5559f59a in s390_cpu_realizefn (dev=0x557c28c0, 
errp=0x7fffe1a0) at ../home/iii/myrepos/qemu/target/s390x/cpu.c:284
  #2  0x5563f76b in device_set_realized (obj=, 
value=, errp=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:510
  #3  0x5564363d in property_set_bool (obj=0x557c28c0, v=, name=, opaque=0x557a9140, errp=0x7fffe2e0) at 
../home/iii/myrepos/qemu/qom/object.c:2362
  #4  0x55646b9b in object_property_set (obj=obj@entry=0x557c28c0, 
name=name@entry=0x556e8ae2 "realized", v=v@entry=0x557c6650, 
errp=errp@entry=0x7fffe2e0)
  at ../home/iii/myrepos/qemu/qom/object.c:1471
  #5  0x5564a43f in object_property_set_qobject 
(obj=obj@entry=0x557c28c0, name=name@entry=0x556e8ae2 "realized", 
value=value@entry=0x557a7a90, errp=errp@entry=0x7fffe2e0)
  at ../home/iii/myrepos/qemu/qom/qom-qobject.c:28
  #6  0x55647204 in object_property_set_bool (obj=0x557c28c0, 
name=name@entry=0x556e8ae2 "realized", value=value@entry=true, 
errp=errp@entry=0x7fffe2e0)
  at ../home/iii/myrepos/qemu/qom/object.c:1541
  #7  0x5564025c in qdev_realize (dev=, 
bus=bus@entry=0x0, errp=errp@entry=0x7fffe2e0) at 
../home/iii/myrepos/qemu/hw/core/qdev.c:291
  #8  0x5559bbb4 in cpu_create (typename=) at 
../home/iii/myrepos/qemu/hw/core/cpu-common.c:61
  #9  0x5559a467 in main (argc=4, argv=0x7fffeaa8, envp=) at ../home/iii/myrepos/qemu/linux-user/main.c:811

  (gdb) p cpu
  $1 = (CPUState *) 0x557c28c0
  (gdb) p cpu->accel
  $2 = (AccelCPUState *) 0x0

Configured with: '/home/iii/myrepos/qemu/configure' 
'--target-list=s390x-linux-user' '--disable-tools' '--disable-slirp' 
'--disable-fdt' '--disable-capstone' '--disable-docs'

If you don't see what can be wrong here right away, I can debug this.

> Since v2:
> - Move cpu_loop_exit_requested() to "exec/cpu-loop.h"
> - Added R-b tags
> 
> Since v1:
> - First 13 patches queued
> - Restrict qemu_plugin_vcpu_exit_hook() to (TCG) plugins
> - Restrict cpu_plugin_mem_cbs_enabled() to TCG (plugins)
> - Addressed Richard review comments on the others:
>   - Move cpu_plugin_mem_cbs_enabled()
>   - Do not move mem_io_pc, waiting for [*]
>   - Mention can_do_io restricted
> 
> Finish extracting TCG fields from CPUState:
> - Extract tcg_cpu_exit() from cpu_exit()
> - Introduce AccelOpsClass::exit_vcpu_thread()
> - cpu_exit() calls exit_vcpu_thread=tcg_cpu_exit for TCG
> - Forward declare TaskState and more uses of get_task_state()
> - Introduce TCG AccelCPUState
> - Move TCG specific fields from CPUState to AccelCPUState
> - Restrict "exec/tlb-common.h" to TCG
> - Restrict iommu_notifiers, icount to system emulation
> 
> [*] 
> https://lore.kernel.org/qemu-devel/20240416040609.1313605-3-richard.hender...@linaro.org/
> 
> Based-on: https://gitlab.com/philmd/qemu/-/commits/accel-next
> 
> Philippe Mathieu-Daudé (13):
>   accel/tcg: Restrict qemu_plugin_vcpu_exit_hook() to TCG plugins
>   accel/tcg: Restrict cpu_plugin_mem_cbs_enabled() to TCG
>   accel/tcg: Move @plugin_mem_cbs from CPUState to
> CPUNegativeOffsetState
>   accel/tcg: Move @plugin_state from CPUState to TCG AccelCPUState
>   accel/tcg: Restrict cpu_loop_exit_requested() to TCG
>   accel/tcg: Restrict IcountDecr / can_do_io / CPUTLB to TCG
>   accel/tcg: Move @jmp_env from CPUState to TCG AccelCPUState
>   accel/tcg: Move @cflags_next_tb from CPUState to TCG AccelCPUState
>   accel/tcg: Move @iommu_notifiers from CPUState to TCG AccelCPUState
>   accel/tcg: Move @tcg_cflags from CPUState to TCG AccelCPUState
>   accel/tcg: Restrict icount to system emulation
>   accel/tcg: Move icount fields from CPUState to TCG AccelCPUState
>   accel/tcg: Move @tb_jmp_cache from CPUState to TCG AccelCPUState
> 
>  accel/tcg/internal-common.h  | 18 ++
>  accel/tcg/tb-jmp-cache.h |  4 +--
>  accel/tcg/tcg-accel-ops.h|  1 +
>  accel/tcg/vcpu-state.h   | 20 +++
>  include/exec/cpu-loop.h  | 35 +++
>  include/exec/exec-all.h  | 17 --
>  include/exec/tlb-common.h|  4 +++
>  include/hw/core/cpu.h| 58 ++

Re: [PATCH v7 09/12] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-04-30 Thread fan
On Mon, Apr 29, 2024 at 09:58:42AM +0200, Markus Armbruster wrote:
> fan  writes:
> 
> > On Fri, Apr 26, 2024 at 11:12:50AM +0200, Markus Armbruster wrote:
> >> nifan@gmail.com writes:
> 
> [...]
> 
> >> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> >> > index 4281726dec..2dcf03d973 100644
> >> > --- a/qapi/cxl.json
> >> > +++ b/qapi/cxl.json
> >> > @@ -361,3 +361,72 @@
> >> >  ##
> >> >  {'command': 'cxl-inject-correctable-error',
> >> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> >> > +
> >> > +##
> >> > +# @CXLDCExtentRecord:
> >> 
> >> Such traffic jams of capital letters are hard to read.  What about
> >> CxlDynamicCapacityExtent?
> >> 
> >> > +#
> >> > +# Record of a single extent to add/release
> >> 
> >> Suggest "A dynamic capacity extent."
> >> 
> >> > +#
> >> > +# @offset: offset to the start of the region where the extent to be 
> >> > operated
> >> 
> >> Blank line here, please.
> >> 
> >> 
> >> 
> >> > +# @len: length of the extent
> >> > +#
> >> > +# Since: 9.1
> >> > +##
> >> > +{ 'struct': 'CXLDCExtentRecord',
> >> > +  'data': {
> >> > +  'offset':'uint64',
> >> > +  'len': 'uint64'
> >> > +  }
> >> > +}
> >> > +
> >> > +##
> >> > +# @cxl-add-dynamic-capacity:
> >> > +#
> >> > +# Command to start add dynamic capacity extents flow. The device will
> >> > +# have to acknowledged the acceptance of the extents before they are 
> >> > usable.
> >> 
> >> This text needs work.  More on that at the end of my review.
> >
> > Yes. I will work on it for the next version once all the feedbacks
> > are collected and comments are resolved.
> >
> > See below.
> >
> >> 
> >> docs/devel/qapi-code-gen.rst:
> >> 
> >> For legibility, wrap text paragraphs so every line is at most 70
> >> characters long.
> >> 
> >> Separate sentences with two spaces.
> >> 
> >> More elsewhere.
> >> 
> >> > +#
> >> > +# @path: CXL DCD canonical QOM path
> >> 
> >> I'd prefer @qom-path, unless you can make a consistency argument for
> >> @path.
> >> 
> >> Sure the QOM path needs to be canonical?
> >> 
> >> If not, what about "path to the CXL dynamic capacity device in the QOM
> >> tree".  Intentionally close to existing descriptions of @qom-path
> >> elsewhere.
> >
> > From the same file, I saw "path" was used for other commands, like
> > "cxl-inject-memory-module-event", so I followed it.
> > DCD is nothing different from "type 3 device" expect it can dynamically
> > change capacity. 
> > Renaming it to "qom-path" is no problem for me, just want to make sure it
> > will not break the naming consistency.
> 
> Both @path and @qom-path are used (sadly).  @path is used for all kinds
> of paths, whereas @qom-path is only used for QOM paths.  That's why I
> prefer it.
> 
> However, you're making a compelling local consistency argument: cxl.json
> uses only @path.  Sticking to that makes sense.
> 
> >> > +# @hid: host id
> >> 
> >> @host-id, unless "HID" is established terminology in CXL DCD land.
> >
> > host-id works.
> >> 
> >> What is a host ID?
> >
> > It is an id identifying the host to which the capacity is being added.
> 
> How are these IDs assigned?

All the arguments passed to the command here are defined in CXL spec. I
will add reference to the spec.

Based on the spec, for LD-FAM (Fabric attached memory represented as
logical device), host id is the LD-ID of the host interface to which
the capacity is being added. LD-ID is a unique number (16-bit) assigned
to a host interface.

> 
> >> > +# @selection-policy: policy to use for selecting extents for adding 
> >> > capacity
> >> 
> >> Where are selection policies defined?
> >
> > It is defined in CXL specification: Specifies the policy to use for 
> > selecting
> > which extents comprise the added capacity
> 
> Include a reference to the spec here?
Wil do.
> 
> >> > +# @region-id: id of the region where the extent to add
> >> 
> >> Is "region ID" the established terminology in CXL DCD land?  Or is
> >> "region number" also used?  I'm asking because "ID" in this QEMU device
> >> context suggests a connection to a qdev ID.
> >> 
> >> If region number is fine, I'd rename to just @region, and rephrase the
> >> description to avoid "ID".  Perhaps "number of the region the extent is
> >> to be added to".  Not entirely happy with the phrasing, doesn't exactly
> >> roll off the tongue, but "where the extent to add" sounds worse to my
> >> ears.  Mind, I'm not a native speaker.
> >
> > Yes. region number is fine. Will rename it as "region"
> >
> >> 
> >> > +# @tag: Context field
> >> 
> >> What is this about?
> >
> > Based on the specification, it is "Context field utilized by implementations
> > that make use of the Dynamic Capacity feature.". Basically, it is a
> > string (label) attached to an dynamic capacity extent so we can achieve
> > specific purpose, like identifying or grouping extents.
> 
> Include a reference to the spec here?
Will do.
> 
> >> > +# @extents: Extents to add
> >> 
> >> Blank lines between argument descriptions, please.

Re: [PATCH 1/2] accel/tcg: Make TCGCPUOps::cpu_exec_halt return bool for whether to halt

2024-04-30 Thread Richard Henderson

On 4/30/24 07:00, Peter Maydell wrote:

The TCGCPUOps::cpu_exec_halt method is called from cpu_handle_halt()
when the CPU is halted, so that a target CPU emulation can do
anything target-specific it needs to do.  (At the moment we only use
this on i386.)

The current specification of the method doesn't allow the target
specific code to do something different if the CPU is about to come
out of the halt state, because cpu_handle_halt() only determines this
after the method has returned.  (If the method called cpu_has_work()
itself this would introduce a potential race if an interrupt arrived
between the target's method implementation checking and
cpu_handle_halt() repeating the check.)

Change the definition of the method so that it returns a bool to
tell cpu_handle_halt() whether to stay in halt or not.

We will want this for the Arm target, where FEAT_WFxT wants to do
some work only for the case where the CPU is in halt but about to
leave it.

Signed-off-by: Peter Maydell 
---
  include/hw/core/tcg-cpu-ops.h   | 11 +--
  target/i386/tcg/helper-tcg.h|  2 +-
  accel/tcg/cpu-exec.c|  7 +--
  target/i386/tcg/sysemu/seg_helper.c |  3 ++-
  4 files changed, 17 insertions(+), 6 deletions(-)


Reviewed-by: Richard Henderson 

I like Alex's suggested rename.


--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -669,11 +669,14 @@ static inline bool cpu_handle_halt(CPUState *cpu)
  #ifndef CONFIG_USER_ONLY
  if (cpu->halted) {
  const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+bool leave_halt;
  
  if (tcg_ops->cpu_exec_halt) {

-tcg_ops->cpu_exec_halt(cpu);
+leave_halt = tcg_ops->cpu_exec_halt(cpu);
+} else {
+leave_halt = cpu_has_work(cpu);
  }
-if (!cpu_has_work(cpu)) {
+if (!leave_halt) {
  return true;
  }


As a followup, I would also suggest making implementation of the hook mandatory.
We already require the has_work hook to be set; it would simply be a matter of copying the 
function pointer to the second slot.


Also, the assert in cpu_has_work could be moved to startup, as Phil has started to do with 
some of the other hooks.



r~




Re: [PATCH v4 0/2] query-cpu-model-expansion: report deprecated features

2024-04-30 Thread Collin Walling
[...]

Thank you all for the valuable feedback.  Since the QEMU interface seems
stable, I will rework my libvirt (not upstream) and post as an RFC.

-- 
Regards,
  Collin




[PATCH v2 1/3] hw/xen/xen_pt: Save back data only for declared registers

2024-04-30 Thread Marek Marczykowski-Górecki
Call pci_default_write_config() in xen_pt_pci_write_config() only for
registers that have matching XenPTRegInfo structure, and do that only after
resolving any custom handlers. This is important for two reasons:
1. XenPTRegInfo has ro_mask which needs to be enforced - Xen-specific
   hooks do that on their own (especially xen_pt_*_reg_write()).
2. Not setting value early allows hooks to see the old value too.

If it would be only about the first point, setting PCIDevice.wmask would
probably be sufficient, but given the second point, restructure those
writes.

Signed-off-by: Marek Marczykowski-Górecki 
---
v2:
 - rewrite commit message, previous one was very misleading
 - fix loop saving register values
 - fix int overflow when calculating write mask
---
 hw/xen/xen_pt.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 3635d1b..cea2e18 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -256,6 +256,7 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t 
addr,
 uint32_t find_addr = addr;
 XenPTRegInfo *reg = NULL;
 bool wp_flag = false;
+uint32_t emul_mask = 0, write_val;
 
 if (xen_pt_pci_config_access_check(d, addr, len)) {
 return;
@@ -311,7 +312,6 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t 
addr,
 }
 
 memory_region_transaction_begin();
-pci_default_write_config(d, addr, val, len);
 
 /* adjust the read and write value to appropriate CFC-CFF window */
 read_val <<= (addr & 3) << 3;
@@ -371,6 +371,9 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t 
addr,
 return;
 }
 
+emul_mask |= ((1L << (reg->size * 8)) - 1)
+ << ((find_addr & 3) * 8);
+
 /* calculate next address to find */
 emul_len -= reg->size;
 if (emul_len > 0) {
@@ -397,6 +400,25 @@ static void xen_pt_pci_write_config(PCIDevice *d, uint32_t 
addr,
 /* need to shift back before passing them to xen_host_pci_set_block. */
 val >>= (addr & 3) << 3;
 
+/* store emulated registers after calling their handlers */
+write_val = val;
+for (index = 0; index < len; index += emul_len) {
+emul_len = 0;
+while (emul_mask & 0xff) {
+emul_len++;
+emul_mask >>= 8;
+}
+if (emul_len) {
+uint32_t mask = ((1L << (emul_len * 8)) - 1);
+pci_default_write_config(d, addr + index, write_val & mask,
+ emul_len);
+} else {
+emul_mask >>= 8;
+emul_len = 1;
+}
+write_val >>= emul_len * 8;
+}
+
 memory_region_transaction_commit();
 
 out:
-- 
git-series 0.9.1



[PATCH v2 3/3] Do not access /dev/mem in MSI-X PCI passthrough on Xen

2024-04-30 Thread Marek Marczykowski-Górecki
The /dev/mem is used for two purposes:
 - reading PCI_MSIX_ENTRY_CTRL_MASKBIT
 - reading Pending Bit Array (PBA)

The first one was originally done because when Xen did not send all
vector ctrl writes to the device model, so QEMU might have outdated old
register value. If Xen is new enough, this has been changed, so QEMU can
now use its cached value of the register instead. Detect the "new
enough" based on XENFEAT_dm_msix_all_writes bit in XENVER_get_features.

The Pending Bit Array (PBA) handling is for the case where it lives on
the same page as the MSI-X table itself. Xen has been extended to handle
this case too (as well as other registers that may live on those pages),
so QEMU handling is not necessary anymore.

Additionally, reading from /dev/mem is trapped and emulated by Xen, so
QEMU doesn't see real values anyway. And if it did, this method is prone
to race conditions. Removing /dev/mem access is useful to work within
stubdomain (avoids emulated reads and potential races), and necessary
when dom0 kernel runs in lockdown mode (where /dev/mem is unavailable at
all).

Signed-off-by: Marek Marczykowski-Górecki 
---
Changes in v2:
- Make change conditional on new Xen version (tested via
  XENFEAT_dm_msix_all_writes)
- add few comments
---
 hw/xen/xen_pt_msi.c | 94 --
 1 file changed, 59 insertions(+), 35 deletions(-)

diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index 09cca4e..836cc9c 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -460,15 +460,23 @@ static void pci_msix_write(void *opaque, hwaddr addr,
 entry->updated = true;
 } else if (msix->enabled && entry->updated &&
!(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
-const volatile uint32_t *vec_ctrl;
-
 /*
- * If Xen intercepts the mask bit access, entry->vec_ctrl may not be
- * up-to-date. Read from hardware directly.
+ * Reading mask bit from hardware directly is needed on older Xen only.
  */
-vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE
-+ PCI_MSIX_ENTRY_VECTOR_CTRL;
-xen_pt_msix_update_one(s, entry_nr, *vec_ctrl);
+if (s->msix->phys_iomem_base) {
+/* Memory mapped registers */
+const volatile uint32_t *vec_ctrl;
+
+/*
+ * If Xen intercepts the mask bit access, entry->vec_ctrl may not 
be
+ * up-to-date. Read from hardware directly.
+ */
+vec_ctrl = s->msix->phys_iomem_base + entry_nr * 
PCI_MSIX_ENTRY_SIZE
++ PCI_MSIX_ENTRY_VECTOR_CTRL;
+xen_pt_msix_update_one(s, entry_nr, *vec_ctrl);
+} else {
+xen_pt_msix_update_one(s, entry_nr, entry->latch(VECTOR_CTRL));
+}
 }
 
 set_entry_value(entry, offset, val);
@@ -493,7 +501,12 @@ static uint64_t pci_msix_read(void *opaque, hwaddr addr,
 return get_entry_value(&msix->msix_entry[entry_nr], offset);
 } else {
 /* Pending Bit Array (PBA) */
-return *(uint32_t *)(msix->phys_iomem_base + addr);
+if (s->msix->phys_iomem_base) {
+return *(uint32_t *)(msix->phys_iomem_base + addr);
+}
+XEN_PT_LOG(&s->dev, "reading PBA, addr 0x%lx, offset 0x%lx\n",
+   addr, addr - msix->total_entries * PCI_MSIX_ENTRY_SIZE);
+return 0x;
 }
 }
 
@@ -528,8 +541,8 @@ int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t 
base)
 uint32_t table_off = 0;
 int i, total_entries, bar_index;
 XenHostPCIDevice *hd = &s->real_device;
+xen_feature_info_t xc_version_info = { 0 };
 PCIDevice *d = &s->dev;
-int fd = -1;
 XenPTMSIX *msix = NULL;
 int rc = 0;
 
@@ -543,6 +556,10 @@ int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t 
base)
 return -1;
 }
 
+if (xc_version(xen_xc, XENVER_get_features, &xc_version_info) < 0) {
+return -1;
+}
+
 rc = xen_host_pci_get_word(hd, base + PCI_MSIX_FLAGS, &control);
 if (rc) {
 XEN_PT_ERR(d, "Failed to read PCI_MSIX_FLAGS field\n");
@@ -576,33 +593,40 @@ int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t 
base)
 msix->table_base = s->real_device.io_regions[bar_index].base_addr;
 XEN_PT_LOG(d, "get MSI-X table BAR base 0x%"PRIx64"\n", msix->table_base);
 
-fd = open("/dev/mem", O_RDWR);
-if (fd == -1) {
-rc = -errno;
-XEN_PT_ERR(d, "Can't open /dev/mem: %s\n", strerror(errno));
-goto error_out;
-}
-XEN_PT_LOG(d, "table_off = 0x%x, total_entries = %d\n",
-   table_off, total_entries);
-msix->table_offset_adjust = table_off & 0x0fff;
-msix->phys_iomem_base =
-mmap(NULL,
- total_entries * PCI_MSIX_ENTRY_SIZE + msix->table_offset_adjust,
- PROT_READ,
- MAP_SHARED | MAP_LOCKED,
- fd,
- msix->table_base + table_off - msix->table_offset_adjust);
-cl

[PATCH v2 0/3] Fix MSI-X handling for Xen HVM

2024-04-30 Thread Marek Marczykowski-Górecki
This series fixes handling MSI-X when device model is running in a stubdomain.
The main part is to avoid accessing /dev/mem, which also fixes running dom0
with lockdown enabled.

It depends on a behavior change of Xen that was just comitted, and signaled
with a feature flag. If Xen is too old (and XENFEAT_dm_msix_all_writes flag is
not set), fallback to the old behavior.

The other part is a fix to enforce read-only registers in the config space.
This fixes MSI-X setup for iwlwifi Linux driver, as it happen to write to MSI-X
capability id reg (as a workaround for some older device which has another
register there). It should be no-op, but due to a bug in xen_pt code,
it broke MSI-X detection.

All those patches have been shipped in Qubes OS 4.2 already, and prove to fix
the issue.

See individual commit messages for details.

Marek Marczykowski-Górecki (3):
  hw/xen/xen_pt: Save back data only for declared registers
  Update Xen's features.h header
  Do not access /dev/mem in MSI-X PCI passthrough on Xen

 hw/xen/xen_pt.c | 24 +++-
 hw/xen/xen_pt_msi.c | 94 ++
 include/hw/xen/interface/features.h | 17 +-
 3 files changed, 99 insertions(+), 36 deletions(-)

base-commit: 2358f1b60f73287fe606c7ff48043b4f9e1c2d0f
-- 
git-series 0.9.1



[PATCH v2 2/3] Update Xen's features.h header

2024-04-30 Thread Marek Marczykowski-Górecki
Update it to get XENFEAT_dm_msix_all_writes for the next patch.

Signed-off-by: Marek Marczykowski-Górecki 
---
 include/hw/xen/interface/features.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/include/hw/xen/interface/features.h 
b/include/hw/xen/interface/features.h
index d2a9175..8801930 100644
--- a/include/hw/xen/interface/features.h
+++ b/include/hw/xen/interface/features.h
@@ -111,6 +111,23 @@
 #define XENFEAT_not_direct_mapped 16
 #define XENFEAT_direct_mapped 17
 
+/*
+ * Signal whether the domain is able to use the following hypercalls:
+ *
+ * VCPUOP_register_runstate_phys_area
+ * VCPUOP_register_vcpu_time_phys_area
+ */
+#define XENFEAT_runstate_phys_area18
+#define XENFEAT_vcpu_time_phys_area   19
+
+/*
+ * If set, Xen will passthrough all MSI-X vector ctrl writes to device model,
+ * not only those unmasking an entry. This allows device model to properly keep
+ * track of the MSI-X table without having to read it from the device behind
+ * Xen's backs. This information is relevant only for device models.
+ */
+#define XENFEAT_dm_msix_all_writes20
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
-- 
git-series 0.9.1



Re: [PATCH 2/2] target/arm: Implement FEAT WFxT and enable for '-cpu max'

2024-04-30 Thread Richard Henderson

On 4/30/24 07:00, Peter Maydell wrote:

+if (uadd64_overflow(timeout, offset, &nexttick)) {
+nexttick = UINT64_MAX;
+}
+if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) {
+/*
+ * If the timeout is too long for the signed 64-bit range
+ * of a QEMUTimer, let it expire early.
+ */
+timer_mod_ns(cpu->wfxt_timer, INT64_MAX);
+} else {
+timer_mod(cpu->wfxt_timer, nexttick);
+}


The use of both UINT64_MAX and INT64_MAX is confusing.  Perhaps

if (uadd64_overflow(timeout, offset, &nexttick) ||
nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) {
nexttick = INT64_MAX;
}
timer_mod(cpu->wfxt_timer, nexttick);


Anyway,
Reviewed-by: Richard Henderson 


r~



Re: [PATCH v4] fix endianness bug

2024-04-30 Thread Alex Bennée
Alexandra Diupina  writes:

As the subject is what ends up in the shortlog it is useful to prefix
the subsystem to make it easier to see what was touched when reviewing
log files. So maybe:

xlnx_dpdma: fix endianness bug

or even:

xlnx_dpdma: fix descriptor endianness bug

as we have space within the 60 or so chars recommended for subject lines ;-)



-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PATCH 1/1] prealloc: add truncate mode for prealloc filter

2024-04-30 Thread Denis V. Lunev
Preallocate filter allows to implement really interesting setups.

Assume that we have
* shared block device, f.e. iSCSI LUN, implemented with some HW device
* clustered LVM on top of it
* QCOW2 image stored inside LVM volume

This allows very cheap clustered setups with all QCOW2 features intact.
Currently supported setups using QCOW2 with data_file option are not
so cool as snapshots are not allowed, QCOW2 should be placed into some
additional distributed storage and so on.

Though QCOW2 inside LVM volume has a drawback. The image is growing and
in order to accomodate that image LVM volume is to be resized. This
could be done externally using ENOSPACE event/condition but this is
cumbersome.

This patch introduces native implementation for such a setup. We should
just put prealloc filter in between QCOW2 format and file nodes. In that
case LVM will be resized at proper moment and that is done effectively
as resizing is done in chinks.

The patch adds allocation mode for this purpose in order to distinguish
'fallocate' for ordinary file system and 'truncate'.

Signed-off-by: Denis V. Lunev 
CC: Alexander Ivanov 
CC: Kevin Wolf 
CC: Hanna Reitz 
CC: Vladimir Sementsov-Ogievskiy 
---
 block/preallocate.c | 50 +++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/block/preallocate.c b/block/preallocate.c
index 4d82125036..6d31627325 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -33,10 +33,24 @@
 #include "block/block-io.h"
 #include "block/block_int.h"
 
+typedef enum PreallocateMode {
+PREALLOCATE_MODE_FALLOCATE = 0,
+PREALLOCATE_MODE_TRUNCATE = 1,
+PREALLOCATE_MODE__MAX = 2,
+} PreallocateMode;
+
+static QEnumLookup prealloc_mode_lookup = {
+.array = (const char *const[]) {
+"falloc",
+"truncate",
+},
+.size = PREALLOCATE_MODE__MAX,
+};
 
 typedef struct PreallocateOpts {
 int64_t prealloc_size;
 int64_t prealloc_align;
+PreallocateMode prealloc_mode;
 } PreallocateOpts;
 
 typedef struct BDRVPreallocateState {
@@ -79,6 +93,7 @@ typedef struct BDRVPreallocateState {
 
 #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
 #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
+#define PREALLOCATE_OPT_MODE "mode"
 static QemuOptsList runtime_opts = {
 .name = "preallocate",
 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -94,7 +109,14 @@ static QemuOptsList runtime_opts = {
 .type = QEMU_OPT_SIZE,
 .help = "how much to preallocate, default 128M",
 },
-{ /* end of list */ }
+{
+.name = PREALLOCATE_OPT_MODE,
+.type = QEMU_OPT_STRING,
+.help = "Preallocation mode on image expansion "
+"(allowed values: falloc, truncate)",
+.def_value_str = "falloc",
+},
+{ /* end of list */ },
 },
 };
 
@@ -102,6 +124,8 @@ static bool preallocate_absorb_opts(PreallocateOpts *dest, 
QDict *options,
 BlockDriverState *child_bs, Error **errp)
 {
 QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+Error *local_err = NULL;
+char *buf;
 
 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 return false;
@@ -112,6 +136,17 @@ static bool preallocate_absorb_opts(PreallocateOpts *dest, 
QDict *options,
 dest->prealloc_size =
 qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
 
+buf = qemu_opt_get_del(opts, PREALLOCATE_OPT_MODE);
+/* prealloc_mode can be downgraded later during allocate_clusters */
+dest->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
+  PREALLOCATE_MODE_FALLOCATE,
+  &local_err);
+g_free(buf);
+if (local_err != NULL) {
+error_propagate(errp, local_err);
+return false;
+}
+
 qemu_opts_del(opts);
 
 if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
@@ -335,9 +370,20 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
 
 want_merge_zero = want_merge_zero && (prealloc_start <= offset);
 
-ret = bdrv_co_pwrite_zeroes(
+switch (s->opts.prealloc_mode) {
+case PREALLOCATE_MODE_FALLOCATE:
+ret = bdrv_co_pwrite_zeroes(
 bs->file, prealloc_start, prealloc_end - prealloc_start,
 BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
+break;
+case PREALLOCATE_MODE_TRUNCATE:
+ret = bdrv_co_truncate(bs->file, prealloc_end, false,
+   PREALLOC_MODE_OFF, 0, NULL);
+break;
+default:
+return false;
+}
+
 if (ret < 0) {
 s->file_end = ret;
 return false;
-- 
2.39.3




Re: [PATCH v7 09/12] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-04-30 Thread Jonathan Cameron via
On Mon, 29 Apr 2024 09:58:42 +0200
Markus Armbruster  wrote:

> fan  writes:
> 
> > On Fri, Apr 26, 2024 at 11:12:50AM +0200, Markus Armbruster wrote:  
> >> nifan@gmail.com writes:  
> 
> [...]
> 
> >> > diff --git a/qapi/cxl.json b/qapi/cxl.json
> >> > index 4281726dec..2dcf03d973 100644
> >> > --- a/qapi/cxl.json
> >> > +++ b/qapi/cxl.json
> >> > @@ -361,3 +361,72 @@
> >> >  ##
> >> >  {'command': 'cxl-inject-correctable-error',
> >> >   'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
> >> > +
> >> > +##
> >> > +# @CXLDCExtentRecord:  
> >> 
> >> Such traffic jams of capital letters are hard to read.  What about
> >> CxlDynamicCapacityExtent?
> >>   
> >> > +#
> >> > +# Record of a single extent to add/release  
> >> 
> >> Suggest "A dynamic capacity extent."
> >>   
> >> > +#
> >> > +# @offset: offset to the start of the region where the extent to be 
> >> > operated  
> >> 
> >> Blank line here, please.
> >> 
> >> 
> >>   
> >> > +# @len: length of the extent
> >> > +#
> >> > +# Since: 9.1
> >> > +##
> >> > +{ 'struct': 'CXLDCExtentRecord',
> >> > +  'data': {
> >> > +  'offset':'uint64',
> >> > +  'len': 'uint64'
> >> > +  }
> >> > +}
> >> > +
> >> > +##
> >> > +# @cxl-add-dynamic-capacity:
> >> > +#
> >> > +# Command to start add dynamic capacity extents flow. The device will
> >> > +# have to acknowledged the acceptance of the extents before they are 
> >> > usable.  
> >> 
> >> This text needs work.  More on that at the end of my review.  
> >
> > Yes. I will work on it for the next version once all the feedbacks
> > are collected and comments are resolved.
> >
> > See below.
> >  
> >> 
> >> docs/devel/qapi-code-gen.rst:
> >> 
> >> For legibility, wrap text paragraphs so every line is at most 70
> >> characters long.
> >> 
> >> Separate sentences with two spaces.
> >> 
> >> More elsewhere.
> >>   
> >> > +#
> >> > +# @path: CXL DCD canonical QOM path  
> >> 
> >> I'd prefer @qom-path, unless you can make a consistency argument for
> >> @path.
> >> 
> >> Sure the QOM path needs to be canonical?
> >> 
> >> If not, what about "path to the CXL dynamic capacity device in the QOM
> >> tree".  Intentionally close to existing descriptions of @qom-path
> >> elsewhere.  
> >
> > From the same file, I saw "path" was used for other commands, like
> > "cxl-inject-memory-module-event", so I followed it.
> > DCD is nothing different from "type 3 device" expect it can dynamically
> > change capacity. 
> > Renaming it to "qom-path" is no problem for me, just want to make sure it
> > will not break the naming consistency.  
> 
> Both @path and @qom-path are used (sadly).  @path is used for all kinds
> of paths, whereas @qom-path is only used for QOM paths.  That's why I
> prefer it.
> 
> However, you're making a compelling local consistency argument: cxl.json
> uses only @path.  Sticking to that makes sense.
> 
> >> > +# @hid: host id  
> >> 
> >> @host-id, unless "HID" is established terminology in CXL DCD land.  
> >
> > host-id works.  
> >> 
> >> What is a host ID?  
> >
> > It is an id identifying the host to which the capacity is being added.  
> 
> How are these IDs assigned?

Right now there is only 1 option.  We can drop this for now and introduce
it when needed (Default of 0 will be fine).  Multi head device patches
that will need this are on list though I haven't read them yet :(

> 
> >> > +# @selection-policy: policy to use for selecting extents for adding 
> >> > capacity  
> >> 
> >> Where are selection policies defined?  
> >
> > It is defined in CXL specification: Specifies the policy to use for 
> > selecting
> > which extents comprise the added capacity  
> 
> Include a reference to the spec here?
> 
> >> > +# @region-id: id of the region where the extent to add  
> >> 
> >> Is "region ID" the established terminology in CXL DCD land?  Or is
> >> "region number" also used?  I'm asking because "ID" in this QEMU device
> >> context suggests a connection to a qdev ID.
> >> 
> >> If region number is fine, I'd rename to just @region, and rephrase the
> >> description to avoid "ID".  Perhaps "number of the region the extent is
> >> to be added to".  Not entirely happy with the phrasing, doesn't exactly
> >> roll off the tongue, but "where the extent to add" sounds worse to my
> >> ears.  Mind, I'm not a native speaker.  
> >
> > Yes. region number is fine. Will rename it as "region"
> >  
> >>   
> >> > +# @tag: Context field  
> >> 
> >> What is this about?  
> >
> > Based on the specification, it is "Context field utilized by implementations
> > that make use of the Dynamic Capacity feature.". Basically, it is a
> > string (label) attached to an dynamic capacity extent so we can achieve
> > specific purpose, like identifying or grouping extents.  
> 
> Include a reference to the spec here?

Agreed - that is the best we can do. It'sa  magic value.

> 
> >> > +# @extents: Extents to add  
> >> 
> >> Blank lines between argument descriptions, please.
> >>   
> >> > +#
> >> > 

Re: [RFC 1/2] iova_tree: add an id member to DMAMap

2024-04-30 Thread Eugenio Perez Martin
On Tue, Apr 30, 2024 at 7:55 AM Si-Wei Liu  wrote:
>
>
>
> On 4/29/2024 1:14 AM, Eugenio Perez Martin wrote:
> > On Thu, Apr 25, 2024 at 7:44 PM Si-Wei Liu  wrote:
> >>
> >>
> >> On 4/24/2024 12:33 AM, Eugenio Perez Martin wrote:
> >>> On Wed, Apr 24, 2024 at 12:21 AM Si-Wei Liu  wrote:
> 
>  On 4/22/2024 1:49 AM, Eugenio Perez Martin wrote:
> > On Sat, Apr 20, 2024 at 1:50 AM Si-Wei Liu  
> > wrote:
> >> On 4/19/2024 1:29 AM, Eugenio Perez Martin wrote:
> >>> On Thu, Apr 18, 2024 at 10:46 PM Si-Wei Liu  
> >>> wrote:
>  On 4/10/2024 3:03 AM, Eugenio Pérez wrote:
> > IOVA tree is also used to track the mappings of virtio-net shadow
> > virtqueue.  This mappings may not match with the GPA->HVA ones.
> >
> > This causes a problem when overlapped regions (different GPA but 
> > same
> > translated HVA) exists in the tree, as looking them by HVA will 
> > return
> > them twice.  To solve this, create an id member so we can assign 
> > unique
> > identifiers (GPA) to the maps.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   include/qemu/iova-tree.h | 5 +++--
> >   util/iova-tree.c | 3 ++-
> >   2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
> > index 2a10a7052e..34ee230e7d 100644
> > --- a/include/qemu/iova-tree.h
> > +++ b/include/qemu/iova-tree.h
> > @@ -36,6 +36,7 @@ typedef struct DMAMap {
> >   hwaddr iova;
> >   hwaddr translated_addr;
> >   hwaddr size;/* Inclusive */
> > +uint64_t id;
> >   IOMMUAccessFlags perm;
> >   } QEMU_PACKED DMAMap;
> >   typedef gboolean (*iova_tree_iterator)(DMAMap *map);
> > @@ -100,8 +101,8 @@ const DMAMap *iova_tree_find(const IOVATree 
> > *tree, const DMAMap *map);
> >* @map: the mapping to search
> >*
> >* Search for a mapping in the iova tree that translated_addr 
> > overlaps with the
> > - * mapping range specified.  Only the first found mapping will be
> > - * returned.
> > + * mapping range specified and map->id is equal.  Only the first 
> > found
> > + * mapping will be returned.
> >*
> >* Return: DMAMap pointer if found, or NULL if not found.  
> > Note that
> >* the returned DMAMap pointer is maintained internally.  
> > User should
> > diff --git a/util/iova-tree.c b/util/iova-tree.c
> > index 536789797e..0863e0a3b8 100644
> > --- a/util/iova-tree.c
> > +++ b/util/iova-tree.c
> > @@ -97,7 +97,8 @@ static gboolean 
> > iova_tree_find_address_iterator(gpointer key, gpointer value,
> >
> >   needle = args->needle;
> >   if (map->translated_addr + map->size < 
> > needle->translated_addr ||
> > -needle->translated_addr + needle->size < 
> > map->translated_addr) {
> > +needle->translated_addr + needle->size < 
> > map->translated_addr ||
> > +needle->id != map->id) {
>  It looks this iterator can also be invoked by SVQ from
>  vhost_svq_translate_addr() -> iova_tree_find_iova(), where guest GPA
>  space will be searched on without passing in the ID (GPA), and exact
>  match for the same GPA range is not actually needed unlike the 
>  mapping
>  removal case. Could we create an API variant, for the SVQ lookup case
>  specifically? Or alternatively, add a special flag, say 
>  skip_id_match to
>  DMAMap, and the id match check may look like below:
> 
>  (!needle->skip_id_match && needle->id != map->id)
> 
>  I think vhost_svq_translate_addr() could just call the API variant or
>  pass DMAmap with skip_id_match set to true to 
>  svq_iova_tree_find_iova().
> 
> >>> I think you're totally right. But I'd really like to not complicate
> >>> the API of the iova_tree more.
> >>>
> >>> I think we can look for the hwaddr using memory_region_from_host and
> >>> then get the hwaddr. It is another lookup though...
> >> Yeah, that will be another means of doing translation without having to
> >> complicate the API around iova_tree. I wonder how the lookup through
> >> memory_region_from_host() may perform compared to the iova tree one, 
> >> the
> >> former looks to be an O(N) linear search on a linked list while the
> >> latter would be roughly O(log N) on an AVL tree?
> > Even worse, as the reverse lookup (from QEMU vaddr to SVQ IOVA) is
> > linear too

Re: [PATCH 1/2] accel/tcg: Make TCGCPUOps::cpu_exec_halt return bool for whether to halt

2024-04-30 Thread Alex Bennée
Peter Maydell  writes:

> The TCGCPUOps::cpu_exec_halt method is called from cpu_handle_halt()
> when the CPU is halted, so that a target CPU emulation can do
> anything target-specific it needs to do.  (At the moment we only use
> this on i386.)
>
> The current specification of the method doesn't allow the target
> specific code to do something different if the CPU is about to come
> out of the halt state, because cpu_handle_halt() only determines this
> after the method has returned.  (If the method called cpu_has_work()
> itself this would introduce a potential race if an interrupt arrived
> between the target's method implementation checking and
> cpu_handle_halt() repeating the check.)
>
> Change the definition of the method so that it returns a bool to
> tell cpu_handle_halt() whether to stay in halt or not.
>
> We will want this for the Arm target, where FEAT_WFxT wants to do
> some work only for the case where the CPU is in halt but about to
> leave it.
>
> Signed-off-by: Peter Maydell 
> ---
>  include/hw/core/tcg-cpu-ops.h   | 11 +--
>  target/i386/tcg/helper-tcg.h|  2 +-
>  accel/tcg/cpu-exec.c|  7 +--
>  target/i386/tcg/sysemu/seg_helper.c |  3 ++-
>  4 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
> index dc1f16a9777..f3ac76e6f6d 100644
> --- a/include/hw/core/tcg-cpu-ops.h
> +++ b/include/hw/core/tcg-cpu-ops.h
> @@ -111,8 +111,15 @@ struct TCGCPUOps {
>  void (*do_interrupt)(CPUState *cpu);
>  /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec 
> */
>  bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
> -/** @cpu_exec_halt: Callback for handling halt in cpu_exec */
> -void (*cpu_exec_halt)(CPUState *cpu);
> +/**
> + * @cpu_exec_halt: Callback for handling halt in cpu_exec.
> + *
> + * Return true to indicate that the CPU should now leave halt, false
> + * if it should remain in the halted state.
> + * If this method is not provided, the default is to leave halt
> + * if cpu_has_work() returns true.
> + */
> +bool (*cpu_exec_halt)(CPUState *cpu);

Would it be too much to rename the method to cpu_exec_leave_halt() to
make it clearer on use the sense of the return value?

>  /**
>   * @tlb_fill: Handle a softmmu tlb miss
>   *
> diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
> index effc2c1c984..85957943bf3 100644
> --- a/target/i386/tcg/helper-tcg.h
> +++ b/target/i386/tcg/helper-tcg.h
> @@ -39,7 +39,7 @@ QEMU_BUILD_BUG_ON(TCG_PHYS_ADDR_BITS > 
> TARGET_PHYS_ADDR_SPACE_BITS);
>   */
>  void x86_cpu_do_interrupt(CPUState *cpu);
>  #ifndef CONFIG_USER_ONLY
> -void x86_cpu_exec_halt(CPUState *cpu);
> +bool x86_cpu_exec_halt(CPUState *cpu);
>  bool x86_need_replay_interrupt(int interrupt_request);
>  bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
>  #endif
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 5c70748060a..550f93b19ce 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -669,11 +669,14 @@ static inline bool cpu_handle_halt(CPUState *cpu)
>  #ifndef CONFIG_USER_ONLY
>  if (cpu->halted) {
>  const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
> +bool leave_halt;
>  
>  if (tcg_ops->cpu_exec_halt) {
> -tcg_ops->cpu_exec_halt(cpu);
> +leave_halt = tcg_ops->cpu_exec_halt(cpu);
> +} else {
> +leave_halt = cpu_has_work(cpu);
>  }
> -if (!cpu_has_work(cpu)) {
> +if (!leave_halt) {
>  return true;
>  }
>  
> diff --git a/target/i386/tcg/sysemu/seg_helper.c 
> b/target/i386/tcg/sysemu/seg_helper.c
> index 2db8083748e..9ba94deb3aa 100644
> --- a/target/i386/tcg/sysemu/seg_helper.c
> +++ b/target/i386/tcg/sysemu/seg_helper.c
> @@ -128,7 +128,7 @@ void x86_cpu_do_interrupt(CPUState *cs)
>  }
>  }
>  
> -void x86_cpu_exec_halt(CPUState *cpu)
> +bool x86_cpu_exec_halt(CPUState *cpu)
>  {
>  if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
>  X86CPU *x86_cpu = X86_CPU(cpu);
> @@ -138,6 +138,7 @@ void x86_cpu_exec_halt(CPUState *cpu)
>  cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
>  bql_unlock();
>  }
> +return cpu_has_work(cpu);

The x86 version is essentially being called for side effects. Do we want
to document this usage in the method?

>  }
>  
>  bool x86_need_replay_interrupt(int interrupt_request)

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH 1/1] tests/fp/meson: don't build fp-bench test if fenv.h is missing

2024-04-30 Thread Alex Bennée
Dario Binacchi  writes:

> The fp-bench test (i. e. tests/fp/fp-bench.c) use fenv.h that is not
> always provided by the libc (uClibc). The patch disables its compilation
> in case the header is not available.
>
> The patch is based on a suggestion from Paolo Bonzini, which you can
> find at the following link.
>
> Link: https://lists.nongnu.org/archive/html/qemu-devel/2021-03/msg00492.html
> Suggested-by: Paolo Bonzini 
> Signed-off-by: Dario Binacchi 

Acked-by: Alex Bennée 

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PATCH 1/1] block: drop force_dup parameter of raw_reconfigure_getfd()

2024-04-30 Thread Denis V. Lunev
This parameter is always passed as 'false' from the caller.

Signed-off-by: Denis V. Lunev 
CC: Andrey Zhadchenko 
CC: Kevin Wolf 
CC: Hanna Reitz 
---
 block/file-posix.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 35684f7e21..5c46938936 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1039,8 +1039,7 @@ static int fcntl_setfl(int fd, int flag)
 }
 
 static int raw_reconfigure_getfd(BlockDriverState *bs, int flags,
- int *open_flags, uint64_t perm, bool 
force_dup,
- Error **errp)
+ int *open_flags, uint64_t perm, Error **errp)
 {
 BDRVRawState *s = bs->opaque;
 int fd = -1;
@@ -1068,7 +1067,7 @@ static int raw_reconfigure_getfd(BlockDriverState *bs, 
int flags,
 assert((s->open_flags & O_ASYNC) == 0);
 #endif
 
-if (!force_dup && *open_flags == s->open_flags) {
+if (*open_flags == s->open_flags) {
 /* We're lucky, the existing fd is fine */
 return s->fd;
 }
@@ -3748,8 +3747,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t 
perm, uint64_t shared,
 int ret;
 
 /* We may need a new fd if auto-read-only switches the mode */
-ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm,
-false, errp);
+ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm, errp);
 if (ret < 0) {
 return ret;
 } else if (ret != s->fd) {
-- 
2.40.1




Re: [PATCH] docs/about: Automatically deprecate versioned machine types older than 6 years

2024-04-30 Thread Daniel P . Berrangé
On Tue, Apr 30, 2024 at 12:29:14PM +0200, Thomas Huth wrote:
> On 30/04/2024 11.55, Daniel P. Berrangé wrote:
> > On Tue, Apr 30, 2024 at 08:45:29AM +0200, Thomas Huth wrote:
> > > Old machine types often have bugs or work-arounds that affect our
> > > possibilities to move forward with the QEMU code base (see for example
> > > https://gitlab.com/qemu-project/qemu/-/issues/2213 for a bug that likely
> > > cannot be fixed without breaking live migration with old machine types,
> > > or https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg04516.html or
> > > commit ea985d235b86). So instead of going through the process of manually
> > > deprecating old machine types again and again, let's rather add an entry
> > > that can stay, which declares that machine types older than 6 years are
> > > considered as deprecated automatically. Six years should be sufficient to
> > > support the release cycles of most Linux distributions.
> > 
> > Reading this again, I think we're mixing two concepts here.
> > 
> > With this 6 year cut off, we're declaring the actual *removal* date,
> > not the deprecation date.
> > 
> > A deprecation is something that happens prior to removal normally,
> > to give people a warning of /future/ removal, as a suggestion
> > that they stop using it.
> > 
> > If we never set the 'deprecation_reason' on a machine type, then
> > unless someone reads this doc, they'll never realize they are on
> > a deprecated machine.
> > 
> > When it comes to machine types, I see deprecation as a way to tell
> > people they should not deploy a /new/ VM on a machine type, only
> > use it for back compat (incoming migration / restore from saved
> > image) with existing deployed VMs.
> > 
> > If we delete a machine on the 6 year anniversary, then users
> > don't want to be deploying /new/ VMs using that on the
> > 5 year anniversary as it only gives a 1 year upgrade window.
> > 
> > So how long far back do we consider it reasonable for a user
> > to deploy a /new/ VM on an old machine type ? 1 year, 2 years,
> > 3 years ?
> > 
> > 
> > How about picking the half way point ?  3 years ?
> > 
> > ie, set deprecation_reason for any machine that is 3 years
> > old, but declare that our deprecation cycle lasts for
> > 3 years, instead of the normal 1 year, when applied to
> > machine types.
> > 
> > This would give a strong hint that users should get off the
> > old machine type, several years before its finally deleted.
> 
> Sounds like a good idea, too! Since I have to drop this patch here anyway,
> could you maybe write such a new patch? (or do you want me to try to
> formulate this?)

Yes, I'll send something for discussion soon.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH v4 14/17] xen: Add xen_mr_is_memory()

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Add xen_mr_is_memory() to abstract away tests for the
xen_memory MR.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-hvm-common.c | 8 +++-
 include/sysemu/xen.h| 8 
 system/physmem.c| 2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 1627da7398..0267b88d26 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -12,6 +12,12 @@
 
 MemoryRegion xen_memory;
 
+/* Check for xen memory.  */
+bool xen_mr_is_memory(MemoryRegion *mr)
+{
+return mr == &xen_memory;
+}
+
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
Error **errp)
 {
@@ -28,7 +34,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, 
MemoryRegion *mr,
 return;
 }
 
-if (mr == &xen_memory) {
+if (xen_mr_is_memory(mr)) {
 return;
 }
 
diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index 754ec2e6cb..dc72f83bcb 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -34,6 +34,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t 
length);
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
struct MemoryRegion *mr, Error **errp);
 
+bool xen_mr_is_memory(MemoryRegion *mr);
+
 #else /* !CONFIG_XEN_IS_POSSIBLE */
 
 #define xen_enabled() 0
@@ -47,6 +49,12 @@ static inline void xen_ram_alloc(ram_addr_t ram_addr, 
ram_addr_t size,
 g_assert_not_reached();
 }
 
+static inline bool xen_mr_is_memory(MemoryRegion *mr)
+{
+g_assert_not_reached();
+return false;
+}
+
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
 #endif
diff --git a/system/physmem.c b/system/physmem.c
index ad7a8c7d95..1a5ffcba2a 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2227,7 +2227,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  * because we don't want to map the entire memory in QEMU.
  * In that case just map the requested area.
  */
-if (block->offset == 0) {
+if (xen_mr_is_memory(block->mr)) {
 return xen_map_cache(block->mr, addr, len, lock, lock,
  is_write);
 }
-- 
2.40.1




[PATCH v4 13/17] softmmu: Pass RAM MemoryRegion and is_write xen_map_cache()

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Propagate MR and is_write to xen_map_cache().
This is in preparation for adding support for grant mappings.

No functional change.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 10 ++
 include/sysemu/xen-mapcache.h | 11 +++
 system/physmem.c  | 31 +++
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 0365311788..09b5f36d9c 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -264,7 +264,7 @@ static void xen_remap_bucket(MapCache *mc,
 
 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
hwaddr phys_addr, hwaddr size,
-   uint8_t lock, bool dma)
+   uint8_t lock, bool dma, bool is_write)
 {
 MapCacheEntry *entry, *pentry = NULL,
   *free_entry = NULL, *free_pentry = NULL;
@@ -387,13 +387,15 @@ tryagain:
 return mc->last_entry->vaddr_base + address_offset;
 }
 
-uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
-   uint8_t lock, bool dma)
+uint8_t *xen_map_cache(MemoryRegion *mr,
+   hwaddr phys_addr, hwaddr size,
+   uint8_t lock, bool dma,
+   bool is_write)
 {
 uint8_t *p;
 
 mapcache_lock(mapcache);
-p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma);
+p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma, is_write);
 mapcache_unlock(mapcache);
 return p;
 }
diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
index 10c2e3082a..1ec9e66752 100644
--- a/include/sysemu/xen-mapcache.h
+++ b/include/sysemu/xen-mapcache.h
@@ -18,8 +18,9 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset,
 
 void xen_map_cache_init(phys_offset_to_gaddr_t f,
 void *opaque);
-uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
-   uint8_t lock, bool dma);
+uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size,
+   uint8_t lock, bool dma,
+   bool is_write);
 ram_addr_t xen_ram_addr_from_mapcache(void *ptr);
 void xen_invalidate_map_cache_entry(uint8_t *buffer);
 void xen_invalidate_map_cache(void);
@@ -33,10 +34,12 @@ static inline void 
xen_map_cache_init(phys_offset_to_gaddr_t f,
 {
 }
 
-static inline uint8_t *xen_map_cache(hwaddr phys_addr,
+static inline uint8_t *xen_map_cache(MemoryRegion *mr,
+ hwaddr phys_addr,
  hwaddr size,
  uint8_t lock,
- bool dma)
+ bool dma,
+ bool is_write)
 {
 abort();
 }
diff --git a/system/physmem.c b/system/physmem.c
index f114b972a5..ad7a8c7d95 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2190,11 +2190,22 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 
 /*
  * Return a host pointer to guest's ram.
+ * For Xen, foreign mappings get created if they don't already exist.
+ *
+ * @block: block for the RAM to lookup (optional and may be NULL).
+ * @addr: address within the memory region.
+ * @size: pointer to requested size (optional and may be NULL).
+ *size may get modified and return a value smaller than
+ *what was requested.
+ * @lock: wether to lock the mapping in xen-mapcache until invalidated.
+ * @is_write: hint wether to map RW or RO in the xen-mapcache.
+ *(optional and may always be set to true).
  *
  * Called within RCU critical section.
  */
 static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr,
- hwaddr *size, bool lock)
+ hwaddr *size, bool lock,
+ bool is_write)
 {
 hwaddr len = 0;
 
@@ -2217,10 +2228,13 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  * In that case just map the requested area.
  */
 if (block->offset == 0) {
-return xen_map_cache(addr, len, lock, lock);
+return xen_map_cache(block->mr, addr, len, lock, lock,
+ is_write);
 }
 
-block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
+block->host = xen_map_cache(block->mr, block->offset,
+block->max_length, 1,
+lock, is_write);
 }
 
 return ramblock_ptr(block, addr);
@@ -2236,7 +2250,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  */
 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
 {
-return qemu_ram_ptr_length(ram_block, addr, NULL, false);
+return qemu_ram_ptr_length(ram_block, ad

[PATCH v4 04/17] xen: mapcache: Refactor xen_map_cache for multi-instance

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Make xen_map_cache take a MapCache as argument. This is in
prepaparation to support multiple map caches.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 3f11562075..896021d86f 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -240,7 +240,8 @@ static void xen_remap_bucket(MapCacheEntry *entry,
 g_free(err);
 }
 
-static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
+static uint8_t *xen_map_cache_unlocked(MapCache *mc,
+   hwaddr phys_addr, hwaddr size,
uint8_t lock, bool dma)
 {
 MapCacheEntry *entry, *pentry = NULL,
@@ -269,16 +270,16 @@ tryagain:
 test_bit_size = XC_PAGE_SIZE;
 }
 
-if (mapcache->last_entry != NULL &&
-mapcache->last_entry->paddr_index == address_index &&
+if (mc->last_entry != NULL &&
+mc->last_entry->paddr_index == address_index &&
 !lock && !size &&
 test_bits(address_offset >> XC_PAGE_SHIFT,
   test_bit_size >> XC_PAGE_SHIFT,
-  mapcache->last_entry->valid_mapping)) {
+  mc->last_entry->valid_mapping)) {
 trace_xen_map_cache_return(
-mapcache->last_entry->vaddr_base + address_offset
+mc->last_entry->vaddr_base + address_offset
 );
-return mapcache->last_entry->vaddr_base + address_offset;
+return mc->last_entry->vaddr_base + address_offset;
 }
 
 /* size is always a multiple of MCACHE_BUCKET_SIZE */
@@ -291,7 +292,7 @@ tryagain:
 cache_size = MCACHE_BUCKET_SIZE;
 }
 
-entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+entry = &mc->entry[address_index % mc->nr_buckets];
 
 while (entry && (lock || entry->lock) && entry->vaddr_base &&
 (entry->paddr_index != address_index || entry->size != cache_size 
||
@@ -326,10 +327,10 @@ tryagain:
 if(!test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
-mapcache->last_entry = NULL;
+mc->last_entry = NULL;
 #ifdef XEN_COMPAT_PHYSMAP
-if (!translated && mapcache->phys_offset_to_gaddr) {
-phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
+if (!translated && mc->phys_offset_to_gaddr) {
+phys_addr = mc->phys_offset_to_gaddr(phys_addr, size);
 translated = true;
 goto tryagain;
 }
@@ -342,7 +343,7 @@ tryagain:
 return NULL;
 }
 
-mapcache->last_entry = entry;
+mc->last_entry = entry;
 if (lock) {
 MapCacheRev *reventry = g_new0(MapCacheRev, 1);
 entry->lock++;
@@ -352,16 +353,16 @@ tryagain:
 abort();
 }
 reventry->dma = dma;
-reventry->vaddr_req = mapcache->last_entry->vaddr_base + 
address_offset;
-reventry->paddr_index = mapcache->last_entry->paddr_index;
+reventry->vaddr_req = mc->last_entry->vaddr_base + address_offset;
+reventry->paddr_index = mc->last_entry->paddr_index;
 reventry->size = entry->size;
-QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
+QTAILQ_INSERT_HEAD(&mc->locked_entries, reventry, next);
 }
 
 trace_xen_map_cache_return(
-mapcache->last_entry->vaddr_base + address_offset
+mc->last_entry->vaddr_base + address_offset
 );
-return mapcache->last_entry->vaddr_base + address_offset;
+return mc->last_entry->vaddr_base + address_offset;
 }
 
 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
@@ -370,7 +371,7 @@ uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 uint8_t *p;
 
 mapcache_lock(mapcache);
-p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
+p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma);
 mapcache_unlock(mapcache);
 return p;
 }
-- 
2.40.1




[PULL 11/21] hw/arm/sbsa-ref: Force CPU generic timer to 62.5MHz

2024-04-30 Thread Peter Maydell
Currently QEMU CPUs always run with a generic timer counter frequency
of 62.5MHz, but ARMv8.6 CPUs will run at 1GHz.  For older versions of
the TF-A firmware that sbsa-ref runs, the frequency of the generic
timer is hardcoded into the firmware, and so if the CPU actually has
a different frequency then timers in the guest will be set
incorrectly.

The default frequency used by the 'max' CPU is about to change, so
make the sbsa-ref board force the CPU frequency to the value which
the firmware expects.

Newer versions of TF-A will read the frequency from the CPU's
CNTFRQ_EL0 register:
 
https://github.com/ARM-software/arm-trusted-firmware/commit/4c77fac98dac0bebc63798aae9101ac865b87148
so in the longer term we could make this board use the 1GHz
frequency. We will need to make sure we update the binaries used
by our avocado test
 Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max_pauth_impdef
before we can do that.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Marcin Juszkiewicz 
Message-id: 20240426122913.3427983-3-peter.mayd...@linaro.org
---
 hw/arm/sbsa-ref.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index f5709d6c141..36f6f717b4b 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -60,6 +60,19 @@
 #define NUM_SMMU_IRQS   4
 #define NUM_SATA_PORTS  6
 
+/*
+ * Generic timer frequency in Hz (which drives both the CPU generic timers
+ * and the SBSA watchdog-timer). Older versions of the TF-A firmware
+ * typically used with sbsa-ref (including the binaries in our Avocado test
+ * Aarch64SbsarefMachine.test_sbsaref_alpine_linux_max_pauth_impdef
+ * assume it is this value.
+ *
+ * TODO: this value is not architecturally correct for an Armv8.6 or
+ * better CPU, so we should move to 1GHz once the TF-A fix above has
+ * made it into a release and into our Avocado test.
+ */
+#define SBSA_GTIMER_HZ 6250
+
 enum {
 SBSA_FLASH,
 SBSA_MEM,
@@ -767,6 +780,8 @@ static void sbsa_ref_init(MachineState *machine)
 &error_abort);
 }
 
+object_property_set_int(cpuobj, "cntfrq", SBSA_GTIMER_HZ, 
&error_abort);
+
 object_property_set_link(cpuobj, "memory", OBJECT(sysmem),
  &error_abort);
 
-- 
2.34.1




[PATCH v4 02/17] xen: let xen_ram_addr_from_mapcache() return -1 in case of not found entry

2024-04-30 Thread Edgar E. Iglesias
From: Juergen Gross 

Today xen_ram_addr_from_mapcache() will either abort() or return 0 in
case it can't find a matching entry for a pointer value. Both cases
are bad, so change that to return an invalid address instead.

Signed-off-by: Juergen Gross 
Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
Reviewed-by: Alex Bennée 
Reviewed-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 7f59080ba7..b7cefb78f7 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -394,13 +394,8 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 }
 }
 if (!found) {
-trace_xen_ram_addr_from_mapcache_not_found(ptr);
-QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
-trace_xen_ram_addr_from_mapcache_found(reventry->paddr_index,
-   reventry->vaddr_req);
-}
-abort();
-return 0;
+mapcache_unlock();
+return RAM_ADDR_INVALID;
 }
 
 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
@@ -409,7 +404,7 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 }
 if (!entry) {
 trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
-raddr = 0;
+raddr = RAM_ADDR_INVALID;
 } else {
 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
  ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
-- 
2.40.1




[PULL 13/21] target/arm: Default to 1GHz cntfrq for 'max' and new CPUs

2024-04-30 Thread Peter Maydell
In previous versions of the Arm architecture, the frequency of the
generic timers as reported in CNTFRQ_EL0 could be any IMPDEF value,
and for QEMU we picked 62.5MHz, giving a timer tick period of 16ns.
In Armv8.6, the architecture standardized this frequency to 1GHz.

Because there is no ID register feature field that indicates whether
a CPU is v8.6 or that it ought to have this counter frequency, we
implement this by changing our default CNTFRQ value for all CPUs,
with exceptions for backwards compatibility:

 * CPU types which we already implement will retain the old
   default value. None of these are v8.6 CPUs, so this is
   architecturally OK.
 * CPUs used in versioned machine types with a version of 9.0
   or earlier will retain the old default value.

The upshot is that the only CPU type that changes is 'max'; but any
new type we add in future (whether v8.6 or not) will also get the new
1GHz default.

It remains the case that the machine model can override the default
value via the 'cntfrq' QOM property (regardless of the CPU type).

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240426122913.3427983-5-peter.mayd...@linaro.org
---
 target/arm/cpu.h   | 11 +++
 target/arm/internals.h | 12 ++--
 hw/core/machine.c  |  4 +++-
 target/arm/cpu.c   | 23 +--
 target/arm/cpu64.c |  2 ++
 target/arm/tcg/cpu32.c |  4 
 target/arm/tcg/cpu64.c | 18 ++
 7 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1f90590f937..a550bcd25fe 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -956,6 +956,9 @@ struct ArchCPU {
  */
 bool host_cpu_probe_failed;
 
+/* QOM property to indicate we should use the back-compat CNTFRQ default */
+bool backcompat_cntfrq;
+
 /* Specify the number of cores in this CPU cluster. Used for the L2CTLR
  * register.
  */
@@ -2373,6 +2376,14 @@ enum arm_features {
 ARM_FEATURE_M_SECURITY, /* M profile Security Extension */
 ARM_FEATURE_M_MAIN, /* M profile Main Extension */
 ARM_FEATURE_V8_1M, /* M profile extras only in v8.1M and later */
+/*
+ * ARM_FEATURE_BACKCOMPAT_CNTFRQ makes the CPU default cntfrq be 62.5MHz
+ * if the board doesn't set a value, instead of 1GHz. It is for backwards
+ * compatibility and used only with CPU definitions that were already
+ * in QEMU before we changed the default. It should not be set on any
+ * CPU types added in future.
+ */
+ARM_FEATURE_BACKCOMPAT_CNTFRQ, /* 62.5MHz timer default */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index b6c78db0243..ee3ebd383e1 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -62,9 +62,17 @@ static inline bool excp_is_internal(int excp)
 
 /*
  * Default frequency for the generic timer, in Hz.
- * This is 62.5MHz, which gives a 16 ns tick period.
+ * ARMv8.6 and later CPUs architecturally must use a 1GHz timer; before
+ * that it was an IMPDEF choice, and QEMU initially picked 62.5MHz,
+ * which gives a 16ns tick period.
+ *
+ * We will use the back-compat value:
+ *  - for QEMU CPU types added before we standardized on 1GHz
+ *  - for versioned machine types with a version of 9.0 or earlier
+ * In any case, the machine model may override via the cntfrq property.
  */
-#define GTIMER_DEFAULT_HZ 6250
+#define GTIMER_DEFAULT_HZ 10
+#define GTIMER_BACKCOMPAT_HZ 6250
 
 /* Bit definitions for the v7M CONTROL register */
 FIELD(V7M_CONTROL, NPRIV, 0, 1)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0dec48e8021..4ff60911e74 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -33,7 +33,9 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_9_0[] = {};
+GlobalProperty hw_compat_9_0[] = {
+{"arm-cpu", "backcompat-cntfrq", "true" },
+};
 const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
 
 GlobalProperty hw_compat_8_2[] = {
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 9f2ca6633a1..fdc3eda318a 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1959,13 +1959,22 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 
 if (!cpu->gt_cntfrq_hz) {
 /*
- * 0 means "the board didn't set a value, use the default".
- * The default value of the generic timer frequency (as seen in
- * CNTFRQ_EL0) is 62.5MHz, which corresponds to a period of 16ns.
- * This is what you get (a) for a CONFIG_USER_ONLY CPU (b) if the
- * board doesn't set it.
+ * 0 means "the board didn't set a value, use the default". (We also
+ * get here for the CONFIG_USER_ONLY case.)
+ * ARMv8.6 and later CPUs architecturally must use a 1GHz timer; before
+ * that it was an IMPDEF choice, and QEMU initially picked 62.

[PATCH v4 11/17] xen: mapcache: Make MCACHE_BUCKET_SHIFT runtime configurable

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Make MCACHE_BUCKET_SHIFT runtime configurable per cache instance.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 52 ++-
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 72a7e25e3e..4f98d284dd 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -23,13 +23,10 @@
 
 
 #if HOST_LONG_BITS == 32
-#  define MCACHE_BUCKET_SHIFT 16
 #  define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
 #else
-#  define MCACHE_BUCKET_SHIFT 20
 #  define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */
 #endif
-#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
 
 /* This is the size of the virtual address space reserve to QEMU that will not
  * be use by MapCache.
@@ -65,7 +62,8 @@ typedef struct MapCache {
 /* For most cases (>99.9%), the page address is the same. */
 MapCacheEntry *last_entry;
 unsigned long max_mcache_size;
-unsigned int mcache_bucket_shift;
+unsigned int bucket_shift;
+unsigned long bucket_size;
 
 phys_offset_to_gaddr_t phys_offset_to_gaddr;
 QemuMutex lock;
@@ -95,6 +93,7 @@ static inline int test_bits(int nr, int size, const unsigned 
long *addr)
 
 static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
void *opaque,
+   unsigned int bucket_shift,
unsigned long max_size)
 {
 unsigned long size;
@@ -108,12 +107,14 @@ static MapCache 
*xen_map_cache_init_single(phys_offset_to_gaddr_t f,
 
 QTAILQ_INIT(&mc->locked_entries);
 
+mc->bucket_shift = bucket_shift;
+mc->bucket_size = 1UL << bucket_shift;
 mc->max_mcache_size = max_size;
 
 mc->nr_buckets =
 (((mc->max_mcache_size >> XC_PAGE_SHIFT) +
-  (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
- (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+  (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >>
+ (bucket_shift - XC_PAGE_SHIFT));
 
 size = mc->nr_buckets * sizeof(MapCacheEntry);
 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
@@ -126,6 +127,13 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 {
 struct rlimit rlimit_as;
 unsigned long max_mcache_size;
+unsigned int bucket_shift;
+
+if (HOST_LONG_BITS == 32) {
+bucket_shift = 16;
+} else {
+bucket_shift = 20;
+}
 
 if (geteuid() == 0) {
 rlimit_as.rlim_cur = RLIM_INFINITY;
@@ -146,7 +154,9 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 }
 }
 
-mapcache = xen_map_cache_init_single(f, opaque, max_mcache_size);
+mapcache = xen_map_cache_init_single(f, opaque,
+ bucket_shift,
+ max_mcache_size);
 setrlimit(RLIMIT_AS, &rlimit_as);
 }
 
@@ -195,7 +205,7 @@ static void xen_remap_bucket(MapCache *mc,
 entry->valid_mapping = NULL;
 
 for (i = 0; i < nb_pfn; i++) {
-pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
 }
 
 /*
@@ -266,8 +276,8 @@ static uint8_t *xen_map_cache_unlocked(MapCache *mc,
 bool dummy = false;
 
 tryagain:
-address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
-address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
+address_index  = phys_addr >> mc->bucket_shift;
+address_offset = phys_addr & (mc->bucket_size - 1);
 
 trace_xen_map_cache(phys_addr);
 
@@ -294,14 +304,14 @@ tryagain:
 return mc->last_entry->vaddr_base + address_offset;
 }
 
-/* size is always a multiple of MCACHE_BUCKET_SIZE */
+/* size is always a multiple of mc->bucket_size */
 if (size) {
 cache_size = size + address_offset;
-if (cache_size % MCACHE_BUCKET_SIZE) {
-cache_size += MCACHE_BUCKET_SIZE - (cache_size % 
MCACHE_BUCKET_SIZE);
+if (cache_size % mc->bucket_size) {
+cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
 }
 } else {
-cache_size = MCACHE_BUCKET_SIZE;
+cache_size = mc->bucket_size;
 }
 
 entry = &mc->entry[address_index % mc->nr_buckets];
@@ -419,7 +429,7 @@ static ram_addr_t 
xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
 trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
 raddr = RAM_ADDR_INVALID;
 } else {
-raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
+raddr = (reventry->paddr_index << mc->bucket_shift) +
  ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 }
 mapcache_unlock(mc);
@@ -582,8 +592,8 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache 
*mc,
 hwaddr address_index, address_offset;
 hwaddr test_bit_size, cache_size = size;
 
-a

[PATCH v4 01/17] softmmu: let qemu_map_ram_ptr() use qemu_ram_ptr_length()

2024-04-30 Thread Edgar E. Iglesias
From: Juergen Gross 

qemu_map_ram_ptr() and qemu_ram_ptr_length() share quite some code, so
modify qemu_ram_ptr_length() a little bit and use it for
qemu_map_ram_ptr(), too.

Signed-off-by: Juergen Gross 
Signed-off-by: Vikram Garhwal 
Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
Reviewed-by: Alex Bennée 
Reviewed-by: Edgar E. Iglesias 
---
 system/physmem.c | 56 
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/system/physmem.c b/system/physmem.c
index 1a81c226ba..f114b972a5 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2188,43 +2188,17 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 }
 #endif /* !_WIN32 */
 
-/* Return a host pointer to ram allocated with qemu_ram_alloc.
- * This should not be used for general purpose DMA.  Use address_space_map
- * or address_space_rw instead. For local memory (e.g. video ram) that the
- * device owns, use memory_region_get_ram_ptr.
- *
- * Called within RCU critical section.
- */
-void *qemu_map_ram_ptr(RAMBlock *block, ram_addr_t addr)
-{
-if (block == NULL) {
-block = qemu_get_ram_block(addr);
-addr -= block->offset;
-}
-
-if (xen_enabled() && block->host == NULL) {
-/* We need to check if the requested address is in the RAM
- * because we don't want to map the entire memory in QEMU.
- * In that case just map until the end of the page.
- */
-if (block->offset == 0) {
-return xen_map_cache(addr, 0, 0, false);
-}
-
-block->host = xen_map_cache(block->offset, block->max_length, 1, 
false);
-}
-return ramblock_ptr(block, addr);
-}
-
-/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
- * but takes a size argument.
+/*
+ * Return a host pointer to guest's ram.
  *
  * Called within RCU critical section.
  */
 static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr,
  hwaddr *size, bool lock)
 {
-if (*size == 0) {
+hwaddr len = 0;
+
+if (size && *size == 0) {
 return NULL;
 }
 
@@ -2232,7 +2206,10 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
 block = qemu_get_ram_block(addr);
 addr -= block->offset;
 }
-*size = MIN(*size, block->max_length - addr);
+if (size) {
+*size = MIN(*size, block->max_length - addr);
+len = *size;
+}
 
 if (xen_enabled() && block->host == NULL) {
 /* We need to check if the requested address is in the RAM
@@ -2240,7 +2217,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  * In that case just map the requested area.
  */
 if (block->offset == 0) {
-return xen_map_cache(addr, *size, lock, lock);
+return xen_map_cache(addr, len, lock, lock);
 }
 
 block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
@@ -2249,6 +2226,19 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
 return ramblock_ptr(block, addr);
 }
 
+/*
+ * Return a host pointer to ram allocated with qemu_ram_alloc.
+ * This should not be used for general purpose DMA.  Use address_space_map
+ * or address_space_rw instead. For local memory (e.g. video ram) that the
+ * device owns, use memory_region_get_ram_ptr.
+ *
+ * Called within RCU critical section.
+ */
+void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
+{
+return qemu_ram_ptr_length(ram_block, addr, NULL, false);
+}
+
 /* Return the offset of a hostpointer within a ramblock */
 ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
 {
-- 
2.40.1




[PULL 10/21] target/arm: Refactor default generic timer frequency handling

2024-04-30 Thread Peter Maydell
The generic timer frequency is settable by board code via a QOM
property "cntfrq", but otherwise defaults to 62.5MHz.  The way this
is done includes some complication resulting from how this was
originally a fixed value with no QOM property.  Clean it up:

 * always set cpu->gt_cntfrq_hz to some sensible value, whether
   the CPU has the generic timer or not, and whether it's system
   or user-only emulation
 * this means we can always use gt_cntfrq_hz, and never need
   the old GTIMER_SCALE define
 * set the default value in exactly one place, in the realize fn

The aim here is to pave the way for handling the ARMv8.6 requirement
that the generic timer frequency is always 1GHz.  We're going to do
that by having old CPU types keep their legacy-in-QEMU behaviour and
having the default for any new CPU types be a 1GHz rather han 62.5MHz
cntfrq, so we want the point where the default is decided to be in
one place, and in code, not in a DEFINE_PROP_UINT64() initializer.

This commit should have no behavioural changes.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-id: 20240426122913.3427983-2-peter.mayd...@linaro.org
---
 target/arm/internals.h |  7 ---
 target/arm/cpu.c   | 31 +--
 target/arm/helper.c| 16 
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index e40ec453d56..b6c78db0243 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -60,10 +60,11 @@ static inline bool excp_is_internal(int excp)
 || excp == EXCP_SEMIHOST;
 }
 
-/* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
+/*
+ * Default frequency for the generic timer, in Hz.
+ * This is 62.5MHz, which gives a 16 ns tick period.
  */
-#define GTIMER_SCALE 16
+#define GTIMER_DEFAULT_HZ 6250
 
 /* Bit definitions for the v7M CONTROL register */
 FIELD(V7M_CONTROL, NPRIV, 0, 1)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index a152def2413..9f2ca6633a1 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1506,9 +1506,12 @@ static void arm_cpu_initfn(Object *obj)
 }
 }
 
+/*
+ * 0 means "unset, use the default value". That default might vary depending
+ * on the CPU type, and is set in the realize fn.
+ */
 static Property arm_cpu_gt_cntfrq_property =
-DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz,
-   NANOSECONDS_PER_SECOND / GTIMER_SCALE);
+DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0);
 
 static Property arm_cpu_reset_cbar_property =
 DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0);
@@ -1954,6 +1957,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (!cpu->gt_cntfrq_hz) {
+/*
+ * 0 means "the board didn't set a value, use the default".
+ * The default value of the generic timer frequency (as seen in
+ * CNTFRQ_EL0) is 62.5MHz, which corresponds to a period of 16ns.
+ * This is what you get (a) for a CONFIG_USER_ONLY CPU (b) if the
+ * board doesn't set it.
+ */
+cpu->gt_cntfrq_hz = GTIMER_DEFAULT_HZ;
+}
+
 #ifndef CONFIG_USER_ONLY
 /* The NVIC and M-profile CPU are two halves of a single piece of
  * hardware; trying to use one without the other is a command line
@@ -2002,18 +2016,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 }
 
 {
-uint64_t scale;
-
-if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
-if (!cpu->gt_cntfrq_hz) {
-error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz",
-   cpu->gt_cntfrq_hz);
-return;
-}
-scale = gt_cntfrq_period_ns(cpu);
-} else {
-scale = GTIMER_SCALE;
-}
+uint64_t scale = gt_cntfrq_period_ns(cpu);
 
 cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
arm_gt_ptimer_cb, cpu);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index bb0e1baf628..75876359608 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -2474,6 +2474,13 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = {
   .resetvalue = 0 },
 };
 
+static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque)
+{
+ARMCPU *cpu = env_archcpu(env);
+
+cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq_hz;
+}
+
 #ifndef CONFIG_USER_ONLY
 
 static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo 
*ri,
@@ -3228,13 +3235,6 @@ void arm_gt_hvtimer_cb(void *opaque)
 gt_recalc_timer(cpu, GTIMER_HYPVIRT);
 }
 
-static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque)
-{
-ARMCPU *cpu = env_archcpu(env);
-
-cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq_hz;
-}
-
 static const ARMCPRegInfo generic_tim

[PATCH v4 12/17] xen: mapcache: Unmap first entries in buckets

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

When invalidating memory ranges, if we happen to hit the first
entry in a bucket we were never unmapping it. This was harmless
for foreign mappings but now that we're looking to reuse the
mapcache for transient grant mappings, we must unmap entries
when invalidated.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 4f98d284dd..0365311788 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -486,18 +486,22 @@ static void 
xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
 return;
 }
 entry->lock--;
-if (entry->lock > 0 || pentry == NULL) {
+if (entry->lock > 0) {
 return;
 }
 
-pentry->next = entry->next;
 ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
 if (munmap(entry->vaddr_base, entry->size) != 0) {
 perror("unmap fails");
 exit(-1);
 }
-g_free(entry->valid_mapping);
-g_free(entry);
+if (pentry) {
+pentry->next = entry->next;
+g_free(entry->valid_mapping);
+g_free(entry);
+} else {
+memset(entry, 0, sizeof *entry);
+}
 }
 
 typedef struct XenMapCacheData {
-- 
2.40.1




[PATCH v4 03/17] xen: mapcache: Refactor lock functions for multi-instance

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Make the lock functions take MapCache * as argument. This is
in preparation for supporting multiple caches.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index b7cefb78f7..3f11562075 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -74,14 +74,14 @@ typedef struct MapCache {
 
 static MapCache *mapcache;
 
-static inline void mapcache_lock(void)
+static inline void mapcache_lock(MapCache *mc)
 {
-qemu_mutex_lock(&mapcache->lock);
+qemu_mutex_lock(&mc->lock);
 }
 
-static inline void mapcache_unlock(void)
+static inline void mapcache_unlock(MapCache *mc)
 {
-qemu_mutex_unlock(&mapcache->lock);
+qemu_mutex_unlock(&mc->lock);
 }
 
 static inline int test_bits(int nr, int size, const unsigned long *addr)
@@ -369,9 +369,9 @@ uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 {
 uint8_t *p;
 
-mapcache_lock();
+mapcache_lock(mapcache);
 p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
-mapcache_unlock();
+mapcache_unlock(mapcache);
 return p;
 }
 
@@ -384,7 +384,7 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 ram_addr_t raddr;
 int found = 0;
 
-mapcache_lock();
+mapcache_lock(mapcache);
 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 if (reventry->vaddr_req == ptr) {
 paddr_index = reventry->paddr_index;
@@ -394,7 +394,7 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 }
 }
 if (!found) {
-mapcache_unlock();
+mapcache_unlock(mapcache);
 return RAM_ADDR_INVALID;
 }
 
@@ -409,7 +409,7 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
  ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 }
-mapcache_unlock();
+mapcache_unlock(mapcache);
 return raddr;
 }
 
@@ -480,9 +480,9 @@ static void xen_invalidate_map_cache_entry_bh(void *opaque)
 {
 XenMapCacheData *data = opaque;
 
-mapcache_lock();
+mapcache_lock(mapcache);
 xen_invalidate_map_cache_entry_unlocked(data->buffer);
-mapcache_unlock();
+mapcache_unlock(mapcache);
 
 aio_co_wake(data->co);
 }
@@ -498,9 +498,9 @@ void coroutine_mixed_fn 
xen_invalidate_map_cache_entry(uint8_t *buffer)
 xen_invalidate_map_cache_entry_bh, &data);
 qemu_coroutine_yield();
 } else {
-mapcache_lock();
+mapcache_lock(mapcache);
 xen_invalidate_map_cache_entry_unlocked(buffer);
-mapcache_unlock();
+mapcache_unlock(mapcache);
 }
 }
 
@@ -512,7 +512,7 @@ void xen_invalidate_map_cache(void)
 /* Flush pending AIO before destroying the mapcache */
 bdrv_drain_all();
 
-mapcache_lock();
+mapcache_lock(mapcache);
 
 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
 if (!reventry->dma) {
@@ -546,7 +546,7 @@ void xen_invalidate_map_cache(void)
 
 mapcache->last_entry = NULL;
 
-mapcache_unlock();
+mapcache_unlock(mapcache);
 }
 
 static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
@@ -606,8 +606,8 @@ uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
 {
 uint8_t *p;
 
-mapcache_lock();
+mapcache_lock(mapcache);
 p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
-mapcache_unlock();
+mapcache_unlock(mapcache);
 return p;
 }
-- 
2.40.1




[PATCH v4 09/17] xen: mapcache: Break out xen_invalidate_map_cache_single()

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Break out xen_invalidate_map_cache_single().

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 34454da2f6..dd08cd296b 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -512,17 +512,14 @@ void coroutine_mixed_fn 
xen_invalidate_map_cache_entry(uint8_t *buffer)
 }
 }
 
-void xen_invalidate_map_cache(void)
+static void xen_invalidate_map_cache_single(MapCache *mc)
 {
 unsigned long i;
 MapCacheRev *reventry;
 
-/* Flush pending AIO before destroying the mapcache */
-bdrv_drain_all();
-
-mapcache_lock(mapcache);
+mapcache_lock(mc);
 
-QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
 if (!reventry->dma) {
 continue;
 }
@@ -530,8 +527,8 @@ void xen_invalidate_map_cache(void)
reventry->vaddr_req);
 }
 
-for (i = 0; i < mapcache->nr_buckets; i++) {
-MapCacheEntry *entry = &mapcache->entry[i];
+for (i = 0; i < mc->nr_buckets; i++) {
+MapCacheEntry *entry = &mc->entry[i];
 
 if (entry->vaddr_base == NULL) {
 continue;
@@ -552,9 +549,17 @@ void xen_invalidate_map_cache(void)
 entry->valid_mapping = NULL;
 }
 
-mapcache->last_entry = NULL;
+mc->last_entry = NULL;
 
-mapcache_unlock(mapcache);
+mapcache_unlock(mc);
+}
+
+void xen_invalidate_map_cache(void)
+{
+/* Flush pending AIO before destroying the mapcache */
+bdrv_drain_all();
+
+xen_invalidate_map_cache_single(mapcache);
 }
 
 static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
-- 
2.40.1




[PULL 17/21] hw/display : Add device DM163

2024-04-30 Thread Peter Maydell
From: Inès Varhol 

This device implements the IM120417002 colors shield v1.1 for Arduino
(which relies on the DM163 8x3-channel led driving logic) and features
a simple display of an 8x8 RGB matrix. The columns of the matrix are
driven by the DM163 and the rows are driven externally.

Acked-by: Alistair Francis 
Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240424200929.240921-2-ines.var...@telecom-paris.fr
[PMM: updated to new reset hold method prototype]
Signed-off-by: Peter Maydell 
---
 docs/system/arm/b-l475e-iot01a.rst |   3 +-
 include/hw/display/dm163.h |  59 +
 hw/display/dm163.c | 349 +
 hw/display/Kconfig |   3 +
 hw/display/meson.build |   1 +
 hw/display/trace-events|  14 ++
 6 files changed, 428 insertions(+), 1 deletion(-)
 create mode 100644 include/hw/display/dm163.h
 create mode 100644 hw/display/dm163.c

diff --git a/docs/system/arm/b-l475e-iot01a.rst 
b/docs/system/arm/b-l475e-iot01a.rst
index a76c9976c50..2adcc4b4c16 100644
--- a/docs/system/arm/b-l475e-iot01a.rst
+++ b/docs/system/arm/b-l475e-iot01a.rst
@@ -12,7 +12,7 @@ USART, I2C, SPI, CAN and USB OTG, as well as a variety of 
sensors.
 Supported devices
 "
 
-Currently B-L475E-IOT01A machine's only supports the following devices:
+Currently B-L475E-IOT01A machines support the following devices:
 
 - Cortex-M4F based STM32L4x5 SoC
 - STM32L4x5 EXTI (Extended interrupts and events controller)
@@ -20,6 +20,7 @@ Currently B-L475E-IOT01A machine's only supports the 
following devices:
 - STM32L4x5 RCC (Reset and clock control)
 - STM32L4x5 GPIOs (General-purpose I/Os)
 - STM32L4x5 USARTs, UARTs and LPUART (Serial ports)
+- optional 8x8 led display (based on DM163 driver)
 
 Missing devices
 """
diff --git a/include/hw/display/dm163.h b/include/hw/display/dm163.h
new file mode 100644
index 000..4377f77bb75
--- /dev/null
+++ b/include/hw/display/dm163.h
@@ -0,0 +1,59 @@
+/*
+ * QEMU DM163 8x3-channel constant current led driver
+ * driving columns of associated 8x8 RGB matrix.
+ *
+ * Copyright (C) 2024 Samuel Tardieu 
+ * Copyright (C) 2024 Arnaud Minier 
+ * Copyright (C) 2024 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_DISPLAY_DM163_H
+#define HW_DISPLAY_DM163_H
+
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_DM163 "dm163"
+OBJECT_DECLARE_SIMPLE_TYPE(DM163State, DM163);
+
+#define RGB_MATRIX_NUM_ROWS 8
+#define RGB_MATRIX_NUM_COLS 8
+#define DM163_NUM_LEDS (RGB_MATRIX_NUM_COLS * 3)
+/* The last row is filled with 0 (turned off row) */
+#define COLOR_BUFFER_SIZE (RGB_MATRIX_NUM_ROWS + 1)
+
+typedef struct DM163State {
+DeviceState parent_obj;
+
+/* DM163 driver */
+uint64_t bank0_shift_register[3];
+uint64_t bank1_shift_register[3];
+uint16_t latched_outputs[DM163_NUM_LEDS];
+uint16_t outputs[DM163_NUM_LEDS];
+qemu_irq sout;
+
+uint8_t sin;
+uint8_t dck;
+uint8_t rst_b;
+uint8_t lat_b;
+uint8_t selbk;
+uint8_t en_b;
+
+/* IM120417002 colors shield */
+uint8_t activated_rows;
+
+/* 8x8 RGB matrix */
+QemuConsole *console;
+uint8_t redraw;
+/* Rows currently being displayed on the matrix. */
+/* The last row is filled with 0 (turned off row) */
+uint32_t buffer[COLOR_BUFFER_SIZE][RGB_MATRIX_NUM_COLS];
+uint8_t last_buffer_idx;
+uint8_t buffer_idx_of_row[RGB_MATRIX_NUM_ROWS];
+/* Used to simulate retinal persistence of rows */
+uint8_t row_persistence_delay[RGB_MATRIX_NUM_ROWS];
+} DM163State;
+
+#endif /* HW_DISPLAY_DM163_H */
diff --git a/hw/display/dm163.c b/hw/display/dm163.c
new file mode 100644
index 000..f92aee371d9
--- /dev/null
+++ b/hw/display/dm163.c
@@ -0,0 +1,349 @@
+/*
+ * QEMU DM163 8x3-channel constant current led driver
+ * driving columns of associated 8x8 RGB matrix.
+ *
+ * Copyright (C) 2024 Samuel Tardieu 
+ * Copyright (C) 2024 Arnaud Minier 
+ * Copyright (C) 2024 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+/*
+ * The reference used for the DM163 is the following :
+ * http://www.siti.com.tw/product/spec/LED/DM163.pdf
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "hw/display/dm163.h"
+#include "ui/console.h"
+#include "trace.h"
+
+#define LED_SQUARE_SIZE 100
+/* Number of frames a row stays visible after being turned off. */
+#define ROW_PERSISTENCE 3
+#define TURNED_OFF_ROW (COLOR_BUFFER_SIZE - 1)
+
+static const VMStateDescription vmstate_dm163 = {
+.name = TYPE_DM163,
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (const VMStateField[]) {
+VMSTATE_UINT64_ARRAY(bank0_shift_register, DM163State, 3),
+VMSTATE_UINT64_ARRAY(bank1_shift_register, DM163State, 3),
+VMSTATE_UINT16_ARRAY

[PULL 00/21] target-arm queue

2024-04-30 Thread Peter Maydell
Here's another arm pullreq; nothing too exciting in here I think.

thanks
-- PMM

The following changes since commit 5fee33d97a7f2e95716417bd164f2f5264acd976:

  Merge tag 'samuel-thibault' of https://people.debian.org/~sthibault/qemu into 
staging (2024-04-29 14:34:25 -0700)

are available in the Git repository at:

  https://git.linaro.org/people/pmaydell/qemu-arm.git 
tags/pull-target-arm-20240430

for you to fetch changes up to a0c325c4b05cf7815739d6a84e567b95c8c5be7e:

  tests/qtest : Add testcase for DM163 (2024-04-30 16:05:08 +0100)


target-arm queue:
 * hw/core/clock: allow clock_propagate on child clocks
 * hvf: arm: Remove unused PL1_WRITE_MASK define
 * target/arm: Restrict translation disabled alignment check to VMSA
 * docs/system/arm/emulation.rst: Add missing implemented features
 * target/arm: Enable FEAT_CSV2_3, FEAT_ETS2, FEAT_Spec_FPACC for 'max'
 * tests/avocado: update sunxi kernel from armbian to 6.6.16
 * target/arm: Make new CPUs default to 1GHz generic timer
 * hw/dmax/xlnx_dpdma: fix handling of address_extension descriptor fields
 * hw/char/stm32l4x5_usart: Fix memory corruption by adding correct class_size
 * hw/arm/npcm7xx: Store derivative OTP fuse key in little endian
 * hw/arm: Add DM163 display to B-L475E-IOT01A board


Alexandra Diupina (1):
  hw/dmax/xlnx_dpdma: fix handling of address_extension descriptor fields

Inès Varhol (5):
  hw/display : Add device DM163
  hw/arm : Pass STM32L4x5 SYSCFG gpios to STM32L4x5 SoC
  hw/arm : Create Bl475eMachineState
  hw/arm : Connect DM163 to B-L475E-IOT01A
  tests/qtest : Add testcase for DM163

Peter Maydell (10):
  docs/system/arm/emulation.rst: Add missing implemented features
  target/arm: Enable FEAT_CSV2_3 for -cpu max
  target/arm: Enable FEAT_ETS2 for -cpu max
  target/arm: Implement ID_AA64MMFR3_EL1
  target/arm: Enable FEAT_Spec_FPACC for -cpu max
  tests/avocado: update sunxi kernel from armbian to 6.6.16
  target/arm: Refactor default generic timer frequency handling
  hw/arm/sbsa-ref: Force CPU generic timer to 62.5MHz
  hw/watchdog/sbsa_gwdt: Make watchdog timer frequency a QOM property
  target/arm: Default to 1GHz cntfrq for 'max' and new CPUs

Philippe Mathieu-Daudé (1):
  hw/arm/npcm7xx: Store derivative OTP fuse key in little endian

Raphael Poggi (1):
  hw/core/clock: allow clock_propagate on child clocks

Richard Henderson (1):
  target/arm: Restrict translation disabled alignment check to VMSA

Thomas Huth (1):
  hw/char/stm32l4x5_usart: Fix memory corruption by adding correct 
class_size

Zenghui Yu (1):
  hvf: arm: Remove PL1_WRITE_MASK

 docs/system/arm/b-l475e-iot01a.rst  |   3 +-
 docs/system/arm/emulation.rst   |  42 -
 include/hw/display/dm163.h  |  59 ++
 include/hw/watchdog/sbsa_gwdt.h |   3 +-
 target/arm/cpu.h|  28 +++
 target/arm/internals.h  |  15 +-
 hw/arm/b-l475e-iot01a.c | 105 +--
 hw/arm/npcm7xx.c|   3 +-
 hw/arm/sbsa-ref.c   |  16 ++
 hw/arm/stm32l4x5_soc.c  |   6 +-
 hw/char/stm32l4x5_usart.c   |   1 +
 hw/core/clock.c |   1 -
 hw/core/machine.c   |   4 +-
 hw/display/dm163.c  | 349 
 hw/dma/xlnx_dpdma.c |  20 +--
 hw/watchdog/sbsa_gwdt.c |  15 +-
 target/arm/cpu.c|  42 +++--
 target/arm/cpu64.c  |   2 +
 target/arm/helper.c |  22 +--
 target/arm/hvf/hvf.c|   3 +-
 target/arm/kvm.c|   2 +
 target/arm/tcg/cpu32.c  |   6 +-
 target/arm/tcg/cpu64.c  |  28 ++-
 target/arm/tcg/hflags.c |  12 +-
 tests/qtest/dm163-test.c| 194 
 tests/qtest/stm32l4x5_gpio-test.c   |  13 +-
 tests/qtest/stm32l4x5_syscfg-test.c |  17 +-
 hw/arm/Kconfig  |   1 +
 hw/display/Kconfig  |   3 +
 hw/display/meson.build  |   1 +
 hw/display/trace-events |  14 ++
 tests/avocado/boot_linux_console.py |  70 
 tests/avocado/replay_kernel.py  |   8 +-
 tests/qtest/meson.build |   2 +
 34 files changed, 987 insertions(+), 123 deletions(-)
 create mode 100644 include/hw/display/dm163.h
 create mode 100644 hw/display/dm163.c
 create mode 100644 tests/qtest/dm163-test.c



[PULL 01/21] hw/core/clock: allow clock_propagate on child clocks

2024-04-30 Thread Peter Maydell
From: Raphael Poggi 

clock_propagate() has an assert that clk->source is NULL, i.e. that
you are calling it on a clock which has no source clock.  This made
sense in the original design where the only way for a clock's
frequency to change if it had a source clock was when that source
clock changed.  However, we subsequently added multiplier/divider
support, but didn't look at what that meant for propagation.

If a clock-management device changes the multiplier or divider value
on a clock, it needs to propagate that change down to child clocks,
even if the clock has a source clock set.  So the assertion is now
incorrect.

Remove the assertion.

Signed-off-by: Raphael Poggi 
Message-id: 20240419162951.23558-1-raphael.po...@lynxleap.co.uk
Reviewed-by: Peter Maydell 
[PMM: Rewrote the commit message]
Signed-off-by: Peter Maydell 
---
 hw/core/clock.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/core/clock.c b/hw/core/clock.c
index a19c7db7df9..e212865307b 100644
--- a/hw/core/clock.c
+++ b/hw/core/clock.c
@@ -108,7 +108,6 @@ static void clock_propagate_period(Clock *clk, bool 
call_callbacks)
 
 void clock_propagate(Clock *clk)
 {
-assert(clk->source == NULL);
 trace_clock_propagate(CLOCK_PATH(clk));
 clock_propagate_period(clk, true);
 }
-- 
2.34.1




[PATCH v4 00/17] xen: Support grant mappings

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Hi,

This is a follow-up on Vikrams v3:
http://next.patchew.org/QEMU/20240227223501.28475-1-vikram.garh...@amd.com/

Grant mappings are a mechanism in Xen for guests to grant each other
permissions to map and share pages. These grants can be temporary
so both map and unmaps must be respected. See here for more info:
https://github.com/xen-project/xen/blob/master/docs/misc/grant-tables.txt

Currently, the primary use-case for this is with QEMU's VirtIO backends.
Grant mappings will only work with models that use the address_space_map/unmap
interfaces, any other access will fail with appropriate error messages.

In response to feedback we got on v3, this version switches approach
from adding new MemoryRegion types and map/unmap hooks to instead reusing
the existing xen_map_cache() hooks (with extensions). Almost all of the
changes are now contained to the Xen modules.

This approach also refactors the mapcache to support multiple instances
(one for existing foreign mappings and another for grant mappings).

Patch 1 - 10 are refactorings with minimal functional changes.
Patch 3 - 10 could possibly get squashed into one but I've left them
separate to make them easier to review.

I've only enabled grants for the ARM PVH machine since that is what
I can currently test on.

Cheers,
Edgar

ChangeLog:

v3 -> v4:
* Major changes.
* Reuse existing xen_map_cache hooks.
* Reuse existing map-cache for both foreign and grant mappings.
* Only enable grants for the ARM PVH machine (removed i386).

v2 -> v3:
* Drop patch 1/7. This was done because device unplug is an x86-only case.
* Add missing qemu_mutex_unlock() before return.

v1 -> v2:
* Split patch 2/7 to keep phymem.c changes in a separate.
* In patch "xen: add map and unmap callbacks for grant" add check for total
  allowed grant < XEN_MAX_VIRTIO_GRANTS.
* Fix formatting issues and re-based with master latest.

Edgar E. Iglesias (15):
  xen: mapcache: Refactor lock functions for multi-instance
  xen: mapcache: Refactor xen_map_cache for multi-instance
  xen: mapcache: Refactor xen_remap_bucket for multi-instance
  xen: mapcache: Break out xen_ram_addr_from_mapcache_single
  xen: mapcache: Refactor xen_replace_cache_entry_unlocked
  xen: mapcache: Refactor xen_invalidate_map_cache_entry_unlocked
  xen: mapcache: Break out xen_invalidate_map_cache_single()
  xen: mapcache: Break out xen_map_cache_init_single()
  xen: mapcache: Make MCACHE_BUCKET_SHIFT runtime configurable
  xen: mapcache: Unmap first entries in buckets
  softmmu: Pass RAM MemoryRegion and is_write xen_map_cache()
  xen: Add xen_mr_is_memory()
  xen: mapcache: Remove assumption of RAMBlock with 0 offset
  xen: mapcache: Add support for grant mappings
  hw/arm: xen: Enable use of grant mappings

Juergen Gross (2):
  softmmu: let qemu_map_ram_ptr() use qemu_ram_ptr_length()
  xen: let xen_ram_addr_from_mapcache() return -1 in case of not found
entry

 hw/arm/xen_arm.c|   5 +
 hw/xen/xen-hvm-common.c |  16 +-
 hw/xen/xen-mapcache.c   | 408 +---
 include/hw/xen/xen-hvm-common.h |   3 +
 include/sysemu/xen-mapcache.h   |  13 +-
 include/sysemu/xen.h|  15 ++
 system/physmem.c|  83 ---
 7 files changed, 365 insertions(+), 178 deletions(-)

-- 
2.40.1




[PATCH v4 05/17] xen: mapcache: Refactor xen_remap_bucket for multi-instance

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Add MapCache argument to xen_remap_bucket in preparation
to support multiple map caches.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 896021d86f..326a9b61ca 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -139,7 +139,8 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 mapcache->entry = g_malloc0(size);
 }
 
-static void xen_remap_bucket(MapCacheEntry *entry,
+static void xen_remap_bucket(MapCache *mc,
+ MapCacheEntry *entry,
  void *vaddr,
  hwaddr size,
  hwaddr address_index,
@@ -313,14 +314,14 @@ tryagain:
 if (!entry) {
 entry = g_new0(MapCacheEntry, 1);
 pentry->next = entry;
-xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy);
 } else if (!entry->lock) {
 if (!entry->vaddr_base || entry->paddr_index != address_index ||
 entry->size != cache_size ||
 !test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
-xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, 
dummy);
 }
 }
 
@@ -587,7 +588,7 @@ static uint8_t *xen_replace_cache_entry_unlocked(hwaddr 
old_phys_addr,
 
 trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
 
-xen_remap_bucket(entry, entry->vaddr_base,
+xen_remap_bucket(mapcache, entry, entry->vaddr_base,
  cache_size, address_index, false);
 if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
-- 
2.40.1




[PATCH v4 17/17] hw/arm: xen: Enable use of grant mappings

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Signed-off-by: Edgar E. Iglesias 
---
 hw/arm/xen_arm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
index 15fa7dfa84..6fad829ede 100644
--- a/hw/arm/xen_arm.c
+++ b/hw/arm/xen_arm.c
@@ -125,6 +125,11 @@ static void xen_init_ram(MachineState *machine)
  GUEST_RAM1_BASE, ram_size[1]);
 memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, &ram_hi);
 }
+
+/* Setup support for grants.  */
+memory_region_init_ram(&xen_grants, NULL, "xen.grants", block_len,
+   &error_fatal);
+memory_region_add_subregion(sysmem, XEN_GRANT_ADDR_OFF, &xen_grants);
 }
 
 void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
-- 
2.40.1




[PATCH v4 08/17] xen: mapcache: Refactor xen_invalidate_map_cache_entry_unlocked

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Add MapCache argument to xen_invalidate_map_cache_entry_unlocked.
This is in preparation for supporting multiple map caches.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 6e758eff94..34454da2f6 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -420,7 +420,8 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 return xen_ram_addr_from_mapcache_single(mapcache, ptr);
 }
 
-static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
+static void xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
+uint8_t *buffer)
 {
 MapCacheEntry *entry = NULL, *pentry = NULL;
 MapCacheRev *reventry;
@@ -428,7 +429,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t 
*buffer)
 hwaddr size;
 int found = 0;
 
-QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
 if (reventry->vaddr_req == buffer) {
 paddr_index = reventry->paddr_index;
 size = reventry->size;
@@ -438,7 +439,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t 
*buffer)
 }
 if (!found) {
 trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer);
-QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
 trace_xen_invalidate_map_cache_entry_unlocked_found(
 reventry->paddr_index,
 reventry->vaddr_req
@@ -446,15 +447,15 @@ static void 
xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 }
 return;
 }
-QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
+QTAILQ_REMOVE(&mc->locked_entries, reventry, next);
 g_free(reventry);
 
-if (mapcache->last_entry != NULL &&
-mapcache->last_entry->paddr_index == paddr_index) {
-mapcache->last_entry = NULL;
+if (mc->last_entry != NULL &&
+mc->last_entry->paddr_index == paddr_index) {
+mc->last_entry = NULL;
 }
 
-entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+entry = &mc->entry[paddr_index % mc->nr_buckets];
 while (entry && (entry->paddr_index != paddr_index || entry->size != 
size)) {
 pentry = entry;
 entry = entry->next;
@@ -488,7 +489,7 @@ static void xen_invalidate_map_cache_entry_bh(void *opaque)
 XenMapCacheData *data = opaque;
 
 mapcache_lock(mapcache);
-xen_invalidate_map_cache_entry_unlocked(data->buffer);
+xen_invalidate_map_cache_entry_unlocked(mapcache, data->buffer);
 mapcache_unlock(mapcache);
 
 aio_co_wake(data->co);
@@ -506,7 +507,7 @@ void coroutine_mixed_fn 
xen_invalidate_map_cache_entry(uint8_t *buffer)
 qemu_coroutine_yield();
 } else {
 mapcache_lock(mapcache);
-xen_invalidate_map_cache_entry_unlocked(buffer);
+xen_invalidate_map_cache_entry_unlocked(mapcache, buffer);
 mapcache_unlock(mapcache);
 }
 }
-- 
2.40.1




[PATCH v4 16/17] xen: mapcache: Add support for grant mappings

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Add a second mapcache for grant mappings. The mapcache for
grants needs to work with XC_PAGE_SIZE granularity since
we can't map larger ranges than what has been granted to us.

Like with foreign mappings (xen_memory), machines using grants
are expected to initialize the xen_grants MR and map it
into their address-map accordingly.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-hvm-common.c |  12 ++-
 hw/xen/xen-mapcache.c   | 158 +---
 include/hw/xen/xen-hvm-common.h |   3 +
 include/sysemu/xen.h|   7 ++
 4 files changed, 145 insertions(+), 35 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 0267b88d26..fdec400491 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -10,12 +10,18 @@
 #include "hw/boards.h"
 #include "hw/xen/arch_hvm.h"
 
-MemoryRegion xen_memory;
+MemoryRegion xen_memory, xen_grants;
 
-/* Check for xen memory.  */
+/* Check for any kind of xen memory, foreign mappings or grants.  */
 bool xen_mr_is_memory(MemoryRegion *mr)
 {
-return mr == &xen_memory;
+return mr == &xen_memory || mr == &xen_grants;
+}
+
+/* Check specifically for grants.  */
+bool xen_mr_is_grants(MemoryRegion *mr)
+{
+return mr == &xen_grants;
 }
 
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 1b32d0c003..96cd68e28d 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -14,6 +14,7 @@
 
 #include 
 
+#include "hw/xen/xen-hvm-common.h"
 #include "hw/xen/xen_native.h"
 #include "qemu/bitmap.h"
 
@@ -21,6 +22,8 @@
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
 
+#include 
+#include 
 
 #if HOST_LONG_BITS == 32
 #  define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
@@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
 unsigned long *valid_mapping;
 uint32_t lock;
 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
 uint8_t flags;
 hwaddr size;
 
@@ -74,6 +78,8 @@ typedef struct MapCache {
 } MapCache;
 
 static MapCache *mapcache;
+static MapCache *mapcache_grants;
+static xengnttab_handle *xen_region_gnttabdev;
 
 static inline void mapcache_lock(MapCache *mc)
 {
@@ -132,6 +138,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 unsigned long max_mcache_size;
 unsigned int bucket_shift;
 
+xen_region_gnttabdev = xengnttab_open(NULL, 0);
+if (xen_region_gnttabdev == NULL) {
+error_report("mapcache: Failed to open gnttab device");
+exit(EXIT_FAILURE);
+}
+
 if (HOST_LONG_BITS == 32) {
 bucket_shift = 16;
 } else {
@@ -160,6 +172,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 mapcache = xen_map_cache_init_single(f, opaque,
  bucket_shift,
  max_mcache_size);
+
+/*
+ * Grant mappings must use XC_PAGE_SIZE granularity since we can't
+ * map anything beyond the number of pages granted to us.
+ */
+mapcache_grants = xen_map_cache_init_single(f, opaque,
+XC_PAGE_SHIFT,
+max_mcache_size);
+
 setrlimit(RLIMIT_AS, &rlimit_as);
 }
 
@@ -169,17 +190,25 @@ static void xen_remap_bucket(MapCache *mc,
  hwaddr size,
  hwaddr address_index,
  bool dummy,
+ bool grant,
+ bool grant_is_write,
+ hwaddr grant_ref,
  ram_addr_t ram_offset)
 {
 uint8_t *vaddr_base;
-xen_pfn_t *pfns;
+uint32_t *refs = NULL;
+xen_pfn_t *pfns = NULL;
 int *err;
 unsigned int i;
 hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 
 trace_xen_remap_bucket(address_index);
 
-pfns = g_new0(xen_pfn_t, nb_pfn);
+if (grant) {
+refs = g_new0(uint32_t, nb_pfn);
+} else {
+pfns = g_new0(xen_pfn_t, nb_pfn);
+}
 err = g_new0(int, nb_pfn);
 
 if (entry->vaddr_base != NULL) {
@@ -208,21 +237,45 @@ static void xen_remap_bucket(MapCache *mc,
 g_free(entry->valid_mapping);
 entry->valid_mapping = NULL;
 
-for (i = 0; i < nb_pfn; i++) {
-pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
+if (grant) {
+for (i = 0; i < nb_pfn; i++) {
+refs[i] = grant_ref + i;
+}
+} else {
+for (i = 0; i < nb_pfn; i++) {
+pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + 
i;
+}
 }
 
-/*
- * If the caller has requested the mapping at a specific address use
- * MAP_FIXED to make sure it's honored.
- */
+entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
+
 if (!dummy) {
-vaddr_base = xenforeignmemory_map2(xen

[PULL 04/21] docs/system/arm/emulation.rst: Add missing implemented features

2024-04-30 Thread Peter Maydell
As of version DDI0487K.a of the Arm ARM, some architectural features
which previously didn't have official names have been named.  Add
these to the list of features which QEMU's TCG emulation supports.
Mostly these are features which we thought of as part of baseline 8.0
support.  For SVE and SVE2, the names have been brought into line
with the FEAT_* naming convention of other extensions, and some
sub-components split into separate FEAT_ items.  In a few cases (eg
FEAT_CCIDX, FEAT_DPB2) the omission from our list was just an oversight.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Message-id: 20240418152004.2106516-2-peter.mayd...@linaro.org
---
 docs/system/arm/emulation.rst | 38 +--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index a9ae7ede9fc..5fdc64a944f 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -8,13 +8,26 @@ Armv8 versions of the A-profile architecture. It also has 
support for
 the following architecture extensions:
 
 - FEAT_AA32BF16 (AArch32 BFloat16 instructions)
+- FEAT_AA32EL0 (Support for AArch32 at EL0)
+- FEAT_AA32EL1 (Support for AArch32 at EL1)
+- FEAT_AA32EL2 (Support for AArch32 at EL2)
+- FEAT_AA32EL3 (Support for AArch32 at EL3)
 - FEAT_AA32HPD (AArch32 hierarchical permission disables)
 - FEAT_AA32I8MM (AArch32 Int8 matrix multiplication instructions)
+- FEAT_AA64EL0 (Support for AArch64 at EL0)
+- FEAT_AA64EL1 (Support for AArch64 at EL1)
+- FEAT_AA64EL2 (Support for AArch64 at EL2)
+- FEAT_AA64EL3 (Support for AArch64 at EL3)
+- FEAT_AdvSIMD (Advanced SIMD Extension)
 - FEAT_AES (AESD and AESE instructions)
+- FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
+- FEAT_ASID16 (16 bit ASID)
 - FEAT_BBM at level 2 (Translation table break-before-make levels)
 - FEAT_BF16 (AArch64 BFloat16 instructions)
 - FEAT_BTI (Branch Target Identification)
+- FEAT_CCIDX (Extended cache index)
 - FEAT_CRC32 (CRC32 instructions)
+- FEAT_Crypto (Cryptographic Extension)
 - FEAT_CSV2 (Cache speculation variant 2)
 - FEAT_CSV2_1p1 (Cache speculation variant 2, version 1.1)
 - FEAT_CSV2_1p2 (Cache speculation variant 2, version 1.2)
@@ -23,18 +36,27 @@ the following architecture extensions:
 - FEAT_DGH (Data gathering hint)
 - FEAT_DIT (Data Independent Timing instructions)
 - FEAT_DPB (DC CVAP instruction)
+- FEAT_DPB2 (DC CVADP instruction)
+- FEAT_Debugv8p1 (Debug with VHE)
 - FEAT_Debugv8p2 (Debug changes for v8.2)
 - FEAT_Debugv8p4 (Debug changes for v8.4)
 - FEAT_DotProd (Advanced SIMD dot product instructions)
 - FEAT_DoubleFault (Double Fault Extension)
 - FEAT_E0PD (Preventing EL0 access to halves of address maps)
 - FEAT_ECV (Enhanced Counter Virtualization)
+- FEAT_EL0 (Support for execution at EL0)
+- FEAT_EL1 (Support for execution at EL1)
+- FEAT_EL2 (Support for execution at EL2)
+- FEAT_EL3 (Support for execution at EL3)
 - FEAT_EPAC (Enhanced pointer authentication)
 - FEAT_ETS (Enhanced Translation Synchronization)
 - FEAT_EVT (Enhanced Virtualization Traps)
+- FEAT_F32MM (Single-precision Matrix Multiplication)
+- FEAT_F64MM (Double-precision Matrix Multiplication)
 - FEAT_FCMA (Floating-point complex number instructions)
 - FEAT_FGT (Fine-Grained Traps)
 - FEAT_FHM (Floating-point half-precision multiplication instructions)
+- FEAT_FP (Floating Point extensions)
 - FEAT_FP16 (Half-precision floating-point data processing)
 - FEAT_FPAC (Faulting on AUT* instructions)
 - FEAT_FPACCOMBINE (Faulting on combined pointer authentication instructions)
@@ -60,10 +82,13 @@ the following architecture extensions:
 - FEAT_LSE (Large System Extensions)
 - FEAT_LSE2 (Large System Extensions v2)
 - FEAT_LVA (Large Virtual Address space)
+- FEAT_MixedEnd (Mixed-endian support)
+- FEAT_MixdEndEL0 (Mixed-endian support at EL0)
 - FEAT_MOPS (Standardization of memory operations)
 - FEAT_MTE (Memory Tagging Extension)
 - FEAT_MTE2 (Memory Tagging Extension)
 - FEAT_MTE3 (MTE Asymmetric Fault Handling)
+- FEAT_MTE_ASYM_FAULT (Memory tagging asymmetric faults)
 - FEAT_NMI (Non-maskable Interrupt)
 - FEAT_NV (Nested Virtualization)
 - FEAT_NV2 (Enhanced nested virtualization support)
@@ -76,6 +101,7 @@ the following architecture extensions:
 - FEAT_PAuth (Pointer authentication)
 - FEAT_PAuth2 (Enhancements to pointer authentication)
 - FEAT_PMULL (PMULL, PMULL2 instructions)
+- FEAT_PMUv3 (PMU extension version 3)
 - FEAT_PMUv3p1 (PMU Extensions v3.1)
 - FEAT_PMUv3p4 (PMU Extensions v3.4)
 - FEAT_PMUv3p5 (PMU Extensions v3.5)
@@ -97,8 +123,18 @@ the following architecture extensions:
 - FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
 - FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
 - FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product 
instructions)
+- FEAT_SVE (Scalable Vector Extension)
+- FEAT_SVE_AES (Scalable Vector AES instructions)
+- FEAT_SVE_BitPerm (Scalable Vector Bit 

[PATCH v4 07/17] xen: mapcache: Refactor xen_replace_cache_entry_unlocked

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Add MapCache argument to xen_replace_cache_entry_unlocked in
preparation for supporting multiple map caches.

No functional change.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index d2deff70c8..6e758eff94 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -556,7 +556,8 @@ void xen_invalidate_map_cache(void)
 mapcache_unlock(mapcache);
 }
 
-static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
+static uint8_t *xen_replace_cache_entry_unlocked(MapCache *mc,
+ hwaddr old_phys_addr,
  hwaddr new_phys_addr,
  hwaddr size)
 {
@@ -578,7 +579,7 @@ static uint8_t *xen_replace_cache_entry_unlocked(hwaddr 
old_phys_addr,
 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
 }
 
-entry = &mapcache->entry[address_index % mapcache->nr_buckets];
+entry = &mc->entry[address_index % mc->nr_buckets];
 while (entry && !(entry->paddr_index == address_index &&
   entry->size == cache_size)) {
 entry = entry->next;
@@ -614,7 +615,8 @@ uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
 uint8_t *p;
 
 mapcache_lock(mapcache);
-p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
+p = xen_replace_cache_entry_unlocked(mapcache, old_phys_addr,
+ new_phys_addr, size);
 mapcache_unlock(mapcache);
 return p;
 }
-- 
2.40.1




[PATCH v4 15/17] xen: mapcache: Remove assumption of RAMBlock with 0 offset

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

The current mapcache assumes that all memory is mapped
in a single RAM MR (the first one with offset 0). Remove
this assumption and propagate the offset to the mapcache
so it can do reverse mappings (from hostptr -> ram_addr).

This is in preparation for adding grant mappings.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 25 ++---
 include/sysemu/xen-mapcache.h |  2 ++
 system/physmem.c  |  8 
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 09b5f36d9c..1b32d0c003 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -43,6 +43,9 @@ typedef struct MapCacheEntry {
 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
 uint8_t flags;
 hwaddr size;
+
+/* Keep ram_addr offset for reverse mappings (hostptr -> ram_addr).  */
+ram_addr_t ram_offset;
 struct MapCacheEntry *next;
 } MapCacheEntry;
 
@@ -165,7 +168,8 @@ static void xen_remap_bucket(MapCache *mc,
  void *vaddr,
  hwaddr size,
  hwaddr address_index,
- bool dummy)
+ bool dummy,
+ ram_addr_t ram_offset)
 {
 uint8_t *vaddr_base;
 xen_pfn_t *pfns;
@@ -244,6 +248,7 @@ static void xen_remap_bucket(MapCache *mc,
 entry->size = size;
 entry->valid_mapping = g_new0(unsigned long,
   BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
+entry->ram_offset = ram_offset;
 
 if (dummy) {
 entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
@@ -264,6 +269,7 @@ static void xen_remap_bucket(MapCache *mc,
 
 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_offset,
uint8_t lock, bool dma, bool is_write)
 {
 MapCacheEntry *entry, *pentry = NULL,
@@ -335,14 +341,16 @@ tryagain:
 if (!entry) {
 entry = g_new0(MapCacheEntry, 1);
 pentry->next = entry;
-xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
+ ram_offset);
 } else if (!entry->lock) {
 if (!entry->vaddr_base || entry->paddr_index != address_index ||
 entry->size != cache_size ||
 !test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
-xen_remap_bucket(mc, entry, NULL, cache_size, address_index, 
dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
+ ram_offset);
 }
 }
 
@@ -389,13 +397,15 @@ tryagain:
 
 uint8_t *xen_map_cache(MemoryRegion *mr,
hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_addr_offset,
uint8_t lock, bool dma,
bool is_write)
 {
 uint8_t *p;
 
 mapcache_lock(mapcache);
-p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma, is_write);
+p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
+   lock, dma, is_write);
 mapcache_unlock(mapcache);
 return p;
 }
@@ -432,7 +442,8 @@ static ram_addr_t 
xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
 raddr = RAM_ADDR_INVALID;
 } else {
 raddr = (reventry->paddr_index << mc->bucket_shift) +
- ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
+ ((unsigned long) ptr - (unsigned long) entry->vaddr_base) +
+ entry->ram_offset;
 }
 mapcache_unlock(mc);
 return raddr;
@@ -627,8 +638,8 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache 
*mc,
 
 trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
 
-xen_remap_bucket(mapcache, entry, entry->vaddr_base,
- cache_size, address_index, false);
+xen_remap_bucket(mc, entry, entry->vaddr_base,
+ cache_size, address_index, false, entry->ram_offset);
 if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
index 1ec9e66752..b5e3ea1bc0 100644
--- a/include/sysemu/xen-mapcache.h
+++ b/include/sysemu/xen-mapcache.h
@@ -19,6 +19,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset,
 void xen_map_cache_init(phys_offset_to_gaddr_t f,
 void *opaque);
 uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_addr_offset,
   

Re: [PULL v2 00/17] loongarch-to-apply queue

2024-04-30 Thread Richard Henderson

On 4/28/24 19:30, Song Gao wrote:

The following changes since commit fd87be1dada5672f877e03c2ca8504458292c479:

   Merge tag 'accel-20240426' ofhttps://github.com/philmd/qemu  into staging 
(2024-04-26 15:28:13 -0700)

are available in the Git repository at:

   https://gitlab.com/gaosong/qemu.git  tags/pull-loongarch-20240429

for you to fetch changes up to 841ef2c9df5317e32de590424b0c5c36fbb4de78:

   hw/loongarch: Add cells missing from rtc node (2024-04-29 10:25:56 +0800)


Add boot LoongArch elf kernel with FDT

v2: Fix 'make check-tcg' failed.


Reviewed-by: Richard Henderson 

r~



[PULL 03/21] target/arm: Restrict translation disabled alignment check to VMSA

2024-04-30 Thread Peter Maydell
From: Richard Henderson 

For cpus using PMSA, when the MPU is disabled, the default memory
type is Normal, Non-cachable. This means that it should not
have alignment restrictions enforced.

Cc: qemu-sta...@nongnu.org
Fixes: 59754f85ed3 ("target/arm: Do memory type alignment check when 
translation disabled")
Reported-by: Clément Chigot 
Signed-off-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Clément Chigot 
Message-id: 20240422170722.117409-1-richard.hender...@linaro.org
[PMM: trivial comment, commit message tweaks]
Signed-off-by: Peter Maydell 
---
 target/arm/tcg/hflags.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 5da1b0fc1d4..f03977b4b00 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -38,8 +38,16 @@ static bool aprofile_require_alignment(CPUARMState *env, int 
el, uint64_t sctlr)
 }
 
 /*
- * If translation is disabled, then the default memory type is
- * Device(-nGnRnE) instead of Normal, which requires that alignment
+ * With PMSA, when the MPU is disabled, all memory types in the
+ * default map are Normal, so don't need aligment enforcing.
+ */
+if (arm_feature(env, ARM_FEATURE_PMSA)) {
+return false;
+}
+
+/*
+ * With VMSA, if translation is disabled, then the default memory type
+ * is Device(-nGnRnE) instead of Normal, which requires that alignment
  * be enforced.  Since this affects all ram, it is most efficient
  * to handle this during translation.
  */
-- 
2.34.1




[PULL 20/21] hw/arm : Connect DM163 to B-L475E-IOT01A

2024-04-30 Thread Peter Maydell
From: Inès Varhol 

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240424200929.240921-5-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 hw/arm/b-l475e-iot01a.c | 59 +++--
 hw/arm/Kconfig  |  1 +
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/hw/arm/b-l475e-iot01a.c b/hw/arm/b-l475e-iot01a.c
index 970c637ce61..5002a40f06d 100644
--- a/hw/arm/b-l475e-iot01a.c
+++ b/hw/arm/b-l475e-iot01a.c
@@ -27,10 +27,37 @@
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "qemu/error-report.h"
-#include "hw/arm/stm32l4x5_soc.h"
 #include "hw/arm/boot.h"
+#include "hw/core/split-irq.h"
+#include "hw/arm/stm32l4x5_soc.h"
+#include "hw/gpio/stm32l4x5_gpio.h"
+#include "hw/display/dm163.h"
 
-/* B-L475E-IOT01A implementation is derived from netduinoplus2 */
+/* B-L475E-IOT01A implementation is inspired from netduinoplus2 and arduino */
+
+/*
+ * There are actually 14 input pins in the DM163 device.
+ * Here the DM163 input pin EN isn't connected to the STM32L4x5
+ * GPIOs as the IM120417002 colors shield doesn't actually use
+ * this pin to drive the RGB matrix.
+ */
+#define NUM_DM163_INPUTS 13
+
+static const unsigned dm163_input[NUM_DM163_INPUTS] = {
+1 * GPIO_NUM_PINS + 2,  /* ROW0  PB2   */
+0 * GPIO_NUM_PINS + 15, /* ROW1  PA15  */
+0 * GPIO_NUM_PINS + 2,  /* ROW2  PA2   */
+0 * GPIO_NUM_PINS + 7,  /* ROW3  PA7   */
+0 * GPIO_NUM_PINS + 6,  /* ROW4  PA6   */
+0 * GPIO_NUM_PINS + 5,  /* ROW5  PA5   */
+1 * GPIO_NUM_PINS + 0,  /* ROW6  PB0   */
+0 * GPIO_NUM_PINS + 3,  /* ROW7  PA3   */
+0 * GPIO_NUM_PINS + 4,  /* SIN (SDA) PA4   */
+1 * GPIO_NUM_PINS + 1,  /* DCK (SCK) PB1   */
+2 * GPIO_NUM_PINS + 3,  /* RST_B (RST) PC3 */
+2 * GPIO_NUM_PINS + 4,  /* LAT_B (LAT) PC4 */
+2 * GPIO_NUM_PINS + 5,  /* SELBK (SB)  PC5 */
+};
 
 #define TYPE_B_L475E_IOT01A MACHINE_TYPE_NAME("b-l475e-iot01a")
 OBJECT_DECLARE_SIMPLE_TYPE(Bl475eMachineState, B_L475E_IOT01A)
@@ -39,12 +66,16 @@ typedef struct Bl475eMachineState {
 MachineState parent_obj;
 
 Stm32l4x5SocState soc;
+SplitIRQ gpio_splitters[NUM_DM163_INPUTS];
+DM163State dm163;
 } Bl475eMachineState;
 
 static void bl475e_init(MachineState *machine)
 {
 Bl475eMachineState *s = B_L475E_IOT01A(machine);
 const Stm32l4x5SocClass *sc;
+DeviceState *dev, *gpio_out_splitter;
+unsigned gpio, pin;
 
 object_initialize_child(OBJECT(machine), "soc", &s->soc,
 TYPE_STM32L4X5XG_SOC);
@@ -53,6 +84,30 @@ static void bl475e_init(MachineState *machine)
 sc = STM32L4X5_SOC_GET_CLASS(&s->soc);
 armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0,
sc->flash_size);
+
+if (object_class_by_name(TYPE_DM163)) {
+object_initialize_child(OBJECT(machine), "dm163",
+&s->dm163, TYPE_DM163);
+dev = DEVICE(&s->dm163);
+qdev_realize(dev, NULL, &error_abort);
+
+for (unsigned i = 0; i < NUM_DM163_INPUTS; i++) {
+object_initialize_child(OBJECT(machine), "gpio-out-splitters[*]",
+&s->gpio_splitters[i], TYPE_SPLIT_IRQ);
+gpio_out_splitter = DEVICE(&s->gpio_splitters[i]);
+qdev_prop_set_uint32(gpio_out_splitter, "num-lines", 2);
+qdev_realize(gpio_out_splitter, NULL, &error_fatal);
+
+qdev_connect_gpio_out(gpio_out_splitter, 0,
+qdev_get_gpio_in(DEVICE(&s->soc), dm163_input[i]));
+qdev_connect_gpio_out(gpio_out_splitter, 1,
+qdev_get_gpio_in(dev, i));
+gpio = dm163_input[i] / GPIO_NUM_PINS;
+pin = dm163_input[i] % GPIO_NUM_PINS;
+qdev_connect_gpio_out(DEVICE(&s->soc.gpio[gpio]), pin,
+qdev_get_gpio_in(DEVICE(gpio_out_splitter), 0));
+}
+}
 }
 
 static void bl475e_machine_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index e8b6e5e5ebc..fe1f9643bd9 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -468,6 +468,7 @@ config B_L475E_IOT01A
 default y
 depends on TCG && ARM
 select STM32L4X5_SOC
+imply DM163
 
 config STM32L4X5_SOC
 bool
-- 
2.34.1




[PATCH v4 10/17] xen: mapcache: Break out xen_map_cache_init_single()

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Break out xen_map_cache_init_single() in preparation for
adding multiple map caches.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 53 ++-
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index dd08cd296b..72a7e25e3e 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -93,23 +93,44 @@ static inline int test_bits(int nr, int size, const 
unsigned long *addr)
 return 0;
 }
 
-void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
+static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
+   void *opaque,
+   unsigned long max_size)
 {
 unsigned long size;
-struct rlimit rlimit_as;
+MapCache *mc;
+
+mc = g_new0(MapCache, 1);
+
+mc->phys_offset_to_gaddr = f;
+mc->opaque = opaque;
+qemu_mutex_init(&mc->lock);
+
+QTAILQ_INIT(&mc->locked_entries);
 
-mapcache = g_new0(MapCache, 1);
+mc->max_mcache_size = max_size;
 
-mapcache->phys_offset_to_gaddr = f;
-mapcache->opaque = opaque;
-qemu_mutex_init(&mapcache->lock);
+mc->nr_buckets =
+(((mc->max_mcache_size >> XC_PAGE_SHIFT) +
+  (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
+ (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
 
-QTAILQ_INIT(&mapcache->locked_entries);
+size = mc->nr_buckets * sizeof(MapCacheEntry);
+size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
+trace_xen_map_cache_init(mc->nr_buckets, size);
+mc->entry = g_malloc0(size);
+return mc;
+}
+
+void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
+{
+struct rlimit rlimit_as;
+unsigned long max_mcache_size;
 
 if (geteuid() == 0) {
 rlimit_as.rlim_cur = RLIM_INFINITY;
 rlimit_as.rlim_max = RLIM_INFINITY;
-mapcache->max_mcache_size = MCACHE_MAX_SIZE;
+max_mcache_size = MCACHE_MAX_SIZE;
 } else {
 getrlimit(RLIMIT_AS, &rlimit_as);
 rlimit_as.rlim_cur = rlimit_as.rlim_max;
@@ -119,24 +140,14 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 " memory is not infinity");
 }
 if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
-mapcache->max_mcache_size = rlimit_as.rlim_max -
-NON_MCACHE_MEMORY_SIZE;
+max_mcache_size = rlimit_as.rlim_max - NON_MCACHE_MEMORY_SIZE;
 } else {
-mapcache->max_mcache_size = MCACHE_MAX_SIZE;
+max_mcache_size = MCACHE_MAX_SIZE;
 }
 }
 
+mapcache = xen_map_cache_init_single(f, opaque, max_mcache_size);
 setrlimit(RLIMIT_AS, &rlimit_as);
-
-mapcache->nr_buckets =
-(((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
-  (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
- (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
-
-size = mapcache->nr_buckets * sizeof (MapCacheEntry);
-size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
-trace_xen_map_cache_init(mapcache->nr_buckets, size);
-mapcache->entry = g_malloc0(size);
 }
 
 static void xen_remap_bucket(MapCache *mc,
-- 
2.40.1




Re: [PULL v2 00/17] loongarch-to-apply queue

2024-04-30 Thread Richard Henderson

On 4/28/24 19:30, Song Gao wrote:

The following changes since commit fd87be1dada5672f877e03c2ca8504458292c479:

   Merge tag 'accel-20240426' ofhttps://github.com/philmd/qemu  into staging 
(2024-04-26 15:28:13 -0700)

are available in the Git repository at:

   https://gitlab.com/gaosong/qemu.git  tags/pull-loongarch-20240429

for you to fetch changes up to 841ef2c9df5317e32de590424b0c5c36fbb4de78:

   hw/loongarch: Add cells missing from rtc node (2024-04-29 10:25:56 +0800)


Add boot LoongArch elf kernel with FDT

v2: Fix 'make check-tcg' failed.


Gah, wrong hotkey:Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.




r~




[PULL 14/21] hw/dmax/xlnx_dpdma: fix handling of address_extension descriptor fields

2024-04-30 Thread Peter Maydell
From: Alexandra Diupina 

The DMA descriptor structures for this device have
a set of "address extension" fields which extend the 32
bit source addresses with an extra 16 bits to give a
48 bit address:
 https://docs.amd.com/r/en-US/ug1085-zynq-ultrascale-trm/ADDR_EXT-Field

However, we misimplemented this address extension in several ways:
 * we only extracted 12 bits of the extension fields, not 16
 * we didn't shift the extension field up far enough
 * we accidentally did the shift as 32-bit arithmetic, which
   meant that we would have an overflow instead of setting
   bits [47:32] of the resulting 64-bit address

Add a type cast and use extract64() instead of extract32()
to avoid integer overflow on addition. Fix bit fields
extraction according to documentation.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Cc: qemu-sta...@nongnu.org
Fixes: d3c6369a96 ("introduce xlnx-dpdma")
Signed-off-by: Alexandra Diupina 
Message-id: 20240428181131.23801-1-adiup...@astralinux.ru
[PMM: adjusted commit message]
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/dma/xlnx_dpdma.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/dma/xlnx_dpdma.c b/hw/dma/xlnx_dpdma.c
index 1f5cd64ed10..530717d1885 100644
--- a/hw/dma/xlnx_dpdma.c
+++ b/hw/dma/xlnx_dpdma.c
@@ -175,24 +175,24 @@ static uint64_t 
xlnx_dpdma_desc_get_source_address(DPDMADescriptor *desc,
 
 switch (frag) {
 case 0:
-addr = desc->source_address
-+ (extract32(desc->address_extension, 16, 12) << 20);
+addr = (uint64_t)desc->source_address
++ (extract64(desc->address_extension, 16, 16) << 32);
 break;
 case 1:
-addr = desc->source_address2
-+ (extract32(desc->address_extension_23, 0, 12) << 8);
+addr = (uint64_t)desc->source_address2
++ (extract64(desc->address_extension_23, 0, 16) << 32);
 break;
 case 2:
-addr = desc->source_address3
-+ (extract32(desc->address_extension_23, 16, 12) << 20);
+addr = (uint64_t)desc->source_address3
++ (extract64(desc->address_extension_23, 16, 16) << 32);
 break;
 case 3:
-addr = desc->source_address4
-+ (extract32(desc->address_extension_45, 0, 12) << 8);
+addr = (uint64_t)desc->source_address4
++ (extract64(desc->address_extension_45, 0, 16) << 32);
 break;
 case 4:
-addr = desc->source_address5
-+ (extract32(desc->address_extension_45, 16, 12) << 20);
+addr = (uint64_t)desc->source_address5
++ (extract64(desc->address_extension_45, 16, 16) << 32);
 break;
 default:
 addr = 0;
-- 
2.34.1




[PATCH v4 06/17] xen: mapcache: Break out xen_ram_addr_from_mapcache_single

2024-04-30 Thread Edgar E. Iglesias
From: "Edgar E. Iglesias" 

Break out xen_ram_addr_from_mapcache_single(), a multi-cache
aware version of xen_ram_addr_from_mapcache.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 326a9b61ca..d2deff70c8 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -377,7 +377,7 @@ uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
 return p;
 }
 
-ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
+static ram_addr_t xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
 {
 MapCacheEntry *entry = NULL;
 MapCacheRev *reventry;
@@ -386,8 +386,8 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 ram_addr_t raddr;
 int found = 0;
 
-mapcache_lock(mapcache);
-QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
+mapcache_lock(mc);
+QTAILQ_FOREACH(reventry, &mc->locked_entries, next) {
 if (reventry->vaddr_req == ptr) {
 paddr_index = reventry->paddr_index;
 size = reventry->size;
@@ -396,11 +396,11 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 }
 }
 if (!found) {
-mapcache_unlock(mapcache);
+mapcache_unlock(mc);
 return RAM_ADDR_INVALID;
 }
 
-entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
+entry = &mc->entry[paddr_index % mc->nr_buckets];
 while (entry && (entry->paddr_index != paddr_index || entry->size != 
size)) {
 entry = entry->next;
 }
@@ -411,10 +411,15 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
  ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 }
-mapcache_unlock(mapcache);
+mapcache_unlock(mc);
 return raddr;
 }
 
+ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
+{
+return xen_ram_addr_from_mapcache_single(mapcache, ptr);
+}
+
 static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 {
 MapCacheEntry *entry = NULL, *pentry = NULL;
-- 
2.40.1




[PULL 21/21] tests/qtest : Add testcase for DM163

2024-04-30 Thread Peter Maydell
From: Inès Varhol 

`test_dm163_bank()`
Checks that the pin "sout" of the DM163 led driver outputs the values
received on pin "sin" with the expected latency (depending on the bank).

`test_dm163_gpio_connection()`
Check that changes to relevant STM32L4x5 GPIO pins are propagated to the
DM163 device.

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Acked-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240424200929.240921-6-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 tests/qtest/dm163-test.c | 194 +++
 tests/qtest/meson.build  |   2 +
 2 files changed, 196 insertions(+)
 create mode 100644 tests/qtest/dm163-test.c

diff --git a/tests/qtest/dm163-test.c b/tests/qtest/dm163-test.c
new file mode 100644
index 000..3161c9208d8
--- /dev/null
+++ b/tests/qtest/dm163-test.c
@@ -0,0 +1,194 @@
+/*
+ * QTest testcase for DM163
+ *
+ * Copyright (C) 2024 Samuel Tardieu 
+ * Copyright (C) 2024 Arnaud Minier 
+ * Copyright (C) 2024 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+enum DM163_INPUTS {
+SIN = 8,
+DCK = 9,
+RST_B = 10,
+LAT_B = 11,
+SELBK = 12,
+EN_B = 13
+};
+
+#define DEVICE_NAME "/machine/dm163"
+#define GPIO_OUT(name, value) qtest_set_irq_in(qts, DEVICE_NAME, NULL, name,   
\
+   value)
+#define GPIO_PULSE(name)   
\
+  do { 
\
+GPIO_OUT(name, 1); 
\
+GPIO_OUT(name, 0); 
\
+  } while (0)
+
+
+static void rise_gpio_pin_dck(QTestState *qts)
+{
+/* Configure output mode for pin PB1 */
+qtest_writel(qts, 0x48000400, 0xFEB7);
+/* Write 1 in ODR for PB1 */
+qtest_writel(qts, 0x48000414, 0x0002);
+}
+
+static void lower_gpio_pin_dck(QTestState *qts)
+{
+/* Configure output mode for pin PB1 */
+qtest_writel(qts, 0x48000400, 0xFEB7);
+/* Write 0 in ODR for PB1 */
+qtest_writel(qts, 0x48000414, 0x);
+}
+
+static void rise_gpio_pin_selbk(QTestState *qts)
+{
+/* Configure output mode for pin PC5 */
+qtest_writel(qts, 0x48000800, 0xF7FF);
+/* Write 1 in ODR for PC5 */
+qtest_writel(qts, 0x48000814, 0x0020);
+}
+
+static void lower_gpio_pin_selbk(QTestState *qts)
+{
+/* Configure output mode for pin PC5 */
+qtest_writel(qts, 0x48000800, 0xF7FF);
+/* Write 0 in ODR for PC5 */
+qtest_writel(qts, 0x48000814, 0x);
+}
+
+static void rise_gpio_pin_lat_b(QTestState *qts)
+{
+/* Configure output mode for pin PC4 */
+qtest_writel(qts, 0x48000800, 0xFDFF);
+/* Write 1 in ODR for PC4 */
+qtest_writel(qts, 0x48000814, 0x0010);
+}
+
+static void lower_gpio_pin_lat_b(QTestState *qts)
+{
+/* Configure output mode for pin PC4 */
+qtest_writel(qts, 0x48000800, 0xFDFF);
+/* Write 0 in ODR for PC4 */
+qtest_writel(qts, 0x48000814, 0x);
+}
+
+static void rise_gpio_pin_rst_b(QTestState *qts)
+{
+/* Configure output mode for pin PC3 */
+qtest_writel(qts, 0x48000800, 0xFF7F);
+/* Write 1 in ODR for PC3 */
+qtest_writel(qts, 0x48000814, 0x0008);
+}
+
+static void lower_gpio_pin_rst_b(QTestState *qts)
+{
+/* Configure output mode for pin PC3 */
+qtest_writel(qts, 0x48000800, 0xFF7F);
+/* Write 0 in ODR for PC3 */
+qtest_writel(qts, 0x48000814, 0x);
+}
+
+static void rise_gpio_pin_sin(QTestState *qts)
+{
+/* Configure output mode for pin PA4 */
+qtest_writel(qts, 0x4800, 0xFDFF);
+/* Write 1 in ODR for PA4 */
+qtest_writel(qts, 0x4814, 0x0010);
+}
+
+static void lower_gpio_pin_sin(QTestState *qts)
+{
+/* Configure output mode for pin PA4 */
+qtest_writel(qts, 0x4800, 0xFDFF);
+/* Write 0 in ODR for PA4 */
+qtest_writel(qts, 0x4814, 0x);
+}
+
+static void test_dm163_bank(const void *opaque)
+{
+const unsigned bank = (uintptr_t) opaque;
+const int width = bank ? 192 : 144;
+
+QTestState *qts = qtest_initf("-M b-l475e-iot01a");
+qtest_irq_intercept_out_named(qts, DEVICE_NAME, "sout");
+GPIO_OUT(RST_B, 1);
+GPIO_OUT(EN_B, 0);
+GPIO_OUT(DCK, 0);
+GPIO_OUT(SELBK, bank);
+GPIO_OUT(LAT_B, 1);
+
+/* Fill bank with zeroes */
+GPIO_OUT(SIN, 0);
+for (int i = 0; i < width; i++) {
+GPIO_PULSE(DCK);
+}
+/* Fill bank with ones, check that we get the previous zeroes */
+GPIO_OUT(SIN, 1);
+for (int i = 0; i < width; i++) {
+GPIO_PULSE(DCK);
+g_assert(!qtest_get_irq(qts, 0));
+}
+
+/* Pulse one more bit in the bank, check that we get a one */
+GPIO_PULSE(DCK);
+g_assert(qtest_get_irq(qts, 0));
+
+qtest_quit(qts);
+}

[PULL 15/21] hw/char/stm32l4x5_usart: Fix memory corruption by adding correct class_size

2024-04-30 Thread Peter Maydell
From: Thomas Huth 

"make check-qtest-aarch64" recently started failing on FreeBSD builds,
and valgrind on Linux also detected that there is something fishy with
the new stm32l4x5-usart: The code forgot to set the correct class_size
here, so the various class_init functions in this file wrote beyond
the allocated buffer when setting the subc->type field.

Fixes: 4fb37aea7e ("hw/char: Implement STM32L4x5 USART skeleton")
Signed-off-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240429075908.36302-1-th...@redhat.com
Signed-off-by: Peter Maydell 
---
 hw/char/stm32l4x5_usart.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
index 2627aab8324..02f666308c0 100644
--- a/hw/char/stm32l4x5_usart.c
+++ b/hw/char/stm32l4x5_usart.c
@@ -617,6 +617,7 @@ static const TypeInfo stm32l4x5_usart_types[] = {
 .parent = TYPE_SYS_BUS_DEVICE,
 .instance_size  = sizeof(Stm32l4x5UsartBaseState),
 .instance_init  = stm32l4x5_usart_base_init,
+.class_size = sizeof(Stm32l4x5UsartBaseClass),
 .class_init = stm32l4x5_usart_base_class_init,
 .abstract   = true,
 }, {
-- 
2.34.1




[PULL 07/21] target/arm: Implement ID_AA64MMFR3_EL1

2024-04-30 Thread Peter Maydell
Newer versions of the Arm ARM (e.g.  rev K.a) now define fields for
ID_AA64MMFR3_EL1.  Implement this register, so that we can set the
fields if we need to.  There's no behaviour change here since we
don't currently set the register value to non-zero.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240418152004.2106516-5-peter.mayd...@linaro.org
---
 target/arm/cpu.h | 17 +
 target/arm/helper.c  |  6 --
 target/arm/hvf/hvf.c |  2 ++
 target/arm/kvm.c |  2 ++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 17efc5d565a..1f90590f937 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1011,6 +1011,7 @@ struct ArchCPU {
 uint64_t id_aa64mmfr0;
 uint64_t id_aa64mmfr1;
 uint64_t id_aa64mmfr2;
+uint64_t id_aa64mmfr3;
 uint64_t id_aa64dfr0;
 uint64_t id_aa64dfr1;
 uint64_t id_aa64zfr0;
@@ -2206,6 +2207,22 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4)
 FIELD(ID_AA64MMFR2, EVT, 56, 4)
 FIELD(ID_AA64MMFR2, E0PD, 60, 4)
 
+FIELD(ID_AA64MMFR3, TCRX, 0, 4)
+FIELD(ID_AA64MMFR3, SCTLRX, 4, 4)
+FIELD(ID_AA64MMFR3, S1PIE, 8, 4)
+FIELD(ID_AA64MMFR3, S2PIE, 12, 4)
+FIELD(ID_AA64MMFR3, S1POE, 16, 4)
+FIELD(ID_AA64MMFR3, S2POE, 20, 4)
+FIELD(ID_AA64MMFR3, AIE, 24, 4)
+FIELD(ID_AA64MMFR3, MEC, 28, 4)
+FIELD(ID_AA64MMFR3, D128, 32, 4)
+FIELD(ID_AA64MMFR3, D128_2, 36, 4)
+FIELD(ID_AA64MMFR3, SNERR, 40, 4)
+FIELD(ID_AA64MMFR3, ANERR, 44, 4)
+FIELD(ID_AA64MMFR3, SDERR, 52, 4)
+FIELD(ID_AA64MMFR3, ADERR, 56, 4)
+FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4)
+
 FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
 FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
 FIELD(ID_AA64DFR0, PMUVER, 8, 4)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6b224826fbb..bb0e1baf628 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -9004,11 +9004,11 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .access = PL1_R, .type = ARM_CP_CONST,
   .accessfn = access_aa64_tid3,
   .resetvalue = cpu->isar.id_aa64mmfr2 },
-{ .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+{ .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3,
   .access = PL1_R, .type = ARM_CP_CONST,
   .accessfn = access_aa64_tid3,
-  .resetvalue = 0 },
+  .resetvalue = cpu->isar.id_aa64mmfr3 },
 { .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4,
   .access = PL1_R, .type = ARM_CP_CONST,
@@ -9165,6 +9165,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .exported_bits = R_ID_AA64MMFR1_AFP_MASK },
 { .name = "ID_AA64MMFR2_EL1",
   .exported_bits = R_ID_AA64MMFR2_AT_MASK },
+{ .name = "ID_AA64MMFR3_EL1",
+  .exported_bits = 0 },
 { .name = "ID_AA64MMFR*_EL1_RESERVED",
   .is_glob = true },
 { .name = "ID_AA64DFR0_EL1",
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8e942f89b35..08d0757438c 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -497,6 +497,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = {
 #endif
 { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) },
 { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) },
+/* Add ID_AA64MMFR3_EL1 here when HVF supports it */
 
 { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) },
 { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) },
@@ -855,6 +856,7 @@ static bool 
hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
 { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
 { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
+/* Add ID_AA64MMFR3_EL1 here when HVF supports it */
 };
 hv_vcpu_t fd;
 hv_return_t r = HV_SUCCESS;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 21ebbf3b8f8..7cf5cf31dec 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -331,6 +331,8 @@ static bool 
kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
   ARM64_SYS_REG(3, 0, 0, 7, 1));
 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2,
   ARM64_SYS_REG(3, 0, 0, 7, 2));
+err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3,
+  ARM64_SYS_REG(3, 0, 0, 7, 3));
 
 /*
  * Note that if AArch32 support is not present in the host,
-- 
2.34.1




[PULL 18/21] hw/arm : Pass STM32L4x5 SYSCFG gpios to STM32L4x5 SoC

2024-04-30 Thread Peter Maydell
From: Inès Varhol 

Exposing SYSCFG inputs to the SoC is practical in order to wire the SoC
to the optional DM163 display from the board code (GPIOs outputs need
to be connected to both SYSCFG inputs and DM163 inputs).

STM32L4x5 SYSCFG in-irq interception needed to be changed accordingly.

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240424200929.240921-3-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 hw/arm/stm32l4x5_soc.c  |  6 --
 tests/qtest/stm32l4x5_gpio-test.c   | 13 -
 tests/qtest/stm32l4x5_syscfg-test.c | 17 ++---
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
index 39924822f3d..38f7a2d5d9f 100644
--- a/hw/arm/stm32l4x5_soc.c
+++ b/hw/arm/stm32l4x5_soc.c
@@ -1,8 +1,8 @@
 /*
  * STM32L4x5 SoC family
  *
- * Copyright (c) 2023 Arnaud Minier 
- * Copyright (c) 2023 Inès Varhol 
+ * Copyright (c) 2023-2024 Arnaud Minier 
+ * Copyright (c) 2023-2024 Inès Varhol 
  *
  * SPDX-License-Identifier: GPL-2.0-or-later
  *
@@ -250,6 +250,8 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
Error **errp)
 }
 }
 
+qdev_pass_gpios(DEVICE(&s->syscfg), dev_soc, NULL);
+
 /* EXTI device */
 busdev = SYS_BUS_DEVICE(&s->exti);
 if (!sysbus_realize(busdev, errp)) {
diff --git a/tests/qtest/stm32l4x5_gpio-test.c 
b/tests/qtest/stm32l4x5_gpio-test.c
index 0f6bda54d3c..72a78234066 100644
--- a/tests/qtest/stm32l4x5_gpio-test.c
+++ b/tests/qtest/stm32l4x5_gpio-test.c
@@ -43,6 +43,9 @@
 #define OTYPER_PUSH_PULL 0
 #define OTYPER_OPEN_DRAIN 1
 
+/* SoC forwards GPIOs to SysCfg */
+#define SYSCFG "/machine/soc"
+
 const uint32_t moder_reset[NUM_GPIOS] = {
 0xABFF,
 0xFEBF,
@@ -284,7 +287,7 @@ static void test_gpio_output_mode(const void *data)
 uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
-qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
+qtest_irq_intercept_in(global_qtest, SYSCFG);
 
 /* Set a bit in ODR and check nothing happens */
 gpio_set_bit(gpio, ODR, pin, 1);
@@ -319,7 +322,7 @@ static void test_gpio_input_mode(const void *data)
 uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
-qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
+qtest_irq_intercept_in(global_qtest, SYSCFG);
 
 /* Configure a line as input, raise it, and check that the pin is high */
 gpio_set_2bits(gpio, MODER, pin, MODER_INPUT);
@@ -348,7 +351,7 @@ static void test_pull_up_pull_down(const void *data)
 uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
-qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
+qtest_irq_intercept_in(global_qtest, SYSCFG);
 
 /* Configure a line as input with pull-up, check the line is set high */
 gpio_set_2bits(gpio, MODER, pin, MODER_INPUT);
@@ -378,7 +381,7 @@ static void test_push_pull(const void *data)
 uint32_t gpio = test_gpio_addr(data);
 uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
 
-qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
+qtest_irq_intercept_in(global_qtest, SYSCFG);
 
 /* Setting a line high externally, configuring it in push-pull output */
 /* And checking the pin was disconnected */
@@ -425,7 +428,7 @@ static void test_open_drain(const void *data)
 uint32_t gpio = test_gpio_addr(data);
 uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
 
-qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
+qtest_irq_intercept_in(global_qtest, SYSCFG);
 
 /* Setting a line high externally, configuring it in open-drain output */
 /* And checking the pin was disconnected */
diff --git a/tests/qtest/stm32l4x5_syscfg-test.c 
b/tests/qtest/stm32l4x5_syscfg-test.c
index 59bac829b7d..506ca08bc24 100644
--- a/tests/qtest/stm32l4x5_syscfg-test.c
+++ b/tests/qtest/stm32l4x5_syscfg-test.c
@@ -1,8 +1,8 @@
 /*
  * QTest testcase for STM32L4x5_SYSCFG
  *
- * Copyright (c) 2023 Arnaud Minier 
- * Copyright (c) 2023 Inès Varhol 
+ * Copyright (c) 2024 Arnaud Minier 
+ * Copyright (c) 2024 Inès Varhol 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -25,6 +25,10 @@
 #define SYSCFG_SWPR2 0x28
 #define INVALID_ADDR 0x2C
 
+/* SoC forwards GPIOs to SysCfg */
+#define SYSCFG "/machine/soc"
+#define EXTI "/machine/soc/exti"
+
 static void syscfg_writel(unsigned int offset, uint32_t value)
 {
 writel(SYSCFG_BASE_ADDR + offset, value);
@@ -37,8 +41,7 @@ static uint32_t syscfg_readl(unsigned int offset)
 
 static void syscfg_set_irq(int num, int level)
 {
-   qtest_set_irq_in(global_qtest, "/machine/soc/syscfg",
-NULL, num, level);
+   qtest_set_irq_in(global_qtest, SYSCFG, NULL, num, level);
 }
 
 static void s

[PULL 12/21] hw/watchdog/sbsa_gwdt: Make watchdog timer frequency a QOM property

2024-04-30 Thread Peter Maydell
Currently the sbsa_gdwt watchdog device hardcodes its frequency at
62.5MHz. In real hardware, this watchdog is supposed to be driven
from the system counter, which also drives the CPU generic timers.
Newer CPU types (in particular from Armv8.6) should have a CPU
generic timer frequency of 1GHz, so we can't leave the watchdog
on the old QEMU default of 62.5GHz.

Make the frequency a QOM property so it can be set by the board,
and have our only board that uses this device set that frequency
to the same value it sets the CPU frequency.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240426122913.3427983-4-peter.mayd...@linaro.org
---
 include/hw/watchdog/sbsa_gwdt.h |  3 +--
 hw/arm/sbsa-ref.c   |  1 +
 hw/watchdog/sbsa_gwdt.c | 15 ++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/hw/watchdog/sbsa_gwdt.h b/include/hw/watchdog/sbsa_gwdt.h
index 70b137de301..4bdc6c6fdb6 100644
--- a/include/hw/watchdog/sbsa_gwdt.h
+++ b/include/hw/watchdog/sbsa_gwdt.h
@@ -55,8 +55,6 @@
 #define SBSA_GWDT_RMMIO_SIZE 0x1000
 #define SBSA_GWDT_CMMIO_SIZE 0x1000
 
-#define SBSA_TIMER_FREQ  6250 /* Hz */
-
 typedef struct SBSA_GWDTState {
 /*  */
 SysBusDevice parent_obj;
@@ -67,6 +65,7 @@ typedef struct SBSA_GWDTState {
 qemu_irq irq;
 
 QEMUTimer *timer;
+uint64_t freq;
 
 uint32_t id;
 uint32_t wcs;
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 36f6f717b4b..57c337fd92a 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -543,6 +543,7 @@ static void create_wdt(const SBSAMachineState *sms)
 SysBusDevice *s = SYS_BUS_DEVICE(dev);
 int irq = sbsa_ref_irqmap[SBSA_GWDT_WS0];
 
+qdev_prop_set_uint64(dev, "clock-frequency", SBSA_GTIMER_HZ);
 sysbus_realize_and_unref(s, &error_fatal);
 sysbus_mmio_map(s, 0, rbase);
 sysbus_mmio_map(s, 1, cbase);
diff --git a/hw/watchdog/sbsa_gwdt.c b/hw/watchdog/sbsa_gwdt.c
index 96895d76369..d437535cc66 100644
--- a/hw/watchdog/sbsa_gwdt.c
+++ b/hw/watchdog/sbsa_gwdt.c
@@ -18,6 +18,7 @@
 #include "qemu/osdep.h"
 #include "sysemu/reset.h"
 #include "sysemu/watchdog.h"
+#include "hw/qdev-properties.h"
 #include "hw/watchdog/sbsa_gwdt.h"
 #include "qemu/timer.h"
 #include "migration/vmstate.h"
@@ -109,7 +110,7 @@ static void sbsa_gwdt_update_timer(SBSA_GWDTState *s, 
WdtRefreshType rtype)
 timeout = s->woru;
 timeout <<= 32;
 timeout |= s->worl;
-timeout = muldiv64(timeout, NANOSECONDS_PER_SECOND, SBSA_TIMER_FREQ);
+timeout = muldiv64(timeout, NANOSECONDS_PER_SECOND, s->freq);
 timeout += qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 
 if ((rtype == EXPLICIT_REFRESH) || ((rtype == TIMEOUT_REFRESH) &&
@@ -261,6 +262,17 @@ static void wdt_sbsa_gwdt_realize(DeviceState *dev, Error 
**errp)
 dev);
 }
 
+static Property wdt_sbsa_gwdt_props[] = {
+/*
+ * Timer frequency in Hz. This must match the frequency used by
+ * the CPU's generic timer. Default 62.5Hz matches QEMU's legacy
+ * CPU timer frequency default.
+ */
+DEFINE_PROP_UINT64("clock-frequency", struct SBSA_GWDTState, freq,
+   6250),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static void wdt_sbsa_gwdt_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -271,6 +283,7 @@ static void wdt_sbsa_gwdt_class_init(ObjectClass *klass, 
void *data)
 set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
 dc->vmsd = &vmstate_sbsa_gwdt;
 dc->desc = "SBSA-compliant generic watchdog device";
+device_class_set_props(dc, wdt_sbsa_gwdt_props);
 }
 
 static const TypeInfo wdt_sbsa_gwdt_info = {
-- 
2.34.1




[PULL 08/21] target/arm: Enable FEAT_Spec_FPACC for -cpu max

2024-04-30 Thread Peter Maydell
FEAT_Spec_FPACC is a feature describing speculative behaviour in the
event of a PAC authontication failure when FEAT_FPACCOMBINE is
implemented.  FEAT_Spec_FPACC means that the speculative use of
pointers processed by a PAC Authentication is not materially
different in terms of the impact on cached microarchitectural state
(caches, TLBs, etc) between passing and failing of the PAC
Authentication.

QEMU doesn't do speculative execution, so we can advertise
this feature.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240418152004.2106516-6-peter.mayd...@linaro.org
---
 docs/system/arm/emulation.rst | 1 +
 target/arm/tcg/cpu64.c| 4 
 2 files changed, 5 insertions(+)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 307539cff91..7fcea54d8db 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -61,6 +61,7 @@ the following architecture extensions:
 - FEAT_FP16 (Half-precision floating-point data processing)
 - FEAT_FPAC (Faulting on AUT* instructions)
 - FEAT_FPACCOMBINE (Faulting on combined pointer authentication instructions)
+- FEAT_FPACC_SPEC (Speculative behavior of combined pointer authentication 
instructions)
 - FEAT_FRINTTS (Floating-point to integer instructions)
 - FEAT_FlagM (Flag manipulation instructions v2)
 - FEAT_FlagM2 (Enhancements to flag manipulation instructions)
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index ebb585afd85..443cffe3a85 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1217,6 +1217,10 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */
 cpu->isar.id_aa64mmfr2 = t;
 
+t = cpu->isar.id_aa64mmfr3;
+t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */
+cpu->isar.id_aa64mmfr3 = t;
+
 t = cpu->isar.id_aa64zfr0;
 t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
 t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2);   /* FEAT_SVE_PMULL128 */
-- 
2.34.1




[PULL 19/21] hw/arm : Create Bl475eMachineState

2024-04-30 Thread Peter Maydell
From: Inès Varhol 

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240424200929.240921-4-ines.var...@telecom-paris.fr
Signed-off-by: Peter Maydell 
---
 hw/arm/b-l475e-iot01a.c | 46 -
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/hw/arm/b-l475e-iot01a.c b/hw/arm/b-l475e-iot01a.c
index d862aa43fc3..970c637ce61 100644
--- a/hw/arm/b-l475e-iot01a.c
+++ b/hw/arm/b-l475e-iot01a.c
@@ -2,8 +2,8 @@
  * B-L475E-IOT01A Discovery Kit machine
  * (B-L475E-IOT01A IoT Node)
  *
- * Copyright (c) 2023 Arnaud Minier 
- * Copyright (c) 2023 Inès Varhol 
+ * Copyright (c) 2023-2024 Arnaud Minier 
+ * Copyright (c) 2023-2024 Inès Varhol 
  *
  * SPDX-License-Identifier: GPL-2.0-or-later
  *
@@ -32,33 +32,51 @@
 
 /* B-L475E-IOT01A implementation is derived from netduinoplus2 */
 
-static void b_l475e_iot01a_init(MachineState *machine)
+#define TYPE_B_L475E_IOT01A MACHINE_TYPE_NAME("b-l475e-iot01a")
+OBJECT_DECLARE_SIMPLE_TYPE(Bl475eMachineState, B_L475E_IOT01A)
+
+typedef struct Bl475eMachineState {
+MachineState parent_obj;
+
+Stm32l4x5SocState soc;
+} Bl475eMachineState;
+
+static void bl475e_init(MachineState *machine)
 {
+Bl475eMachineState *s = B_L475E_IOT01A(machine);
 const Stm32l4x5SocClass *sc;
-DeviceState *dev;
 
-dev = qdev_new(TYPE_STM32L4X5XG_SOC);
-object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
-sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+object_initialize_child(OBJECT(machine), "soc", &s->soc,
+TYPE_STM32L4X5XG_SOC);
+sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_fatal);
 
-sc = STM32L4X5_SOC_GET_CLASS(dev);
-armv7m_load_kernel(ARM_CPU(first_cpu),
-   machine->kernel_filename,
-   0, sc->flash_size);
+sc = STM32L4X5_SOC_GET_CLASS(&s->soc);
+armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename, 0,
+   sc->flash_size);
 }
 
-static void b_l475e_iot01a_machine_init(MachineClass *mc)
+static void bl475e_machine_init(ObjectClass *oc, void *data)
 {
+MachineClass *mc = MACHINE_CLASS(oc);
 static const char *machine_valid_cpu_types[] = {
 ARM_CPU_TYPE_NAME("cortex-m4"),
 NULL
 };
 mc->desc = "B-L475E-IOT01A Discovery Kit (Cortex-M4)";
-mc->init = b_l475e_iot01a_init;
+mc->init = bl475e_init;
 mc->valid_cpu_types = machine_valid_cpu_types;
 
 /* SRAM pre-allocated as part of the SoC instantiation */
 mc->default_ram_size = 0;
 }
 
-DEFINE_MACHINE("b-l475e-iot01a", b_l475e_iot01a_machine_init)
+static const TypeInfo bl475e_machine_type[] = {
+{
+.name   = TYPE_B_L475E_IOT01A,
+.parent = TYPE_MACHINE,
+.instance_size  = sizeof(Bl475eMachineState),
+.class_init = bl475e_machine_init,
+}
+};
+
+DEFINE_TYPES(bl475e_machine_type)
-- 
2.34.1




[PULL 06/21] target/arm: Enable FEAT_ETS2 for -cpu max

2024-04-30 Thread Peter Maydell
FEAT_ETS2 is a tighter set of guarantees about memory ordering
involving translation table walks than the old FEAT_ETS; FEAT_ETS has
been retired from the Arm ARM and the old ID_AA64MMFR1.ETS == 1
now gives no greater guarantees than ETS == 0.

FEAT_ETS2 requires:
 * the virtual address of a load or store that appears in program
   order after a DSB cannot be translated until after the DSB
   completes (section B2.10.9)
 * TLB maintenance operations that only affect translations without
   execute permission are guaranteed complete after a DSB
   (R_BLDZX)
 * if a memory access RW2 is ordered-before memory access RW2,
   then RW1 is also ordered-before any translation table walk
   generated by RW2 that generates a Translation, Address size
   or Access flag fault (R_NNFPF, I_CLGHP)

As with FEAT_ETS, QEMU is already compliant, because we do not
reorder translation table walk memory accesses relative to other
memory accesses, and we always guarantee to have finished TLB
maintenance as soon as the TLB op is done.

Update the documentation to list FEAT_ETS2 instead of the
no-longer-existent FEAT_ETS, and update the 'max' CPU ID registers.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240418152004.2106516-4-peter.mayd...@linaro.org
---
 docs/system/arm/emulation.rst | 2 +-
 target/arm/tcg/cpu32.c| 2 +-
 target/arm/tcg/cpu64.c| 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index d70b66f7530..307539cff91 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -50,7 +50,7 @@ the following architecture extensions:
 - FEAT_EL2 (Support for execution at EL2)
 - FEAT_EL3 (Support for execution at EL3)
 - FEAT_EPAC (Enhanced pointer authentication)
-- FEAT_ETS (Enhanced Translation Synchronization)
+- FEAT_ETS2 (Enhanced Translation Synchronization)
 - FEAT_EVT (Enhanced Virtualization Traps)
 - FEAT_F32MM (Single-precision Matrix Multiplication)
 - FEAT_F64MM (Double-precision Matrix Multiplication)
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
index de8f2be9416..b5a60682fa6 100644
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -67,7 +67,7 @@ void aa32_max_features(ARMCPU *cpu)
 cpu->isar.id_mmfr4 = t;
 
 t = cpu->isar.id_mmfr5;
-t = FIELD_DP32(t, ID_MMFR5, ETS, 1);  /* FEAT_ETS */
+t = FIELD_DP32(t, ID_MMFR5, ETS, 2);  /* FEAT_ETS2 */
 cpu->isar.id_mmfr5 = t;
 
 t = cpu->isar.id_pfr0;
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 8ad05c53e8d..ebb585afd85 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1196,7 +1196,7 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1);   /* FEAT_LOR */
 t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 3);  /* FEAT_PAN3 */
 t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1);  /* FEAT_XNX */
-t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1);  /* FEAT_ETS */
+t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2);  /* FEAT_ETS2 */
 t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1);  /* FEAT_HCX */
 t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1);   /* FEAT_TIDCP1 */
 cpu->isar.id_aa64mmfr1 = t;
-- 
2.34.1




[PULL 09/21] tests/avocado: update sunxi kernel from armbian to 6.6.16

2024-04-30 Thread Peter Maydell
The Linux kernel 5.10.16 binary for sunxi has been removed from
apt.armbian.com. This means that the avocado tests for these machines
will be skipped (status CANCEL) if the old binary isn't present in
the avocado cache.

Update to 6.6.16, in the same way we did in commit e384db41d8661
when we moved to 5.10.16 in 2021.

Cc: qemu-sta...@nongnu.org
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2284
Signed-off-by: Peter Maydell 
Reviewed-by: Strahinja Jankovic 
Reviewed-by: Niek Linnenbank 
Tested-by: Niek Linnenbank 
Message-id: 20240415151845.1564201-1-peter.mayd...@linaro.org
---
 tests/avocado/boot_linux_console.py | 70 ++---
 tests/avocado/replay_kernel.py  |  8 ++--
 2 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/tests/avocado/boot_linux_console.py 
b/tests/avocado/boot_linux_console.py
index 180ac17326e..c35fc5e9ba2 100644
--- a/tests/avocado/boot_linux_console.py
+++ b/tests/avocado/boot_linux_console.py
@@ -646,12 +646,12 @@ def test_arm_cubieboard_initrd(self):
 :avocado: tags=accel:tcg
 """
 deb_url = ('https://apt.armbian.com/pool/main/l/'
-   
'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
-deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
+   
'linux-6.6.16/linux-image-current-sunxi_24.2.1_armhf__6.6.16-Seb3e-D6b4a-P2359-Ce96bHfe66-HK01ba-V014b-B067e-R448a.deb')
+deb_hash = 'f7c3c8c5432f765445dc6e7eab02f3bbe668256b'
 deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
 kernel_path = self.extract_from_deb(deb_path,
-'/boot/vmlinuz-5.10.16-sunxi')
-dtb_path = 
'/usr/lib/linux-image-current-sunxi/sun4i-a10-cubieboard.dtb'
+
'/boot/vmlinuz-6.6.16-current-sunxi')
+dtb_path = 
'/usr/lib/linux-image-6.6.16-current-sunxi/sun4i-a10-cubieboard.dtb'
 dtb_path = self.extract_from_deb(deb_path, dtb_path)
 initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
   '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
@@ -690,12 +690,12 @@ def test_arm_cubieboard_sata(self):
 :avocado: tags=accel:tcg
 """
 deb_url = ('https://apt.armbian.com/pool/main/l/'
-   
'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
-deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
+   
'linux-6.6.16/linux-image-current-sunxi_24.2.1_armhf__6.6.16-Seb3e-D6b4a-P2359-Ce96bHfe66-HK01ba-V014b-B067e-R448a.deb')
+deb_hash = 'f7c3c8c5432f765445dc6e7eab02f3bbe668256b'
 deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
 kernel_path = self.extract_from_deb(deb_path,
-'/boot/vmlinuz-5.10.16-sunxi')
-dtb_path = 
'/usr/lib/linux-image-current-sunxi/sun4i-a10-cubieboard.dtb'
+
'/boot/vmlinuz-6.6.16-current-sunxi')
+dtb_path = 
'/usr/lib/linux-image-6.6.16-current-sunxi/sun4i-a10-cubieboard.dtb'
 dtb_path = self.extract_from_deb(deb_path, dtb_path)
 rootfs_url = ('https://github.com/groeck/linux-build-test/raw/'
   '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
@@ -872,13 +872,13 @@ def test_arm_bpim2u(self):
 :avocado: tags=machine:bpim2u
 :avocado: tags=accel:tcg
 """
-deb_url = ('https://apt.armbian.com/pool/main/l/linux-5.10.16-sunxi/'
-   'linux-image-current-sunxi_21.02.2_armhf.deb')
-deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
+deb_url = ('https://apt.armbian.com/pool/main/l/'
+   
'linux-6.6.16/linux-image-current-sunxi_24.2.1_armhf__6.6.16-Seb3e-D6b4a-P2359-Ce96bHfe66-HK01ba-V014b-B067e-R448a.deb')
+deb_hash = 'f7c3c8c5432f765445dc6e7eab02f3bbe668256b'
 deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
 kernel_path = self.extract_from_deb(deb_path,
-'/boot/vmlinuz-5.10.16-sunxi')
-dtb_path = ('/usr/lib/linux-image-current-sunxi/'
+
'/boot/vmlinuz-6.6.16-current-sunxi')
+dtb_path = ('/usr/lib/linux-image-6.6.16-current-sunxi/'
 'sun8i-r40-bananapi-m2-ultra.dtb')
 dtb_path = self.extract_from_deb(deb_path, dtb_path)
 
@@ -899,13 +899,13 @@ def test_arm_bpim2u_initrd(self):
 :avocado: tags=accel:tcg
 :avocado: tags=machine:bpim2u
 """
-deb_url = ('https://apt.armbian.com/pool/main/l/linux-5.10.16-sunxi/'
-   'linux-image-current-sunxi_21.02.2_armhf.deb')
-deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
+deb_url = ('https://apt.armbian.com/pool/main/l/'
+   
'linux-6.6.16/linux-image-current-sunxi_24.2.1_armhf__6.6.16-Seb3e-D6b4a-P2359-Ce96bH

[PULL 16/21] hw/arm/npcm7xx: Store derivative OTP fuse key in little endian

2024-04-30 Thread Peter Maydell
From: Philippe Mathieu-Daudé 

Use little endian for derivative OTP fuse key.

Cc: qemu-sta...@nongnu.org
Fixes: c752bb079b ("hw/nvram: NPCM7xx OTP device model")
Suggested-by: Avi Fishman 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20240422125813.1403-1-phi...@linaro.org
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/arm/npcm7xx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index cc68b5d8f12..9f2d96c733a 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -24,6 +24,7 @@
 #include "hw/qdev-clock.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
+#include "qemu/bswap.h"
 #include "qemu/units.h"
 #include "sysemu/sysemu.h"
 #include "target/arm/cpu-qom.h"
@@ -386,7 +387,7 @@ static void npcm7xx_init_fuses(NPCM7xxState *s)
  * The initial mask of disabled modules indicates the chip derivative (e.g.
  * NPCM750 or NPCM730).
  */
-value = tswap32(nc->disabled_modules);
+value = cpu_to_le32(nc->disabled_modules);
 npcm7xx_otp_array_write(&s->fuse_array, &value, NPCM7XX_FUSE_DERIVATIVE,
 sizeof(value));
 }
-- 
2.34.1




[PULL 05/21] target/arm: Enable FEAT_CSV2_3 for -cpu max

2024-04-30 Thread Peter Maydell
FEAT_CSV2_3 adds a mechanism to identify if hardware cannot disclose
information about whether branch targets and branch history trained
in one hardware described context can control speculative execution
in a different hardware context.

There is no branch prediction in TCG, so we don't need to do anything
to be compliant with this.  Upadte the '-cpu max' ID registers to
advertise the feature.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240418152004.2106516-3-peter.mayd...@linaro.org
---
 docs/system/arm/emulation.rst | 1 +
 target/arm/tcg/cpu64.c| 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 5fdc64a944f..d70b66f7530 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -32,6 +32,7 @@ the following architecture extensions:
 - FEAT_CSV2_1p1 (Cache speculation variant 2, version 1.1)
 - FEAT_CSV2_1p2 (Cache speculation variant 2, version 1.2)
 - FEAT_CSV2_2 (Cache speculation variant 2, version 2)
+- FEAT_CSV2_3 (Cache speculation variant 2, version 3)
 - FEAT_CSV3 (Cache speculation variant 3)
 - FEAT_DGH (Data gathering hint)
 - FEAT_DIT (Data Independent Timing instructions)
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 62c4663512b..8ad05c53e8d 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -1159,7 +1159,7 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
 t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1);  /* FEAT_SEL2 */
 t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1);   /* FEAT_DIT */
-t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 2);  /* FEAT_CSV2_2 */
+t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3);  /* FEAT_CSV2_3 */
 t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1);  /* FEAT_CSV3 */
 cpu->isar.id_aa64pfr0 = t;
 
@@ -1174,7 +1174,7 @@ void aarch64_max_tcg_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);   /* FEAT_MTE3 */
 t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0);  /* FEAT_RASv1p1 + 
FEAT_DoubleFault */
 t = FIELD_DP64(t, ID_AA64PFR1, SME, 1);   /* FEAT_SME */
-t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
+t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */
 t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1);   /* FEAT_NMI */
 cpu->isar.id_aa64pfr1 = t;
 
-- 
2.34.1




[PULL 02/21] hvf: arm: Remove PL1_WRITE_MASK

2024-04-30 Thread Peter Maydell
From: Zenghui Yu 

As it had never been used since the first commit a1477da3ddeb ("hvf: Add
Apple Silicon support").

Signed-off-by: Zenghui Yu 
Message-id: 20240422092715.71973-1-zenghui...@linux.dev
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 target/arm/hvf/hvf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index db628c1cba7..8e942f89b35 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -150,7 +150,6 @@ void hvf_arm_init_debug(void)
 
 #define HVF_SYSREG(crn, crm, op0, op1, op2) \
 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
-#define PL1_WRITE_MASK 0x4
 
 #define SYSREG_OP0_SHIFT  20
 #define SYSREG_OP0_MASK   0x3
-- 
2.34.1




  1   2   3   >