date:20240509


On 08/05/2024 09.44, Stefano Garzarella wrote:

`memory-backend-shm` can be used with vhost-user devices, so let's
add a new test case for it.

Signed-off-by: Stefano Garzarella 
---
  tests/qtest/vhost-user-test.c | 23 +++
  1 file changed, 23 insertions(+)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index d4e437265f..8c1d903b2a 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -44,6 +44,8 @@
  "mem-path=%s,share=on -numa node,memdev=mem"
  #define QEMU_CMD_MEMFD  " -m %d -object 
memory-backend-memfd,id=mem,size=%dM," \
  " -numa node,memdev=mem"
+#define QEMU_CMD_SHM" -m %d -object memory-backend-shm,id=mem,size=%dM," \
+" -numa node,memdev=mem"
  #define QEMU_CMD_CHR" -chardev socket,id=%s,path=%s%s"
  #define QEMU_CMD_NETDEV " -netdev vhost-user,id=hs0,chardev=%s,vhostforce=on"
  
@@ -195,6 +197,7 @@ enum test_memfd {

  TEST_MEMFD_AUTO,
  TEST_MEMFD_YES,
  TEST_MEMFD_NO,
+TEST_MEMFD_SHM,
  };
  
  static void append_vhost_net_opts(TestServer *s, GString *cmd_line,

@@ -228,6 +231,8 @@ static void append_mem_opts(TestServer *server, GString 
*cmd_line,
  
  if (memfd == TEST_MEMFD_YES) {

  g_string_append_printf(cmd_line, QEMU_CMD_MEMFD, size, size);
+} else if (memfd == TEST_MEMFD_SHM) {
+g_string_append_printf(cmd_line, QEMU_CMD_SHM, size, size);
  } else {
  const char *root = init_hugepagefs() ? : server->tmpfs;
  
@@ -788,6 +793,19 @@ static void *vhost_user_test_setup_memfd(GString *cmd_line, void *arg)

  return server;
  }
  
+static void *vhost_user_test_setup_shm(GString *cmd_line, void *arg)

+{
+TestServer *server = test_server_new("vhost-user-test", arg);
+test_server_listen(server);
+
+append_mem_opts(server, cmd_line, 256, TEST_MEMFD_SHM);
+server->vu_ops->append_opts(server, cmd_line, "");
+
+g_test_queue_destroy(vhost_user_test_cleanup, server);
+
+return server;
+}
+
  static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc)
  {
  TestServer *server = arg;
@@ -1081,6 +1099,11 @@ static void register_vhost_user_test(void)
   "virtio-net",
   test_read_guest_mem, );
  
+opts.before = vhost_user_test_setup_shm;

+qos_add_test("vhost-user/read-guest-mem/shm",
+ "virtio-net",
+ test_read_guest_mem, );
+
  if (qemu_memfd_check(MFD_ALLOW_SEALING)) {
  opts.before = vhost_user_test_setup_memfd;
  qos_add_test("vhost-user/read-guest-mem/memfd",


Acked-by: Thomas Huth

Re: [PATCH v4 11/12] tests/qtest/vhost-user-blk-test: use memory-backend-shm


On 08/05/2024 09.44, Stefano Garzarella wrote:

`memory-backend-memfd` is available only on Linux while the new
`memory-backend-shm` can be used on any POSIX-compliant operating
system. Let's use it so we can run the test in multiple environments.

Signed-off-by: Stefano Garzarella 
---
  tests/qtest/vhost-user-blk-test.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index 117b9acd10..e945f6abf2 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -906,7 +906,7 @@ static void start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
 vhost_user_blk_bin);
  
  g_string_append_printf(cmd_line,

-" -object memory-backend-memfd,id=mem,size=256M,share=on "
+" -object memory-backend-shm,id=mem,size=256M,share=on "
  " -M memory-backend=mem -m 256M ");
  
  for (i = 0; i < vus_instances; i++) {


Acked-by: Thomas Huth

Re: [PATCH 13/13] tests/qtest: arm: fix operation in a build without any boards or devices


On 09/05/2024 19.00, Paolo Bonzini wrote:

ARM/aarch64 are easy to fix because they already have to pass a machine
type by hand.  Just guard the tests with a check that the machine actually
exists.

Signed-off-by: Paolo Bonzini 
---
  tests/qtest/arm-cpu-features.c | 4 
  tests/qtest/migration-test.c   | 6 ++
  tests/qtest/numa-test.c| 4 
  3 files changed, 14 insertions(+)



Reviewed-by: Thomas Huth

Re: [PATCH 03/13] s390: move css_migration_enabled from machine to css.c


On 09/05/2024 19.00, Paolo Bonzini wrote:

The CSS subsystem uses global variables, just face the truth and use
a variable also for whether the CSS vmstate is in use; remove the
indirection of fetching it from the machine type, which makes the
TCG code depend unnecessarily on the virtio-ccw machine.

Signed-off-by: Paolo Bonzini 
---
  include/hw/s390x/css.h |  6 ++
  include/hw/s390x/s390-virtio-ccw.h |  7 ---
  hw/s390x/css.c | 10 +++---
  hw/s390x/s390-virtio-ccw.c | 15 +++
  4 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index ba72ee3dd20..8289e458370 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -333,4 +333,10 @@ static inline int ccw_dstream_read_buf(CcwDataStream *cds, 
void *buff, int len)
  #define ccw_dstream_read(cds, v) ccw_dstream_read_buf((cds), &(v), sizeof(v))
  #define ccw_dstream_write(cds, v) ccw_dstream_write_buf((cds), &(v), 
sizeof(v))
  
+/**

+ * true if (vmstate based) migration of the channel subsystem
+ * is enabled, false if it is disabled.
+ */
+extern bool css_migration_enabled;
+
  #endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index c1d46e78af8..c0494e511cb 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -43,7 +43,6 @@ struct S390CcwMachineClass {
  /*< public >*/
  bool ri_allowed;
  bool cpu_model_allowed;
-bool css_migration_enabled;
  bool hpage_1m_allowed;
  int max_threads;
  };
@@ -55,10 +54,4 @@ bool cpu_model_allowed(void);
  /* 1M huge page mappings allowed by the machine */
  bool hpage_1m_allowed(void);
  
-/**

- * Returns true if (vmstate based) migration of the channel subsystem
- * is enabled, false if it is disabled.
- */
-bool css_migration_enabled(void);
-
  #endif
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 295530963a6..b2d5327dbf4 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -23,6 +23,8 @@
  #include "hw/s390x/s390-virtio-ccw.h"
  #include "hw/s390x/s390-ccw.h"
  
+bool css_migration_enabled = true;

+
  typedef struct CrwContainer {
  CRW crw;
  QTAILQ_ENTRY(CrwContainer) sibling;
@@ -180,7 +182,7 @@ static const VMStateDescription vmstate_orb = {
  
  static bool vmstate_schdev_orb_needed(void *opaque)

  {
-return css_migration_enabled();
+return css_migration_enabled;
  }
  
  static const VMStateDescription vmstate_schdev_orb = {

@@ -388,7 +390,7 @@ static int subch_dev_post_load(void *opaque, int version_id)
  css_subch_assign(s->cssid, s->ssid, s->schid, s->devno, s);
  }
  
-if (css_migration_enabled()) {

+if (css_migration_enabled) {
  /* No compat voodoo to do ;) */
  return 0;
  }
@@ -412,7 +414,9 @@ static int subch_dev_post_load(void *opaque, int version_id)
  
  void css_register_vmstate(void)

  {
-vmstate_register(NULL, 0, _css, _subsys);
+if (css_migration_enabled) {
+vmstate_register(NULL, 0, _css, _subsys);
+}
  }
  
  IndAddr *get_indicator(hwaddr ind_addr, int len)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 1383e47eeb5..aa90703d518 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -275,11 +275,9 @@ static void ccw_init(MachineState *machine)
  s390_enable_css_support(s390_cpu_addr2state(0));
  
  ret = css_create_css_image(VIRTUAL_CSSID, true);

-
  assert(ret == 0);
-if (css_migration_enabled()) {
-css_register_vmstate();
-}
+
+css_register_vmstate();
  
  /* Create VirtIO network adapters */

  s390_create_virtio_net(BUS(css_bus), mc->default_nic);
@@ -741,7 +739,6 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
  
  s390mc->ri_allowed = true;

  s390mc->cpu_model_allowed = true;
-s390mc->css_migration_enabled = true;
  s390mc->hpage_1m_allowed = true;
  s390mc->max_threads = 1;
  mc->init = ccw_init;
@@ -811,11 +808,6 @@ static const TypeInfo ccw_machine_info = {
  },
  };
  
-bool css_migration_enabled(void)

-{
-return get_machine_class()->css_migration_enabled;
-}
-
  #define DEFINE_CCW_MACHINE(suffix, verstr, latest)
\
  static void ccw_machine_##suffix##_class_init(ObjectClass *oc,
\
void *data) 
\
@@ -1171,7 +1163,6 @@ static void ccw_machine_2_9_instance_options(MachineState 
*machine)
  
  static void ccw_machine_2_9_class_options(MachineClass *mc)

  {
-S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
  static GlobalProperty compat[] = {
  { TYPE_S390_STATTRIB, "migration-enabled", "off", },
  { TYPE_S390_FLIC_COMMON, "migration-enabled", "off", },
@@ -1180,7 +1171,7 @@ static void ccw_machine_2_9_class_options(MachineClass 
*mc)
  ccw_machine_2_10_class_options(mc);

Re: [PULL 0/3] loongarch-to-apply queue

2024-05-09 Thread Richard Henderson


On 5/9/24 10:06, Song Gao wrote:

The following changes since commit 4e66a08546a2588a4667766a1edab9caccf24ce3:

   Merge tag 'for-upstream' ofhttps://gitlab.com/bonzini/qemu  into staging 
(2024-05-07 09:26:30 -0700)

are available in the Git repository at:

   https://gitlab.com/gaosong/qemu.git  tags/pull-loongarch-20240509

for you to fetch changes up to 5872966db7abaa7f8753541b7a9f242df9752b50:

   target/loongarch: Put cpucfg operation before CSR register (2024-05-09 
15:19:22 +0800)


pull-loongarch-20240509


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~

Re: [PULL v2 00/13] Migration patches for 2024-05-08

2024-05-09 Thread Richard Henderson


On 5/9/24 01:35, Fabiano Rosas wrote:

The following changes since commit 4e66a08546a2588a4667766a1edab9caccf24ce3:

   Merge tag 'for-upstream' ofhttps://gitlab.com/bonzini/qemu  into staging 
(2024-05-07 09:26:30 -0700)

are available in the Git repository at:

   https://gitlab.com/farosas/qemu.git  tags/migration-20240508-pull-request

for you to fetch changes up to db8cb7b6e73690233bc7c6abbb90979af3a18143:

   hmp/migration: Fix "migrate" command's documentation (2024-05-08 09:22:37 
-0300)


Migration pull request

- Will's WITH_QEMU_LOCK_GUARD cleanup
- Vladimir's new exit-on-error parameter
- Fabiano's removals and deprecations series
   (block migration and non-multifd compression removed)
- Peter's documentation fix for HMP migrate command

v2:
- updated Peter's documentation fix.


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~

Re: [PATCH 05/13] tests/qtest: s390x: fix operation in a build without any boards or devices


On 09/05/2024 19.00, Paolo Bonzini wrote:

Do the bare minimum to ensure that at least a vanilla
--without-default-devices build works for all targets except i386,
x86_64 and ppc64.  In particular this fixes s390x-softmmu; i386 and
x86_64 have about a dozen failing tests that do not pass -M and therefore
require a default machine type; ppc64 has the same issue, though only
with numa-test.

If we can for now ignore the cases where boards and devices are picked
by hand, drive_del-test however can be fixed easily; almost all tests
check for the virtio-blk or virtio-scsi device that they use, and are
already skipped.  Only one didn't get the memo; plus another one does
not need a machine at all and can be run with -M none.

Signed-off-by: Paolo Bonzini 
---
  tests/qtest/drive_del-test.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c
index 8a6f3ac963d..7b67a4bbee4 100644
--- a/tests/qtest/drive_del-test.c
+++ b/tests/qtest/drive_del-test.c
@@ -173,7 +173,7 @@ static void test_drive_without_dev(void)
  QTestState *qts;
  
  /* Start with an empty drive */

-qts = qtest_init("-drive if=none,id=drive0");
+qts = qtest_init("-drive if=none,id=drive0 -M none");
  
  /* Delete the drive */

  drive_del(qts);
@@ -192,6 +192,11 @@ static void test_after_failed_device_add(void)
  QDict *response;
  QTestState *qts;
  
+if (!has_device_builtin("virtio-blk")) {

+g_test_skip("Device virtio-blk is not available");
+return;
+}
+
  snprintf(driver, sizeof(driver), "virtio-blk-%s",
   qvirtio_get_dev_type());
  


Reviewed-by: Thomas Huth

Re: [PATCH 04/13] s390x: select correct components for no-board build


On 09/05/2024 19.00, Paolo Bonzini wrote:

Signed-off-by: Paolo Bonzini 
---
  .gitlab-ci.d/buildtest.yml | 4 ++--
  target/s390x/Kconfig   | 2 ++
  2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 13afd0df1f0..f8502905203 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -650,7 +650,7 @@ build-tci:
  # Check our reduced build configurations
  # requires libfdt: aarch64, arm, i386, loongarch64, microblaze, microblazeel,
  #   mips64el, or1k, ppc, ppc64, riscv32, riscv64, rx, x86_64
-# does not build without boards: i386, s390x, x86_64
+# does not build without boards: i386, x86_64
  build-without-defaults:
extends: .native_build_job_template
needs:
@@ -666,7 +666,7 @@ build-without-defaults:
--disable-strip
  TARGETS: alpha-softmmu avr-softmmu cris-softmmu hppa-softmmu m68k-softmmu
mips-softmmu mips64-softmmu mipsel-softmmu
-  sh4-softmmu sh4eb-softmmu sparc-softmmu
+  s390x-softmmu sh4-softmmu sh4eb-softmmu sparc-softmmu
sparc64-softmmu tricore-softmmu xtensa-softmmu xtensaeb-softmmu
hexagon-linux-user i386-linux-user s390x-linux-user
  MAKE_CHECK_ARGS: check
diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig
index 72da48136c6..d886be48b47 100644
--- a/target/s390x/Kconfig
+++ b/target/s390x/Kconfig
@@ -1,2 +1,4 @@
  config S390X
  bool
+select PCI
+select S390_FLIC


Reviewed-by: Thomas Huth

Re: [PATCH 02/13] s390_flic: add migration-enabled property


On 09/05/2024 19.00, Paolo Bonzini wrote:

Instead of mucking with css_migration_enabled(), add a property specific to
the FLIC device, similar to what is done for TYPE_S390_STATTRIB.

Signed-off-by: Paolo Bonzini 
---
  include/hw/s390x/s390_flic.h | 1 +
  hw/intc/s390_flic.c  | 6 +-
  hw/s390x/s390-virtio-ccw.c   | 1 +
  3 files changed, 7 insertions(+), 1 deletion(-)


Reviewed-by: Thomas Huth 

(BTW: It's really good that Daniel's patch series is going to mark the old 
machine types as deprecated, too ... migration stuff has been so hacky in 
the 2.x days...)



diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
index 3907a13d076..bcb081def58 100644
--- a/include/hw/s390x/s390_flic.h
+++ b/include/hw/s390x/s390_flic.h
@@ -47,6 +47,7 @@ struct S390FLICState {
  /* to limit AdapterRoutes.num_routes for compat */
  uint32_t adapter_routes_max_batch;
  bool ais_supported;
+bool migration_enabled;
  };
  
  
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c

index f4a848460b8..7f930800877 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -405,6 +405,8 @@ static void qemu_s390_flic_class_init(ObjectClass *oc, void 
*data)
  static Property s390_flic_common_properties[] = {
  DEFINE_PROP_UINT32("adapter_routes_max_batch", S390FLICState,
 adapter_routes_max_batch, ADAPTER_ROUTES_MAX_GSI),
+DEFINE_PROP_BOOL("migration-enabled", S390FLICState,
+ migration_enabled, true),
  DEFINE_PROP_END_OF_LIST(),
  };
  
@@ -457,7 +459,9 @@ type_init(qemu_s390_flic_register_types)
  
  static bool adapter_info_so_needed(void *opaque)

  {
-return css_migration_enabled();
+S390FLICState *fs = S390_FLIC_COMMON(opaque);
+
+return fs->migration_enabled;
  }
  
  const VMStateDescription vmstate_adapter_info_so = {

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index feabc173eb3..1383e47eeb5 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1174,6 +1174,7 @@ static void ccw_machine_2_9_class_options(MachineClass 
*mc)
  S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
  static GlobalProperty compat[] = {
  { TYPE_S390_STATTRIB, "migration-enabled", "off", },
+{ TYPE_S390_FLIC_COMMON, "migration-enabled", "off", },
  };
  
  ccw_machine_2_10_class_options(mc);

Re: [PATCH 01/13] s390x: move s390_cpu_addr2state to target/s390x/sigp.c


On 09/05/2024 19.00, Paolo Bonzini wrote:

This function has no dependency on the virtio-ccw machine type, though it
assumes that the CPU address corresponds to the core_id and the index.

If there is any need of something different or more fancy (unlikely)
S390 can include a MachineClass subclass and implement it there.  For
now, move it to sigp.c for simplicity.

Signed-off-by: Paolo Bonzini 
---
  hw/s390x/s390-virtio-ccw.c | 16 
  target/s390x/sigp.c| 17 +
  2 files changed, 17 insertions(+), 16 deletions(-)


Reviewed-by: Thomas Huth 




diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4dcc2138200..feabc173eb3 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -50,22 +50,6 @@
  
  static Error *pv_mig_blocker;
  
-S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)

-{
-static MachineState *ms;
-
-if (!ms) {
-ms = MACHINE(qdev_get_machine());
-g_assert(ms->possible_cpus);
-}
-
-/* CPU address corresponds to the core_id and the index */
-if (cpu_addr >= ms->possible_cpus->len) {
-return NULL;
-}
-return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu);
-}
-
  static S390CPU *s390x_new_cpu(const char *typename, uint32_t core_id,
Error **errp)
  {
diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c
index 9dd977349ab..ad0ad61177d 100644
--- a/target/s390x/sigp.c
+++ b/target/s390x/sigp.c
@@ -11,6 +11,7 @@
  #include "qemu/osdep.h"
  #include "cpu.h"
  #include "s390x-internal.h"
+#include "hw/boards.h"
  #include "sysemu/hw_accel.h"
  #include "sysemu/runstate.h"
  #include "exec/address-spaces.h"
@@ -435,6 +436,22 @@ static int sigp_set_architecture(S390CPU *cpu, uint32_t 
param,
  return SIGP_CC_STATUS_STORED;
  }
  
+S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)

+{
+static MachineState *ms;
+
+if (!ms) {
+ms = MACHINE(qdev_get_machine());
+g_assert(ms->possible_cpus);
+}
+
+/* CPU address corresponds to the core_id and the index */
+if (cpu_addr >= ms->possible_cpus->len) {
+return NULL;
+}
+return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu);
+}
+
  int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3)
  {
  uint64_t *status_reg = >regs[r1];

Re: [PATCH v2] tests/qtest: Add some test cases support on LoongArch


On 09/05/2024 10.47, Bibo Mao wrote:

Add boot-serial-test and filter test cases support on LoongArch system.

Signed-off-by: Bibo Mao 
---
v1 ... v2:
  1. Refresh the changelog, adding filter test case support also.
  2. Adjust order of loongarch qtest in alphabetical order.
---



Reviewed-by: Thomas Huth

Re: [RFC 0/2] Identify aliased maps in vdpa SVQ iova_tree

2024-05-09 Thread Jason Wang

On Thu, May 9, 2024 at 3:10 PM Eugenio Perez Martin  wrote:
>
> On Thu, May 9, 2024 at 8:27 AM Jason Wang  wrote:
> >
> > On Thu, May 9, 2024 at 1:16 AM Eugenio Perez Martin  
> > wrote:
> > >
> > > On Wed, May 8, 2024 at 4:29 AM Jason Wang  wrote:
> > > >
> > > > On Tue, May 7, 2024 at 6:57 PM Eugenio Perez Martin 
> > > >  wrote:
> > > > >
> > > > > On Tue, May 7, 2024 at 9:29 AM Jason Wang  wrote:
> > > > > >
> > > > > > On Fri, Apr 12, 2024 at 3:56 PM Eugenio Perez Martin
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Fri, Apr 12, 2024 at 8:47 AM Jason Wang  
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > On Wed, Apr 10, 2024 at 6:03 PM Eugenio Pérez 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > The guest may have overlapped memory regions, where different 
> > > > > > > > > GPA leads
> > > > > > > > > to the same HVA.  This causes a problem when overlapped 
> > > > > > > > > regions
> > > > > > > > > (different GPA but same translated HVA) exists in the tree, 
> > > > > > > > > as looking
> > > > > > > > > them by HVA will return them twice.
> > > > > > > >
> > > > > > > > I think I don't understand if there's any side effect for 
> > > > > > > > shadow virtqueue?
> > > > > > > >
> > > > > > >
> > > > > > > My bad, I totally forgot to put a reference to where this comes 
> > > > > > > from.
> > > > > > >
> > > > > > > Si-Wei found that during initialization this sequences of maps /
> > > > > > > unmaps happens [1]:
> > > > > > >
> > > > > > > HVAGPAIOVA
> > > > > > > -
> > > > > > > Map
> > > > > > > [0x7f7903e0, 0x7f7983e0)[0x0, 0x8000) [0x1000, 
> > > > > > > 0x8000)
> > > > > > > [0x7f7983e0, 0x7f9903e0)[0x1, 0x208000)
> > > > > > > [0x80001000, 0x201000)
> > > > > > > [0x7f7903ea, 0x7f7903ec)[0xfeda, 0xfedc)
> > > > > > > [0x201000, 0x221000)
> > > > > > >
> > > > > > > Unmap
> > > > > > > [0x7f7903ea, 0x7f7903ec)[0xfeda, 0xfedc) 
> > > > > > > [0x1000,
> > > > > > > 0x2) ???
> > > > > > >
> > > > > > > The third HVA range is contained in the first one, but exposed 
> > > > > > > under a
> > > > > > > different GVA (aliased). This is not "flattened" by QEMU, as GPA 
> > > > > > > does
> > > > > > > not overlap, only HVA.
> > > > > > >
> > > > > > > At the third chunk unmap, the current algorithm finds the first 
> > > > > > > chunk,
> > > > > > > not the second one. This series is the way to tell the difference 
> > > > > > > at
> > > > > > > unmap time.
> > > > > > >
> > > > > > > [1] 
> > > > > > > https://lists.nongnu.org/archive/html/qemu-devel/2024-04/msg00079.html
> > > > > > >
> > > > > > > Thanks!
> > > > > >
> > > > > > Ok, I was wondering if we need to store GPA(GIOVA) to HVA mappings 
> > > > > > in
> > > > > > the iova tree to solve this issue completely. Then there won't be
> > > > > > aliasing issues.
> > > > > >
> > > > >
> > > > > I'm ok to explore that route but this has another problem. Both SVQ
> > > > > vrings and CVQ buffers also need to be addressable by VhostIOVATree,
> > > > > and they do not have GPA.
> > > > >
> > > > > At this moment vhost_svq_translate_addr is able to handle this
> > > > > transparently as we translate vaddr to SVQ IOVA. How can we store
> > > > > these new entries? Maybe a (hwaddr)-1 GPA to signal it has no GPA and
> > > > > then a list to go through other entries (SVQ vaddr and CVQ buffers).
> > > >
> > > > This seems to be tricky.
> > > >
> > > > As discussed, it could be another iova tree.
> > > >
> > >
> > > Yes but there are many ways to add another IOVATree. Let me expand & 
> > > recap.
> > >
> > > Option 1 is to simply add another iova tree to VhostShadowVirtqueue.
> > > Let's call it gpa_iova_tree, as opposed to the current iova_tree that
> > > translates from vaddr to SVQ IOVA. To know which one to use is easy at
> > > adding or removing, like in the memory listener, but how to know at
> > > vhost_svq_translate_addr?
> >
> > Then we won't use virtqueue_pop() at all, we need a SVQ version of
> > virtqueue_pop() to translate GPA to SVQ IOVA directly?
> >
>
> The problem is not virtqueue_pop, that's out of the
> vhost_svq_translate_addr. The problem is the need of adding
> conditionals / complexity in all the callers of
>
> > >
> > > The easiest way for me is to rely on memory_region_from_host(). When
> > > vaddr is from the guest, it returns a valid MemoryRegion. When it is
> > > not, it returns NULL. I'm not sure if this is a valid use case, it
> > > just worked in my tests so far.
> > >
> > > Now we have the second problem: The GPA values of the regions of the
> > > two IOVA tree must be unique. We need to be able to find unallocated
> > > regions in SVQ IOVA. At this moment there is only one IOVATree, so
> > > this is done easily by vhost_iova_tree_map_alloc. But

[PATCH] fix: correct typo in ext_zvkb configuration variable Signed-off-by: Kaiyao Duan

2024-05-09 Thread inspireMeNow

---
 target/riscv/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index eb1a2e7d6d..13cd34adbd 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1535,7 +1535,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 /* Vector cryptography extensions */
 MULTI_EXT_CFG_BOOL("zvbb", ext_zvbb, false),
 MULTI_EXT_CFG_BOOL("zvbc", ext_zvbc, false),
-MULTI_EXT_CFG_BOOL("zvkb", ext_zvkg, false),
+MULTI_EXT_CFG_BOOL("zvkb", ext_zvkb, false),
 MULTI_EXT_CFG_BOOL("zvkg", ext_zvkg, false),
 MULTI_EXT_CFG_BOOL("zvkned", ext_zvkned, false),
 MULTI_EXT_CFG_BOOL("zvknha", ext_zvknha, false),
-- 
2.45.0

Re: [PATCH] target/i386: add feature dependency for XSAVE

2024-05-09 Thread Zhao Liu

On Thu, May 09, 2024 at 05:39:52PM +0200, Paolo Bonzini wrote:
> Date: Thu,  9 May 2024 17:39:52 +0200
> From: Paolo Bonzini 
> Subject: [PATCH] target/i386: add feature dependency for XSAVE
> X-Mailer: git-send-email 2.45.0
> 
> The XSAVEOPT, XSAVEC, XGETBV1, XSAVES features make no sense if you
> cannot enable XSAVE in the first place.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  target/i386/cpu.c | 4 
>  1 file changed, 4 insertions(+)

Reviewed-by: Zhao Liu

Re: [PATCH] target/i386: fix feature dependency for WAITPKG

2024-05-09 Thread Zhao Liu

On Thu, May 09, 2024 at 05:39:50PM +0200, Paolo Bonzini wrote:
> Date: Thu,  9 May 2024 17:39:50 +0200
> From: Paolo Bonzini 
> Subject: [PATCH] target/i386: fix feature dependency for WAITPKG
> X-Mailer: git-send-email 2.45.0
> 
> The VMX feature bit depends on general availability of WAITPKG,
> not the other way round.
> 
> Fixes: 33cc88261c3 ("target/i386: add support for 
> VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE", 2023-08-28)
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Paolo Bonzini 
> ---
>  target/i386/cpu.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Zhao Liu

Re: Re: [PATCH 1/9] block: add persistent reservation in/out api

2024-05-09 Thread zhenwei pi





On 5/10/24 02:22, Stefan Hajnoczi wrote:

On Wed, May 08, 2024 at 05:36:21PM +0800, Changqi Lu wrote:


[SNIP]


+
+/**
+ * Persist Through Power Loss(PTPL) is considered as required in QEMU
+ * block layer, the block driver need always enable PTPL.
+ */


What is the reasoning behind this? Will applications that rely on PTPL=0
work?



Hi Stefan,

PTPL needs to be supported at QEMU block layer in theory, include 
reporting PTPL capability and PTPL flag in PR OUT command. However, in 
the real production environment, both SCSI driver and NVMe driver, even 
linux block layer always enable PTPL nnconditionally on a linux platform.


Ref the latest code:
1, SCSI:
https://github.com/torvalds/linux/blob/master/drivers/scsi/sd.c#L1978
static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 
new_key,

u32 flags)
{
if (flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00,
old_key, new_key, 0,
(1 << 0) /* APTPL */);
}

2, NVMe:
https://github.com/torvalds/linux/blob/master/drivers/nvme/host/pr.c#L127
static int nvme_pr_register(struct block_device *bdev, u64 old,
u64 new, unsigned flags)
{
u32 cdw10;

if (flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;

cdw10 = old ? 2 : 0;
cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
}

3, linux block layers also hides PTPL flag:
https://github.com/torvalds/linux/blob/master/block/ioctl.c#L283
static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
struct pr_registration __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;

if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
if (copy_from_user(, arg, sizeof(reg)))
return -EFAULT;

if (reg.flags & ~PR_FL_IGNORE_KEY)
return -EOPNOTSUPP;
return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
}


So we(Changqi and me) wanted to keep PR a bit simple in QEMU block layer:
- consider PTPL is required in QEMU block, then we don't need an extra flag
- the block backend driver always request PR OUT with PTPL flag

Then: Will applications that rely on PTPL=0 work?
Yes, a guest PR out without PTPL will work, but the backend uses PTPL=1 
instead.


Will this request succeed?
If the backend driver' supports PTPL capability, it will succeed. 
Otherwise it will fail.


--
zhenwei pi

RE: [PATCH] vfio: container: Fix missing allocation of VFIOSpaprContainer

2024-05-09 Thread Duan, Zhenzhong



>-Original Message-
>From: Shivaprasad G Bhat 
>Subject: [PATCH] vfio: container: Fix missing allocation of
>VFIOSpaprContainer
>
>The commit 6ad359ec29 "(vfio/spapr: Move prereg_listener into
>spapr container)" began to use the newly introduced VFIOSpaprContainer
>structure.
>
>After several refactors, today the container_of(container,
>VFIOSpaprContainer, ABC) is used when VFIOSpaprContainer is actually
>not allocated. On PPC64 systems, this dereference is leading to corruption
>showing up as glibc malloc assertion during guest start when using vfio.
>
>Patch adds the missing allocation while also making the structure movement
>to vfio common header file.
>
>Fixes: 6ad359ec29 "(vfio/spapr: Move prereg_listener into spapr container)"
>Signed-off-by: Shivaprasad G Bhat 

Reviewed-by: Zhenzhong Duan 

An alternative way is to introduce a VFIOIOMMUClass::create or
VFIOIOMMUClass::get_container_size.
But that needs some refactor to vfio_connect_container().

Thanks
Zhenzhong

>---
> hw/vfio/container.c   |6 --
> hw/vfio/spapr.c   |6 --
> include/hw/vfio/vfio-common.h |6 ++
> 3 files changed, 10 insertions(+), 8 deletions(-)
>
>diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>index 77bdec276e..ecaf5786d9 100644
>--- a/hw/vfio/container.c
>+++ b/hw/vfio/container.c
>@@ -539,6 +539,7 @@ static int vfio_connect_container(VFIOGroup *group,
>AddressSpace *as,
> {
> VFIOContainer *container;
> VFIOContainerBase *bcontainer;
>+VFIOSpaprContainer *scontainer;
> int ret, fd;
> VFIOAddressSpace *space;
>
>@@ -611,7 +612,8 @@ static int vfio_connect_container(VFIOGroup *group,
>AddressSpace *as,
> goto close_fd_exit;
> }
>
>-container = g_malloc0(sizeof(*container));
>+scontainer = g_malloc0(sizeof(*scontainer));
>+container = >container;
> container->fd = fd;
> bcontainer = >bcontainer;
>
>@@ -675,7 +677,7 @@ unregister_container_exit:
> vfio_cpr_unregister_container(bcontainer);
>
> free_container_exit:
>-g_free(container);
>+g_free(scontainer);
>
> close_fd_exit:
> close(fd);
>diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
>index 0d949bb728..78d218b7e7 100644
>--- a/hw/vfio/spapr.c
>+++ b/hw/vfio/spapr.c
>@@ -24,12 +24,6 @@
> #include "qapi/error.h"
> #include "trace.h"
>
>-typedef struct VFIOSpaprContainer {
>-VFIOContainer container;
>-MemoryListener prereg_listener;
>-QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>-} VFIOSpaprContainer;
>-
> static bool vfio_prereg_listener_skipped_section(MemoryRegionSection
>*section)
> {
> if (memory_region_is_iommu(section->mr)) {
>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-
>common.h
>index b9da6c08ef..010fa68ac6 100644
>--- a/include/hw/vfio/vfio-common.h
>+++ b/include/hw/vfio/vfio-common.h
>@@ -82,6 +82,12 @@ typedef struct VFIOContainer {
> QLIST_HEAD(, VFIOGroup) group_list;
> } VFIOContainer;
>
>+typedef struct VFIOSpaprContainer {
>+VFIOContainer container;
>+MemoryListener prereg_listener;
>+QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>+} VFIOSpaprContainer;
>+
> typedef struct VFIOHostDMAWindow {
> hwaddr min_iova;
> hwaddr max_iova;
>

Re: hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT

On Thu, 9 May 2024, Cord Amfmgm wrote:

On Thu, May 9, 2024 at 12:48 PM Peter Maydell
wrote:

On Wed, 8 May 2024 at 16:29, Cord Amfmgm wrote:

On Wed, May 8, 2024 at 3:45 AM Thomas Huth wrote:

Your Signed-off-by line does not match the From: line ... could you

please

fix this? (see

https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line

, too)

I'll submit the new patch request with my pseudonym in the From: and

Signed-off-by: lines, per your request. Doesn't matter to me. However, this
arises simply because I don't give gmail my real name -
https://en.wikipedia.org/wiki/Nymwars

I'm confused now. Of the two names you've used in this
patch (Cord Amfmgm and David Hubbard), are they both
pseudonyms, or is one a pseudonym and one your real name?

Hi Peter,

I am attempting to submit a small patch. For context, I'm getting broader
attention now because apparently OHCI is one of the less used components of
qemu and maybe the review process was taking a while. That's relevant
because I wasn't able to get prompt feedback and am now choosing what
appears to be the most expeditious approach -- all I want is to get this
patch done and be out of your hair. If Thomas Huth wants me to use a
consistent name, have I not complied? Are you asking out of curiosity or is
there a valid reason why I should answer your question in order to get the
patch submitted? Would you like to have a friendly chat over virtual coffee
sometime (but off-list)?

See here:
https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
and also the document linked from there:
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297

As for getting the patch reviewed, it may be difficult as the USB
maintainer is practically absent and has no time for QEMU for a while and
as OHCI as you said is not odten used there aren't many people who could
review it. Getting at least the formal stuff out of the way may help
though to get somebody to try to review the patch.

Regards,
BALATON Zoltan

[PATCH v5 23/32] target/ppc/mmu_common.c: Move mmu_ctx_t type to mmu_common.c

Remove mmu_ctx_t definition from internal.h as this type is only used
within mmu_common.c.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/internal.h   | 12 
 target/ppc/mmu_common.c | 11 +++
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 5b28e8f3b0..46176c4711 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -292,8 +292,6 @@ static inline int ppc_hash32_pp_prot(int key, int pp, int 
nx)
 return nx ? prot : prot | PAGE_EXEC;
 }
 
-typedef struct mmu_ctx_t mmu_ctx_t;
-
 bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
   hwaddr *raddrp, int *psizep, int *protp,
   int mmu_idx, bool guest_visible);
@@ -301,16 +299,6 @@ bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
access_type,
 /* Software driven TLB helpers */
 int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
 int way, int is_code);
-/* Context used internally during MMU translations */
-struct mmu_ctx_t {
-hwaddr raddr;  /* Real address  */
-hwaddr eaddr;  /* Effective address */
-int prot;  /* Protection bits   */
-hwaddr hash[2];/* Pagetable hash values */
-target_ulong ptem; /* Virtual segment ID | API  */
-int key;   /* Access key*/
-int nx;/* Non-execute area  */
-};
 
 #endif /* !CONFIG_USER_ONLY */
 
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index d7e4baa9cf..1ff2afdea7 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -36,6 +36,17 @@
 
 /* #define DUMP_PAGE_TABLES */
 
+/* Context used internally during MMU translations */
+typedef struct {
+hwaddr raddr;  /* Real address */
+hwaddr eaddr;  /* Effective address*/
+int prot;  /* Protection bits  */
+hwaddr hash[2];/* Pagetable hash values*/
+target_ulong ptem; /* Virtual segment ID | API */
+int key;   /* Access key   */
+int nx;/* Non-execute area */
+} mmu_ctx_t;
+
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 {
 PowerPCCPU *cpu = env_archcpu(env);
-- 
2.30.9

[PATCH v5 17/32] target/ppc/mmu_common.c: Don't use mmu_ctx_t in mmubooke_get_physical_address()

mmubooke_get_physical_address() only uses the raddr and prot fields
from mmu_ctx_t. Pass these directly instead of using a ctx struct.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 30 ++
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index adce6cceb8..12dac9e63a 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -634,36 +634,25 @@ static int mmubooke_check_tlb(CPUPPCState *env, 
ppcemb_tlb_t *tlb,
 return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
-static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
- target_ulong address,
+static int mmubooke_get_physical_address(CPUPPCState *env, hwaddr *raddr,
+ int *prot, target_ulong address,
  MMUAccessType access_type)
 {
 ppcemb_tlb_t *tlb;
-hwaddr raddr;
-int i, ret;
+int i, ret = -1;
 
-ret = -1;
-raddr = (hwaddr)-1ULL;
 for (i = 0; i < env->nb_tlb; i++) {
 tlb = >tlb.tlbe[i];
-ret = mmubooke_check_tlb(env, tlb, , >prot, address,
+ret = mmubooke_check_tlb(env, tlb, raddr, prot, address,
  access_type, i);
 if (ret != -1) {
 break;
 }
 }
-
-if (ret >= 0) {
-ctx->raddr = raddr;
-qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx
-  " => " HWADDR_FMT_plx " %d %d\n", __func__,
-  address, ctx->raddr, ctx->prot, ret);
-} else {
- qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx
-   " => " HWADDR_FMT_plx " %d %d\n", __func__,
-   address, raddr, ctx->prot, ret);
-}
-
+qemu_log_mask(CPU_LOG_MMU,
+  "%s: access %s " TARGET_FMT_lx " => " HWADDR_FMT_plx
+  " %d %d\n", __func__, ret < 0 ? "refused" : "granted",
+  address, ret < 0 ? -1 : *raddr, ret == -1 ? 0 : *prot, ret);
 return ret;
 }
 
@@ -1143,7 +1132,8 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 bool real_mode;
 
 if (env->mmu_model == POWERPC_MMU_BOOKE) {
-return mmubooke_get_physical_address(env, ctx, eaddr, access_type);
+return mmubooke_get_physical_address(env, >raddr, >prot,
+ eaddr, access_type);
 } else if (env->mmu_model == POWERPC_MMU_BOOKE206) {
 return mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
 mmu_idx);
-- 
2.30.9

[PATCH v5 16/32] target/ppc/mmu_common.c: Don't use mmu_ctx_t for mmu40x_get_physical_address()

mmu40x_get_physical_address() only uses the raddr and prot fields from
mmu_ctx_t. Pass these directly instead of using a ctx struct.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 37 +++--
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 6570b280ca..adce6cceb8 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -519,20 +519,18 @@ int ppcemb_tlb_search(CPUPPCState *env, target_ulong 
address, uint32_t pid)
 return -1;
 }
 
-static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-   target_ulong address,
+static int mmu40x_get_physical_address(CPUPPCState *env, hwaddr *raddr,
+   int *prot, target_ulong address,
MMUAccessType access_type)
 {
 ppcemb_tlb_t *tlb;
-hwaddr raddr;
 int i, ret, zsel, zpr, pr;
 
 ret = -1;
-raddr = (hwaddr)-1ULL;
 pr = FIELD_EX64(env->msr, MSR, PR);
 for (i = 0; i < env->nb_tlb; i++) {
 tlb = >tlb.tlbe[i];
-if (!ppcemb_tlb_check(env, tlb, , address,
+if (!ppcemb_tlb_check(env, tlb, raddr, address,
   env->spr[SPR_40x_PID], i)) {
 continue;
 }
@@ -550,40 +548,34 @@ static int mmu40x_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 /* fall through */
 case 0x3:
 /* All accesses granted */
-ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+*prot = PAGE_RWX;
 ret = 0;
 break;
+
 case 0x0:
 if (pr != 0) {
 /* Raise Zone protection fault.  */
 env->spr[SPR_40x_ESR] = 1 << 22;
-ctx->prot = 0;
+*prot = 0;
 ret = -2;
 break;
 }
 /* fall through */
 case 0x1:
-check_perms:
+check_perms:
 /* Check from TLB entry */
-ctx->prot = tlb->prot;
-ret = check_prot(ctx->prot, access_type);
+*prot = tlb->prot;
+ret = check_prot(*prot, access_type);
 if (ret == -2) {
 env->spr[SPR_40x_ESR] = 0;
 }
 break;
 }
-if (ret >= 0) {
-ctx->raddr = raddr;
-qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx
-  " => " HWADDR_FMT_plx
-  " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
-  ret);
-return 0;
-}
 }
-qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx
-  " => " HWADDR_FMT_plx " %d %d\n",
-  __func__, address, raddr, ctx->prot, ret);
+qemu_log_mask(CPU_LOG_MMU, "%s: access %s " TARGET_FMT_lx " => "
+  HWADDR_FMT_plx " %d %d\n",  __func__,
+  ret < 0 ? "refused" : "granted", address,
+  ret < 0 ? 0 : *raddr, *prot, ret);
 
 return ret;
 }
@@ -1171,7 +1163,8 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 case POWERPC_MMU_SOFT_6xx:
 return mmu6xx_get_physical_address(env, ctx, eaddr, access_type, type);
 case POWERPC_MMU_SOFT_4xx:
-return mmu40x_get_physical_address(env, ctx, eaddr, access_type);
+return mmu40x_get_physical_address(env, >raddr, >prot, eaddr,
+   access_type);
 case POWERPC_MMU_REAL:
 cpu_abort(env_cpu(env),
   "PowerPC in real mode do not do any translation\n");
-- 
2.30.9

[PATCH v5 20/32] target/ppc/mmu_common.c: Remove BookE from direct store handling

As BookE never returns -4 we can drop BookE from the direct store case
in ppc_jumbo_xlate().

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index ab082bd12d..14c0305abf 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1258,12 +1258,7 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 /* Direct store exception */
 /* No code fetch is allowed in direct-store areas */
 cs->exception_index = POWERPC_EXCP_ISI;
-if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-(env->mmu_model == POWERPC_MMU_BOOKE206)) {
-env->error_code = 0;
-} else {
-env->error_code = 0x1000;
-}
+env->error_code = 0x1000;
 break;
 }
 } else {
-- 
2.30.9

[PATCH v5 27/32] target/ppc: Split off common embedded TLB init

Several 4xx CPUs and e200 share the same TLB settings enclosed in an
ifdef. Split it off in a common function to reduce code duplication
and the number of ifdefs.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/cpu_init.c | 46 ---
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 07ad788e54..92c71b2a09 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -2107,18 +2107,22 @@ static int check_pow_hid0_74xx(CPUPPCState *env)
 return 0;
 }
 
+static void init_tlbs_emb(CPUPPCState *env)
+{
+#ifndef CONFIG_USER_ONLY
+env->nb_tlb = 64;
+env->nb_ways = 1;
+env->tlb_type = TLB_EMB;
+#endif
+}
+
 static void init_proc_405(CPUPPCState *env)
 {
 register_40x_sprs(env);
 register_405_sprs(env);
 register_usprgh_sprs(env);
 
-/* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-env->nb_tlb = 64;
-env->nb_ways = 1;
-env->tlb_type = TLB_EMB;
-#endif
+init_tlbs_emb(env);
 init_excp_4xx(env);
 env->dcache_line_size = 32;
 env->icache_line_size = 32;
@@ -2185,12 +2189,8 @@ static void init_proc_440EP(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  _read_generic, _write_generic,
  0x);
-/* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-env->nb_tlb = 64;
-env->nb_ways = 1;
-env->tlb_type = TLB_EMB;
-#endif
+
+init_tlbs_emb(env);
 init_excp_BookE(env);
 env->dcache_line_size = 32;
 env->icache_line_size = 32;
@@ -2282,12 +2282,7 @@ static void init_proc_440GP(CPUPPCState *env)
 register_440_sprs(env);
 register_usprgh_sprs(env);
 
-/* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-env->nb_tlb = 64;
-env->nb_ways = 1;
-env->tlb_type = TLB_EMB;
-#endif
+init_tlbs_emb(env);
 init_excp_BookE(env);
 env->dcache_line_size = 32;
 env->icache_line_size = 32;
@@ -2355,12 +2350,8 @@ static void init_proc_440x5(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  _read_generic, _write_generic,
  0x);
-/* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-env->nb_tlb = 64;
-env->nb_ways = 1;
-env->tlb_type = TLB_EMB;
-#endif
+
+init_tlbs_emb(env);
 init_excp_BookE(env);
 env->dcache_line_size = 32;
 env->icache_line_size = 32;
@@ -2717,11 +2708,8 @@ static void init_proc_e200(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  _read_generic, _write_generic,
  0x);
-#if !defined(CONFIG_USER_ONLY)
-env->nb_tlb = 64;
-env->nb_ways = 1;
-env->tlb_type = TLB_EMB;
-#endif
+
+init_tlbs_emb(env);
 init_excp_e200(env, 0xUL);
 env->dcache_line_size = 32;
 env->icache_line_size = 32;
-- 
2.30.9

[PATCH v5 08/32] target/ppc/mmu_common.c: Move some debug logging

Move the debug logging within ppc6xx_tlb_check() from after its only
call to simplify the caller.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 54 ++---
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index ba60b4902b..89bfd9aa45 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -225,17 +225,14 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t 
*ctx,
   access_type == MMU_INST_FETCH ? 'I' : 'D');
 switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
  0, access_type)) {
-case -3:
-/* TLB inconsistency */
-return -1;
 case -2:
 /* Access violation */
 ret = -2;
 best = nr;
 break;
-case -1:
+case -1: /* No match */
+case -3: /* TLB inconsistency */
 default:
-/* No match */
 break;
 case 0:
 /* access granted */
@@ -251,14 +248,34 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t 
*ctx,
 }
 }
 if (best != -1) {
-done:
+done:
 qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx
   " prot=%01x ret=%d\n",
   ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
 /* Update page flags */
 pte_update_flags(ctx, >tlb.tlb6[best].pte1, ret, access_type);
 }
-
+#if defined(DUMP_PAGE_TABLES)
+if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+CPUState *cs = env_cpu(env);
+hwaddr base = ppc_hash32_hpt_base(env_archcpu(env));
+hwaddr len = ppc_hash32_hpt_mask(env_archcpu(env)) + 0x80;
+uint32_t a0, a1, a2, a3;
+
+qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx "\n",
+ base, len);
+for (hwaddr curaddr = base; curaddr < base + len; curaddr += 16) {
+a0 = ldl_phys(cs->as, curaddr);
+a1 = ldl_phys(cs->as, curaddr + 4);
+a2 = ldl_phys(cs->as, curaddr + 8);
+a3 = ldl_phys(cs->as, curaddr + 12);
+if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
+qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n",
+ curaddr, a0, a1, a2, a3);
+}
+}
+}
+#endif
 return ret;
 }
 
@@ -420,29 +437,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 ctx->raddr = (hwaddr)-1ULL;
 /* Software TLB search */
 ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
-#if defined(DUMP_PAGE_TABLES)
-if (qemu_loglevel_mask(CPU_LOG_MMU)) {
-CPUState *cs = env_cpu(env);
-hwaddr curaddr;
-uint32_t a0, a1, a2, a3;
-
-qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx "\n",
- ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu) + 
0x80);
-for (curaddr = ppc_hash32_hpt_base(cpu);
- curaddr < (ppc_hash32_hpt_base(cpu)
-+ ppc_hash32_hpt_mask(cpu) + 0x80);
- curaddr += 16) {
-a0 = ldl_phys(cs->as, curaddr);
-a1 = ldl_phys(cs->as, curaddr + 4);
-a2 = ldl_phys(cs->as, curaddr + 8);
-a3 = ldl_phys(cs->as, curaddr + 12);
-if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
-qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n",
- curaddr, a0, a1, a2, a3);
-}
-}
-}
-#endif
 } else {
 qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 /* Direct-store segment : absolutely *BUGGY* for now */
-- 
2.30.9

[PATCH v5 32/32] target/ppc/mmu_common.c: Remove work around for spurious warnings

Now that some functions are external again the warnings don't appear
any more so the work around added earlier can be removed again.

Signed-off-by: BALATON Zoltan 
---
 target/ppc/mmu_common.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 9ab67141b9..c559ad2d94 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -720,8 +720,6 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 ret = mmu6xx_get_physical_address(env, , eaddr, access_type, type);
 break;
 case POWERPC_MMU_SOFT_4xx:
-/* avoid maybe used uninitialized warnings for unused fields in ctx */
-memset(, 0, sizeof(ctx));
 ret = mmu40x_get_physical_address(env, , , eaddr,
   access_type);
 break;
-- 
2.30.9

[PATCH v5 06/32] target/ppc/mmu_common.c: Introduce mmu6xx_get_physical_address()

Repurpose get_segment_6xx_tlb() to do the whole address translation
for POWERPC_MMU_SOFT_6xx MMU model by moving the BAT check there and
renaming it to match other similar functions. These are only called
once together so no need to keep these separate functions and
combining them simplifies the caller allowing further restructuring.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 3391df61cb..b7c07cf515 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -360,19 +360,23 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 return ret;
 }
 
-/* Perform segment based translation */
-static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-   target_ulong eaddr, MMUAccessType access_type,
-   int type)
+static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+   target_ulong eaddr,
+   MMUAccessType access_type, int type)
 {
 PowerPCCPU *cpu = env_archcpu(env);
 hwaddr hash;
-target_ulong vsid;
+target_ulong vsid, sr, pgidx;
 int ds, target_page_bits;
 bool pr;
 int ret;
-target_ulong sr, pgidx;
 
+/* First try to find a BAT entry if there are any */
+if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
+return 0;
+}
+
+/* Perform segment based translation when no BATs matched */
 pr = FIELD_EX64(env->msr, MSR, PR);
 ctx->eaddr = eaddr;
 
@@ -1194,14 +1198,8 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
 if (real_mode) {
 ret = check_physical(env, ctx, eaddr, access_type);
 } else {
-/* Try to find a BAT */
-if (env->nb_BATs != 0) {
-ret = get_bat_6xx_tlb(env, ctx, eaddr, access_type);
-}
-if (ret < 0) {
-/* We didn't match any BAT entry or don't have BATs */
-ret = get_segment_6xx_tlb(env, ctx, eaddr, access_type, type);
-}
+ret = mmu6xx_get_physical_address(env, ctx, eaddr, access_type,
+  type);
 }
 break;
 
-- 
2.30.9

[PATCH v5 24/32] target/ppc/mmu_common.c: Simplify ppc_booke_xlate() part 1

Move setting error_code that appears in every case out in front and
hoist the common fall through case for BOOKE206 as well which allows
removing the nested switches.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 41 -
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 1ff2afdea7..87f28efbf9 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1164,58 +1164,41 @@ static bool ppc_booke_xlate(PowerPCCPU *cpu, vaddr 
eaddr,
 }
 
 log_cpu_state_mask(CPU_LOG_MMU, cs, 0);
+env->error_code = 0;
+if (ret == -1) {
+if (env->mmu_model == POWERPC_MMU_BOOKE206) {
+booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
+}
+}
 if (access_type == MMU_INST_FETCH) {
 switch (ret) {
 case -1:
 /* No matches in page tables or TLB */
-switch (env->mmu_model) {
-case POWERPC_MMU_BOOKE206:
-booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
-/* fall through */
-case POWERPC_MMU_BOOKE:
-cs->exception_index = POWERPC_EXCP_ITLB;
-env->error_code = 0;
-env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
access_type);
-break;
-default:
-g_assert_not_reached();
-}
+cs->exception_index = POWERPC_EXCP_ITLB;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
 break;
 case -2:
 /* Access rights violation */
 cs->exception_index = POWERPC_EXCP_ISI;
-env->error_code = 0;
 break;
 case -3:
 /* No execute protection violation */
 cs->exception_index = POWERPC_EXCP_ISI;
 env->spr[SPR_BOOKE_ESR] = 0;
-env->error_code = 0;
 break;
 }
 } else {
 switch (ret) {
 case -1:
 /* No matches in page tables or TLB */
-switch (env->mmu_model) {
-case POWERPC_MMU_BOOKE206:
-booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
-/* fall through */
-case POWERPC_MMU_BOOKE:
-cs->exception_index = POWERPC_EXCP_DTLB;
-env->error_code = 0;
-env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
access_type);
-break;
-default:
-g_assert_not_reached();
-}
+cs->exception_index = POWERPC_EXCP_DTLB;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
 break;
 case -2:
 /* Access rights violation */
 cs->exception_index = POWERPC_EXCP_DSI;
-env->error_code = 0;
 env->spr[SPR_BOOKE_DEAR] = eaddr;
 env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
 break;
-- 
2.30.9

[PATCH v5 13/32] target/ppc/mmu_common.c: Fix misindented qemu_log_mask() calls

Fix several qemu_log_mask() calls that are misindented.

Signed-off-by: BALATON Zoltan 
Acked-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 42 -
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 2f412dd7c5..124148b3da 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -315,8 +315,8 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
 int ret = -1;
 bool ifetch = access_type == MMU_INST_FETCH;
 
- qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
- ifetch ? 'I' : 'D', virtual);
+qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
+  ifetch ? 'I' : 'D', virtual);
 if (ifetch) {
 BATlt = env->IBAT[1];
 BATut = env->IBAT[0];
@@ -330,9 +330,9 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
 BEPIu = *BATu & 0xF000;
 BEPIl = *BATu & 0x0FFE;
 bat_size_prot(env, , , , BATu, BATl);
- qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu "
-   TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__,
-   ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
+qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu "
+  TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__,
+  ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
 if ((virtual & 0xF000) == BEPIu &&
 ((virtual & 0x0FFE) & ~bl) == BEPIl) {
 /* BAT matches */
@@ -364,12 +364,11 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 BEPIu = *BATu & 0xF000;
 BEPIl = *BATu & 0x0FFE;
 bl = (*BATu & 0x1FFC) << 15;
- qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v "
-   TARGET_FMT_lx " BATu " TARGET_FMT_lx
-   " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
-   TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-   __func__, ifetch ? 'I' : 'D', i, virtual,
-   *BATu, *BATl, BEPIu, BEPIl, bl);
+qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx
+  " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx
+  "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " "
+  TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D',
+  i, virtual, *BATu, *BATl, BEPIu, BEPIl, bl);
 }
 }
 }
@@ -415,9 +414,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 hash = vsid ^ pgidx;
 ctx->ptem = (vsid << 7) | (pgidx >> 10);
 
-qemu_log_mask(CPU_LOG_MMU,
-"pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
-ctx->key, ds, ctx->nx, vsid);
+qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid "
+  TARGET_FMT_lx "\n", ctx->key, ds, ctx->nx, vsid);
 if (!ds) {
 /* Check if instruction fetch is allowed, if needed */
 if (type == ACCESS_CODE && ctx->nx) {
@@ -583,9 +581,9 @@ static int mmu40x_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 return 0;
 }
 }
- qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx
-   " => " HWADDR_FMT_plx
-   " %d %d\n", __func__, address, raddr, ctx->prot, ret);
+qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx
+  " => " HWADDR_FMT_plx " %d %d\n",
+  __func__, address, raddr, ctx->prot, ret);
 
 return ret;
 }
@@ -704,11 +702,11 @@ int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, 
hwaddr *raddrp,
 }
 
 mask = ~(booke206_tlb_to_page_size(env, tlb) - 1);
- qemu_log_mask(CPU_LOG_MMU, "%s: TLB ADDR=0x" TARGET_FMT_lx
-   " PID=0x%x MAS1=0x%x MAS2=0x%" PRIx64 " mask=0x%"
-   HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%" PRIx32 "\n",
-   __func__, address, pid, tlb->mas1, tlb->mas2, mask,
-   tlb->mas7_3, tlb->mas8);
+qemu_log_mask(CPU_LOG_MMU, "%s: TLB ADDR=0x" TARGET_FMT_lx
+  " PID=0x%x MAS1=0x%x MAS2=0x%" PRIx64 " mask=0x%"
+  HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%" PRIx32 "\n",
+  __func__, address, pid, tlb->mas1, tlb->mas2, mask,
+  tlb->mas7_3, tlb->mas8);
 
 /* Check PID */
 tlb_pid = (tlb->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT;
-- 
2.30.9

[PATCH v5 25/32] target/ppc/mmu_common.c: Simplify ppc_booke_xlate() part 2

Merge the code fetch and data access cases in a common switch.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 52 -
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 87f28efbf9..5f5ac1f2e9 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1165,45 +1165,33 @@ static bool ppc_booke_xlate(PowerPCCPU *cpu, vaddr 
eaddr,
 
 log_cpu_state_mask(CPU_LOG_MMU, cs, 0);
 env->error_code = 0;
-if (ret == -1) {
+switch (ret) {
+case -1:
+/* No matches in page tables or TLB */
 if (env->mmu_model == POWERPC_MMU_BOOKE206) {
 booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
 }
-}
-if (access_type == MMU_INST_FETCH) {
-switch (ret) {
-case -1:
-/* No matches in page tables or TLB */
-cs->exception_index = POWERPC_EXCP_ITLB;
-env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
-break;
-case -2:
-/* Access rights violation */
-cs->exception_index = POWERPC_EXCP_ISI;
-break;
-case -3:
-/* No execute protection violation */
-cs->exception_index = POWERPC_EXCP_ISI;
-env->spr[SPR_BOOKE_ESR] = 0;
-break;
-}
-} else {
-switch (ret) {
-case -1:
-/* No matches in page tables or TLB */
-cs->exception_index = POWERPC_EXCP_DTLB;
-env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
-break;
-case -2:
-/* Access rights violation */
-cs->exception_index = POWERPC_EXCP_DSI;
+cs->exception_index = (access_type == MMU_INST_FETCH) ?
+  POWERPC_EXCP_ITLB : POWERPC_EXCP_DTLB;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
+break;
+case -2:
+/* Access rights violation */
+cs->exception_index = (access_type == MMU_INST_FETCH) ?
+  POWERPC_EXCP_ISI : POWERPC_EXCP_DSI;
+if (access_type != MMU_INST_FETCH) {
 env->spr[SPR_BOOKE_DEAR] = eaddr;
 env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
-break;
 }
+break;
+case -3:
+/* No execute protection violation */
+cs->exception_index = POWERPC_EXCP_ISI;
+env->spr[SPR_BOOKE_ESR] = 0;
+break;
 }
+
 return false;
 }
 
-- 
2.30.9

[PATCH v5 26/32] target/ppc: Remove id_tlbs flag from CPU env

This flag for split instruction/data TLBs is only set for 6xx soft TLB
MMU model and not used otherwise so no need to have a separate flag
for that.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 hw/ppc/pegasos2.c|  2 +-
 target/ppc/cpu.h |  5 -
 target/ppc/cpu_init.c| 19 +--
 target/ppc/helper_regs.c |  1 -
 target/ppc/mmu_common.c  | 10 ++
 target/ppc/mmu_helper.c  | 12 ++--
 6 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 04d6decb2b..750f439c5d 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -984,7 +984,7 @@ static void *build_fdt(MachineState *machine, int *fdt_size)
   cpu->env.icache_line_size);
 qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
   cpu->env.icache_line_size);
-if (cpu->env.id_tlbs) {
+if (ppc_is_split_tlb(cpu)) {
 qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
 qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
 qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 0ac55d6b25..cfb3ba5ac8 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1260,7 +1260,6 @@ struct CPUArchState {
 int tlb_per_way; /* Speed-up helper: used to avoid divisions at run time */
 int nb_ways; /* Number of ways in the TLB set */
 int last_way;/* Last used way used to allocate TLB in a LRU way */
-int id_tlbs; /* If 1, MMU has separated TLBs for instructions & data */
 int nb_pids; /* Number of available PID registers */
 int tlb_type;/* Type of TLB we're dealing with */
 ppc_tlb_t tlb;   /* TLB is optional. Allocate them only if needed */
@@ -2856,6 +2855,10 @@ static inline void booke206_fixed_size_tlbn(CPUPPCState 
*env, const int tlbn,
 tlb->mas1 |= ((uint32_t)tsize) << MAS1_TSIZE_SHIFT;
 }
 
+static inline bool ppc_is_split_tlb(PowerPCCPU *cpu)
+{
+return cpu->env.tlb_type == TLB_6XX;
+}
 #endif
 
 static inline bool msr_is_64bit(CPUPPCState *env, target_ulong msr)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c11a69fd90..07ad788e54 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -2117,7 +2117,6 @@ static void init_proc_405(CPUPPCState *env)
 #if !defined(CONFIG_USER_ONLY)
 env->nb_tlb = 64;
 env->nb_ways = 1;
-env->id_tlbs = 0;
 env->tlb_type = TLB_EMB;
 #endif
 init_excp_4xx(env);
@@ -2190,7 +2189,6 @@ static void init_proc_440EP(CPUPPCState *env)
 #if !defined(CONFIG_USER_ONLY)
 env->nb_tlb = 64;
 env->nb_ways = 1;
-env->id_tlbs = 0;
 env->tlb_type = TLB_EMB;
 #endif
 init_excp_BookE(env);
@@ -2288,7 +2286,6 @@ static void init_proc_440GP(CPUPPCState *env)
 #if !defined(CONFIG_USER_ONLY)
 env->nb_tlb = 64;
 env->nb_ways = 1;
-env->id_tlbs = 0;
 env->tlb_type = TLB_EMB;
 #endif
 init_excp_BookE(env);
@@ -2362,7 +2359,6 @@ static void init_proc_440x5(CPUPPCState *env)
 #if !defined(CONFIG_USER_ONLY)
 env->nb_tlb = 64;
 env->nb_ways = 1;
-env->id_tlbs = 0;
 env->tlb_type = TLB_EMB;
 #endif
 init_excp_BookE(env);
@@ -2724,7 +2720,6 @@ static void init_proc_e200(CPUPPCState *env)
 #if !defined(CONFIG_USER_ONLY)
 env->nb_tlb = 64;
 env->nb_ways = 1;
-env->id_tlbs = 0;
 env->tlb_type = TLB_EMB;
 #endif
 init_excp_e200(env, 0xUL);
@@ -2843,7 +2838,6 @@ static void init_proc_e500(CPUPPCState *env, int version)
 /* Memory management */
 env->nb_pids = 3;
 env->nb_ways = 2;
-env->id_tlbs = 0;
 switch (version) {
 case fsl_e500v1:
 tlbncfg[0] = register_tlbncfg(2, 1, 1, 0, 256);
@@ -6800,20 +6794,17 @@ static void init_ppc_proc(PowerPCCPU *cpu)
 }
 /* Allocate TLBs buffer when needed */
 #if !defined(CONFIG_USER_ONLY)
-if (env->nb_tlb != 0) {
-int nb_tlb = env->nb_tlb;
-if (env->id_tlbs != 0) {
-nb_tlb *= 2;
-}
+if (env->nb_tlb) {
 switch (env->tlb_type) {
 case TLB_6XX:
-env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb);
+/* 6xx has separate TLBs for instructions and data hence times 2 */
+env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, 2 * env->nb_tlb);
 break;
 case TLB_EMB:
-env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb);
+env->tlb.tlbe = g_new0(ppcemb_tlb_t, env->nb_tlb);
 break;
 case TLB_MAS:
-env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb);
+env->tlb.tlbm = g_new0(ppcmas_tlb_t, env->nb_tlb);
 break;
 }
 /* Pre-compute some useful values */
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 25258986e3..ed583fe9b3 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -693,7 +693,6 @@ void

[PATCH v5 29/32] target/ppc/mmu-radix64.c: Drop a local variable

The value is only used once so no need to introduce a local variable
for it.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu-radix64.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 8daf71d2db..395ce3b782 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -185,7 +185,6 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, 
MMUAccessType access_type,
int mmu_idx, bool partition_scoped)
 {
 CPUPPCState *env = >env;
-int need_prot;
 
 /* Check Page Attributes (pte58:59) */
 if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) 
{
@@ -210,8 +209,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, 
MMUAccessType access_type,
 }
 
 /* Check if requested access type is allowed */
-need_prot = prot_for_access_type(access_type);
-if (need_prot & ~*prot) { /* Page Protected for that Access */
+if (prot_for_access_type(access_type) & ~*prot) {
+/* Page Protected for that Access */
 *fault_cause |= access_type == MMU_INST_FETCH ? SRR1_NOEXEC_GUARD :
 DSISR_PROTFAULT;
 return true;
-- 
2.30.9

[PATCH v5 19/32] target/ppc: Remove pp_check() and reuse ppc_hash32_pp_prot()

The ppc_hash32_pp_prot() function in mmu-hash32.c is the same as
pp_check() in mmu_common.c, merge these to remove duplicated code.
Define the common function in internal.h as static lnline otherwise
exporting the function from mmu-hash32.c would stop the compiler
inlining it which results in slightly lower performance.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/internal.h   | 35 
 target/ppc/mmu-hash32.c | 45 -
 target/ppc/mmu_common.c | 44 ++--
 3 files changed, 37 insertions(+), 87 deletions(-)

diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 98b41a970c..ffdf6c075d 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -256,6 +256,41 @@ static inline int prot_for_access_type(MMUAccessType 
access_type)
 #ifndef CONFIG_USER_ONLY
 
 /* PowerPC MMU emulation */
+static inline int ppc_hash32_pp_prot(int key, int pp, int nx)
+{
+int prot;
+
+if (key == 0) {
+switch (pp) {
+case 0x0:
+case 0x1:
+case 0x2:
+prot = PAGE_READ | PAGE_WRITE;
+break;
+case 0x3:
+prot = PAGE_READ;
+break;
+default:
+g_assert_not_reached();
+}
+} else {
+switch (pp) {
+case 0x0:
+prot = 0;
+break;
+case 0x1:
+case 0x3:
+prot = PAGE_READ;
+break;
+case 0x2:
+prot = PAGE_READ | PAGE_WRITE;
+break;
+default:
+g_assert_not_reached();
+}
+}
+return nx ? prot : prot | PAGE_EXEC;
+}
 
 typedef struct mmu_ctx_t mmu_ctx_t;
 
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 6dfedab11d..960751a50e 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -43,51 +43,6 @@ struct mmu_ctx_hash32 {
 int key;   /* Access key*/
 };
 
-static int ppc_hash32_pp_prot(int key, int pp, int nx)
-{
-int prot;
-
-if (key == 0) {
-switch (pp) {
-case 0x0:
-case 0x1:
-case 0x2:
-prot = PAGE_READ | PAGE_WRITE;
-break;
-
-case 0x3:
-prot = PAGE_READ;
-break;
-
-default:
-abort();
-}
-} else {
-switch (pp) {
-case 0x0:
-prot = 0;
-break;
-
-case 0x1:
-case 0x3:
-prot = PAGE_READ;
-break;
-
-case 0x2:
-prot = PAGE_READ | PAGE_WRITE;
-break;
-
-default:
-abort();
-}
-}
-if (nx == 0) {
-prot |= PAGE_EXEC;
-}
-
-return prot;
-}
-
 static int ppc_hash32_pte_prot(int mmu_idx,
target_ulong sr, ppc_hash_pte32_t pte)
 {
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 004ea2111d..ab082bd12d 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -65,44 +65,6 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 /*/
 /* PowerPC MMU emulation */
 
-static int pp_check(int key, int pp, int nx)
-{
-int access;
-
-/* Compute access rights */
-access = 0;
-if (key == 0) {
-switch (pp) {
-case 0x0:
-case 0x1:
-case 0x2:
-access |= PAGE_WRITE;
-/* fall through */
-case 0x3:
-access |= PAGE_READ;
-break;
-}
-} else {
-switch (pp) {
-case 0x0:
-access = 0;
-break;
-case 0x1:
-case 0x3:
-access = PAGE_READ;
-break;
-case 0x2:
-access = PAGE_READ | PAGE_WRITE;
-break;
-}
-}
-if (nx == 0) {
-access |= PAGE_EXEC;
-}
-
-return access;
-}
-
 static int check_prot(int prot, MMUAccessType access_type)
 {
 return prot & prot_for_access_type(access_type) ? 0 : -2;
@@ -130,7 +92,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong 
pte0,
 MMUAccessType access_type)
 {
 target_ulong ptem, mmask;
-int access, ret, pteh, ptev, pp;
+int ret, pteh, ptev, pp;
 
 ret = -1;
 /* Check validity and table match */
@@ -149,11 +111,9 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
target_ulong pte0,
 return -3;
 }
 }
-/* Compute access rights */
-access = pp_check(ctx->key, pp, ctx->nx);
 /* Keep the matching PTE information */
 ctx->raddr = pte1;
-ctx->prot = access;
+ctx->prot = ppc_hash32_pp_prot(ctx->key, pp, ctx->nx);
 ret = check_prot(ctx->prot, access_type);
 if (ret == 0) {

[PATCH v5 21/32] target/ppc/mmu_common.c: Split off BookE handling from ppc_jumbo_xlate()

Introduce ppc_booke_xlate() to handle BookE and BookE 2.06 cases to
reduce ppc_jumbo_xlate() further.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 148 ++--
 1 file changed, 98 insertions(+), 50 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 14c0305abf..d0d49c696b 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1077,21 +1077,9 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
  MMUAccessType access_type, int type,
  int mmu_idx)
 {
-bool real_mode;
-
-if (env->mmu_model == POWERPC_MMU_BOOKE) {
-return mmubooke_get_physical_address(env, >raddr, >prot,
- eaddr, access_type);
-} else if (env->mmu_model == POWERPC_MMU_BOOKE206) {
-return mmubooke206_get_physical_address(env, >raddr, >prot,
-eaddr, access_type, mmu_idx);
-}
-
-real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
-  : !FIELD_EX64(env->msr, MSR, DR);
-if (real_mode && (env->mmu_model == POWERPC_MMU_SOFT_6xx ||
-  env->mmu_model == POWERPC_MMU_SOFT_4xx ||
-  env->mmu_model == POWERPC_MMU_REAL)) {
+bool real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
+   : !FIELD_EX64(env->msr, MSR, DR);
+if (real_mode) {
 ctx->raddr = eaddr;
 ctx->prot = PAGE_RWX;
 return 0;
@@ -1101,6 +1089,8 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 case POWERPC_MMU_SOFT_6xx:
 return mmu6xx_get_physical_address(env, ctx, eaddr, access_type, type);
 case POWERPC_MMU_SOFT_4xx:
+/* avoid maybe used uninitialized warnings for unused fields in ctx */
+memset(ctx, 0, sizeof(*ctx));
 return mmu40x_get_physical_address(env, >raddr, >prot, eaddr,
access_type);
 case POWERPC_MMU_REAL:
@@ -1165,6 +1155,93 @@ static void booke206_update_mas_tlb_miss(CPUPPCState 
*env, target_ulong address,
 env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_NV_SHIFT;
 }
 
+static bool ppc_booke_xlate(PowerPCCPU *cpu, vaddr eaddr,
+MMUAccessType access_type,
+hwaddr *raddrp, int *psizep, int *protp,
+int mmu_idx, bool guest_visible)
+{
+CPUState *cs = CPU(cpu);
+CPUPPCState *env = >env;
+hwaddr raddr;
+int prot, ret;
+
+if (env->mmu_model == POWERPC_MMU_BOOKE206) {
+ret = mmubooke206_get_physical_address(env, , , eaddr,
+   access_type, mmu_idx);
+} else {
+ret = mmubooke_get_physical_address(env, , , eaddr,
+access_type);
+}
+if (ret == 0) {
+*raddrp = raddr;
+*protp = prot;
+*psizep = TARGET_PAGE_BITS;
+return true;
+} else if (!guest_visible) {
+return false;
+}
+
+log_cpu_state_mask(CPU_LOG_MMU, cs, 0);
+if (access_type == MMU_INST_FETCH) {
+switch (ret) {
+case -1:
+/* No matches in page tables or TLB */
+switch (env->mmu_model) {
+case POWERPC_MMU_BOOKE206:
+booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
+/* fall through */
+case POWERPC_MMU_BOOKE:
+cs->exception_index = POWERPC_EXCP_ITLB;
+env->error_code = 0;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
access_type);
+break;
+default:
+g_assert_not_reached();
+}
+break;
+case -2:
+/* Access rights violation */
+cs->exception_index = POWERPC_EXCP_ISI;
+env->error_code = 0;
+break;
+case -3:
+/* No execute protection violation */
+cs->exception_index = POWERPC_EXCP_ISI;
+env->spr[SPR_BOOKE_ESR] = 0;
+env->error_code = 0;
+break;
+}
+} else {
+switch (ret) {
+case -1:
+/* No matches in page tables or TLB */
+switch (env->mmu_model) {
+case POWERPC_MMU_BOOKE206:
+booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
+/* fall through */
+case POWERPC_MMU_BOOKE:
+cs->exception_index = POWERPC_EXCP_DTLB;
+env->error_code = 0;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
access_type);
+break;
+

[PATCH v5 15/32] target/ppc/mmu_common.c: Replace hard coded constants in ppc_jumbo_xlate()

The "2" in booke206_update_mas_tlb_miss() call corresponds to
MMU_INST_FETCH which is the value of access_type in this branch;
mmubooke206_esr() only checks for MMU_DATA_STORE and it's called from
code access so using MMU_DATA_LOAD here seems wrong so replace it with
access_type here as well that yields the same result. This also makes
these calls the same as the data access branch further down.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index f40481b4b1..6570b280ca 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1287,13 +1287,13 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr 
eaddr,
 env->spr[SPR_40x_ESR] = 0x;
 break;
 case POWERPC_MMU_BOOKE206:
-booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx);
+booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
 /* fall through */
 case POWERPC_MMU_BOOKE:
 cs->exception_index = POWERPC_EXCP_ITLB;
 env->error_code = 0;
 env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
MMU_DATA_LOAD);
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
access_type);
 break;
 case POWERPC_MMU_REAL:
 cpu_abort(cs, "PowerPC in real mode should never raise "
-- 
2.30.9

[PATCH v5 11/32] target/ppc/mmu_common.c: Split off real mode cases in get_physical_address_wtlb()

The real mode handling is identical in the remaining switch cases.
Split off these common real mode cases into a separate conditional to
leave only the else branches in the switch that are different.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 34 +-
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 9f177b6976..b13150ce23 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1172,7 +1172,6 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
  MMUAccessType access_type, int type,
  int mmu_idx)
 {
-int ret = -1;
 bool real_mode;
 
 if (env->mmu_model == POWERPC_MMU_BOOKE) {
@@ -1184,38 +1183,23 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
 
 real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
   : !FIELD_EX64(env->msr, MSR, DR);
+if (real_mode && (env->mmu_model == POWERPC_MMU_SOFT_6xx ||
+  env->mmu_model == POWERPC_MMU_SOFT_4xx ||
+  env->mmu_model == POWERPC_MMU_REAL)) {
+return check_physical(env, ctx, eaddr, access_type);
+}
 
 switch (env->mmu_model) {
 case POWERPC_MMU_SOFT_6xx:
-if (real_mode) {
-ret = check_physical(env, ctx, eaddr, access_type);
-} else {
-ret = mmu6xx_get_physical_address(env, ctx, eaddr, access_type,
-  type);
-}
-break;
-
+return mmu6xx_get_physical_address(env, ctx, eaddr, access_type, type);
 case POWERPC_MMU_SOFT_4xx:
-if (real_mode) {
-ret = check_physical(env, ctx, eaddr, access_type);
-} else {
-ret = mmu40x_get_physical_address(env, ctx, eaddr, access_type);
-}
-break;
+return mmu40x_get_physical_address(env, ctx, eaddr, access_type);
 case POWERPC_MMU_REAL:
-if (real_mode) {
-ret = check_physical(env, ctx, eaddr, access_type);
-} else {
-cpu_abort(env_cpu(env),
-  "PowerPC in real mode do not do any translation\n");
-}
-return -1;
+cpu_abort(env_cpu(env),
+  "PowerPC in real mode do not do any translation\n");
 default:
 cpu_abort(env_cpu(env), "Unknown or invalid MMU model\n");
-return -1;
 }
-
-return ret;
 }
 
 static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong 
address,
-- 
2.30.9

[PATCH v5 05/32] target/ppc/mmu_common.c: Drop cases for unimplemented MPC8xx MMU

Drop MPC8xx cases from get_physical_address_wtlb() and ppc_jumbo_xlate().
The default case would still catch this and abort the same way and
there is still a warning about it in ppc_tlb_invalidate_all() which is
called in ppc_cpu_reset_hold() so likely we never get here but to make
sure add a case to ppc_xlate() to the same effect.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 886fb6a657..3391df61cb 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1219,10 +1219,6 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
 ret = mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
mmu_idx);
 break;
-case POWERPC_MMU_MPC8xx:
-/* XXX: TODO */
-cpu_abort(env_cpu(env), "MPC8xx MMU model is not implemented\n");
-break;
 case POWERPC_MMU_REAL:
 if (real_mode) {
 ret = check_physical(env, ctx, eaddr, access_type);
@@ -1353,8 +1349,6 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 env->spr[SPR_BOOKE_DEAR] = eaddr;
 env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
MMU_DATA_LOAD);
 break;
-case POWERPC_MMU_MPC8xx:
-cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
 case POWERPC_MMU_REAL:
 cpu_abort(cs, "PowerPC in real mode should never raise "
   "any MMU exceptions\n");
@@ -1427,9 +1421,6 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 env->spr[SPR_40x_ESR] = 0x;
 }
 break;
-case POWERPC_MMU_MPC8xx:
-/* XXX: TODO */
-cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
 case POWERPC_MMU_BOOKE206:
 booke206_update_mas_tlb_miss(env, eaddr, access_type, 
mmu_idx);
 /* fall through */
@@ -1539,7 +1530,8 @@ bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 case POWERPC_MMU_32B:
 return ppc_hash32_xlate(cpu, eaddr, access_type, raddrp,
psizep, protp, mmu_idx, guest_visible);
-
+case POWERPC_MMU_MPC8xx:
+cpu_abort(env_cpu(>env), "MPC8xx MMU model is not implemented\n");
 default:
 return ppc_jumbo_xlate(cpu, eaddr, access_type, raddrp,
psizep, protp, mmu_idx, guest_visible);
-- 
2.30.9

[PATCH v5 03/32] target/ppc/mmu_common.c: Remove unneeded local variable

In mmubooke_check_tlb() and mmubooke206_check_tlb() we can assign the
value of prot2 directly to the destination, no need to have a separate
local variable for it.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 30 +-
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index f79e390306..09cbeb0052 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -628,8 +628,6 @@ static int mmubooke_check_tlb(CPUPPCState *env, 
ppcemb_tlb_t *tlb,
   hwaddr *raddr, int *prot, target_ulong address,
   MMUAccessType access_type, int i)
 {
-int prot2;
-
 if (!mmubooke_check_pid(env, tlb, raddr, address, i)) {
 qemu_log_mask(CPU_LOG_MMU, "%s: TLB entry not found\n", __func__);
 return -1;
@@ -644,17 +642,16 @@ static int mmubooke_check_tlb(CPUPPCState *env, 
ppcemb_tlb_t *tlb,
 }
 
 if (FIELD_EX64(env->msr, MSR, PR)) {
-prot2 = tlb->prot & 0xF;
+*prot = tlb->prot & 0xF;
 } else {
-prot2 = (tlb->prot >> 4) & 0xF;
+*prot = (tlb->prot >> 4) & 0xF;
 }
-*prot = prot2;
-if (prot2 & prot_for_access_type(access_type)) {
+if (*prot & prot_for_access_type(access_type)) {
 qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__);
 return 0;
 }
 
-qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, prot2);
+qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, *prot);
 return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
@@ -795,7 +792,6 @@ static int mmubooke206_check_tlb(CPUPPCState *env, 
ppcmas_tlb_t *tlb,
  target_ulong address,
  MMUAccessType access_type, int mmu_idx)
 {
-int prot2 = 0;
 uint32_t epid;
 bool as, pr;
 bool use_epid = mmubooke206_get_as(env, mmu_idx, , , );
@@ -841,34 +837,34 @@ found_tlb:
 return -1;
 }
 
+*prot = 0;
 if (pr) {
 if (tlb->mas7_3 & MAS3_UR) {
-prot2 |= PAGE_READ;
+*prot |= PAGE_READ;
 }
 if (tlb->mas7_3 & MAS3_UW) {
-prot2 |= PAGE_WRITE;
+*prot |= PAGE_WRITE;
 }
 if (tlb->mas7_3 & MAS3_UX) {
-prot2 |= PAGE_EXEC;
+*prot |= PAGE_EXEC;
 }
 } else {
 if (tlb->mas7_3 & MAS3_SR) {
-prot2 |= PAGE_READ;
+*prot |= PAGE_READ;
 }
 if (tlb->mas7_3 & MAS3_SW) {
-prot2 |= PAGE_WRITE;
+*prot |= PAGE_WRITE;
 }
 if (tlb->mas7_3 & MAS3_SX) {
-prot2 |= PAGE_EXEC;
+*prot |= PAGE_EXEC;
 }
 }
-*prot = prot2;
-if (prot2 & prot_for_access_type(access_type)) {
+if (*prot & prot_for_access_type(access_type)) {
 qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__);
 return 0;
 }
 
-qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, prot2);
+qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, *prot);
 return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
-- 
2.30.9

[PATCH v5 07/32] target/ppc/mmu_common.c: Move else branch to avoid large if block

In mmu6xx_get_physical_address() we have a large if block with a two
line else branch that effectively returns. Invert the condition and
move the else there to allow deindenting the large if block to make
the flow easier to follow.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 71 -
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index b7c07cf515..ba60b4902b 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -405,47 +405,44 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 ret = -1;
 if (!ds) {
 /* Check if instruction fetch is allowed, if needed */
-if (type != ACCESS_CODE || ctx->nx == 0) {
-/* Page address translation */
-qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx
-" htab_mask " HWADDR_FMT_plx
-" hash " HWADDR_FMT_plx "\n",
-ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
-ctx->hash[0] = hash;
-ctx->hash[1] = ~hash;
-
-/* Initialize real address with an invalid value */
-ctx->raddr = (hwaddr)-1ULL;
-/* Software TLB search */
-ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
+if (type == ACCESS_CODE && ctx->nx) {
+qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
+return -3;
+}
+/* Page address translation */
+qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask "
+  HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n",
+  ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), 
hash);
+ctx->hash[0] = hash;
+ctx->hash[1] = ~hash;
+
+/* Initialize real address with an invalid value */
+ctx->raddr = (hwaddr)-1ULL;
+/* Software TLB search */
+ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
 #if defined(DUMP_PAGE_TABLES)
-if (qemu_loglevel_mask(CPU_LOG_MMU)) {
-CPUState *cs = env_cpu(env);
-hwaddr curaddr;
-uint32_t a0, a1, a2, a3;
-
-qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx
- "\n", ppc_hash32_hpt_base(cpu),
- ppc_hash32_hpt_mask(cpu) + 0x80);
-for (curaddr = ppc_hash32_hpt_base(cpu);
- curaddr < (ppc_hash32_hpt_base(cpu)
-+ ppc_hash32_hpt_mask(cpu) + 0x80);
- curaddr += 16) {
-a0 = ldl_phys(cs->as, curaddr);
-a1 = ldl_phys(cs->as, curaddr + 4);
-a2 = ldl_phys(cs->as, curaddr + 8);
-a3 = ldl_phys(cs->as, curaddr + 12);
-if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
-qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n",
- curaddr, a0, a1, a2, a3);
-}
+if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+CPUState *cs = env_cpu(env);
+hwaddr curaddr;
+uint32_t a0, a1, a2, a3;
+
+qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx "\n",
+ ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu) + 
0x80);
+for (curaddr = ppc_hash32_hpt_base(cpu);
+ curaddr < (ppc_hash32_hpt_base(cpu)
++ ppc_hash32_hpt_mask(cpu) + 0x80);
+ curaddr += 16) {
+a0 = ldl_phys(cs->as, curaddr);
+a1 = ldl_phys(cs->as, curaddr + 4);
+a2 = ldl_phys(cs->as, curaddr + 8);
+a3 = ldl_phys(cs->as, curaddr + 12);
+if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
+qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n",
+ curaddr, a0, a1, a2, a3);
 }
 }
-#endif
-} else {
-qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
-ret = -3;
 }
+#endif
 } else {
 qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 /* Direct-store segment : absolutely *BUGGY* for now */
-- 
2.30.9

[PATCH v5 18/32] target/ppc/mmu_common.c: Don't use mmu_ctx_t in mmubooke206_get_physical_address()

mmubooke206_get_physical_address() only uses the raddr and prot fields
from mmu_ctx_t. Pass these directly instead of using a ctx struct.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 32 ++--
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 12dac9e63a..004ea2111d 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -836,27 +836,22 @@ found_tlb:
 return access_type == MMU_INST_FETCH ? -3 : -2;
 }
 
-static int mmubooke206_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-target_ulong address,
+static int mmubooke206_get_physical_address(CPUPPCState *env, hwaddr *raddr,
+int *prot, target_ulong address,
 MMUAccessType access_type,
 int mmu_idx)
 {
 ppcmas_tlb_t *tlb;
-hwaddr raddr;
-int i, j, ret;
-
-ret = -1;
-raddr = (hwaddr)-1ULL;
+int i, j, ret = -1;
 
 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 int ways = booke206_tlb_ways(env, i);
-
 for (j = 0; j < ways; j++) {
 tlb = booke206_get_tlbm(env, i, address, j);
 if (!tlb) {
 continue;
 }
-ret = mmubooke206_check_tlb(env, tlb, , >prot, address,
+ret = mmubooke206_check_tlb(env, tlb, raddr, prot, address,
 access_type, mmu_idx);
 if (ret != -1) {
 goto found_tlb;
@@ -866,17 +861,10 @@ static int mmubooke206_get_physical_address(CPUPPCState 
*env, mmu_ctx_t *ctx,
 
 found_tlb:
 
-if (ret >= 0) {
-ctx->raddr = raddr;
- qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx
-   " => " HWADDR_FMT_plx " %d %d\n", __func__, address,
-   ctx->raddr, ctx->prot, ret);
-} else {
- qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx
-   " => " HWADDR_FMT_plx " %d %d\n", __func__, address,
-   raddr, ctx->prot, ret);
-}
-
+qemu_log_mask(CPU_LOG_MMU, "%s: access %s " TARGET_FMT_lx " => "
+  HWADDR_FMT_plx " %d %d\n", __func__,
+  ret < 0 ? "refused" : "granted", address,
+  ret < 0 ? -1 : *raddr, ret == -1 ? 0 : *prot, ret);
 return ret;
 }
 
@@ -1135,8 +1123,8 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 return mmubooke_get_physical_address(env, >raddr, >prot,
  eaddr, access_type);
 } else if (env->mmu_model == POWERPC_MMU_BOOKE206) {
-return mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
-mmu_idx);
+return mmubooke206_get_physical_address(env, >raddr, >prot,
+eaddr, access_type, mmu_idx);
 }
 
 real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
-- 
2.30.9

[PATCH v5 10/32] target/ppc/mmu_common.c: Split out BookE cases before checking real mode

BookE does not have real mode so split off and handle it first in
get_physical_address_wtlb() before checking for real mode for other
MMU models.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 03d9e6bfda..9f177b6976 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1175,6 +1175,13 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
 int ret = -1;
 bool real_mode;
 
+if (env->mmu_model == POWERPC_MMU_BOOKE) {
+return mmubooke_get_physical_address(env, ctx, eaddr, access_type);
+} else if (env->mmu_model == POWERPC_MMU_BOOKE206) {
+return mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
+mmu_idx);
+}
+
 real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
   : !FIELD_EX64(env->msr, MSR, DR);
 
@@ -1195,13 +1202,6 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
 ret = mmu40x_get_physical_address(env, ctx, eaddr, access_type);
 }
 break;
-case POWERPC_MMU_BOOKE:
-ret = mmubooke_get_physical_address(env, ctx, eaddr, access_type);
-break;
-case POWERPC_MMU_BOOKE206:
-ret = mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
-   mmu_idx);
-break;
 case POWERPC_MMU_REAL:
 if (real_mode) {
 ret = check_physical(env, ctx, eaddr, access_type);
-- 
2.30.9

[PATCH v5 12/32] target/ppc/mmu_common.c: Inline and remove check_physical()

This function just does two assignments and and unnecessary check that
is always true so inline it in the only caller left and remove it.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 26 +++---
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index b13150ce23..2f412dd7c5 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1145,28 +1145,6 @@ void dump_mmu(CPUPPCState *env)
 }
 }
 
-static int check_physical(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr,
-  MMUAccessType access_type)
-{
-ctx->raddr = eaddr;
-ctx->prot = PAGE_READ | PAGE_EXEC;
-
-switch (env->mmu_model) {
-case POWERPC_MMU_SOFT_6xx:
-case POWERPC_MMU_SOFT_4xx:
-case POWERPC_MMU_REAL:
-case POWERPC_MMU_BOOKE:
-ctx->prot |= PAGE_WRITE;
-break;
-
-default:
-/* Caller's checks mean we should never get here for other models */
-g_assert_not_reached();
-}
-
-return 0;
-}
-
 int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
  target_ulong eaddr,
  MMUAccessType access_type, int type,
@@ -1186,7 +1164,9 @@ int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t 
*ctx,
 if (real_mode && (env->mmu_model == POWERPC_MMU_SOFT_6xx ||
   env->mmu_model == POWERPC_MMU_SOFT_4xx ||
   env->mmu_model == POWERPC_MMU_REAL)) {
-return check_physical(env, ctx, eaddr, access_type);
+ctx->raddr = eaddr;
+ctx->prot = PAGE_RWX;
+return 0;
 }
 
 switch (env->mmu_model) {
-- 
2.30.9

[PATCH v5 30/32] target/ppc: Add a function to check for page protection bit

Checking if a page protection bit is set for a given access type is a
common operation. Add a function to avoid repeating the same check at
multiple places. As this relies on access type and page protection bit
values having certain relation also add an assert to ensure that this
assumption holds.

Signed-off-by: BALATON Zoltan 
---
 target/ppc/cpu_init.c|  5 +
 target/ppc/internal.h| 23 +--
 target/ppc/mmu-hash32.c  |  6 +++---
 target/ppc/mmu-hash64.c  |  2 +-
 target/ppc/mmu-radix64.c |  2 +-
 target/ppc/mmu_common.c  | 26 +-
 6 files changed, 28 insertions(+), 36 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 92c71b2a09..d3b92d9f0e 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7388,6 +7388,11 @@ static void ppc_cpu_class_init(ObjectClass *oc, void 
*data)
 #ifndef CONFIG_USER_ONLY
 cc->sysemu_ops = _sysemu_ops;
 INTERRUPT_STATS_PROVIDER_CLASS(oc)->get_statistics = ppc_get_irq_stats;
+
+/* check_prot_access_type relies on MMU access and PAGE bits relations */
+qemu_build_assert(MMU_DATA_LOAD == 0 && MMU_DATA_STORE == 1 &&
+  MMU_INST_FETCH == 2 && PAGE_READ == 1 &&
+  PAGE_WRITE == 2 && PAGE_EXEC == 4);
 #endif
 
 cc->gdb_num_core_regs = 71;
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 46176c4711..9cdb797dc0 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -234,27 +234,14 @@ void destroy_ppc_opcodes(PowerPCCPU *cpu);
 void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *ppc);
 const gchar *ppc_gdb_arch_name(CPUState *cs);
 
-/**
- * prot_for_access_type:
- * @access_type: Access type
- *
- * Return the protection bit required for the given access type.
- */
-static inline int prot_for_access_type(MMUAccessType access_type)
+#ifndef CONFIG_USER_ONLY
+
+/* Check if permission bit required for the access_type is set in prot */
+static inline int check_prot_access_type(int prot, MMUAccessType access_type)
 {
-switch (access_type) {
-case MMU_INST_FETCH:
-return PAGE_EXEC;
-case MMU_DATA_LOAD:
-return PAGE_READ;
-case MMU_DATA_STORE:
-return PAGE_WRITE;
-}
-g_assert_not_reached();
+return prot & (1 << access_type);
 }
 
-#ifndef CONFIG_USER_ONLY
-
 /* PowerPC MMU emulation */
 static inline int ppc_hash32_pp_prot(int key, int pp, int nx)
 {
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index b5d7aeed4e..0b54f923d0 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -213,7 +213,7 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, 
target_ulong sr,
 }
 
 *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ;
-if (*prot & prot_for_access_type(access_type)) {
+if (check_prot_access_type(*prot, access_type)) {
 *raddr = eaddr;
 return true;
 }
@@ -364,7 +364,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 if (env->nb_BATs != 0) {
 raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, mmu_idx);
 if (raddr != -1) {
-if (prot_for_access_type(access_type) & ~*protp) {
+if (!check_prot_access_type(*protp, access_type)) {
 if (guest_visible) {
 if (access_type == MMU_INST_FETCH) {
 cs->exception_index = POWERPC_EXCP_ISI;
@@ -432,7 +432,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 
 prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
 
-if (prot_for_access_type(access_type) & ~prot) {
+if (!check_prot_access_type(prot, access_type)) {
 /* Access right violation */
 qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
 if (guest_visible) {
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 0966422a55..d9626f6aab 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -1097,7 +1097,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 amr_prot = ppc_hash64_amr_prot(cpu, pte);
 prot = exec_prot & pp_prot & amr_prot;
 
-need_prot = prot_for_access_type(access_type);
+need_prot = check_prot_access_type(PAGE_RWX, access_type);
 if (need_prot & ~prot) {
 /* Access right violation */
 qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 395ce3b782..2c5ade5cea 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -209,7 +209,7 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, 
MMUAccessType access_type,
 }
 
 /* Check if requested access type is allowed */
-if (prot_for_access_type(access_type) & ~*prot) {
+if (!check_prot_access_type(*prot, access_type)) {
 /* Page Protected for that Access */
 *fault_cause |= access_type == MMU_INST_FETCH ? SRR1_NOEXEC_GUARD :

Re: [PATCH 1/3] target/riscv: Save counter values during countinhibit update

2024-05-09 Thread Atish Kumar Patra

On Thu, May 2, 2024 at 5:39 AM Andrew Jones  wrote:
>
> On Tue, Apr 30, 2024 at 03:00:45PM GMT, Daniel Henrique Barboza wrote:
> >
> >
> > On 4/29/24 16:28, Atish Patra wrote:
> > > Currently, if a counter monitoring cycle/instret is stopped via
> > > mcountinhibit we just update the state while the value is saved
> > > during the next read. This is not accurate as the read may happen
> > > many cycles after the counter is stopped. Ideally, the read should
> > > return the value saved when the counter is stopped.
> > >
> > > Thus, save the value of the counter during the inhibit update
> > > operation and return that value during the read if corresponding bit
> > > in mcountihibit is set.
> > >
> > > Signed-off-by: Atish Patra 
> > > ---
> > >   target/riscv/cpu.h |  1 -
> > >   target/riscv/csr.c | 32 
> > >   target/riscv/machine.c |  1 -
> > >   3 files changed, 20 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > > index 3b1a02b9449a..09bbf7ce9880 100644
> > > --- a/target/riscv/cpu.h
> > > +++ b/target/riscv/cpu.h
> > > @@ -153,7 +153,6 @@ typedef struct PMUCTRState {
> > >   target_ulong mhpmcounter_prev;
> > >   /* Snapshort value of a counter in RV32 */
> > >   target_ulong mhpmcounterh_prev;
> > > -bool started;
> > >   /* Value beyond UINT32_MAX/UINT64_MAX before overflow interrupt 
> > > trigger */
> > >   target_ulong irq_overflow_left;
> > >   } PMUCTRState;
> > > diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> > > index 726096444fae..68ca31aff47d 100644
> > > --- a/target/riscv/csr.c
> > > +++ b/target/riscv/csr.c
> > > @@ -929,17 +929,11 @@ static RISCVException 
> > > riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
> > >   if (get_field(env->mcountinhibit, BIT(ctr_idx))) {
> > >   /*
> > > - * Counter should not increment if inhibit bit is set. We can't 
> > > really
> > > - * stop the icount counting. Just return the counter value 
> > > written by
> > > - * the supervisor to indicate that counter was not incremented.
> > > + * Counter should not increment if inhibit bit is set. Just 
> > > return the
> > > + * current counter value.
> > >*/
> > > -if (!counter->started) {
> > > -*val = ctr_val;
> > > -return RISCV_EXCP_NONE;
> > > -} else {
> > > -/* Mark that the counter has been stopped */
> > > -counter->started = false;
> > > -}
> > > + *val = ctr_val;
> > > + return RISCV_EXCP_NONE;
> > >   }
> > >   /*
> > > @@ -1973,9 +1967,23 @@ static RISCVException 
> > > write_mcountinhibit(CPURISCVState *env, int csrno,
> > >   /* Check if any other counter is also monitoring 
> > > cycles/instructions */
> > >   for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) {
> > > -if (!get_field(env->mcountinhibit, BIT(cidx))) {
> > >   counter = >pmu_ctrs[cidx];
> > > -counter->started = true;
> > > +if (get_field(env->mcountinhibit, BIT(cidx)) && (val & 
> > > BIT(cidx))) {
> > > +   /*
> > > + * Update the counter value for cycle/instret as we can't 
> > > stop the
> > > + * host ticks. But we should show the current value at this 
> > > moment.
> > > + */
> > > +if (riscv_pmu_ctr_monitor_cycles(env, cidx) ||
> > > +riscv_pmu_ctr_monitor_instructions(env, cidx)) {
> > > +counter->mhpmcounter_val = get_ticks(false) -
> > > +   counter->mhpmcounter_prev +
> > > +   counter->mhpmcounter_val;
> > > +if (riscv_cpu_mxl(env) == MXL_RV32) {
> > > +counter->mhpmcounterh_val = get_ticks(false) -
> > > +
> > > counter->mhpmcounterh_prev +
> > > +
> > > counter->mhpmcounterh_val;
> > > +   }
> > > +}
> > >   }
> > >   }
> > > diff --git a/target/riscv/machine.c b/target/riscv/machine.c
> > > index 76f2150f78b5..3e0f2dd2ce2a 100644
> > > --- a/target/riscv/machine.c
> > > +++ b/target/riscv/machine.c
> > > @@ -328,7 +328,6 @@ static const VMStateDescription vmstate_pmu_ctr_state 
> > > = {
> > >   VMSTATE_UINTTL(mhpmcounterh_val, PMUCTRState),
> > >   VMSTATE_UINTTL(mhpmcounter_prev, PMUCTRState),
> > >   VMSTATE_UINTTL(mhpmcounterh_prev, PMUCTRState),
> > > -VMSTATE_BOOL(started, PMUCTRState),
> >
> > Unfortunately we can't remove fields from the VMStateDescription without 
> > breaking
> > migration backward compatibility. Older QEMUs will attempt to read a field 
> > that
> > doesn't exist and migration will fail.
> >
> > I'm assuming that we care about backward compat. If we're not up to this 
> > point yet
> > then we

[PATCH v5 22/32] target/ppc/mmu_common.c: Eliminate get_physical_address_wtlb()

Inline get_physical_address_wtlb() in its only caller and remove it.

Signed-off-by: BALATON Zoltan 
---
 target/ppc/internal.h   |  5 +---
 target/ppc/mmu_common.c | 66 +
 2 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index ffdf6c075d..5b28e8f3b0 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -297,10 +297,7 @@ typedef struct mmu_ctx_t mmu_ctx_t;
 bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
   hwaddr *raddrp, int *psizep, int *protp,
   int mmu_idx, bool guest_visible);
-int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
- target_ulong eaddr,
- MMUAccessType access_type, int type,
- int mmu_idx);
+
 /* Software driven TLB helpers */
 int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
 int way, int is_code);
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index d0d49c696b..d7e4baa9cf 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1072,35 +1072,6 @@ void dump_mmu(CPUPPCState *env)
 }
 }
 
-int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
- target_ulong eaddr,
- MMUAccessType access_type, int type,
- int mmu_idx)
-{
-bool real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
-   : !FIELD_EX64(env->msr, MSR, DR);
-if (real_mode) {
-ctx->raddr = eaddr;
-ctx->prot = PAGE_RWX;
-return 0;
-}
-
-switch (env->mmu_model) {
-case POWERPC_MMU_SOFT_6xx:
-return mmu6xx_get_physical_address(env, ctx, eaddr, access_type, type);
-case POWERPC_MMU_SOFT_4xx:
-/* avoid maybe used uninitialized warnings for unused fields in ctx */
-memset(ctx, 0, sizeof(*ctx));
-return mmu40x_get_physical_address(env, >raddr, >prot, eaddr,
-   access_type);
-case POWERPC_MMU_REAL:
-cpu_abort(env_cpu(env),
-  "PowerPC in real mode do not do any translation\n");
-default:
-cpu_abort(env_cpu(env), "Unknown or invalid MMU model\n");
-}
-}
-
 static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong 
address,
  MMUAccessType access_type, int 
mmu_idx)
 {
@@ -1255,6 +1226,15 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 int type;
 int ret;
 
+/* If in real_mode then no translation */
+if (access_type == MMU_INST_FETCH ? !FIELD_EX64(env->msr, MSR, IR)
+  : !FIELD_EX64(env->msr, MSR, DR)) {
+*raddrp = eaddr;
+*protp = PAGE_RWX;
+*psizep = TARGET_PAGE_BITS;
+return true;
+}
+
 if (access_type == MMU_INST_FETCH) {
 /* code access */
 type = ACCESS_CODE;
@@ -1265,8 +1245,22 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 type = ACCESS_INT;
 }
 
-ret = get_physical_address_wtlb(env, , eaddr, access_type,
-type, mmu_idx);
+switch (env->mmu_model) {
+case POWERPC_MMU_SOFT_6xx:
+ret = mmu6xx_get_physical_address(env, , eaddr, access_type, type);
+break;
+case POWERPC_MMU_SOFT_4xx:
+/* avoid maybe used uninitialized warnings for unused fields in ctx */
+memset(, 0, sizeof(ctx));
+ret = mmu40x_get_physical_address(env, , , eaddr,
+  access_type);
+break;
+case POWERPC_MMU_REAL:
+cpu_abort(cs, "PowerPC in real mode do not do any translation\n");
+default:
+cpu_abort(cs, "Unknown or invalid MMU model\n");
+}
+
 if (ret == 0) {
 *raddrp = ctx.raddr;
 *protp = ctx.prot;
@@ -1294,11 +1288,8 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 env->spr[SPR_40x_DEAR] = eaddr;
 env->spr[SPR_40x_ESR] = 0x;
 break;
-case POWERPC_MMU_REAL:
-cpu_abort(cs, "PowerPC in real mode should never raise "
-  "any MMU exceptions\n");
 default:
-cpu_abort(cs, "Unknown or invalid MMU model\n");
+g_assert_not_reached();
 }
 break;
 case -2:
@@ -1350,11 +1341,8 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
 env->spr[SPR_40x_ESR] = 0x;
 }
 break;
-case POWERPC_MMU_REAL:
-cpu_abort(cs, "PowerPC in real mode should never raise "
-  "any MMU

[PATCH v5 28/32] target/ppc/mmu-hash32.c: Drop a local variable

In ppc_hash32_xlate() the value of need_prop is checked in two places
but precalculating it does not help because when we reach the first
check we always return and not reach the second place so the value
will only be used once. We can drop the local variable and calculate
it when needed, which makes these checks using it similar to other
places with such checks.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu-hash32.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 960751a50e..b5d7aeed4e 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -347,7 +347,6 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 hwaddr pte_offset;
 ppc_hash_pte32_t pte;
 int prot;
-int need_prot;
 hwaddr raddr;
 
 /* There are no hash32 large pages. */
@@ -361,13 +360,11 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 return true;
 }
 
-need_prot = prot_for_access_type(access_type);
-
 /* 2. Check Block Address Translation entries (BATs) */
 if (env->nb_BATs != 0) {
 raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, mmu_idx);
 if (raddr != -1) {
-if (need_prot & ~*protp) {
+if (prot_for_access_type(access_type) & ~*protp) {
 if (guest_visible) {
 if (access_type == MMU_INST_FETCH) {
 cs->exception_index = POWERPC_EXCP_ISI;
@@ -435,7 +432,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 
 prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
 
-if (need_prot & ~prot) {
+if (prot_for_access_type(access_type) & ~prot) {
 /* Access right violation */
 qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
 if (guest_visible) {
-- 
2.30.9

[PATCH v5 14/32] target/ppc/mmu_common.c: Deindent ppc_jumbo_xlate()

Instead of putting a large block of code in an if, invert the
condition and return early to be able to deindent the code block.

Signed-off-by: BALATON Zoltan 
Acked-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 319 
 1 file changed, 159 insertions(+), 160 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 124148b3da..f40481b4b1 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1264,187 +1264,186 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr 
eaddr,
 *protp = ctx.prot;
 *psizep = TARGET_PAGE_BITS;
 return true;
+} else if (!guest_visible) {
+return false;
 }
 
-if (guest_visible) {
-log_cpu_state_mask(CPU_LOG_MMU, cs, 0);
-if (type == ACCESS_CODE) {
-switch (ret) {
-case -1:
-/* No matches in page tables or TLB */
-switch (env->mmu_model) {
-case POWERPC_MMU_SOFT_6xx:
-cs->exception_index = POWERPC_EXCP_IFTLB;
-env->error_code = 1 << 18;
-env->spr[SPR_IMISS] = eaddr;
-env->spr[SPR_ICMP] = 0x8000 | ctx.ptem;
-goto tlb_miss;
-case POWERPC_MMU_SOFT_4xx:
-cs->exception_index = POWERPC_EXCP_ITLB;
-env->error_code = 0;
-env->spr[SPR_40x_DEAR] = eaddr;
-env->spr[SPR_40x_ESR] = 0x;
-break;
-case POWERPC_MMU_BOOKE206:
-booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx);
-/* fall through */
-case POWERPC_MMU_BOOKE:
-cs->exception_index = POWERPC_EXCP_ITLB;
-env->error_code = 0;
-env->spr[SPR_BOOKE_DEAR] = eaddr;
-env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
MMU_DATA_LOAD);
-break;
-case POWERPC_MMU_REAL:
-cpu_abort(cs, "PowerPC in real mode should never raise "
-  "any MMU exceptions\n");
-default:
-cpu_abort(cs, "Unknown or invalid MMU model\n");
-}
+log_cpu_state_mask(CPU_LOG_MMU, cs, 0);
+if (type == ACCESS_CODE) {
+switch (ret) {
+case -1:
+/* No matches in page tables or TLB */
+switch (env->mmu_model) {
+case POWERPC_MMU_SOFT_6xx:
+cs->exception_index = POWERPC_EXCP_IFTLB;
+env->error_code = 1 << 18;
+env->spr[SPR_IMISS] = eaddr;
+env->spr[SPR_ICMP] = 0x8000 | ctx.ptem;
+goto tlb_miss;
+case POWERPC_MMU_SOFT_4xx:
+cs->exception_index = POWERPC_EXCP_ITLB;
+env->error_code = 0;
+env->spr[SPR_40x_DEAR] = eaddr;
+env->spr[SPR_40x_ESR] = 0x;
 break;
-case -2:
-/* Access rights violation */
-cs->exception_index = POWERPC_EXCP_ISI;
-if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-(env->mmu_model == POWERPC_MMU_BOOKE206)) {
-env->error_code = 0;
-} else {
-env->error_code = 0x0800;
-}
+case POWERPC_MMU_BOOKE206:
+booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx);
+/* fall through */
+case POWERPC_MMU_BOOKE:
+cs->exception_index = POWERPC_EXCP_ITLB;
+env->error_code = 0;
+env->spr[SPR_BOOKE_DEAR] = eaddr;
+env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 
MMU_DATA_LOAD);
 break;
-case -3:
-/* No execute protection violation */
-if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-(env->mmu_model == POWERPC_MMU_BOOKE206)) {
-env->spr[SPR_BOOKE_ESR] = 0x;
-env->error_code = 0;
+case POWERPC_MMU_REAL:
+cpu_abort(cs, "PowerPC in real mode should never raise "
+  "any MMU exceptions\n");
+default:
+cpu_abort(cs, "Unknown or invalid MMU model\n");
+}
+break;
+case -2:
+/* Access rights violation */
+cs->exception_index = POWERPC_EXCP_ISI;
+if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
+(env->mmu_model == POWERPC_MMU_BOOKE206)) {
+env->error_code = 0;
+} else {
+env->error_code = 0x0800;
+}
+break;
+case -3:
+/* No execute protection violation */
+if ((env->mmu_model ==

[PATCH v5 31/32] target/ppc: Move out BookE and related MMU functions from mmu_common.c

Add a new mmu-booke.c file for BookE and related MMU bits from
mmu_common.c.

Signed-off-by: BALATON Zoltan 
Acked-by: Nicholas Piggin 
---
 target/ppc/cpu.h|   4 -
 target/ppc/meson.build  |   1 +
 target/ppc/mmu-booke.c  | 531 
 target/ppc/mmu-booke.h  |  17 ++
 target/ppc/mmu_common.c | 508 +-
 target/ppc/mmu_helper.c |   1 +
 6 files changed, 551 insertions(+), 511 deletions(-)
 create mode 100644 target/ppc/mmu-booke.c
 create mode 100644 target/ppc/mmu-booke.h

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index cfb3ba5ac8..92b50a1be2 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1606,10 +1606,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env);
 void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr);
 void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
 void cpu_ppc_set_1lpar(PowerPCCPU *cpu);
-int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp,
- target_ulong address, uint32_t pid);
-int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid);
-hwaddr booke206_tlb_to_page_size(CPUPPCState *env, ppcmas_tlb_t *tlb);
 #endif
 
 void ppc_store_fpscr(CPUPPCState *env, target_ulong val);
diff --git a/target/ppc/meson.build b/target/ppc/meson.build
index 0b89f9b89f..db3b7a0c33 100644
--- a/target/ppc/meson.build
+++ b/target/ppc/meson.build
@@ -37,6 +37,7 @@ ppc_system_ss.add(files(
   'arch_dump.c',
   'machine.c',
   'mmu-hash32.c',
+  'mmu-booke.c',
   'mmu_common.c',
   'ppc-qmp-cmds.c',
 ))
diff --git a/target/ppc/mmu-booke.c b/target/ppc/mmu-booke.c
new file mode 100644
index 00..55e5dd7c6b
--- /dev/null
+++ b/target/ppc/mmu-booke.c
@@ -0,0 +1,531 @@
+/*
+ *  PowerPC BookE MMU, TLB emulation helpers for QEMU.
+ *
+ *  Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "exec/page-protection.h"
+#include "exec/log.h"
+#include "cpu.h"
+#include "internal.h"
+#include "mmu-booke.h"
+
+/* Generic TLB check function for embedded PowerPC implementations */
+static bool ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb,
+ hwaddr *raddrp,
+ target_ulong address, uint32_t pid, int i)
+{
+target_ulong mask;
+
+/* Check valid flag */
+if (!(tlb->prot & PAGE_VALID)) {
+return false;
+}
+mask = ~(tlb->size - 1);
+qemu_log_mask(CPU_LOG_MMU, "%s: TLB %d address " TARGET_FMT_lx
+  " PID %u <=> " TARGET_FMT_lx " " TARGET_FMT_lx " %u %x\n",
+  __func__, i, address, pid, tlb->EPN,
+  mask, (uint32_t)tlb->PID, tlb->prot);
+/* Check PID */
+if (tlb->PID != 0 && tlb->PID != pid) {
+return false;
+}
+/* Check effective address */
+if ((address & mask) != tlb->EPN) {
+return false;
+}
+*raddrp = (tlb->RPN & mask) | (address & ~mask);
+return true;
+}
+
+/* Generic TLB search function for PowerPC embedded implementations */
+int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid)
+{
+ppcemb_tlb_t *tlb;
+hwaddr raddr;
+int i;
+
+for (i = 0; i < env->nb_tlb; i++) {
+tlb = >tlb.tlbe[i];
+if (ppcemb_tlb_check(env, tlb, , address, pid, i)) {
+return i;
+}
+}
+return -1;
+}
+
+int mmu40x_get_physical_address(CPUPPCState *env, hwaddr *raddr, int *prot,
+target_ulong address,
+MMUAccessType access_type)
+{
+ppcemb_tlb_t *tlb;
+int i, ret, zsel, zpr, pr;
+
+ret = -1;
+pr = FIELD_EX64(env->msr, MSR, PR);
+for (i = 0; i < env->nb_tlb; i++) {
+tlb = >tlb.tlbe[i];
+if (!ppcemb_tlb_check(env, tlb, raddr, address,
+  env->spr[SPR_40x_PID], i)) {
+continue;
+}
+zsel = (tlb->attr >> 4) & 0xF;
+zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3;
+qemu_log_mask(CPU_LOG_MMU,
+  "%s: TLB %d zsel %d zpr %d ty %d attr %08x\n",
+  __func__, i, zsel, zpr, access_type, tlb->attr);
+/* Check execute enable bit */
+switch (zpr) {
+case 0x2:
+if (pr != 0)

[PATCH v5 01/32] target/ppc: Remove unused helper

The helper_rac function is defined but not used, remove it.

Fixes: 005b69fdcc (target/ppc: Remove PowerPC 601 CPUs)
Signed-off-by: BALATON Zoltan 
Reviwed-by: Nicholas Piggin 
---
 target/ppc/helper.h |  2 --
 target/ppc/mmu_helper.c | 24 
 2 files changed, 26 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 86f97ee1e7..f769e01c3d 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -700,8 +700,6 @@ DEF_HELPER_2(book3s_msgclr, void, env, tl)
 
 DEF_HELPER_4(dlmzb, tl, env, tl, tl, i32)
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_2(rac, tl, env, tl)
-
 DEF_HELPER_2(load_dcr, tl, env, tl)
 DEF_HELPER_3(store_dcr, void, env, tl, tl)
 #endif
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index b35a93c198..421e777ee6 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -596,30 +596,6 @@ void helper_6xx_tlbi(CPUPPCState *env, target_ulong EPN)
 do_6xx_tlb(env, EPN, 1);
 }
 
-/*/
-/* PowerPC 601 specific instructions (POWER bridge) */
-
-target_ulong helper_rac(CPUPPCState *env, target_ulong addr)
-{
-mmu_ctx_t ctx;
-int nb_BATs;
-target_ulong ret = 0;
-
-/*
- * We don't have to generate many instances of this instruction,
- * as rac is supervisor only.
- *
- * XXX: FIX THIS: Pretend we have no BAT
- */
-nb_BATs = env->nb_BATs;
-env->nb_BATs = 0;
-if (get_physical_address_wtlb(env, , addr, 0, ACCESS_INT, 0) == 0) {
-ret = ctx.raddr;
-}
-env->nb_BATs = nb_BATs;
-return ret;
-}
-
 static inline target_ulong booke_tlb_to_page_size(int size)
 {
 return 1024 << (2 * size);
-- 
2.30.9

[PATCH v5 02/32] target/ppc/mmu_common.c: Move calculation of a value closer to its usage

In mmubooke_check_tlb() and mmubooke206_check_tlb() prot2 is
calculated first but only used after an unrelated check that can
return before tha value is used. Move the calculation after the check,
closer to where it is used, to keep them together and avoid computing
it when not needed.

Signed-off-by: BALATON Zoltan 
Reviwed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 4fde7fd3bf..f79e390306 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -635,12 +635,6 @@ static int mmubooke_check_tlb(CPUPPCState *env, 
ppcemb_tlb_t *tlb,
 return -1;
 }
 
-if (FIELD_EX64(env->msr, MSR, PR)) {
-prot2 = tlb->prot & 0xF;
-} else {
-prot2 = (tlb->prot >> 4) & 0xF;
-}
-
 /* Check the address space */
 if ((access_type == MMU_INST_FETCH ?
 FIELD_EX64(env->msr, MSR, IR) :
@@ -649,6 +643,11 @@ static int mmubooke_check_tlb(CPUPPCState *env, 
ppcemb_tlb_t *tlb,
 return -1;
 }
 
+if (FIELD_EX64(env->msr, MSR, PR)) {
+prot2 = tlb->prot & 0xF;
+} else {
+prot2 = (tlb->prot >> 4) & 0xF;
+}
 *prot = prot2;
 if (prot2 & prot_for_access_type(access_type)) {
 qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__);
@@ -830,6 +829,18 @@ static int mmubooke206_check_tlb(CPUPPCState *env, 
ppcmas_tlb_t *tlb,
 
 found_tlb:
 
+/* Check the address space and permissions */
+if (access_type == MMU_INST_FETCH) {
+/* There is no way to fetch code using epid load */
+assert(!use_epid);
+as = FIELD_EX64(env->msr, MSR, IR);
+}
+
+if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
+qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__);
+return -1;
+}
+
 if (pr) {
 if (tlb->mas7_3 & MAS3_UR) {
 prot2 |= PAGE_READ;
@@ -851,19 +862,6 @@ found_tlb:
 prot2 |= PAGE_EXEC;
 }
 }
-
-/* Check the address space and permissions */
-if (access_type == MMU_INST_FETCH) {
-/* There is no way to fetch code using epid load */
-assert(!use_epid);
-as = FIELD_EX64(env->msr, MSR, IR);
-}
-
-if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
-qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__);
-return -1;
-}
-
 *prot = prot2;
 if (prot2 & prot_for_access_type(access_type)) {
 qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__);
-- 
2.30.9

[PATCH v5 04/32] target/ppc/mmu_common.c: Simplify checking for real mode

In get_physical_address_wtlb() the real_mode flag depends on either
the MSR[IR] or MSR[DR] bit depending on access_type. Extract just the
needed bit in a more straight forward way instead of doing unnecessary
computation.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 09cbeb0052..886fb6a657 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -1184,8 +1184,10 @@ int get_physical_address_wtlb(CPUPPCState *env, 
mmu_ctx_t *ctx,
  int mmu_idx)
 {
 int ret = -1;
-bool real_mode = (type == ACCESS_CODE && !FIELD_EX64(env->msr, MSR, IR)) ||
- (type != ACCESS_CODE && !FIELD_EX64(env->msr, MSR, DR));
+bool real_mode;
+
+real_mode = (type == ACCESS_CODE) ? !FIELD_EX64(env->msr, MSR, IR)
+  : !FIELD_EX64(env->msr, MSR, DR);
 
 switch (env->mmu_model) {
 case POWERPC_MMU_SOFT_6xx:
-- 
2.30.9

[PATCH v5 00/32] Misc PPC exception and BookE MMU clean ups

This series does some further clean up mostly around BookE MMU to
untangle it from other MMU models. It also contains some other changes
that I've come up with while working on this. The Simplify
ppc_booke_xlate() part 1 and part 2 patches could be squashed together
but left them separate for easier review.

v5:
- drop sc patches from this series
- eliminate uninit warning work arounds and also get rid of
get_physical_address_wtlb() (one memset is still needed temporarily
but can be removed at the end)
- use function instead of macro

v4:
- Add a (probably redundant) check for MPC8xx case in ppc_xlate so we
don't have to care about it in lower levels
- Detangle BookE related functions from mmu_ctx_t to avoid some used
uninit work arounds and allow these to be moved out to mmu-booke.c
- Some other tweaks asked during review

v3:
- Address review comments from Nick
- Rebase on master
- Squashed some patches together
- Add some more patches I've done since last version

v2:
- Fix user mode issue in patch 1 by keeping old behaviour for user mode
- Add some more MMU clean up patches

Regards,
BALATON Zoltan


BALATON Zoltan (32):
  target/ppc: Remove unused helper
  target/ppc/mmu_common.c: Move calculation of a value closer to its
usage
  target/ppc/mmu_common.c: Remove unneeded local variable
  target/ppc/mmu_common.c: Simplify checking for real mode
  target/ppc/mmu_common.c: Drop cases for unimplemented MPC8xx MMU
  target/ppc/mmu_common.c: Introduce mmu6xx_get_physical_address()
  target/ppc/mmu_common.c: Move else branch to avoid large if block
  target/ppc/mmu_common.c: Move some debug logging
  target/ppc/mmu_common.c: Eliminate ret from
mmu6xx_get_physical_address()
  target/ppc/mmu_common.c: Split out BookE cases before checking real
mode
  target/ppc/mmu_common.c: Split off real mode cases in
get_physical_address_wtlb()
  target/ppc/mmu_common.c: Inline and remove check_physical()
  target/ppc/mmu_common.c: Fix misindented qemu_log_mask() calls
  target/ppc/mmu_common.c: Deindent ppc_jumbo_xlate()
  target/ppc/mmu_common.c: Replace hard coded constants in
ppc_jumbo_xlate()
  target/ppc/mmu_common.c: Don't use mmu_ctx_t for
mmu40x_get_physical_address()
  target/ppc/mmu_common.c: Don't use mmu_ctx_t in
mmubooke_get_physical_address()
  target/ppc/mmu_common.c: Don't use mmu_ctx_t in
mmubooke206_get_physical_address()
  target/ppc: Remove pp_check() and reuse ppc_hash32_pp_prot()
  target/ppc/mmu_common.c: Remove BookE from direct store handling
  target/ppc/mmu_common.c: Split off BookE handling from
ppc_jumbo_xlate()
  target/ppc/mmu_common.c: Eliminate get_physical_address_wtlb()
  target/ppc/mmu_common.c: Move mmu_ctx_t type to mmu_common.c
  target/ppc/mmu_common.c: Simplify ppc_booke_xlate() part 1
  target/ppc/mmu_common.c: Simplify ppc_booke_xlate() part 2
  target/ppc: Remove id_tlbs flag from CPU env
  target/ppc: Split off common embedded TLB init
  target/ppc/mmu-hash32.c: Drop a local variable
  target/ppc/mmu-radix64.c: Drop a local variable
  target/ppc: Add a function to check for page protection bit
  target/ppc: Move out BookE and related MMU functions from mmu_common.c
  target/ppc/mmu_common.c: Remove work around for spurious warnings

 hw/ppc/pegasos2.c|2 +-
 target/ppc/cpu.h |9 +-
 target/ppc/cpu_init.c|   70 +--
 target/ppc/helper.h  |2 -
 target/ppc/helper_regs.c |1 -
 target/ppc/internal.h|   75 +--
 target/ppc/meson.build   |1 +
 target/ppc/mmu-booke.c   |  531 +
 target/ppc/mmu-booke.h   |   17 +
 target/ppc/mmu-hash32.c  |   54 +-
 target/ppc/mmu-hash64.c  |2 +-
 target/ppc/mmu-radix64.c |5 +-
 target/ppc/mmu_common.c  | 1158 +-
 target/ppc/mmu_helper.c  |   37 +-
 14 files changed, 891 insertions(+), 1073 deletions(-)
 create mode 100644 target/ppc/mmu-booke.c
 create mode 100644 target/ppc/mmu-booke.h

-- 
2.30.9

[PATCH v5 09/32] target/ppc/mmu_common.c: Eliminate ret from mmu6xx_get_physical_address()

Return directly, which is simpler than dragging a return value through
multpile if and else blocks.

Signed-off-by: BALATON Zoltan 
Reviewed-by: Nicholas Piggin 
---
 target/ppc/mmu_common.c | 84 +++--
 1 file changed, 39 insertions(+), 45 deletions(-)

diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
index 89bfd9aa45..03d9e6bfda 100644
--- a/target/ppc/mmu_common.c
+++ b/target/ppc/mmu_common.c
@@ -386,7 +386,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 target_ulong vsid, sr, pgidx;
 int ds, target_page_bits;
 bool pr;
-int ret;
 
 /* First try to find a BAT entry if there are any */
 if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
@@ -419,7 +418,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 qemu_log_mask(CPU_LOG_MMU,
 "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
 ctx->key, ds, ctx->nx, vsid);
-ret = -1;
 if (!ds) {
 /* Check if instruction fetch is allowed, if needed */
 if (type == ACCESS_CODE && ctx->nx) {
@@ -436,51 +434,47 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
mmu_ctx_t *ctx,
 /* Initialize real address with an invalid value */
 ctx->raddr = (hwaddr)-1ULL;
 /* Software TLB search */
-ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
-} else {
-qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
-/* Direct-store segment : absolutely *BUGGY* for now */
-
-switch (type) {
-case ACCESS_INT:
-/* Integer load/store : only access allowed */
-break;
-case ACCESS_CODE:
-/* No code fetch is allowed in direct-store areas */
-return -4;
-case ACCESS_FLOAT:
-/* Floating point load/store */
-return -4;
-case ACCESS_RES:
-/* lwarx, ldarx or srwcx. */
-return -4;
-case ACCESS_CACHE:
-/*
- * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
- *
- * Should make the instruction do no-op.  As it already do
- * no-op, it's quite easy :-)
- */
-ctx->raddr = eaddr;
-return 0;
-case ACCESS_EXT:
-/* eciwx or ecowx */
-return -4;
-default:
-qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need "
-  "address translation\n");
-return -4;
-}
-if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
-(access_type == MMU_DATA_LOAD || ctx->key != 0)) {
-ctx->raddr = eaddr;
-ret = 2;
-} else {
-ret = -2;
-}
+return ppc6xx_tlb_check(env, ctx, eaddr, access_type);
 }
 
-return ret;
+/* Direct-store segment : absolutely *BUGGY* for now */
+qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
+switch (type) {
+case ACCESS_INT:
+/* Integer load/store : only access allowed */
+break;
+case ACCESS_CODE:
+/* No code fetch is allowed in direct-store areas */
+return -4;
+case ACCESS_FLOAT:
+/* Floating point load/store */
+return -4;
+case ACCESS_RES:
+/* lwarx, ldarx or srwcx. */
+return -4;
+case ACCESS_CACHE:
+/*
+ * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
+ *
+ * Should make the instruction do no-op.  As it already do
+ * no-op, it's quite easy :-)
+ */
+ctx->raddr = eaddr;
+return 0;
+case ACCESS_EXT:
+/* eciwx or ecowx */
+return -4;
+default:
+qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need address"
+   " translation\n");
+return -4;
+}
+if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
+(access_type == MMU_DATA_LOAD || ctx->key != 0)) {
+ctx->raddr = eaddr;
+return 2;
+}
+return -2;
 }
 
 /* Generic TLB check function for embedded PowerPC implementations */
-- 
2.30.9

Re: [PATCH v4 00/12] vhost-user: support any POSIX system (tested on macOS, FreeBSD, OpenBSD)

On Wed, May 08, 2024 at 09:44:44AM +0200, Stefano Garzarella wrote:
> v1: https://patchew.org/QEMU/20240228114759.44758-1-sgarz...@redhat.com/
> v2: https://patchew.org/QEMU/20240326133936.125332-1-sgarz...@redhat.com/
> v3: https://patchew.org/QEMU/20240404122330.92710-1-sgarz...@redhat.com/
> v4:
>   - rebased on master (commit e116b92d01c2cd75957a9f8ad1d4932292867b81)
>   - added patch 6 to move using QEMU bswap helper functions in a separate
> patch (Phil)
>   - fail if we find "share=off" in shm_backend_memory_alloc() (David)
>   - added Phil's R-b and David's A-b
> 
> The vhost-user protocol is not really Linux-specific, so let's try support
> QEMU's frontends and backends (including libvhost-user) in any POSIX system
> with this series. The main use case is to be able to use virtio devices that
> we don't have built-in in QEMU (e.g. virtiofsd, vhost-user-vsock, etc.) even
> in non-Linux systems.
> 
> The first 5 patches are more like fixes discovered at runtime on macOS or
> FreeBSD that could go even independently of this series.
> 
> Patches 6, 7, 8, and 9 enable building of frontends and backends (including
> libvhost-user) with associated code changes to succeed in compilation.
> 
> Patch 10 adds `memory-backend-shm` that uses the POSIX shm_open() API to
> create shared memory which is identified by an fd that can be shared with
> vhost-user backends. This is useful on those systems (like macOS) where
> we don't have memfd_create() or special filesystems like "/dev/shm".
> 
> Patches 11 and 12 use `memory-backend-shm` in some vhost-user tests.
> 
> Maybe the first 5 patches can go separately, but I only discovered those
> problems after testing patches 6 - 9, so I have included them in this series
> for now. Please let me know if you prefer that I send them separately.
> 
> I tested this series using vhost-user-blk and QSD on macOS Sonoma 14.4
> (aarch64), FreeBSD 14 (x86_64), OpenBSD 7.4 (x86_64), and Fedora 39 (x86_64)
> in this way:
> 
> - Start vhost-user-blk or QSD (same commands for all systems)
> 
>   vhost-user-blk -s /tmp/vhost.socket \
> -b Fedora-Cloud-Base-39-1.5.x86_64.raw
> 
>   qemu-storage-daemon \
> --blockdev 
> file,filename=Fedora-Cloud-Base-39-1.5.x86_64.qcow2,node-name=file \
> --blockdev qcow2,file=file,node-name=qcow2 \
> --export 
> vhost-user-blk,addr.type=unix,addr.path=/tmp/vhost.socket,id=vub,num-queues=1,node-name=qcow2,writable=on
> 
> - macOS (aarch64): start QEMU (using hvf accelerator)
> 
>   qemu-system-aarch64 -smp 2 -cpu host -M virt,accel=hvf,memory-backend=mem \
> -drive 
> file=./build/pc-bios/edk2-aarch64-code.fd,if=pflash,format=raw,readonly=on \
> -device virtio-net-device,netdev=net0 -netdev user,id=net0 \
> -device ramfb -device usb-ehci -device usb-kbd \
> -object memory-backend-shm,id=mem,size=512M \
> -device vhost-user-blk-pci,num-queues=1,disable-legacy=on,chardev=char0 \
> -chardev socket,id=char0,path=/tmp/vhost.socket
> 
> - FreeBSD/OpenBSD (x86_64): start QEMU (no accelerators available)
> 
>   qemu-system-x86_64 -smp 2 -M q35,memory-backend=mem \
> -object memory-backend-shm,id=mem,size="512M" \
> -device vhost-user-blk-pci,num-queues=1,chardev=char0 \
> -chardev socket,id=char0,path=/tmp/vhost.socket
> 
> - Fedora (x86_64): start QEMU (using kvm accelerator)
> 
>   qemu-system-x86_64 -smp 2 -M q35,accel=kvm,memory-backend=mem \
> -object memory-backend-shm,size="512M" \
> -device vhost-user-blk-pci,num-queues=1,chardev=char0 \
> -chardev socket,id=char0,path=/tmp/vhost.socket
> 
> Branch pushed (and CI started) at 
> https://gitlab.com/sgarzarella/qemu/-/tree/macos-vhost-user?ref_type=heads
> 
> Thanks,
> Stefano
> 
> Stefano Garzarella (12):
>   libvhost-user: set msg.msg_control to NULL when it is empty
>   libvhost-user: fail vu_message_write() if sendmsg() is failing
>   libvhost-user: mask F_INFLIGHT_SHMFD if memfd is not supported
>   vhost-user-server: do not set memory fd non-blocking
>   contrib/vhost-user-blk: fix bind() using the right size of the address
>   contrib/vhost-user-*: use QEMU bswap helper functions
>   vhost-user: enable frontends on any POSIX system
>   libvhost-user: enable it on any POSIX system
>   contrib/vhost-user-blk: enable it on any POSIX system
>   hostmem: add a new memory backend based on POSIX shm_open()
>   tests/qtest/vhost-user-blk-test: use memory-backend-shm
>   tests/qtest/vhost-user-test: add a test case for memory-backend-shm
> 
>  docs/system/devices/vhost-user.rst|   5 +-
>  meson.build   |   5 +-
>  qapi/qom.json |  17 +++
>  subprojects/libvhost-user/libvhost-user.h |   2 +-
>  backends/hostmem-shm.c| 123 ++
>  contrib/vhost-user-blk/vhost-user-blk.c   |  27 +++--
>  contrib/vhost-user-input/main.c   |  16 +--
>  hw/net/vhost_net.c|   5 +
>  subprojects/libvhost-user/libvhost-user.c |  76

Re: [PATCH V1 26/26] migration: only-migratable-modes

Steve Sistare  writes:

> Add the only-migratable-modes option as a generalization of only-migratable.
> Only devices that support all requested modes are allowed.
>
> Signed-off-by: Steve Sistare 
> ---
>  include/migration/misc.h   |  3 +++
>  include/sysemu/sysemu.h|  1 -
>  migration/migration-hmp-cmds.c | 26 +-
>  migration/migration.c  | 22 +-
>  migration/savevm.c |  2 +-
>  qemu-options.hx| 16 ++--
>  system/globals.c   |  1 -
>  system/vl.c| 13 -
>  target/s390x/cpu_models.c  |  4 +++-
>  9 files changed, 75 insertions(+), 13 deletions(-)
>
> diff --git a/include/migration/misc.h b/include/migration/misc.h
> index 5b963ba..3ad2cd9 100644
> --- a/include/migration/misc.h
> +++ b/include/migration/misc.h
> @@ -119,6 +119,9 @@ bool migration_incoming_postcopy_advised(void);
>  /* True if background snapshot is active */
>  bool migration_in_bg_snapshot(void);
>  
> +void migration_set_required_mode(MigMode mode);
> +bool migration_mode_required(MigMode mode);
> +
>  /* migration/block-dirty-bitmap.c */
>  void dirty_bitmap_mig_init(void);
>  
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 5b4397e..0a9c4b4 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -8,7 +8,6 @@
>  
>  /* vl.c */
>  
> -extern int only_migratable;
>  extern const char *qemu_name;
>  extern QemuUUID qemu_uuid;
>  extern bool qemu_uuid_set;
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> index 414c7e8..ca913b7 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -16,6 +16,7 @@
>  #include "qemu/osdep.h"
>  #include "block/qapi.h"
>  #include "migration/snapshot.h"
> +#include "migration/misc.h"
>  #include "monitor/hmp.h"
>  #include "monitor/monitor.h"
>  #include "qapi/error.h"
> @@ -33,6 +34,28 @@
>  #include "options.h"
>  #include "migration.h"
>  
> +static void migration_dump_modes(Monitor *mon)
> +{
> +int mode, n = 0;
> +
> +monitor_printf(mon, "only-migratable-modes: ");
> +
> +for (mode = 0; mode < MIG_MODE__MAX; mode++) {
> +if (migration_mode_required(mode)) {
> +if (n++) {
> +monitor_printf(mon, ",");
> +}
> +monitor_printf(mon, "%s", MigMode_str(mode));
> +}
> +}
> +
> +if (!n) {
> +monitor_printf(mon, "none\n");
> +} else {
> +monitor_printf(mon, "\n");
> +}
> +}
> +
>  static void migration_global_dump(Monitor *mon)
>  {
>  MigrationState *ms = migrate_get_current();
> @@ -41,7 +64,7 @@ static void migration_global_dump(Monitor *mon)
>  monitor_printf(mon, "store-global-state: %s\n",
> ms->store_global_state ? "on" : "off");
>  monitor_printf(mon, "only-migratable: %s\n",
> -   only_migratable ? "on" : "off");
> +   migration_mode_required(MIG_MODE_NORMAL) ? "on" : "off");
>  monitor_printf(mon, "send-configuration: %s\n",
> ms->send_configuration ? "on" : "off");
>  monitor_printf(mon, "send-section-footer: %s\n",
> @@ -50,6 +73,7 @@ static void migration_global_dump(Monitor *mon)
> ms->decompress_error_check ? "on" : "off");
>  monitor_printf(mon, "clear-bitmap-shift: %u\n",
> ms->clear_bitmap_shift);
> +migration_dump_modes(mon);
>  }
>  
>  void hmp_info_migrate(Monitor *mon, const QDict *qdict)
> diff --git a/migration/migration.c b/migration/migration.c
> index 4984dee..5535b84 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1719,17 +1719,29 @@ static bool is_busy(Error **reasonp, Error **errp)
>  return false;
>  }
>  
> -static bool is_only_migratable(Error **reasonp, Error **errp, int modes)
> +static int migration_modes_required;
> +
> +void migration_set_required_mode(MigMode mode)
> +{
> +migration_modes_required |= BIT(mode);
> +}
> +
> +bool migration_mode_required(MigMode mode)
> +{
> +return !!(migration_modes_required & BIT(mode));
> +}
> +
> +static bool modes_are_required(Error **reasonp, Error **errp, int modes)
>  {
>  ERRP_GUARD();
>  
> -if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) {
> +if (migration_modes_required & modes) {
>  error_propagate_prepend(errp, *reasonp,
> -"disallowing migration blocker "
> -"(--only-migratable) for: ");
> +"-only-migratable{-modes}  specified, but: 
> ");

extra space before 'specified'

>  *reasonp = NULL;
>  return true;
>  }
> +
>  return false;
>  }
>  
> @@ -1783,7 +1795,7 @@ int migrate_add_blocker_modes(Error **reasonp, Error 
> **errp, MigMode mode, ...)
>  modes = get_modes(mode, ap);
>  va_end(ap);
>  
> -if (is_only_migratable(reasonp,

[PATCH] vfio: container: Fix missing allocation of VFIOSpaprContainer

2024-05-09 Thread Shivaprasad G Bhat

The commit 6ad359ec29 "(vfio/spapr: Move prereg_listener into
spapr container)" began to use the newly introduced VFIOSpaprContainer
structure.

After several refactors, today the container_of(container,
VFIOSpaprContainer, ABC) is used when VFIOSpaprContainer is actually
not allocated. On PPC64 systems, this dereference is leading to corruption
showing up as glibc malloc assertion during guest start when using vfio.

Patch adds the missing allocation while also making the structure movement
to vfio common header file.

Fixes: 6ad359ec29 "(vfio/spapr: Move prereg_listener into spapr container)"
Signed-off-by: Shivaprasad G Bhat 
---
 hw/vfio/container.c   |6 --
 hw/vfio/spapr.c   |6 --
 include/hw/vfio/vfio-common.h |6 ++
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 77bdec276e..ecaf5786d9 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -539,6 +539,7 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 {
 VFIOContainer *container;
 VFIOContainerBase *bcontainer;
+VFIOSpaprContainer *scontainer;
 int ret, fd;
 VFIOAddressSpace *space;

@@ -611,7 +612,8 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 goto close_fd_exit;
 }

-container = g_malloc0(sizeof(*container));
+scontainer = g_malloc0(sizeof(*scontainer));
+container = >container;
 container->fd = fd;
 bcontainer = >bcontainer;

@@ -675,7 +677,7 @@ unregister_container_exit:
 vfio_cpr_unregister_container(bcontainer);

 free_container_exit:
-g_free(container);
+g_free(scontainer);

 close_fd_exit:
 close(fd);
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 0d949bb728..78d218b7e7 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -24,12 +24,6 @@
 #include "qapi/error.h"
 #include "trace.h"

-typedef struct VFIOSpaprContainer {
-VFIOContainer container;
-MemoryListener prereg_listener;
-QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
-} VFIOSpaprContainer;
-
 static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
 {
 if (memory_region_is_iommu(section->mr)) {
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index b9da6c08ef..010fa68ac6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -82,6 +82,12 @@ typedef struct VFIOContainer {
 QLIST_HEAD(, VFIOGroup) group_list;
 } VFIOContainer;

+typedef struct VFIOSpaprContainer {
+VFIOContainer container;
+MemoryListener prereg_listener;
+QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
+} VFIOSpaprContainer;
+
 typedef struct VFIOHostDMAWindow {
 hwaddr min_iova;
 hwaddr max_iova;

Re: [PATCH 5/9] hw/scsi: add persistent reservation in/out api for scsi device

On Wed, May 08, 2024 at 05:36:25PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/scsi/scsi-disk.c | 302 
>  1 file changed, 302 insertions(+)

Does sg_persist --report-capabilities
(https://linux.die.net/man/8/sg_persist) show that persistent
reservations are supported? Maybe some INQUIRY data still needs to be
added.

> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..bdd66c4026 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -1474,6 +1475,296 @@ static void scsi_disk_emulate_read_data(SCSIRequest 
> *req)
>  scsi_req_complete(>req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +uint32_t generation;
> +uint32_t num_keys;
> +uint64_t *keys;
> +void *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +uint32_t generation;
> +uint64_t key;
> +BlockPrType type;
> +void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +int num_keys;
> +uint8_t *buf;
> +SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +qemu_get_current_aio_context());
> +
> +assert(r->req.aiocb != NULL);
> +r->req.aiocb = NULL;
> +
> +if (scsi_disk_req_check_error(r, ret, true)) {
> +goto done;
> +}
> +
> +buf = scsi_req_get_buf(>req);
> +num_keys = MIN(blk_keys->num_keys, ret);
> +blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +memcpy([0], _keys->generation, 4);
> +for (int i = 0; i < num_keys; i++) {
> +blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +memcpy([8 + i * 8], _keys->keys[i], 8);
> +}
> +num_keys = cpu_to_be32(num_keys * 8);
> +memcpy([4], _keys, 4);
> +
> +scsi_req_data(>req, r->buflen);
> +done:
> +scsi_req_unref(>req);
> +g_free(blk_keys->keys);
> +g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +SCSIPrReadKeys *blk_keys;
> +SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +blk_keys = g_new0(SCSIPrReadKeys, 1);
> +blk_keys->generation = 0;
> +/* num_keys is the maximum number of keys that can be transmitted */
> +blk_keys->num_keys = num_keys;
> +blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +blk_keys->req = r;
> +
> +scsi_req_ref(>req);
> +r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, 
> _keys->generation,
> +blk_keys->num_keys, blk_keys->keys,
> +scsi_pr_read_keys_complete, 
> blk_keys);
> +return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +uint8_t *buf;
> +uint32_t num_keys = 0;
> +SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +qemu_get_current_aio_context());
> +
> +assert(r->req.aiocb != NULL);
> +r->req.aiocb = NULL;
> +
> +if (scsi_disk_req_check_error(r, ret, true)) {
> +goto done;
> +}
> +
> +buf = scsi_req_get_buf(>req);
> +blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +memcpy([0], _rsv->generation, 4);
> +if (ret) {
> +num_keys = cpu_to_be32(16);
> +blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +memcpy([8], _rsv->key, 8);
> +buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +} else {
> +num_keys = cpu_to_be32(0);
> +}
> +
> +memcpy([4], _keys, 4);
> +scsi_req_data(>req, r->buflen);
> +
> +done:
> +

Re: [PATCH 0/9] Support persistent reservation operations

On Wed, May 08, 2024 at 05:36:20PM +0800, Changqi Lu wrote:
> Hi,
> 
> I am going to introduce persistent reservation for QEMU block.
> There are three parts in this series:
> 
> Firstly, at the block layer, the commit abstracts seven APIs related to
> the persistent reservation command. These APIs including reading keys,
> reading reservations, registering, reserving, releasing, clearing and 
> preempting.
> 
> Next, the commit implements the necessary pr-related operation APIs for both 
> the
> SCSI protocol and NVMe protocol at the device layer. This ensures that the 
> necessary
> functionality is available for handling persistent reservations in these 
> protocols.
> 
> Finally, the commit includes adaptations to the iscsi driver at the driver 
> layer
> to verify the correct implementation and functionality of the changes.
> 
> With these changes, GFS works fine in the guest. Also, sg-utils(for SCSI 
> block) and
> nvme-cli(for NVMe block) work fine too.

What is the relationship to the existing PRManager functionality
(docs/interop/pr-helper.rst) where block/file-posix.c interprets SCSI
ioctls and sends persistent reservation requests to an external helper
process?

I wonder if block/file-posix.c can implement the new block driver
callbacks using pr_mgr (while keeping the existing scsi-generic
support).

Stefan


signature.asc
Description: PGP signature

Re: [PATCH 7/9] hw/nvme: add helper functions for converting reservation types

On Wed, May 08, 2024 at 05:36:27PM +0800, Changqi Lu wrote:
> This commit introduces two helper functions
> that facilitate the conversion between the
> reservation types used in the NVME protocol
> and those used in the block layer.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/nvme/nvme.h | 40 
>  1 file changed, 40 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 6/9] block/nvme: add reservation command protocol constants

On Wed, May 08, 2024 at 05:36:26PM +0800, Changqi Lu wrote:
> Add constants for the NVMe persistent command protocol.
> The constants include the reservation command opcode and
> reservation type values defined in section 7 of the NVMe
> 2.0 specification.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  include/block/nvme.h | 30 ++
>  1 file changed, 30 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 5/9] hw/scsi: add persistent reservation in/out api for scsi device

On Wed, May 08, 2024 at 05:36:25PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  hw/scsi/scsi-disk.c | 302 
>  1 file changed, 302 insertions(+)

Paolo: Please review for buffer overflows. I find the buffer size
assumption in the SCSI layer mysterious (e.g. scsi_req_get_buf() returns
a void* and it's not obvious how large the buffer is), so I didn't check
for buffer overflows.

> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..bdd66c4026 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -1474,6 +1475,296 @@ static void scsi_disk_emulate_read_data(SCSIRequest 
> *req)
>  scsi_req_complete(>req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +uint32_t generation;
> +uint32_t num_keys;
> +uint64_t *keys;
> +void *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +uint32_t generation;
> +uint64_t key;
> +BlockPrType type;
> +void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +int num_keys;
> +uint8_t *buf;
> +SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +qemu_get_current_aio_context());
> +
> +assert(r->req.aiocb != NULL);
> +r->req.aiocb = NULL;
> +
> +if (scsi_disk_req_check_error(r, ret, true)) {
> +goto done;
> +}
> +
> +buf = scsi_req_get_buf(>req);
> +num_keys = MIN(blk_keys->num_keys, ret);
> +blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +memcpy([0], _keys->generation, 4);
> +for (int i = 0; i < num_keys; i++) {
> +blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +memcpy([8 + i * 8], _keys->keys[i], 8);
> +}
> +num_keys = cpu_to_be32(num_keys * 8);
> +memcpy([4], _keys, 4);
> +
> +scsi_req_data(>req, r->buflen);
> +done:
> +scsi_req_unref(>req);
> +g_free(blk_keys->keys);
> +g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +SCSIPrReadKeys *blk_keys;
> +SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +blk_keys = g_new0(SCSIPrReadKeys, 1);
> +blk_keys->generation = 0;
> +/* num_keys is the maximum number of keys that can be transmitted */
> +blk_keys->num_keys = num_keys;
> +blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +blk_keys->req = r;
> +
> +scsi_req_ref(>req);
> +r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, 
> _keys->generation,
> +blk_keys->num_keys, blk_keys->keys,
> +scsi_pr_read_keys_complete, 
> blk_keys);
> +return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +uint8_t *buf;
> +uint32_t num_keys = 0;
> +SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +qemu_get_current_aio_context());
> +
> +assert(r->req.aiocb != NULL);
> +r->req.aiocb = NULL;
> +
> +if (scsi_disk_req_check_error(r, ret, true)) {
> +goto done;
> +}
> +
> +buf = scsi_req_get_buf(>req);
> +blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +memcpy([0], _rsv->generation, 4);
> +if (ret) {
> +num_keys = cpu_to_be32(16);

The SPC-6 calls this field Additional Length, which is clearer than
num_keys (there is only 1 key here!). Could you rename it to
additional_len to avoid confusion?

> +blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +memcpy([8], _rsv->key, 8);
> +

Re: [PATCH V1 25/26] migration: fix mismatched GPAs during cpr-exec

Steve Sistare  writes:

> For cpr-exec mode, ramblock_is_ignored is always true, and the address of
> each migrated memory region must match the address of the statically
> initialized region on the target.  However, for a PCI rom block, the region
> address is set when the guest writes to a BAR on the source, which does not
> occur on the target, causing a "Mismatched GPAs" error during cpr-exec
> migration.
>
> To fix, unconditionally set the target's address to the source's address
> if the region does not have an address yet.
>
> Signed-off-by: Steve Sistare 

Just a detail below.

Reviewed-by: Fabiano Rosas 

> ---
>  include/exec/memory.h | 12 
>  migration/ram.c   | 15 +--
>  system/memory.c   | 10 --
>  3 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index d337737..4f654b0 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -801,6 +801,7 @@ struct MemoryRegion {
>  bool unmergeable;
>  uint8_t dirty_log_mask;
>  bool is_iommu;
> +bool has_addr;

This field is not used during memory access, maybe move it down below to
preserve the hole for future usage.

>  RAMBlock *ram_block;
>  Object *owner;
>  /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access 
> hotpath */
> @@ -2402,6 +2403,17 @@ void memory_region_set_enabled(MemoryRegion *mr, bool 
> enabled);
>  void memory_region_set_address(MemoryRegion *mr, hwaddr addr);
>  
>  /*
> + * memory_region_set_address_only: set the address of a region.
> + *
> + * Same as memory_region_set_address, but without causing transaction side
> + * effects.
> + *
> + * @mr: the region to be updated
> + * @addr: new address, relative to container region
> + */
> +void memory_region_set_address_only(MemoryRegion *mr, hwaddr addr);
> +
> +/*
>   * memory_region_set_size: dynamically update the size of a region.
>   *
>   * Dynamically updates the size of a region.
> diff --git a/migration/ram.c b/migration/ram.c
> index add285b..7b8d7f6 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -4196,12 +4196,15 @@ static int parse_ramblock(QEMUFile *f, RAMBlock 
> *block, ram_addr_t length)
>  }
>  if (migrate_ignore_shared()) {
>  hwaddr addr = qemu_get_be64(f);
> -if (migrate_ram_is_ignored(block) &&
> -block->mr->addr != addr) {
> -error_report("Mismatched GPAs for block %s "
> - "%" PRId64 "!= %" PRId64, block->idstr,
> - (uint64_t)addr, (uint64_t)block->mr->addr);
> -return -EINVAL;
> +if (migrate_ram_is_ignored(block)) {
> +if (!block->mr->has_addr) {
> +memory_region_set_address_only(block->mr, addr);
> +} else if (block->mr->addr != addr) {
> +error_report("Mismatched GPAs for block %s "
> + "%" PRId64 "!= %" PRId64, block->idstr,
> + (uint64_t)addr, (uint64_t)block->mr->addr);
> +return -EINVAL;
> +}
>  }
>  }
>  ret = rdma_block_notification_handle(f, block->idstr);
> diff --git a/system/memory.c b/system/memory.c
> index ca04a0e..3c72504 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -2665,7 +2665,7 @@ static void 
> memory_region_add_subregion_common(MemoryRegion *mr,
>  for (alias = subregion->alias; alias; alias = alias->alias) {
>  alias->mapped_via_alias++;
>  }
> -subregion->addr = offset;
> +memory_region_set_address_only(subregion, offset);
>  memory_region_update_container_subregions(subregion);
>  }
>  
> @@ -2745,10 +2745,16 @@ static void 
> memory_region_readd_subregion(MemoryRegion *mr)
>  }
>  }
>  
> +void memory_region_set_address_only(MemoryRegion *mr, hwaddr addr)
> +{
> +mr->addr = addr;
> +mr->has_addr = true;
> +}
> +
>  void memory_region_set_address(MemoryRegion *mr, hwaddr addr)
>  {
>  if (addr != mr->addr) {
> -mr->addr = addr;
> +memory_region_set_address_only(mr, addr);
>  memory_region_readd_subregion(mr);
>  }
>  }

Re: [PATCH 4/9] scsi/util: add helper functions for persistent reservation types conversion

On Wed, May 08, 2024 at 05:36:24PM +0800, Changqi Lu wrote:
> This commit introduces two helper functions
> that facilitate the conversion between the
> persistent reservation types used in the SCSI
> protocol and those used in the block layer.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  include/scsi/utils.h |  5 +
>  scsi/utils.c | 40 
>  2 files changed, 45 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 3/9] scsi/constant: add persistent reservation in/out protocol constants

On Wed, May 08, 2024 at 05:36:23PM +0800, Changqi Lu wrote:
> Add constants for the persistent reservation in/out protocol
> in the scsi/constant module. The constants include the persistent
> reservation command, type, and scope values defined in sections
> 6.13 and 6.14 of the SCSI Primary Commands-4 (SPC-4) specification.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  include/scsi/constants.h | 29 +
>  1 file changed, 29 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 2/9] block/raw: add persistent reservation in/out driver

On Wed, May 08, 2024 at 05:36:22PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations for raw driver.
> The following methods are implemented: bdrv_co_pr_read_keys,
> bdrv_co_pr_read_reservation, bdrv_co_pr_register, bdrv_co_pr_reserve,
> bdrv_co_pr_release, bdrv_co_pr_clear and bdrv_co_pr_preempt.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  block/raw-format.c | 55 ++
>  1 file changed, 55 insertions(+)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 1/9] block: add persistent reservation in/out api

On Wed, May 08, 2024 at 05:36:21PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations
> at the block level. The following operations
> are included:
> 
> - read_keys:retrieves the list of registered keys.
> - read_reservation: retrieves the current reservation status.
> - register: registers a new reservation key.
> - reserve:  initiates a reservation for a specific key.
> - release:  releases a reservation for a specific key.
> - clear:clears all existing reservations.
> - preempt:  preempts a reservation held by another key.
> 
> Signed-off-by: Changqi Lu 
> Signed-off-by: zhenwei pi 
> ---
>  block/block-backend.c | 386 ++
>  block/io.c| 161 +
>  include/block/block-common.h  |   9 +
>  include/block/block-io.h  |  19 ++
>  include/block/block_int-common.h  |  31 +++
>  include/sysemu/block-backend-io.h |  22 ++
>  6 files changed, 628 insertions(+)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index db6f9b92a3..ec67937d28 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1770,6 +1770,392 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned 
> long int req, void *buf,
>  return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, 
> opaque);
>  }
>  
> +typedef struct BlkPrInCo {
> +BlockBackend *blk;
> +uint32_t *generation;
> +uint32_t num_keys;
> +BlockPrType *type;
> +uint64_t *keys;
> +int ret;
> +} BlkPrInCo;
> +
> +typedef struct BlkPrInCB {
> +BlockAIOCB common;
> +BlkPrInCo prco;
> +bool has_returned;
> +} BlkPrInCB;
> +
> +static const AIOCBInfo blk_pr_in_aiocb_info = {
> +.aiocb_size = sizeof(BlkPrInCB),
> +};
> +
> +static void blk_pr_in_complete(BlkPrInCB *acb)
> +{
> +if (acb->has_returned) {
> +acb->common.cb(acb->common.opaque, acb->prco.ret);
> +blk_dec_in_flight(acb->prco.blk);

Please add a comment identifying which blk_inc_in_flight() call this dec
is paired with. That makes it easier for people trying to understand
in-flight reference counting.

> +qemu_aio_unref(acb);
> +}
> +}
> +
> +static void blk_pr_in_complete_bh(void *opaque)
> +{
> +BlkPrInCB *acb = opaque;
> +assert(acb->has_returned);
> +blk_pr_in_complete(acb);
> +}
> +
> +static BlockAIOCB *blk_aio_pr_in(BlockBackend *blk, uint32_t *generation,
> + uint32_t num_keys, BlockPrType *type,
> + uint64_t *keys, CoroutineEntry co_entry,
> + BlockCompletionFunc *cb, void *opaque)
> +{
> +BlkPrInCB *acb;
> +Coroutine *co;
> +
> +blk_inc_in_flight(blk);

Please add a comment identifying which blk_dec_in_flight() call this inc
is paired with.

> +acb = blk_aio_get(_pr_in_aiocb_info, blk, cb, opaque);
> +acb->prco = (BlkPrInCo) {
> +.blk= blk,
> +.generation = generation,
> +.num_keys   = num_keys,
> +.type   = type,
> +.ret= NOT_DONE,
> +.keys   = keys,
> +};
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(co_entry, acb);
> +aio_co_enter(qemu_get_current_aio_context(), co);
> +
> +acb->has_returned = true;
> +if (acb->prco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
> + blk_pr_in_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
> +
> +static int coroutine_fn
> +blk_aio_pr_do_read_keys(BlockBackend *blk, uint32_t *generation,
> +uint32_t num_keys, uint64_t *keys)
> +{
> +IO_CODE();
> +
> +blk_wait_while_drained(blk);
> +GRAPH_RDLOCK_GUARD();
> +
> +if (!blk_co_is_available(blk)) {
> +return -ENOMEDIUM;
> +}
> +
> +return bdrv_co_pr_read_keys(blk_bs(blk), generation, num_keys, keys);
> +}
> +
> +static void coroutine_fn blk_aio_pr_read_keys_entry(void *opaque)
> +{
> +BlkPrInCB *acb = opaque;
> +BlkPrInCo *prco = >prco;
> +
> +prco->ret = blk_aio_pr_do_read_keys(prco->blk, prco->generation,
> +prco->num_keys, prco->keys);
> +blk_pr_in_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_pr_read_keys(BlockBackend *blk, uint32_t *generation,
> + uint32_t num_keys, uint64_t *keys,
> + BlockCompletionFunc *cb, void *opaque)
> +{
> +IO_CODE();
> +return blk_aio_pr_in(blk, generation, num_keys, NULL, keys,
> + blk_aio_pr_read_keys_entry, cb, opaque);
> +}
> +
> +
> +static int coroutine_fn
> +blk_aio_pr_do_read_reservation(BlockBackend *blk, uint32_t *generation,
> +   uint64_t *key, BlockPrType *type)
> +{
> +IO_CODE();
> +
> +blk_wait_while_drained(blk);
> +

Re: hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT

2024-05-09 Thread Cord Amfmgm

On Thu, May 9, 2024 at 12:48 PM Peter Maydell 
wrote:

> On Wed, 8 May 2024 at 16:29, Cord Amfmgm  wrote:
> > On Wed, May 8, 2024 at 3:45 AM Thomas Huth  wrote:
> >>
> >> Your Signed-off-by line does not match the From: line ... could you
> please
> >> fix this? (see
> >>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> >> , too)
> >
> >
> > I'll submit the new patch request with my pseudonym in the From: and
> Signed-off-by: lines, per your request. Doesn't matter to me. However, this
> arises simply because I don't give gmail my real name -
> https://en.wikipedia.org/wiki/Nymwars
>
> I'm confused now. Of the two names you've used in this
> patch (Cord Amfmgm and David Hubbard), are they both
> pseudonyms, or is one a pseudonym and one your real name?
>
>
Hi Peter,

I am attempting to submit a small patch. For context, I'm getting broader
attention now because apparently OHCI is one of the less used components of
qemu and maybe the review process was taking a while. That's relevant
because I wasn't able to get prompt feedback and am now choosing what
appears to be the most expeditious approach -- all I want is to get this
patch done and be out of your hair. If Thomas Huth wants me to use a
consistent name, have I not complied? Are you asking out of curiosity or is
there a valid reason why I should answer your question in order to get the
patch submitted? Would you like to have a friendly chat over virtual coffee
sometime (but off-list)?

If you could please clarify I'm sure the answer is an easy one.

Thanks

Re: [PATCH V1 24/26] seccomp: cpr-exec blocker

Steve Sistare  writes:

> cpr-exec mode needs permission to exec.  Block it if permission is denied.
>
> Signed-off-by: Steve Sistare 

Reviewed-by: Fabiano Rosas

Re: [PATCH V1 23/26] migration: misc cpr-exec blockers

Steve Sistare  writes:

> Add blockers for cpr-exec migration mode for devices and options that do
> not support it.
>
> Signed-off-by: Steve Sistare 

Reviewed-by: Fabiano Rosas

Re: [PATCH V1 22/26] migration: ram block cpr-exec blockers

Steve Sistare  writes:

> Unlike cpr-reboot mode, cpr-exec mode cannot save volatile ram blocks in the
> migration stream file and recreate them later, because the physical memory for
> the blocks is pinned and registered for vfio.  Add an exec-mode blocker for
> volatile ram blocks.
>
> Also add a blocker for RAM_GUEST_MEMFD.  Preserving guest_memfd may be
> sufficient for cpr-exec, but it has not been tested yet.
>
> - Steve

extra text here

>
> Signed-off-by: Steve Sistare 

Reviewed-by: Fabiano Rosas

Re: hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT

2024-05-09 Thread Peter Maydell

On Wed, 8 May 2024 at 16:29, Cord Amfmgm  wrote:
> On Wed, May 8, 2024 at 3:45 AM Thomas Huth  wrote:
>>
>> Your Signed-off-by line does not match the From: line ... could you please
>> fix this? (see
>> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
>> , too)
>
>
> I'll submit the new patch request with my pseudonym in the From: and 
> Signed-off-by: lines, per your request. Doesn't matter to me. However, this 
> arises simply because I don't give gmail my real name - 
> https://en.wikipedia.org/wiki/Nymwars

I'm confused now. Of the two names you've used in this
patch (Cord Amfmgm and David Hubbard), are they both
pseudonyms, or is one a pseudonym and one your real name?

thanks
-- PMM

Re: [PATCH V1 21/26] migration: migrate_add_blocker_mode

Steve Sistare  writes:

> Define a convenience function to add a migration blocker for a single mode.
>
> Signed-off-by: Steve Sistare 

Reviewed-by: Fabiano Rosas

[PATCH v5 5/8] softmmu: Replace check for RAMBlock offset 0 with xen_mr_is_memory

From: "Edgar E. Iglesias" 

For xen, when checking for the first RAM (xen_memory), use
xen_mr_is_memory() rather than checking for a RAMBlock with
offset 0.

All Xen machines create xen_memory first so this has no
functional change for existing machines.

Signed-off-by: Edgar E. Iglesias 
---
 system/physmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/system/physmem.c b/system/physmem.c
index 5e6257ef65..b7847db1a2 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2229,7 +2229,7 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  * because we don't want to map the entire memory in QEMU.
  * In that case just map the requested area.
  */
-if (block->offset == 0) {
+if (xen_mr_is_memory(block->mr)) {
 return xen_map_cache(block->mr, block->offset + addr,
  len, lock, lock,
  is_write);
-- 
2.40.1

[PATCH v5 6/8] xen: mapcache: Pass the ram_addr offset to xen_map_cache()

From: "Edgar E. Iglesias" 

Pass the ram_addr offset to xen_map_cache.
This is in preparation for adding grant mappings that need
to compute the address within the RAMBlock.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-mapcache.c | 16 +++-
 include/sysemu/xen-mapcache.h |  2 ++
 system/physmem.c  |  9 +
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index ec95445696..26bc38a9e3 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -167,7 +167,8 @@ static void xen_remap_bucket(MapCache *mc,
  void *vaddr,
  hwaddr size,
  hwaddr address_index,
- bool dummy)
+ bool dummy,
+ ram_addr_t ram_offset)
 {
 uint8_t *vaddr_base;
 xen_pfn_t *pfns;
@@ -266,6 +267,7 @@ static void xen_remap_bucket(MapCache *mc,
 
 static uint8_t *xen_map_cache_unlocked(MapCache *mc,
hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_offset,
uint8_t lock, bool dma, bool is_write)
 {
 MapCacheEntry *entry, *pentry = NULL,
@@ -337,14 +339,16 @@ tryagain:
 if (!entry) {
 entry = g_new0(MapCacheEntry, 1);
 pentry->next = entry;
-xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
+ ram_offset);
 } else if (!entry->lock) {
 if (!entry->vaddr_base || entry->paddr_index != address_index ||
 entry->size != cache_size ||
 !test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
-xen_remap_bucket(mc, entry, NULL, cache_size, address_index, 
dummy);
+xen_remap_bucket(mc, entry, NULL, cache_size, address_index, dummy,
+ ram_offset);
 }
 }
 
@@ -391,13 +395,15 @@ tryagain:
 
 uint8_t *xen_map_cache(MemoryRegion *mr,
hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_addr_offset,
uint8_t lock, bool dma,
bool is_write)
 {
 uint8_t *p;
 
 mapcache_lock(mapcache);
-p = xen_map_cache_unlocked(mapcache, phys_addr, size, lock, dma, is_write);
+p = xen_map_cache_unlocked(mapcache, phys_addr, size, ram_addr_offset,
+   lock, dma, is_write);
 mapcache_unlock(mapcache);
 return p;
 }
@@ -632,7 +638,7 @@ static uint8_t *xen_replace_cache_entry_unlocked(MapCache 
*mc,
 trace_xen_replace_cache_entry_dummy(old_phys_addr, new_phys_addr);
 
 xen_remap_bucket(mc, entry, entry->vaddr_base,
- cache_size, address_index, false);
+ cache_size, address_index, false, new_phys_addr);
 if (!test_bits(address_offset >> XC_PAGE_SHIFT,
 test_bit_size >> XC_PAGE_SHIFT,
 entry->valid_mapping)) {
diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
index 1ec9e66752..b5e3ea1bc0 100644
--- a/include/sysemu/xen-mapcache.h
+++ b/include/sysemu/xen-mapcache.h
@@ -19,6 +19,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset,
 void xen_map_cache_init(phys_offset_to_gaddr_t f,
 void *opaque);
 uint8_t *xen_map_cache(MemoryRegion *mr, hwaddr phys_addr, hwaddr size,
+   ram_addr_t ram_addr_offset,
uint8_t lock, bool dma,
bool is_write);
 ram_addr_t xen_ram_addr_from_mapcache(void *ptr);
@@ -37,6 +38,7 @@ static inline void xen_map_cache_init(phys_offset_to_gaddr_t 
f,
 static inline uint8_t *xen_map_cache(MemoryRegion *mr,
  hwaddr phys_addr,
  hwaddr size,
+ ram_addr_t ram_addr_offset,
  uint8_t lock,
  bool dma,
  bool is_write)
diff --git a/system/physmem.c b/system/physmem.c
index b7847db1a2..33d09f7571 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2231,13 +2231,14 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  */
 if (xen_mr_is_memory(block->mr)) {
 return xen_map_cache(block->mr, block->offset + addr,
- len, lock, lock,
- is_write);
+ len, block->offset,
+ lock, lock, is_write);
 }
 
 block->host = xen_map_cache(block->mr, block->offset,
-

[PATCH v5 2/8] xen: mapcache: Unmap first entries in buckets

From: "Edgar E. Iglesias" 

When invalidating memory ranges, if we happen to hit the first
entry in a bucket we were never unmapping it. This was harmless
for foreign mappings but now that we're looking to reuse the
mapcache for transient grant mappings, we must unmap entries
when invalidated.

Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
---
 hw/xen/xen-mapcache.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index bc860f4373..ec95445696 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -491,18 +491,23 @@ static void 
xen_invalidate_map_cache_entry_unlocked(MapCache *mc,
 return;
 }
 entry->lock--;
-if (entry->lock > 0 || pentry == NULL) {
+if (entry->lock > 0) {
 return;
 }
 
-pentry->next = entry->next;
 ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
 if (munmap(entry->vaddr_base, entry->size) != 0) {
 perror("unmap fails");
 exit(-1);
 }
+
 g_free(entry->valid_mapping);
-g_free(entry);
+if (pentry) {
+pentry->next = entry->next;
+g_free(entry);
+} else {
+memset(entry, 0, sizeof *entry);
+}
 }
 
 typedef struct XenMapCacheData {
-- 
2.40.1

[PATCH v5 7/8] xen: mapcache: Add support for grant mappings

From: "Edgar E. Iglesias" 

Add a second mapcache for grant mappings. The mapcache for
grants needs to work with XC_PAGE_SIZE granularity since
we can't map larger ranges than what has been granted to us.

Like with foreign mappings (xen_memory), machines using grants
are expected to initialize the xen_grants MR and map it
into their address-map accordingly.

Signed-off-by: Edgar E. Iglesias 
---
 hw/xen/xen-hvm-common.c |  12 ++-
 hw/xen/xen-mapcache.c   | 163 ++--
 include/hw/xen/xen-hvm-common.h |   3 +
 include/sysemu/xen.h|   7 ++
 4 files changed, 152 insertions(+), 33 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index c94f1990c5..7a1e2ce4b3 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -10,12 +10,18 @@
 #include "hw/boards.h"
 #include "hw/xen/arch_hvm.h"
 
-MemoryRegion xen_memory;
+MemoryRegion xen_memory, xen_grants;
 
-/* Check for xen memory.  */
+/* Check for any kind of xen memory, foreign mappings or grants.  */
 bool xen_mr_is_memory(MemoryRegion *mr)
 {
-return mr == _memory;
+return mr == _memory || mr == _grants;
+}
+
+/* Check specifically for grants.  */
+bool xen_mr_is_grants(MemoryRegion *mr)
+{
+return mr == _grants;
 }
 
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 26bc38a9e3..25041ab02d 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -14,6 +14,7 @@
 
 #include 
 
+#include "hw/xen/xen-hvm-common.h"
 #include "hw/xen/xen_native.h"
 #include "qemu/bitmap.h"
 
@@ -21,6 +22,8 @@
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
 
+#include 
+#include 
 
 #if HOST_LONG_BITS == 32
 #  define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
@@ -41,6 +44,7 @@ typedef struct MapCacheEntry {
 unsigned long *valid_mapping;
 uint32_t lock;
 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
+#define XEN_MAPCACHE_ENTRY_GRANT (1 << 1)
 uint8_t flags;
 hwaddr size;
 struct MapCacheEntry *next;
@@ -71,6 +75,8 @@ typedef struct MapCache {
 } MapCache;
 
 static MapCache *mapcache;
+static MapCache *mapcache_grants;
+static xengnttab_handle *xen_region_gnttabdev;
 
 static inline void mapcache_lock(MapCache *mc)
 {
@@ -131,6 +137,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 unsigned long max_mcache_size;
 unsigned int bucket_shift;
 
+xen_region_gnttabdev = xengnttab_open(NULL, 0);
+if (xen_region_gnttabdev == NULL) {
+error_report("mapcache: Failed to open gnttab device");
+exit(EXIT_FAILURE);
+}
+
 if (HOST_LONG_BITS == 32) {
 bucket_shift = 16;
 } else {
@@ -159,6 +171,15 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 mapcache = xen_map_cache_init_single(f, opaque,
  bucket_shift,
  max_mcache_size);
+
+/*
+ * Grant mappings must use XC_PAGE_SIZE granularity since we can't
+ * map anything beyond the number of pages granted to us.
+ */
+mapcache_grants = xen_map_cache_init_single(f, opaque,
+XC_PAGE_SHIFT,
+max_mcache_size);
+
 setrlimit(RLIMIT_AS, _as);
 }
 
@@ -168,17 +189,24 @@ static void xen_remap_bucket(MapCache *mc,
  hwaddr size,
  hwaddr address_index,
  bool dummy,
+ bool grant,
+ bool is_write,
  ram_addr_t ram_offset)
 {
 uint8_t *vaddr_base;
-xen_pfn_t *pfns;
+uint32_t *refs = NULL;
+xen_pfn_t *pfns = NULL;
 int *err;
 unsigned int i;
 hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
 
 trace_xen_remap_bucket(address_index);
 
-pfns = g_new0(xen_pfn_t, nb_pfn);
+if (grant) {
+refs = g_new0(uint32_t, nb_pfn);
+} else {
+pfns = g_new0(xen_pfn_t, nb_pfn);
+}
 err = g_new0(int, nb_pfn);
 
 if (entry->vaddr_base != NULL) {
@@ -207,21 +235,51 @@ static void xen_remap_bucket(MapCache *mc,
 g_free(entry->valid_mapping);
 entry->valid_mapping = NULL;
 
-for (i = 0; i < nb_pfn; i++) {
-pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
+if (grant) {
+hwaddr grant_base = address_index - (ram_offset >> XC_PAGE_SHIFT);
+
+for (i = 0; i < nb_pfn; i++) {
+refs[i] = grant_base + i;
+}
+} else {
+for (i = 0; i < nb_pfn; i++) {
+pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + 
i;
+}
 }
 
-/*
- * If the caller has requested the mapping at a specific address use
- * MAP_FIXED to make sure it's honored.
- */
+entry->flags &= ~XEN_MAPCACHE_ENTRY_GRANT;
+
 if (!dummy) {
-

[PATCH v5 3/8] xen: Add xen_mr_is_memory()

From: "Edgar E. Iglesias" 

Add xen_mr_is_memory() to abstract away tests for the
xen_memory MR.

No functional changes.

Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
Acked-by: David Hildenbrand 
---
 hw/xen/xen-hvm-common.c | 10 --
 include/sysemu/xen.h|  8 
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 1627da7398..c94f1990c5 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -12,6 +12,12 @@
 
 MemoryRegion xen_memory;
 
+/* Check for xen memory.  */
+bool xen_mr_is_memory(MemoryRegion *mr)
+{
+return mr == _memory;
+}
+
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
Error **errp)
 {
@@ -28,7 +34,7 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, 
MemoryRegion *mr,
 return;
 }
 
-if (mr == _memory) {
+if (xen_mr_is_memory(mr)) {
 return;
 }
 
@@ -55,7 +61,7 @@ static void xen_set_memory(struct MemoryListener *listener,
 {
 XenIOState *state = container_of(listener, XenIOState, memory_listener);
 
-if (section->mr == _memory) {
+if (xen_mr_is_memory(section->mr)) {
 return;
 } else {
 if (add) {
diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
index 754ec2e6cb..dc72f83bcb 100644
--- a/include/sysemu/xen.h
+++ b/include/sysemu/xen.h
@@ -34,6 +34,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t 
length);
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
struct MemoryRegion *mr, Error **errp);
 
+bool xen_mr_is_memory(MemoryRegion *mr);
+
 #else /* !CONFIG_XEN_IS_POSSIBLE */
 
 #define xen_enabled() 0
@@ -47,6 +49,12 @@ static inline void xen_ram_alloc(ram_addr_t ram_addr, 
ram_addr_t size,
 g_assert_not_reached();
 }
 
+static inline bool xen_mr_is_memory(MemoryRegion *mr)
+{
+g_assert_not_reached();
+return false;
+}
+
 #endif /* CONFIG_XEN_IS_POSSIBLE */
 
 #endif
-- 
2.40.1

[PATCH v5 0/8] xen: Support grant mappings

From: "Edgar E. Iglesias" 

Hi,

Grant mappings are a mechanism in Xen for guests to grant each other
permissions to map and share pages. These grants can be temporary
so both map and unmaps must be respected. See here for more info:
https://github.com/xen-project/xen/blob/master/docs/misc/grant-tables.txt

Currently, the primary use-case for grants in QEMU, is with VirtIO backends.
Grant mappings will only work with models that use the address_space_map/unmap
interfaces, any other access will fail with appropriate error messages.

In response to feedback we got on v3, later version switch approach
from adding new MemoryRegion types and map/unmap hooks to instead reusing
the existing xen_map_cache() hooks (with extensions). Almost all of the
changes are now contained to the Xen modules.

This approach also refactors the mapcache to support multiple instances
(one for existing foreign mappings and another for grant mappings).

I've only enabled grants for the ARM PVH machine since that is what
I can currently test on.

Cheers,
Edgar

ChangeLog:

v4 -> v5:
* Compute grant_ref from address_index to xen_remap_bucket().
* Rename grant_is_write to is_write and comment on why foreign
  mappings don't yet use it.
* Remove unnecessary + mc->bucket_size - 1 in
  xen_invalidate_map_cache_entry_unlocked().
* Remove use of global mapcache in refactor of
  xen_replace_cache_entry_unlocked().
* Add error checking for xengnttab_unmap().
* Add assert in xen_replace_cache_entry_unlocked() against grant mappings.
* Fix memory leak when freeing first entry in mapcache buckets.
* Assert that bucket_shift is >= XC_PAGE_SHIFT when creating mapcache.
* Add missing use of xen_mr_is_memory() in hw/xen/xen-hvm-common.c.
* Rebase with master.

v3 -> v4:
* Reuse existing xen_map_cache hooks.
* Reuse existing map-cache for both foreign and grant mappings.
* Only enable grants for the ARM PVH machine (removed i386).

v2 -> v3:
* Drop patch 1/7. This was done because device unplug is an x86-only case.
* Add missing qemu_mutex_unlock() before return.

v1 -> v2:
* Split patch 2/7 to keep phymem.c changes in a separate.
* In patch "xen: add map and unmap callbacks for grant" add check for total
  allowed grant < XEN_MAX_VIRTIO_GRANTS.
* Fix formatting issues and re-based with master latest.


Edgar E. Iglesias (8):
  xen: mapcache: Make MCACHE_BUCKET_SHIFT runtime configurable
  xen: mapcache: Unmap first entries in buckets
  xen: Add xen_mr_is_memory()
  softmmu: xen: Always pass offset + addr to xen_map_cache
  softmmu: Replace check for RAMBlock offset 0 with xen_mr_is_memory
  xen: mapcache: Pass the ram_addr offset to xen_map_cache()
  xen: mapcache: Add support for grant mappings
  hw/arm: xen: Enable use of grant mappings

 hw/arm/xen_arm.c|   5 +
 hw/xen/xen-hvm-common.c |  18 ++-
 hw/xen/xen-mapcache.c   | 232 
 include/hw/xen/xen-hvm-common.h |   3 +
 include/sysemu/xen-mapcache.h   |   2 +
 include/sysemu/xen.h|  15 +++
 system/physmem.c|  12 +-
 7 files changed, 226 insertions(+), 61 deletions(-)

-- 
2.40.1

[PATCH v5 4/8] softmmu: xen: Always pass offset + addr to xen_map_cache

From: "Edgar E. Iglesias" 

Always pass address with offset to xen_map_cache().
This is in preparation for support for grant mappings.

Since this is within a block that checks for offset == 0,
this has no functional changes.

Signed-off-by: Edgar E. Iglesias 
---
 system/physmem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/system/physmem.c b/system/physmem.c
index 342b7a8fd4..5e6257ef65 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2230,7 +2230,8 @@ static void *qemu_ram_ptr_length(RAMBlock *block, 
ram_addr_t addr,
  * In that case just map the requested area.
  */
 if (block->offset == 0) {
-return xen_map_cache(block->mr, addr, len, lock, lock,
+return xen_map_cache(block->mr, block->offset + addr,
+ len, lock, lock,
  is_write);
 }
 
-- 
2.40.1

[PATCH v5 1/8] xen: mapcache: Make MCACHE_BUCKET_SHIFT runtime configurable

From: "Edgar E. Iglesias" 

Make MCACHE_BUCKET_SHIFT runtime configurable per cache instance.

Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
---
 hw/xen/xen-mapcache.c | 54 ++-
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index fa6813b1ad..bc860f4373 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -23,13 +23,10 @@
 
 
 #if HOST_LONG_BITS == 32
-#  define MCACHE_BUCKET_SHIFT 16
 #  define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
 #else
-#  define MCACHE_BUCKET_SHIFT 20
 #  define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */
 #endif
-#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
 
 /* This is the size of the virtual address space reserve to QEMU that will not
  * be use by MapCache.
@@ -65,7 +62,8 @@ typedef struct MapCache {
 /* For most cases (>99.9%), the page address is the same. */
 MapCacheEntry *last_entry;
 unsigned long max_mcache_size;
-unsigned int mcache_bucket_shift;
+unsigned int bucket_shift;
+unsigned long bucket_size;
 
 phys_offset_to_gaddr_t phys_offset_to_gaddr;
 QemuMutex lock;
@@ -95,11 +93,14 @@ static inline int test_bits(int nr, int size, const 
unsigned long *addr)
 
 static MapCache *xen_map_cache_init_single(phys_offset_to_gaddr_t f,
void *opaque,
+   unsigned int bucket_shift,
unsigned long max_size)
 {
 unsigned long size;
 MapCache *mc;
 
+assert(bucket_shift >= XC_PAGE_SHIFT);
+
 mc = g_new0(MapCache, 1);
 
 mc->phys_offset_to_gaddr = f;
@@ -108,12 +109,14 @@ static MapCache 
*xen_map_cache_init_single(phys_offset_to_gaddr_t f,
 
 QTAILQ_INIT(>locked_entries);
 
+mc->bucket_shift = bucket_shift;
+mc->bucket_size = 1UL << bucket_shift;
 mc->max_mcache_size = max_size;
 
 mc->nr_buckets =
 (((mc->max_mcache_size >> XC_PAGE_SHIFT) +
-  (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
- (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+  (1UL << (bucket_shift - XC_PAGE_SHIFT)) - 1) >>
+ (bucket_shift - XC_PAGE_SHIFT));
 
 size = mc->nr_buckets * sizeof(MapCacheEntry);
 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
@@ -126,6 +129,13 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 {
 struct rlimit rlimit_as;
 unsigned long max_mcache_size;
+unsigned int bucket_shift;
+
+if (HOST_LONG_BITS == 32) {
+bucket_shift = 16;
+} else {
+bucket_shift = 20;
+}
 
 if (geteuid() == 0) {
 rlimit_as.rlim_cur = RLIM_INFINITY;
@@ -146,7 +156,9 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 }
 }
 
-mapcache = xen_map_cache_init_single(f, opaque, max_mcache_size);
+mapcache = xen_map_cache_init_single(f, opaque,
+ bucket_shift,
+ max_mcache_size);
 setrlimit(RLIMIT_AS, _as);
 }
 
@@ -195,7 +207,7 @@ static void xen_remap_bucket(MapCache *mc,
 entry->valid_mapping = NULL;
 
 for (i = 0; i < nb_pfn; i++) {
-pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+pfns[i] = (address_index << (mc->bucket_shift - XC_PAGE_SHIFT)) + i;
 }
 
 /*
@@ -266,8 +278,8 @@ static uint8_t *xen_map_cache_unlocked(MapCache *mc,
 bool dummy = false;
 
 tryagain:
-address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
-address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
+address_index  = phys_addr >> mc->bucket_shift;
+address_offset = phys_addr & (mc->bucket_size - 1);
 
 trace_xen_map_cache(phys_addr);
 
@@ -294,14 +306,14 @@ tryagain:
 return mc->last_entry->vaddr_base + address_offset;
 }
 
-/* size is always a multiple of MCACHE_BUCKET_SIZE */
+/* size is always a multiple of mc->bucket_size */
 if (size) {
 cache_size = size + address_offset;
-if (cache_size % MCACHE_BUCKET_SIZE) {
-cache_size += MCACHE_BUCKET_SIZE - (cache_size % 
MCACHE_BUCKET_SIZE);
+if (cache_size % mc->bucket_size) {
+cache_size += mc->bucket_size - (cache_size % mc->bucket_size);
 }
 } else {
-cache_size = MCACHE_BUCKET_SIZE;
+cache_size = mc->bucket_size;
 }
 
 entry = >entry[address_index % mc->nr_buckets];
@@ -422,7 +434,7 @@ static ram_addr_t 
xen_ram_addr_from_mapcache_single(MapCache *mc, void *ptr)
 trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
 raddr = RAM_ADDR_INVALID;
 } else {
-raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
+raddr = (reventry->paddr_index << mc->bucket_shift) +
  ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
 }
 mapcache_unlock(mc);
@@ -585,8 +597,8 @@

[PATCH v5 8/8] hw/arm: xen: Enable use of grant mappings

From: "Edgar E. Iglesias" 

Signed-off-by: Edgar E. Iglesias 
Reviewed-by: Stefano Stabellini 
---
 hw/arm/xen_arm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
index 15fa7dfa84..6fad829ede 100644
--- a/hw/arm/xen_arm.c
+++ b/hw/arm/xen_arm.c
@@ -125,6 +125,11 @@ static void xen_init_ram(MachineState *machine)
  GUEST_RAM1_BASE, ram_size[1]);
 memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, _hi);
 }
+
+/* Setup support for grants.  */
+memory_region_init_ram(_grants, NULL, "xen.grants", block_len,
+   _fatal);
+memory_region_add_subregion(sysmem, XEN_GRANT_ADDR_OFF, _grants);
 }
 
 void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
-- 
2.40.1

Re: [PATCH] target/riscv: Remove experimental prefix from "B" extension

2024-05-09 Thread Daniel Henrique Barboza





On 5/8/24 08:22, Andrew Jones wrote:

On Tue, May 07, 2024 at 11:27:21AM GMT, Rob Bradford wrote:

This extension has now been ratified:
https://jira.riscv.org/browse/RVS-2006 so the "x-" prefix can be
removed.

Signed-off-by: Rob Bradford 
---
  target/riscv/cpu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index eb1a2e7d6d..861d9f4350 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1396,7 +1396,7 @@ static const MISAExtInfo misa_ext_info_arr[] = {
  MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"),
  MISA_EXT_INFO(RVV, "v", "Vector operations"),
  MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"),
-MISA_EXT_INFO(RVB, "x-b", "Bit manipulation (Zba_Zbb_Zbs)")
+MISA_EXT_INFO(RVB, "b", "Bit manipulation (Zba_Zbb_Zbs)")
  };
  
  static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc)

--
2.44.0




Reviewed-by: Andrew Jones 

I think we should also either change the false to true for RVB in
misa_ext_cfgs[] or at least ensure RVB is set for the 'max' cpu
type in riscv_init_max_cpu_extensions().


I prefer if we keep misa_ext_cfgs[] as is. Changing the defaults in this array
will also change the defaults for rv64. IMO we should enable RVB manually in
riscv_init_max_cpu_extensions().

We already have some precedence for it: RVV is enabled in 'max' while is default
'false' for rv64.


Thanks,

Daniel




Thanks,
drew

[PATCH 13/13] tests/qtest: arm: fix operation in a build without any boards or devices

ARM/aarch64 are easy to fix because they already have to pass a machine
type by hand.  Just guard the tests with a check that the machine actually
exists.

Signed-off-by: Paolo Bonzini 
---
 tests/qtest/arm-cpu-features.c | 4 
 tests/qtest/migration-test.c   | 6 ++
 tests/qtest/numa-test.c| 4 
 3 files changed, 14 insertions(+)

diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 9d6e6190d55..966c65d5c3e 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -632,6 +632,10 @@ int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
 
+if (!qtest_has_machine("virt")) {
+goto out;
+}
+
 if (qtest_has_accel("tcg")) {
 qtest_add_data_func("/arm/query-cpu-model-expansion",
 NULL, test_query_cpu_model_expansion);
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 5d6d8cd6343..31045b69fa7 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -813,6 +813,12 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 kvm_opts = ",dirty-ring-size=4096";
 }
 
+if (!qtest_has_machine(machine_alias)) {
+g_autofree char *msg = g_strdup_printf("machine %s not supported", 
machine_alias);
+g_test_skip(msg);
+return -1;
+}
+
 machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
   QEMU_ENV_DST);
 
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index 4f4404a4b14..7aa262dbb99 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -558,6 +558,9 @@ int main(int argc, char **argv)
 }
 
 if (g_str_equal(arch, "aarch64")) {
+if (!qtest_has_machine("virt")) {
+goto out;
+}
 g_string_append(args, " -machine virt");
 }
 
@@ -590,5 +593,6 @@ int main(int argc, char **argv)
 aarch64_numa_cpu);
 }
 
+out:
 return g_test_run();
 }
-- 
2.45.0

[PATCH 01/13] s390x: move s390_cpu_addr2state to target/s390x/sigp.c

This function has no dependency on the virtio-ccw machine type, though it
assumes that the CPU address corresponds to the core_id and the index.

If there is any need of something different or more fancy (unlikely)
S390 can include a MachineClass subclass and implement it there.  For
now, move it to sigp.c for simplicity.

Signed-off-by: Paolo Bonzini 
---
 hw/s390x/s390-virtio-ccw.c | 16 
 target/s390x/sigp.c| 17 +
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4dcc2138200..feabc173eb3 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -50,22 +50,6 @@
 
 static Error *pv_mig_blocker;
 
-S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
-{
-static MachineState *ms;
-
-if (!ms) {
-ms = MACHINE(qdev_get_machine());
-g_assert(ms->possible_cpus);
-}
-
-/* CPU address corresponds to the core_id and the index */
-if (cpu_addr >= ms->possible_cpus->len) {
-return NULL;
-}
-return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu);
-}
-
 static S390CPU *s390x_new_cpu(const char *typename, uint32_t core_id,
   Error **errp)
 {
diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c
index 9dd977349ab..ad0ad61177d 100644
--- a/target/s390x/sigp.c
+++ b/target/s390x/sigp.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "s390x-internal.h"
+#include "hw/boards.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/runstate.h"
 #include "exec/address-spaces.h"
@@ -435,6 +436,22 @@ static int sigp_set_architecture(S390CPU *cpu, uint32_t 
param,
 return SIGP_CC_STATUS_STORED;
 }
 
+S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
+{
+static MachineState *ms;
+
+if (!ms) {
+ms = MACHINE(qdev_get_machine());
+g_assert(ms->possible_cpus);
+}
+
+/* CPU address corresponds to the core_id and the index */
+if (cpu_addr >= ms->possible_cpus->len) {
+return NULL;
+}
+return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu);
+}
+
 int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3)
 {
 uint64_t *status_reg = >regs[r1];
-- 
2.45.0

[PATCH 08/13] i386: correctly select code in hw/i386 that depends on other components

fw_cfg.c and vapic.c are currently included unconditionally but
depend on other components.  vapic.c depends on the local APIC,
while fw_cfg.c includes a piece of AML builder code that depends
on CONFIG_ACPI.

Signed-off-by: Paolo Bonzini 
---
 hw/i386/fw_cfg.c| 2 ++
 hw/i386/meson.build | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index d802d2787f0..6e0d9945d07 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -203,6 +203,7 @@ void fw_cfg_build_feature_control(MachineState *ms, 
FWCfgState *fw_cfg)
 fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
 }
 
+#ifdef CONFIG_ACPI
 void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
 {
 /*
@@ -229,3 +230,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
 aml_append(dev, aml_name_decl("_CRS", crs));
 aml_append(scope, dev);
 }
+#endif
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index d8b70ef3e9c..d9da676038c 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -1,12 +1,12 @@
 i386_ss = ss.source_set()
 i386_ss.add(files(
   'fw_cfg.c',
-  'vapic.c',
   'e820_memory_layout.c',
   'multiboot.c',
   'x86.c',
 ))
 
+i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c'))
 i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
   if_false: files('x86-iommu-stub.c'))
 i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
-- 
2.45.0

[PATCH 12/13] i386: select correct components for no-board build

The local APIC is a part of the CPU and has callbacks that are invoked
from multiple accelerators.

The IOAPIC on the other hand is optional, but ioapic_eoi_broadcast is
used by common x86 code to implement the IOAPIC's implicit EOI mode.
Add a stub in case the IOAPIC device is not included but the APIC is.

Signed-off-by: Paolo Bonzini 
---
 hw/intc/ioapic-stub.c  | 29 +
 .gitlab-ci.d/buildtest.yml |  2 +-
 hw/intc/meson.build|  2 +-
 target/i386/Kconfig|  1 +
 4 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 hw/intc/ioapic-stub.c

diff --git a/hw/intc/ioapic-stub.c b/hw/intc/ioapic-stub.c
new file mode 100644
index 000..4dcd86248da
--- /dev/null
+++ b/hw/intc/ioapic-stub.c
@@ -0,0 +1,29 @@
+/*
+ *  ioapic.c IOAPIC emulation logic
+ *
+ *  Copyright (c) 2004-2005 Fabrice Bellard
+ *
+ *  Split the ioapic logic from apic.c
+ *  Xiantao Zhang 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/intc/ioapic.h"
+
+void ioapic_eoi_broadcast(int vector)
+{
+}
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index f8502905203..62616157206 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -650,7 +650,7 @@ build-tci:
 # Check our reduced build configurations
 # requires libfdt: aarch64, arm, i386, loongarch64, microblaze, microblazeel,
 #   mips64el, or1k, ppc, ppc64, riscv32, riscv64, rx, x86_64
-# does not build without boards: i386, x86_64
+# fails qtest without boards: i386, x86_64
 build-without-defaults:
   extends: .native_build_job_template
   needs:
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index f4b540e6a8b..0d1b7d0a432 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -20,7 +20,7 @@ system_ss.add(when: 'CONFIG_GOLDFISH_PIC', if_true: 
files('goldfish_pic.c'))
 system_ss.add(when: 'CONFIG_HEATHROW_PIC', if_true: files('heathrow_pic.c'))
 system_ss.add(when: 'CONFIG_I8259', if_true: files('i8259_common.c', 
'i8259.c'))
 system_ss.add(when: 'CONFIG_IMX', if_true: files('imx_avic.c', 'imx_gpcv2.c'))
-system_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic_common.c'))
+system_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic_common.c'), 
if_false: files('ioapic-stub.c'))
 system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_intc.c'))
 system_ss.add(when: 'CONFIG_OPENPIC', if_true: files('openpic.c'))
 system_ss.add(when: 'CONFIG_PL190', if_true: files('pl190.c'))
diff --git a/target/i386/Kconfig b/target/i386/Kconfig
index ad9291d3b8f..6b0feef0299 100644
--- a/target/i386/Kconfig
+++ b/target/i386/Kconfig
@@ -1,5 +1,6 @@
 config I386
 bool
+select APIC
 # kvm_arch_fixup_msi_route() needs to access PCIDevice
 select PCI if KVM
 
-- 
2.45.0

[PATCH 07/13] xen: register legacy backends via xen_backend_init

It is okay to register legacy backends in the middle of xen_bus_init().
All that the registration does is record the existence of the backend
in xenstore.

This makes it possible to remove them from the build without introducing
undefined symbols in xen_be_init().  It also removes the need for the
backend_register callback, whose only purpose is to avoid registering
nonfunctional backends.

Signed-off-by: Paolo Bonzini 
---
 include/hw/xen/xen-legacy-backend.h | 14 ++
 include/hw/xen/xen_pvdev.h  |  1 -
 hw/9pfs/xen-9p-backend.c|  8 +++-
 hw/display/xenfb.c  |  8 +++-
 hw/usb/xen-usb.c| 14 --
 hw/xen/xen-legacy-backend.c | 16 
 6 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/include/hw/xen/xen-legacy-backend.h 
b/include/hw/xen/xen-legacy-backend.h
index 2cca1747786..979c4ea04c5 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -66,18 +66,8 @@ static inline void xen_be_unmap_grant_ref(struct 
XenLegacyDevice *xendev,
 return xen_be_unmap_grant_refs(xendev, ptr, , 1);
 }
 
-/* actual backend drivers */
-extern struct XenDevOps xen_console_ops;  /* xen_console.c */
-extern struct XenDevOps xen_kbdmouse_ops; /* xen_framebuffer.c */
-extern struct XenDevOps xen_framebuffer_ops;  /* xen_framebuffer.c */
-extern struct XenDevOps xen_blkdev_ops;   /* xen_disk.c*/
-#ifdef CONFIG_VIRTFS
-extern struct XenDevOps xen_9pfs_ops;   /* xen-9p-backend.c*/
-#endif
-extern struct XenDevOps xen_netdev_ops;   /* xen_nic.c */
-#ifdef CONFIG_USB_LIBUSB
-extern struct XenDevOps xen_usb_ops;  /* xen-usb.c */
-#endif
+/* backend drivers not included in all machines */
+extern struct XenDevOps xen_framebuffer_ops;  /* xenfb.c */
 
 /* configuration (aka xenbus setup) */
 void xen_config_cleanup(void);
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index ddad4b9f36a..fdf84f47af1 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -29,7 +29,6 @@ struct XenDevOps {
  const char *node);
 void  (*frontend_changed)(struct XenLegacyDevice *xendev,
   const char *node);
-int   (*backend_register)(void);
 };
 
 struct XenLegacyDevice {
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 4aa9c8c736d..a3ac53f989e 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -513,7 +513,7 @@ static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
 xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
 }
 
-struct XenDevOps xen_9pfs_ops = {
+static struct XenDevOps xen_9pfs_ops = {
 .size   = sizeof(Xen9pfsDev),
 .flags  = DEVOPS_FLAG_NEED_GNTDEV,
 .alloc  = xen_9pfs_alloc,
@@ -522,3 +522,9 @@ struct XenDevOps xen_9pfs_ops = {
 .disconnect = xen_9pfs_disconnect,
 .free   = xen_9pfs_free,
 };
+
+static void xen_9pfs_register_backend(void)
+{
+xen_be_register("9pfs", _9pfs_ops);
+}
+xen_backend_init(xen_9pfs_register_backend);
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index b2130a0d700..27536bfce0c 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -972,7 +972,7 @@ static void fb_event(struct XenLegacyDevice *xendev)
 
 /*  */
 
-struct XenDevOps xen_kbdmouse_ops = {
+static struct XenDevOps xen_kbdmouse_ops = {
 .size   = sizeof(struct XenInput),
 .init   = input_init,
 .initialise = input_initialise,
@@ -995,3 +995,9 @@ static const GraphicHwOps xenfb_ops = {
 .gfx_update  = xenfb_update,
 .ui_info = xenfb_ui_info,
 };
+
+static void xen_vkbd_register_backend(void)
+{
+xen_be_register("vkbd", _kbdmouse_ops);
+}
+xen_backend_init(xen_vkbd_register_backend);
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 09ec326aeae..416623f956a 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -1083,7 +1083,7 @@ static void usbback_event(struct XenLegacyDevice *xendev)
 qemu_bh_schedule(usbif->bh);
 }
 
-struct XenDevOps xen_usb_ops = {
+static struct XenDevOps xen_usb_ops = {
 .size= sizeof(struct usbback_info),
 .flags   = DEVOPS_FLAG_NEED_GNTDEV,
 .init= usbback_init,
@@ -1095,15 +1095,9 @@ struct XenDevOps xen_usb_ops = {
 .event   = usbback_event,
 };
 
-#else /* USBIF_SHORT_NOT_OK */
-
-static int usbback_not_supported(void)
+static void xen_usb_register_backend(void)
 {
-return -EINVAL;
+xen_be_register("qusb", _usb_ops);
 }
-
-struct XenDevOps xen_usb_ops = {
-.backend_register = usbback_not_supported,
-};
-
+xen_backend_init(xen_usb_register_backend);
 #endif
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 124dd5f3d68..6f0b300a421 100644
---

[PATCH 02/13] s390_flic: add migration-enabled property

Instead of mucking with css_migration_enabled(), add a property specific to
the FLIC device, similar to what is done for TYPE_S390_STATTRIB.

Signed-off-by: Paolo Bonzini 
---
 include/hw/s390x/s390_flic.h | 1 +
 hw/intc/s390_flic.c  | 6 +-
 hw/s390x/s390-virtio-ccw.c   | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
index 3907a13d076..bcb081def58 100644
--- a/include/hw/s390x/s390_flic.h
+++ b/include/hw/s390x/s390_flic.h
@@ -47,6 +47,7 @@ struct S390FLICState {
 /* to limit AdapterRoutes.num_routes for compat */
 uint32_t adapter_routes_max_batch;
 bool ais_supported;
+bool migration_enabled;
 };
 
 
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index f4a848460b8..7f930800877 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -405,6 +405,8 @@ static void qemu_s390_flic_class_init(ObjectClass *oc, void 
*data)
 static Property s390_flic_common_properties[] = {
 DEFINE_PROP_UINT32("adapter_routes_max_batch", S390FLICState,
adapter_routes_max_batch, ADAPTER_ROUTES_MAX_GSI),
+DEFINE_PROP_BOOL("migration-enabled", S390FLICState,
+ migration_enabled, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -457,7 +459,9 @@ type_init(qemu_s390_flic_register_types)
 
 static bool adapter_info_so_needed(void *opaque)
 {
-return css_migration_enabled();
+S390FLICState *fs = S390_FLIC_COMMON(opaque);
+
+return fs->migration_enabled;
 }
 
 const VMStateDescription vmstate_adapter_info_so = {
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index feabc173eb3..1383e47eeb5 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1174,6 +1174,7 @@ static void ccw_machine_2_9_class_options(MachineClass 
*mc)
 S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
 static GlobalProperty compat[] = {
 { TYPE_S390_STATTRIB, "migration-enabled", "off", },
+{ TYPE_S390_FLIC_COMMON, "migration-enabled", "off", },
 };
 
 ccw_machine_2_10_class_options(mc);
-- 
2.45.0

[PATCH 03/13] s390: move css_migration_enabled from machine to css.c

The CSS subsystem uses global variables, just face the truth and use
a variable also for whether the CSS vmstate is in use; remove the
indirection of fetching it from the machine type, which makes the
TCG code depend unnecessarily on the virtio-ccw machine.

Signed-off-by: Paolo Bonzini 
---
 include/hw/s390x/css.h |  6 ++
 include/hw/s390x/s390-virtio-ccw.h |  7 ---
 hw/s390x/css.c | 10 +++---
 hw/s390x/s390-virtio-ccw.c | 15 +++
 4 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index ba72ee3dd20..8289e458370 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -333,4 +333,10 @@ static inline int ccw_dstream_read_buf(CcwDataStream *cds, 
void *buff, int len)
 #define ccw_dstream_read(cds, v) ccw_dstream_read_buf((cds), &(v), sizeof(v))
 #define ccw_dstream_write(cds, v) ccw_dstream_write_buf((cds), &(v), sizeof(v))
 
+/**
+ * true if (vmstate based) migration of the channel subsystem
+ * is enabled, false if it is disabled.
+ */
+extern bool css_migration_enabled;
+
 #endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index c1d46e78af8..c0494e511cb 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -43,7 +43,6 @@ struct S390CcwMachineClass {
 /*< public >*/
 bool ri_allowed;
 bool cpu_model_allowed;
-bool css_migration_enabled;
 bool hpage_1m_allowed;
 int max_threads;
 };
@@ -55,10 +54,4 @@ bool cpu_model_allowed(void);
 /* 1M huge page mappings allowed by the machine */
 bool hpage_1m_allowed(void);
 
-/**
- * Returns true if (vmstate based) migration of the channel subsystem
- * is enabled, false if it is disabled.
- */
-bool css_migration_enabled(void);
-
 #endif
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 295530963a6..b2d5327dbf4 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -23,6 +23,8 @@
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/s390-ccw.h"
 
+bool css_migration_enabled = true;
+
 typedef struct CrwContainer {
 CRW crw;
 QTAILQ_ENTRY(CrwContainer) sibling;
@@ -180,7 +182,7 @@ static const VMStateDescription vmstate_orb = {
 
 static bool vmstate_schdev_orb_needed(void *opaque)
 {
-return css_migration_enabled();
+return css_migration_enabled;
 }
 
 static const VMStateDescription vmstate_schdev_orb = {
@@ -388,7 +390,7 @@ static int subch_dev_post_load(void *opaque, int version_id)
 css_subch_assign(s->cssid, s->ssid, s->schid, s->devno, s);
 }
 
-if (css_migration_enabled()) {
+if (css_migration_enabled) {
 /* No compat voodoo to do ;) */
 return 0;
 }
@@ -412,7 +414,9 @@ static int subch_dev_post_load(void *opaque, int version_id)
 
 void css_register_vmstate(void)
 {
-vmstate_register(NULL, 0, _css, _subsys);
+if (css_migration_enabled) {
+vmstate_register(NULL, 0, _css, _subsys);
+}
 }
 
 IndAddr *get_indicator(hwaddr ind_addr, int len)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 1383e47eeb5..aa90703d518 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -275,11 +275,9 @@ static void ccw_init(MachineState *machine)
 s390_enable_css_support(s390_cpu_addr2state(0));
 
 ret = css_create_css_image(VIRTUAL_CSSID, true);
-
 assert(ret == 0);
-if (css_migration_enabled()) {
-css_register_vmstate();
-}
+
+css_register_vmstate();
 
 /* Create VirtIO network adapters */
 s390_create_virtio_net(BUS(css_bus), mc->default_nic);
@@ -741,7 +739,6 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
 
 s390mc->ri_allowed = true;
 s390mc->cpu_model_allowed = true;
-s390mc->css_migration_enabled = true;
 s390mc->hpage_1m_allowed = true;
 s390mc->max_threads = 1;
 mc->init = ccw_init;
@@ -811,11 +808,6 @@ static const TypeInfo ccw_machine_info = {
 },
 };
 
-bool css_migration_enabled(void)
-{
-return get_machine_class()->css_migration_enabled;
-}
-
 #define DEFINE_CCW_MACHINE(suffix, verstr, latest)\
 static void ccw_machine_##suffix##_class_init(ObjectClass *oc,\
   void *data) \
@@ -1171,7 +1163,6 @@ static void ccw_machine_2_9_instance_options(MachineState 
*machine)
 
 static void ccw_machine_2_9_class_options(MachineClass *mc)
 {
-S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
 static GlobalProperty compat[] = {
 { TYPE_S390_STATTRIB, "migration-enabled", "off", },
 { TYPE_S390_FLIC_COMMON, "migration-enabled", "off", },
@@ -1180,7 +1171,7 @@ static void ccw_machine_2_9_class_options(MachineClass 
*mc)
 ccw_machine_2_10_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
 compat_props_add(mc->compat_props, compat,

[PATCH 06/13] xen: initialize legacy backends from xen_bus_init()

Prepare for moving the calls to xen_be_register() under the
control of xen_bus_init(), using the normal xen_backend_init()
method that is used by the "modern" backends.

This requires the xenstore global variable to be initialized,
which is done by xen_be_init().  To ensure that everything is
ready at the time the xen_backend_init() functions are called,
remove the xen_be_init() function from all the boards and
place it directly in xen_bus_init().

Signed-off-by: Paolo Bonzini 
---
 hw/i386/pc.c  | 1 -
 hw/xen/xen-bus.c  | 4 
 hw/xen/xen-hvm-common.c   | 2 --
 hw/xenpv/xen_machine_pv.c | 5 +
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 505ea750f4d..19f21953b4a 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1250,7 +1250,6 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 pci_create_simple(pcms->pcibus, -1, "xen-platform");
 }
 xen_bus_init();
-xen_be_init();
 }
 #endif
 
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index fb82cc33e48..95b207ac8b4 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -13,6 +13,7 @@
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
 #include "hw/xen/xen-backend.h"
+#include "hw/xen/xen-legacy-backend.h" /* xen_be_init() */
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-bus-helper.h"
 #include "monitor/monitor.h"
@@ -329,6 +330,9 @@ static void xen_bus_realize(BusState *bus, Error **errp)
 goto fail;
 }
 
+/* Initialize legacy backend core & drivers */
+xen_be_init();
+
 if (xs_node_scanf(xenbus->xsh, XBT_NULL, "", /* domain root node */
   "domid", NULL, "%u", ) == 1) {
 xenbus->backend_id = domid;
diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 1627da73982..2d1b0321214 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -872,8 +872,6 @@ void xen_register_ioreq(XenIOState *state, unsigned int 
max_cpus,
 
 xen_bus_init();
 
-xen_be_init();
-
 return;
 
 err:
diff --git a/hw/xenpv/xen_machine_pv.c b/hw/xenpv/xen_machine_pv.c
index 1130d1a1479..b500ce09891 100644
--- a/hw/xenpv/xen_machine_pv.c
+++ b/hw/xenpv/xen_machine_pv.c
@@ -34,8 +34,7 @@ static void xen_init_pv(MachineState *machine)
 {
 setup_xen_backend_ops();
 
-/* Initialize backend core & drivers */
-xen_be_init();
+xen_bus_init();
 
 switch (xen_mode) {
 case XEN_ATTACH:
@@ -60,8 +59,6 @@ static void xen_init_pv(MachineState *machine)
 vga_interface_created = true;
 }
 
-xen_bus_init();
-
 /* config cleanup hook */
 atexit(xen_config_cleanup);
 }
-- 
2.45.0

[PATCH 00/13] fix --without-default-devices build and (mostly) tests

The recent change to make boards "default y" made them go away from
a --without-default-devices build, because the boards are not anymore
enabled explicitly in configs/devices/.

This is a problem for some targets that were not fully ready for this
and have generic target code that needs symbols from boards or devices.
The more complicated ones are s390x and i386.  In some cases some
components simply have to be required by target/ARCH/, but often
it is better to move some code around, associating it with boards
instead of targets or vice versa.

--without-default-devices builds in practice will always use a
custom config, but let's keep things tidy.  This series does
this for s390x (patches 1 to 5) and x86 (patches 6 to 12).  As a
small addendum, patch 13 fixes qtest for ARM (32- and 64-bit) on a
--without-default-devices build.

The series seems huge, but it's mostly code movement.  Patch 10
in particular moves board building code, which is unrelated to the
X86MachineState superclass and has many dependencies on NUMA or
hw/i386/pc-sysfw.c.

I suspect that there are more issues, for example when building
a CONFIG_MICROVM-only binary.  Fixing builds without boards on vanilla
upstream configs is the more pressing problem, though.

Patches 6 and 7 were tested with the Avocado Xen-on-KVM tests.

Paolo


Paolo Bonzini (13):
  s390x: move s390_cpu_addr2state to target/s390x/sigp.c
  s390_flic: add migration-enabled property
  s390: move css_migration_enabled from machine to css.c
  s390x: select correct components for no-board build
  tests/qtest: s390x: fix operation in a build without any boards or
devices
  xen: initialize legacy backends from xen_bus_init()
  xen: register legacy backends via xen_backend_init
  i386: correctly select code in hw/i386 that depends on other
components
  i386: pc: remove unnecessary MachineClass overrides
  hw/i386: split x86.c in multiple parts
  hw/i386: move rtc-reset-reinjection command out of hw/rtc
  i386: select correct components for no-board build
  tests/qtest: arm: fix operation in a build without any boards or
devices

 include/hw/i386/x86.h   |   10 +-
 include/hw/rtc/mc146818rtc.h|2 +-
 include/hw/s390x/css.h  |6 +
 include/hw/s390x/s390-virtio-ccw.h  |7 -
 include/hw/s390x/s390_flic.h|1 +
 include/hw/xen/xen-legacy-backend.h |   14 +-
 include/hw/xen/xen_pvdev.h  |1 -
 hw/9pfs/xen-9p-backend.c|8 +-
 hw/display/xenfb.c  |8 +-
 hw/i386/fw_cfg.c|2 +
 hw/i386/monitor.c   |   46 ++
 hw/i386/pc.c|4 -
 hw/i386/x86-common.c| 1007 +
 hw/i386/x86-cpu.c   |   97 +++
 hw/i386/x86.c   | 1058 +--
 hw/intc/ioapic-stub.c   |   29 +
 hw/intc/s390_flic.c |6 +-
 hw/rtc/mc146818rtc.c|   12 +-
 hw/s390x/css.c  |   10 +-
 hw/s390x/s390-virtio-ccw.c  |   32 +-
 hw/usb/xen-usb.c|   14 +-
 hw/xen/xen-bus.c|4 +
 hw/xen/xen-hvm-common.c |2 -
 hw/xen/xen-legacy-backend.c |   16 -
 hw/xenpv/xen_machine_pv.c   |5 +-
 target/s390x/sigp.c |   17 +
 tests/qtest/arm-cpu-features.c  |4 +
 tests/qtest/drive_del-test.c|7 +-
 tests/qtest/migration-test.c|6 +
 tests/qtest/numa-test.c |4 +
 .gitlab-ci.d/buildtest.yml  |4 +-
 hw/i386/meson.build |7 +-
 hw/intc/meson.build |2 +-
 target/i386/Kconfig |1 +
 target/s390x/Kconfig|2 +
 35 files changed, 1289 insertions(+), 1166 deletions(-)
 create mode 100644 hw/i386/monitor.c
 create mode 100644 hw/i386/x86-common.c
 create mode 100644 hw/i386/x86-cpu.c
 create mode 100644 hw/intc/ioapic-stub.c

-- 
2.45.0

[PATCH 05/13] tests/qtest: s390x: fix operation in a build without any boards or devices

Do the bare minimum to ensure that at least a vanilla
--without-default-devices build works for all targets except i386,
x86_64 and ppc64.  In particular this fixes s390x-softmmu; i386 and
x86_64 have about a dozen failing tests that do not pass -M and therefore
require a default machine type; ppc64 has the same issue, though only
with numa-test.

If we can for now ignore the cases where boards and devices are picked
by hand, drive_del-test however can be fixed easily; almost all tests
check for the virtio-blk or virtio-scsi device that they use, and are
already skipped.  Only one didn't get the memo; plus another one does
not need a machine at all and can be run with -M none.

Signed-off-by: Paolo Bonzini 
---
 tests/qtest/drive_del-test.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c
index 8a6f3ac963d..7b67a4bbee4 100644
--- a/tests/qtest/drive_del-test.c
+++ b/tests/qtest/drive_del-test.c
@@ -173,7 +173,7 @@ static void test_drive_without_dev(void)
 QTestState *qts;
 
 /* Start with an empty drive */
-qts = qtest_init("-drive if=none,id=drive0");
+qts = qtest_init("-drive if=none,id=drive0 -M none");
 
 /* Delete the drive */
 drive_del(qts);
@@ -192,6 +192,11 @@ static void test_after_failed_device_add(void)
 QDict *response;
 QTestState *qts;
 
+if (!has_device_builtin("virtio-blk")) {
+g_test_skip("Device virtio-blk is not available");
+return;
+}
+
 snprintf(driver, sizeof(driver), "virtio-blk-%s",
  qvirtio_get_dev_type());
 
-- 
2.45.0

[PATCH 11/13] hw/i386: move rtc-reset-reinjection command out of hw/rtc

The rtc-reset-reinjection QMP command is specific to x86, other boards do not
have the ACK tracking functionality that is needed for RTC interrupt
reinjection.  Therefore the QMP command is only included in x86, but
qmp_rtc_reset_reinjection() is implemented by hw/rtc/mc146818rtc.c
and requires tracking of all created RTC devices.  Move the implementation
to hw/i386, so that 1) it is available even if no RTC device exist
2) the only RTC that exists is easily found in x86ms->rtc.

Signed-off-by: Paolo Bonzini 
---
 include/hw/rtc/mc146818rtc.h |  2 +-
 hw/i386/monitor.c| 46 
 hw/rtc/mc146818rtc.c | 12 ++
 hw/i386/meson.build  |  1 +
 4 files changed, 50 insertions(+), 11 deletions(-)
 create mode 100644 hw/i386/monitor.c

diff --git a/include/hw/rtc/mc146818rtc.h b/include/hw/rtc/mc146818rtc.h
index 97cec0b3e84..64893be1515 100644
--- a/include/hw/rtc/mc146818rtc.h
+++ b/include/hw/rtc/mc146818rtc.h
@@ -55,6 +55,6 @@ MC146818RtcState *mc146818_rtc_init(ISABus *bus, int 
base_year,
 qemu_irq intercept_irq);
 void mc146818rtc_set_cmos_data(MC146818RtcState *s, int addr, int val);
 int mc146818rtc_get_cmos_data(MC146818RtcState *s, int addr);
-void qmp_rtc_reset_reinjection(Error **errp);
+void rtc_reset_reinjection(MC146818RtcState *rtc);
 
 #endif /* HW_RTC_MC146818RTC_H */
diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c
new file mode 100644
index 000..1ebd3564bf2
--- /dev/null
+++ b/hw/i386/monitor.c
@@ -0,0 +1,46 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-misc-target.h"
+#include "hw/i386/x86.h"
+#include "hw/rtc/mc146818rtc.h"
+
+#include CONFIG_DEVICES
+
+void qmp_rtc_reset_reinjection(Error **errp)
+{
+X86MachineState *x86ms = X86_MACHINE(qdev_get_machine());
+
+#ifdef CONFIG_MC146818RTC
+if (x86ms->rtc) {
+rtc_reset_reinjection(MC146818_RTC(x86ms->rtc));
+}
+#else
+assert(!x86ms->rtc);
+#endif
+}
diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 3379f92748b..8ccee9a385d 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -104,16 +104,9 @@ static void rtc_coalesced_timer_update(MC146818RtcState *s)
 }
 }
 
-static QLIST_HEAD(, MC146818RtcState) rtc_devices =
-QLIST_HEAD_INITIALIZER(rtc_devices);
-
-void qmp_rtc_reset_reinjection(Error **errp)
+void rtc_reset_reinjection(MC146818RtcState *rtc)
 {
-MC146818RtcState *s;
-
-QLIST_FOREACH(s, _devices, link) {
-s->irq_coalesced = 0;
-}
+rtc->irq_coalesced = 0;
 }
 
 static bool rtc_policy_slew_deliver_irq(MC146818RtcState *s)
@@ -941,7 +934,6 @@ static void rtc_realizefn(DeviceState *dev, Error **errp)
 object_property_add_tm(OBJECT(s), "date", rtc_get_date);
 
 qdev_init_gpio_out(dev, >irq, 1);
-QLIST_INSERT_HEAD(_devices, s, link);
 }
 
 MC146818RtcState *mc146818_rtc_init(ISABus *bus, int base_year,
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 3437da0aad1..03aad10df7a 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -2,6 +2,7 @@ i386_ss = ss.source_set()
 i386_ss.add(files(
   'fw_cfg.c',
   'e820_memory_layout.c',
+  'monitor.c',
   'multiboot.c',
   'x86.c',
   'x86-cpu.c',
-- 
2.45.0

[PATCH 10/13] hw/i386: split x86.c in multiple parts

Keep the basic X86MachineState definition in x86.c.  Move out functions that
are only needed by other files: x86-common.c for the pc and microvm machines,
x86-cpu.c for those used by accelerator code.

Signed-off-by: Paolo Bonzini 
---
 include/hw/i386/x86.h |6 +-
 hw/i386/x86-common.c  | 1007 +++
 hw/i386/x86-cpu.c |   97 
 hw/i386/x86.c | 1052 +
 hw/i386/meson.build   |4 +-
 5 files changed, 1113 insertions(+), 1053 deletions(-)
 create mode 100644 hw/i386/x86-common.c
 create mode 100644 hw/i386/x86-cpu.c

diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index c2062db13f5..b006f16b8d3 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -21,6 +21,7 @@
 #include "exec/memory.h"
 
 #include "hw/boards.h"
+#include "hw/i386/topology.h"
 #include "hw/intc/ioapic.h"
 #include "hw/isa/isa.h"
 #include "qom/object.h"
@@ -109,12 +110,11 @@ struct X86MachineState {
 #define TYPE_X86_MACHINE   MACHINE_TYPE_NAME("x86")
 OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)
 
-uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
+void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms);
+uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms,
 unsigned int cpu_index);
 
-void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
 void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
 void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
 void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
   DeviceState *dev, Error **errp);
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
new file mode 100644
index 000..67b03c913a5
--- /dev/null
+++ b/hw/i386/x86-common.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2019, 2024 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#include "qemu/units.h"
+#include "qemu/datadir.h"
+#include "qapi/error.h"
+#include "sysemu/numa.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/xen.h"
+#include "trace.h"
+
+#include "hw/i386/x86.h"
+#include "target/i386/cpu.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "target/i386/sev.h"
+
+#include "hw/acpi/cpu_hotplug.h"
+#include "hw/irq.h"
+#include "hw/loader.h"
+#include "multiboot.h"
+#include "elf.h"
+#include "standard-headers/asm-x86/bootparam.h"
+#include CONFIG_DEVICES
+#include "kvm/kvm_i386.h"
+
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+#endif
+
+/* Physical Address of PVH entry point read from kernel ELF NOTE */
+static size_t pvh_start_addr;
+
+static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp)
+{
+Object *cpu = object_new(MACHINE(x86ms)->cpu_type);
+
+if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) {
+goto out;
+}
+qdev_realize(DEVICE(cpu), NULL, errp);
+
+out:
+object_unref(cpu);
+}
+
+void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
+{
+int i;
+const CPUArchIdList *possible_cpus;
+MachineState *ms = MACHINE(x86ms);
+MachineClass *mc = MACHINE_GET_CLASS(x86ms);
+
+x86_cpu_set_default_version(default_cpu_version);
+
+/*
+ * Calculates the limit to CPU APIC ID values
+ *
+ * Limit for the APIC ID value, so that all
+ * CPU APIC IDs are < x86ms->apic_id_limit.
+ *
+ * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create().
+ */
+x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
+  ms->smp.max_cpus - 1) + 
1;
+
+/*
+ * Can we support APIC ID 255 or higher?  With

[PATCH 09/13] i386: pc: remove unnecessary MachineClass overrides

There is no need to override these fields of MachineClass because they are
already set to the right value in the superclass.

Signed-off-by: Paolo Bonzini 
---
 include/hw/i386/x86.h | 4 
 hw/i386/pc.c  | 3 ---
 hw/i386/x86.c | 6 +++---
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index d7b7d3f3ce0..c2062db13f5 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
 
 void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
 void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
- unsigned cpu_index);
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
 CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
 void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
 void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 19f21953b4a..bfb46e9b548 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1826,9 +1826,6 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = pc_get_hotplug_handler;
 mc->hotplug_allowed = pc_hotplug_allowed;
-mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
-mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
-mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
 mc->auto_enable_numa_with_memhp = true;
 mc->auto_enable_numa_with_memdev = true;
 mc->has_hotpluggable_cpus = true;
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index c61f4ebfa6a..fcef652c1e3 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
 numa_cpu_pre_plug(cpu_slot, dev, errp);
 }
 
-CpuInstanceProperties
+static CpuInstanceProperties
 x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
 {
 MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
 return possible_cpus->cpus[cpu_index].props;
 }
 
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
+static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
 {
X86CPUTopoIDs topo_ids;
X86MachineState *x86ms = X86_MACHINE(ms);
@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, 
int idx)
return topo_ids.pkg_id % ms->numa_state->num_nodes;
 }
 
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
+static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
 {
 X86MachineState *x86ms = X86_MACHINE(ms);
 unsigned int max_cpus = ms->smp.max_cpus;
-- 
2.45.0

[PATCH 04/13] s390x: select correct components for no-board build

Signed-off-by: Paolo Bonzini 
---
 .gitlab-ci.d/buildtest.yml | 4 ++--
 target/s390x/Kconfig   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 13afd0df1f0..f8502905203 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -650,7 +650,7 @@ build-tci:
 # Check our reduced build configurations
 # requires libfdt: aarch64, arm, i386, loongarch64, microblaze, microblazeel,
 #   mips64el, or1k, ppc, ppc64, riscv32, riscv64, rx, x86_64
-# does not build without boards: i386, s390x, x86_64
+# does not build without boards: i386, x86_64
 build-without-defaults:
   extends: .native_build_job_template
   needs:
@@ -666,7 +666,7 @@ build-without-defaults:
   --disable-strip
 TARGETS: alpha-softmmu avr-softmmu cris-softmmu hppa-softmmu m68k-softmmu
   mips-softmmu mips64-softmmu mipsel-softmmu
-  sh4-softmmu sh4eb-softmmu sparc-softmmu
+  s390x-softmmu sh4-softmmu sh4eb-softmmu sparc-softmmu
   sparc64-softmmu tricore-softmmu xtensa-softmmu xtensaeb-softmmu
   hexagon-linux-user i386-linux-user s390x-linux-user
 MAKE_CHECK_ARGS: check
diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig
index 72da48136c6..d886be48b47 100644
--- a/target/s390x/Kconfig
+++ b/target/s390x/Kconfig
@@ -1,2 +1,4 @@
 config S390X
 bool
+select PCI
+select S390_FLIC
-- 
2.45.0

[PATCH] target/i386: add feature dependency for XSAVE

The XSAVEOPT, XSAVEC, XGETBV1, XSAVES features make no sense if you
cannot enable XSAVE in the first place.

Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f2ea6899e39..6f5ff71c6ee 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1550,6 +1550,10 @@ static FeatureDep feature_dependencies[] = {
 .from = { FEAT_8000_0001_ECX,   CPUID_EXT3_SVM },
 .to = { FEAT_SVM,   ~0ull },
 },
+{
+.from = { FEAT_1_ECX,   CPUID_EXT_XSAVE },
+.to = { FEAT_XSAVE, ~0ull },
+},
 {
 .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
 .to = { FEAT_VMX_SECONDARY_CTLS,
VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
-- 
2.45.0

[PATCH] target/i386: fix feature dependency for WAITPKG

The VMX feature bit depends on general availability of WAITPKG,
not the other way round.

Fixes: 33cc88261c3 ("target/i386: add support for 
VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE", 2023-08-28)
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
 target/i386/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1058b6803fd..f2ea6899e39 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1551,8 +1551,8 @@ static FeatureDep feature_dependencies[] = {
 .to = { FEAT_SVM,   ~0ull },
 },
 {
-.from = { FEAT_VMX_SECONDARY_CTLS,  
VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
-.to = { FEAT_7_0_ECX,   CPUID_7_0_ECX_WAITPKG },
+.from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
+.to = { FEAT_VMX_SECONDARY_CTLS,
VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
 },
 };
 
-- 
2.45.0

[PATCH] target/i386: move prefetch and multi-byte UD/NOP to new decoder

These are trivial to add, and moving them to the new decoder fixes some
corner cases: raising #UD instead of an instruction fetch page fault for
the undefined opcodes, and incorrectly rejecting 0F 18 prefetches with
register operands (which are treated as reserved NOPs).

Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/decode-new.h |  1 +
 target/i386/tcg/translate.c  | 30 --
 target/i386/tcg/decode-new.c.inc | 24 +---
 target/i386/tcg/emit.c.inc   |  5 +
 4 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 2ea06b44787..51ef0e621b9 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -50,6 +50,7 @@ typedef enum X86OpType {
 X86_TYPE_EM, /* modrm byte selects an ALU memory operand */
 X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */
 X86_TYPE_I_unsigned, /* Immediate, zero-extended */
+X86_TYPE_nop, /* modrm operand decoded but not loaded into s->T{0,1} */
 X86_TYPE_2op, /* 2-operand RMW instruction */
 X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */
 X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 3da4fdf64cc..de87775016b 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4019,25 +4019,6 @@ static void disas_insn_old(DisasContext *s, CPUState 
*cpu, int b)
 set_cc_op(s, CC_OP_EFLAGS);
 }
 break;
-case 0x118:
-modrm = x86_ldub_code(env, s);
-mod = (modrm >> 6) & 3;
-op = (modrm >> 3) & 7;
-switch(op) {
-case 0: /* prefetchnta */
-case 1: /* prefetchnt0 */
-case 2: /* prefetchnt0 */
-case 3: /* prefetchnt0 */
-if (mod == 3)
-goto illegal_op;
-gen_nop_modrm(env, s, modrm);
-/* nothing more to do */
-break;
-default: /* nop (multi byte) */
-gen_nop_modrm(env, s, modrm);
-break;
-}
-break;
 case 0x11a:
 modrm = x86_ldub_code(env, s);
 if (s->flags & HF_MPX_EN_MASK) {
@@ -4229,10 +4210,6 @@ static void disas_insn_old(DisasContext *s, CPUState 
*cpu, int b)
 }
 gen_nop_modrm(env, s, modrm);
 break;
-case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
-modrm = x86_ldub_code(env, s);
-gen_nop_modrm(env, s, modrm);
-break;
 
 case 0x120: /* mov reg, crN */
 case 0x122: /* mov crN, reg */
@@ -4506,13 +4483,6 @@ static void disas_insn_old(DisasContext *s, CPUState 
*cpu, int b)
 }
 break;
 
-case 0x10d: /* 3DNow! prefetch(w) */
-modrm = x86_ldub_code(env, s);
-mod = (modrm >> 6) & 3;
-if (mod == 3)
-goto illegal_op;
-gen_nop_modrm(env, s, modrm);
-break;
 case 0x1aa: /* rsm */
 gen_svm_check_intercept(s, SVM_EXIT_RSM);
 if (!(s->flags & HF_SMM_MASK))
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 0e1811399f8..4baf7672158 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -55,6 +55,10 @@
  * mask could be applied (and the original sign-extended value would be
  * optimized away by TCG) in the emitter function.
  *
+ * Finally, a "nop" operand type is used for multi-byte NOPs.  It accepts
+ * any value of mod including 11b (unlike M) but it does not try to
+ * interpret the operand (like M).
+ *
  * Vector operands
  * ---
  *
@@ -1056,6 +1060,16 @@ static const X86OpEntry opcodes_0F[256] = {
 [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
 [0xa1] = X86_OP_ENTRYw(POP, FS, w),
 
+[0x0b] = X86_OP_ENTRY0(UD),   /* UD2 */
+[0x0d] = X86_OP_ENTRY1(NOP,  M,v),/* 3DNow! prefetch */
+
+[0x18] = X86_OP_ENTRY1(NOP,  nop,v),  /* prefetch/reserved NOP */
+[0x19] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
+[0x1c] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
+[0x1d] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
+[0x1e] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
+[0x1f] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
+
 [0x28] = X86_OP_ENTRY3(MOVDQ,  V,x,  None,None, W,x, vex1 p_00_66), /* 
MOVAPS */
 [0x29] = X86_OP_ENTRY3(MOVDQ,  W,x,  None,None, V,x, vex1 p_00_66), /* 
MOVAPS */
 [0x2A] = X86_OP_GROUP0(0F2A),
@@ -1135,6 +1149,8 @@ static const X86OpEntry opcodes_0F[256] = {
 [0xb6] = X86_OP_ENTRY3(MOV,G,v, E,b, None, None, zextT0), /* MOVZX */
 [0xb7] = X86_OP_ENTRY3(MOV,G,v, E,w, None, None, zextT0), /* MOVZX */
 
+/* decoded as modrm, which is visible as a difference between page fault 
and #UD */
+[0xb9] = X86_OP_ENTRYr(UD, nop,v),/* UD1 */
 [0xbe] = X86_OP_ENTRY3(MOV,G,v, E,b, None, None,

[PATCH] target/i386: fix operand size for DATA16 REX.W POPCNT

According to the manual, 32-bit vs 64-bit is governed by REX.W
and REX ignores the 0x66 prefix.  This can be confirmed with this
program:

#include 
int main()
{
   int x = 0x1234;
   int y;
   asm("popcntl %1, %0" : "=r" (y) : "r" (x)); printf("%x\n", y);
   asm("mov $-1, %0; .byte 0x66; popcntl %1, %0" : "+r" (y) : "r" (x)); 
printf("%x\n", y);
   asm("mov $-1, %0; .byte 0x66; popcntq %q1, %q0" : "+r" (y) : "r" (x)); 
printf("%x\n", y);
}

which prints 5//5 on real hardware and 5//
on QEMU.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7d9f6b5c55b..5366dc32dd3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -411,16 +411,6 @@ static inline MemOp mo_stacksize(DisasContext *s)
 return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 }
 
-/* Select only size 64 else 32.  Used for SSE operand sizes.  */
-static inline MemOp mo_64_32(MemOp ot)
-{
-#ifdef TARGET_X86_64
-return ot == MO_64 ? MO_64 : MO_32;
-#else
-return MO_32;
-#endif
-}
-
 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
byte vs word opcodes.  */
 static inline MemOp mo_b_d(int b, MemOp ot)
@@ -4545,12 +4535,7 @@ static void disas_insn_old(DisasContext *s, CPUState 
*cpu, int b)
 modrm = x86_ldub_code(env, s);
 reg = ((modrm >> 3) & 7) | REX_R(s);
 
-if (s->prefix & PREFIX_DATA) {
-ot = MO_16;
-} else {
-ot = mo_64_32(dflag);
-}
-
+ot = dflag;
 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 gen_extu(ot, s->T0);
 tcg_gen_mov_tl(cpu_cc_src, s->T0);
-- 
2.45.0

[PATCH] tests/tcg: cover lzcnt/tzcnt/popcnt