Re: [PATCH v3] hw/gpio/aspeed: Add reg_table_size to AspeedGPIOClass

2024-06-19 Thread Cédric Le Goater

On 6/19/24 8:36 PM, Zheyu Ma wrote:

ASan detected a global-buffer-overflow error in the aspeed_gpio_read()
function. This issue occurred when reading beyond the bounds of the
reg_table.

To enhance the safety and maintainability of the Aspeed GPIO code, this commit
introduces a reg_table_size member to the AspeedGPIOClass structure. This
change ensures that the size of the GPIO register table is explicitly tracked
and initialized, reducing the risk of errors if new register tables are
introduced in the future.

Reproducer:
cat << EOF | qemu-system-aarch64 -display none \
-machine accel=qtest, -m 512M -machine ast1030-evb -qtest stdio
readq 0x7e780272
EOF

ASAN log indicating the issue:
==2602930==ERROR: AddressSanitizer: global-buffer-overflow on address 
0x55a5da29e128 at pc 0x55a5d700dc62 bp 0x7fff096c4e90 sp 0x7fff096c4e88
READ of size 2 at 0x55a5da29e128 thread T0
 #0 0x55a5d700dc61 in aspeed_gpio_read hw/gpio/aspeed_gpio.c:564:14
 #1 0x55a5d933f3ab in memory_region_read_accessor system/memory.c:445:11
 #2 0x55a5d92fba40 in access_with_adjusted_size system/memory.c:573:18
 #3 0x55a5d92f842c in memory_region_dispatch_read1 system/memory.c:1426:16
 #4 0x55a5d92f7b68 in memory_region_dispatch_read system/memory.c:1459:9
 #5 0x55a5d9376ad1 in flatview_read_continue_step system/physmem.c:2836:18
 #6 0x55a5d9376399 in flatview_read_continue system/physmem.c:2877:19
 #7 0x55a5d93775b8 in flatview_read system/physmem.c:2907:12

Signed-off-by: Zheyu Ma 



Applied to aspeed-next.

Thanks,

C.




Re: [PULL 00/24] tcg patch queue

2024-06-19 Thread Richard Henderson

On 6/19/24 13:59, Richard Henderson wrote:

The following changes since commit 223696363bb117241ad9c2facbff0c474afa4104:

   Merge tag 'edgar/xilinx-queue-2024-06-17.for-upstream' 
ofhttps://gitlab.com/edgar.iglesias/qemu  into staging (2024-06-18 13:08:01 
-0700)

are available in the Git repository at:

   https://gitlab.com/rth7680/qemu.git  tags/pull-tcg-20240619

for you to fetch changes up to 521d7fb3ebdf88112ed13556a93e3037742b9eb8:

   tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers (2024-06-19 
13:50:22 -0700)


tcg/loongarch64: Support 64- and 256-bit vectors
tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers
util/bufferiszero: Split out host include files
util/bufferiszero: Add loongarch64 vector acceleration
accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded
target/sparc: use signed denominator in sdiv helper
linux-user: Make TARGET_NR_setgroups affect only the current thread


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




Re: [RFC PATCH v4 5/5] target/riscv: Inline unit-stride ld/st and corresponding functions for performance

2024-06-19 Thread Richard Henderson

On 6/13/24 10:51, Max Chou wrote:

In the vector unit-stride load/store helper functions. the vext_ldst_us
& vext_ldst_whole functions corresponding most of the execution time.
Inline the functions can avoid the function call overhead to improve the
helper function performance.

Signed-off-by: Max Chou 
---
  target/riscv/vector_helper.c | 64 +++-
  1 file changed, 34 insertions(+), 30 deletions(-)



Reviewed-by: Richard Henderson 


r~



Re: [RFC PATCH v4 2/5] target/riscv: rvv: Provide a fast path using direct access to host ram for unmasked unit-stride load/store

2024-06-19 Thread Richard Henderson

On 6/13/24 10:51, Max Chou wrote:

+#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
+static void NAME##_tlb(CPURISCVState *env, abi_ptr addr,\
+   uint32_t byte_off, void *vd, uintptr_t retaddr)  \
+{   \
+uint8_t *reg = ((uint8_t *)vd + byte_off);  \
+ETYPE *cur = ((ETYPE *)reg);\
+*cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);   \
+}   \
+\
+static void NAME##_host(void *vd, uint32_t byte_off, void *host)\
+{   \
+ETYPE val = LDSUF##_p(host);\
+uint8_t *reg = (uint8_t *)(vd + byte_off);  \
+*(ETYPE *)(reg) = val;  \
+}


Why are you casting to and from uint8_t* ?

Surely this is cleaner as

ETYPE *cur = vd + byte_off;


r~



Re: [RFC PATCH v4 4/5] target/riscv: rvv: Provide group continuous ld/st flow for unit-stride ld/st instructions

2024-06-19 Thread Richard Henderson

On 6/13/24 10:51, Max Chou wrote:

The vector unmasked unit-stride and whole register load/store
instructions will load/store continuous memory. If the endian of both
the host and guest architecture are the same, then we can group the
element load/store to load/store more data at a time.

Signed-off-by: Max Chou 
---
  target/riscv/vector_helper.c | 160 +--
  1 file changed, 117 insertions(+), 43 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 793337a6f96..cba46ef16a5 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -457,6 +457,69 @@ GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
  GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
  GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
  
+static inline uint32_t

+vext_group_ldst_host(CPURISCVState *env, void *vd, uint32_t byte_end,
+ uint32_t byte_offset, void *host, uint32_t esz,
+ bool is_load)
+{
+uint32_t group_size;
+static vext_ldst_elem_fn_host * const fns[2][4] = {
+/* Store */
+{ ste_b_host, ste_h_host, ste_w_host, ste_d_host },
+/* Load */
+{ lde_b_host, lde_h_host, lde_w_host, lde_d_host }
+};
+vext_ldst_elem_fn_host *fn;
+
+if (byte_offset + 8 < byte_end) {
+group_size = MO_64;
+} else if (byte_offset + 4 < byte_end) {
+group_size = MO_32;
+} else if (byte_offset + 2 < byte_end) {
+group_size = MO_16;
+} else {
+group_size = MO_8;
+}
+
+fn = fns[is_load][group_size];
+fn(vd, byte_offset, host + byte_offset);


This is a really bad idea.  The table and indirect call means that none of these will be 
properly inlined.  Anyway...



+
+return 1 << group_size;
+}
+
+static inline void
+vext_continus_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
+   void *vd, uint32_t evl, target_ulong addr,
+   uint32_t reg_start, uintptr_t ra, uint32_t esz,
+   bool is_load)
+{
+for (; reg_start < evl; reg_start++, addr += esz) {
+ldst_tlb(env, adjust_addr(env, addr), reg_start * esz, vd, ra);
+}
+}
+
+static inline void
+vext_continus_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
+void *vd, uint32_t evl, uint32_t reg_start, void *host,
+uint32_t esz, bool is_load)
+{
+#if TARGET_BIG_ENDIAN != HOST_BIG_ENDIAN
+for (; reg_start < evl; reg_start++) {
+uint32_t byte_off = reg_start * esz;
+ldst_host(vd, byte_off, host + byte_off);
+}
+#else
+uint32_t group_byte;
+uint32_t byte_start = reg_start * esz;
+uint32_t byte_end = evl * esz;
+while (byte_start < byte_end) {
+group_byte = vext_group_ldst_host(env, vd, byte_end, byte_start, host,
+  esz, is_load);
+byte_start += group_byte;
+}


... this is much better handled with memcpy, given that you know endianness 
matches.


r~



Re: [RFC PATCH v4 2/5] target/riscv: rvv: Provide a fast path using direct access to host ram for unmasked unit-stride load/store

2024-06-19 Thread Richard Henderson

On 6/13/24 10:51, Max Chou wrote:

This commit references the sve_ldN_r/sve_stN_r helper functions in ARM
target to optimize the vector unmasked unit-stride load/store
instructions by following items:

* Get the loose bound of activate elements
* Probing pages/resolving host memory address/handling watchpoint at beginning
* Provide new interface to direct access host memory

The original element load/store interface is replaced by the new element
load/store functions with _tlb & _host postfix that means doing the
element load/store through the original softmmu flow and the direct
access host memory flow.

Signed-off-by: Max Chou 
---
  target/riscv/insn_trans/trans_rvv.c.inc |   3 +
  target/riscv/vector_helper.c| 637 +++-
  target/riscv/vector_internals.h |  48 ++
  3 files changed, 551 insertions(+), 137 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 3a3896ba06c..14e10568bd7 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -770,6 +770,7 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, 
uint8_t eew)
  /* Mask destination register are always tail-agnostic */
  data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
  data = FIELD_DP32(data, VDATA, VMA, s->vma);
+data = FIELD_DP32(data, VDATA, VM, 1);
  return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
  }
  
@@ -787,6 +788,7 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew)

  /* EMUL = 1, NFIELDS = 1 */
  data = FIELD_DP32(data, VDATA, LMUL, 0);
  data = FIELD_DP32(data, VDATA, NF, 1);
+data = FIELD_DP32(data, VDATA, VM, 1);
  return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
  }
  
@@ -1106,6 +1108,7 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,

  TCGv_i32 desc;
  
  uint32_t data = FIELD_DP32(0, VDATA, NF, nf);

+data = FIELD_DP32(data, VDATA, VM, 1);
  dest = tcg_temp_new_ptr();
  desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
s->cfg_ptr->vlenb, data));


This is ok, and would warrant a separate patch.



+if (vm == 0) {
+for (i = vstart; i < evl; ++i) {
+if (vext_elem_mask(v0, i)) {
+reg_idx_last = i;
+if (reg_idx_first < 0) {
+reg_idx_first = i;
+}
+}
+}


This isn't great, and isn't used for now, since only unmasked unit-stride is handled so 
far.  I think this first patch should be simpler and *assume* VM is set.



+/*
+ * Resolve the guest virtual addresses to info->page[].
+ * Control the generation of page faults with @fault.  Return false if
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
+ */
+static bool vext_cont_ldst_pages(CPURISCVState *env, RVVContLdSt *info,
+ target_ulong addr, bool is_load,
+ uint32_t desc, uint32_t esz, uintptr_t ra,
+ bool is_us_whole)
+{
+uint32_t vm = vext_vm(desc);
+uint32_t nf = vext_nf(desc);
+bool nofault = (vm == 1 ? false : true);


Why is nofault == "!vm"?

Also, it's silly to use ?: with true/false -- use the proper boolean expression in the 
first place.


That said... faults with RVV must interact with vstart.

I'm not sure what the best code organization is.

Perhaps a subroutine, passed the first and last elements for a single page.

  Update vstart, resolve the page, allowing the exception.
  If watchpoints, one call to cpu_check_watchpoint for the entire memory range.
  If ram, iterate through the rest of the page using host accesses; otherwise,
  iterate through the rest of the page using tlb accesses, making sure vstart
  is always up-to-date.

The main routine looks for the page_split, invokes the subroutine for the first (and 
likely only) page.  Special case any split-page element.  Invoke the subroutine for the 
second page.



r~



Re: [RFC PATCH v4 1/5] accel/tcg: Avoid unnecessary call overhead from qemu_plugin_vcpu_mem_cb

2024-06-19 Thread Richard Henderson

On 6/13/24 10:51, Max Chou wrote:

If there are not any QEMU plugin memory callback functions, checking
before calling the qemu_plugin_vcpu_mem_cb function can reduce the
function call overhead.

Signed-off-by: Max Chou
---
  accel/tcg/ldst_common.c.inc | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v7 1/2] hw/misc/riscv_iopmp: Add RISC-V IOPMP device

2024-06-19 Thread Ethan Chen via
On Mon, Jun 17, 2024 at 02:09:34PM +0200, Stefan Weil wrote:
> [EXTERNAL MAIL]
> 
> Am 12.06.24 um 05:17 schrieb Ethan Chen via:
> > Support basic functions of IOPMP specification v0.9.1 rapid-k model.
> > The specification url:
> > https://github.com/riscv-non-isa/iopmp-spec/releases/tag/v0.9.1
> > 
> > IOPMP check memory access from device is valid or not. This implementation 
> > uses
> > IOMMU to change address space that device access. There are three possible
> > results of an access: valid, blocked, and stalled(stall is not supported in 
> > this
> >   patch).
> > 
> > If an access is valid, target address space is downstream_as.
> > If an access is blocked, it will go to blocked_io_as. The operation of
> > blocked_io_as could be a bus error, or it can respond a success with 
> > fabricated
> > data depending on IOPMP ERR_CFG register value.
> > 
> > Signed-off-by: Ethan Chen 
> > ---
> >   hw/misc/Kconfig   |3 +
> >   hw/misc/meson.build   |1 +
> >   hw/misc/riscv_iopmp.c | 1002 +
> >   hw/misc/trace-events  |4 +
> >   include/hw/misc/riscv_iopmp.h |  152 +
> >   5 files changed, 1162 insertions(+)
> >   create mode 100644 hw/misc/riscv_iopmp.c
> >   create mode 100644 include/hw/misc/riscv_iopmp.h
> 
> Should both new files have SPDX license identifiers?
> 
> Regards,
> Stefan W.

Thank you for the reminder, I will add them.

Thanks,
Ethan



Re: [PATCH v7 1/2] hw/misc/riscv_iopmp: Add RISC-V IOPMP device

2024-06-19 Thread Ethan Chen via
On Mon, Jun 17, 2024 at 07:28:33PM +0800, LIU Zhiwei wrote:
> 
> On 2024/6/12 11:17, Ethan Chen wrote:
> > Support basic functions of IOPMP specification v0.9.1 rapid-k model.
> > The specification url:
> > https://github.com/riscv-non-isa/iopmp-spec/releases/tag/v0.9.1
> > 
> > IOPMP check memory access from device is valid or not. This implementation 
> > uses
> > IOMMU to change address space that device access. There are three possible
> > results of an access: valid, blocked, and stalled(stall is not supported in 
> > this
> >   patch).
> > 
> > If an access is valid, target address space is downstream_as.
> > If an access is blocked, it will go to blocked_io_as. The operation of
> > blocked_io_as could be a bus error, or it can respond a success with 
> > fabricated
> > data depending on IOPMP ERR_CFG register value.
> > 
> > Signed-off-by: Ethan Chen 
> > ---
> >   hw/misc/Kconfig   |3 +
> >   hw/misc/meson.build   |1 +
> >   hw/misc/riscv_iopmp.c | 1002 +
> >   hw/misc/trace-events  |4 +
> >   include/hw/misc/riscv_iopmp.h |  152 +
> >   5 files changed, 1162 insertions(+)
> >   create mode 100644 hw/misc/riscv_iopmp.c
> >   create mode 100644 include/hw/misc/riscv_iopmp.h
> > 
> > diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
> > index 1e08785b83..427f0c702e 100644
> > --- a/hw/misc/Kconfig
> > +++ b/hw/misc/Kconfig
> > @@ -213,4 +213,7 @@ config IOSB
> >   config XLNX_VERSAL_TRNG
> >   bool
> > 
> > +config RISCV_IOPMP
> > +bool
> > +
> >   source macio/Kconfig
> > diff --git a/hw/misc/meson.build b/hw/misc/meson.build
> > index 86596a3888..f83cd108f8 100644
> > --- a/hw/misc/meson.build
> > +++ b/hw/misc/meson.build
> > @@ -34,6 +34,7 @@ system_ss.add(when: 'CONFIG_SIFIVE_E_PRCI', if_true: 
> > files('sifive_e_prci.c'))
> >   system_ss.add(when: 'CONFIG_SIFIVE_E_AON', if_true: 
> > files('sifive_e_aon.c'))
> >   system_ss.add(when: 'CONFIG_SIFIVE_U_OTP', if_true: 
> > files('sifive_u_otp.c'))
> >   system_ss.add(when: 'CONFIG_SIFIVE_U_PRCI', if_true: 
> > files('sifive_u_prci.c'))
> > +specific_ss.add(when: 'CONFIG_RISCV_IOPMP', if_true: 
> > files('riscv_iopmp.c'))
> > 
> >   subdir('macio')
> > 
> > diff --git a/hw/misc/riscv_iopmp.c b/hw/misc/riscv_iopmp.c
> > new file mode 100644
> > index 00..75b28dc559
> > --- /dev/null
> > +++ b/hw/misc/riscv_iopmp.c
> > @@ -0,0 +1,1002 @@
> > +/*
> > + * QEMU RISC-V IOPMP (Input Output Physical Memory Protection)
> > + *
> > + * Copyright (c) 2023 Andes Tech. Corp.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2 or later, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
> > for
> > + * more details.
> > + *
> > + * You should have received a copy of the GNU General Public License along 
> > with
> > + * this program.  If not, see .
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/log.h"
> > +#include "qapi/error.h"
> > +#include "trace.h"
> > +#include "exec/exec-all.h"
> > +#include "exec/address-spaces.h"
> > +#include "hw/qdev-properties.h"
> > +#include "hw/sysbus.h"
> > +#include "hw/misc/riscv_iopmp.h"
> > +#include "memory.h"
> > +#include "hw/irq.h"
> > +#include "hw/registerfields.h"
> > +#include "trace.h"
> > +
> > +#define TYPE_IOPMP_IOMMU_MEMORY_REGION "iopmp-iommu-memory-region"
> > +
> > +REG32(VERSION, 0x00)
> > +FIELD(VERSION, VENDOR, 0, 24)
> > +FIELD(VERSION, SPECVER , 24, 8)
> > +REG32(IMP, 0x04)
> > +FIELD(IMP, IMPID, 0, 32)
> > +REG32(HWCFG0, 0x08)
> > +FIELD(HWCFG0, MODEL, 0, 4)
> > +FIELD(HWCFG0, TOR_EN, 4, 1)
> > +FIELD(HWCFG0, SPS_EN, 5, 1)
> > +FIELD(HWCFG0, USER_CFG_EN, 6, 1)
> > +FIELD(HWCFG0, PRIENT_PROG, 7, 1)
> > +FIELD(HWCFG0, RRID_TRANSL_EN, 8, 1)
> > +FIELD(HWCFG0, RRID_TRANSL_PROG, 9, 1)
> > +FIELD(HWCFG0, CHK_X, 10, 1)
> > +FIELD(HWCFG0, NO_X, 11, 1)
> > +FIELD(HWCFG0, NO_W, 12, 1)
> > +FIELD(HWCFG0, STALL_EN, 13, 1)
> > +FIELD(HWCFG0, PEIS, 14, 1)
> > +FIELD(HWCFG0, PEES, 15, 1)
> > +FIELD(HWCFG0, MFR_EN, 16, 1)
> > +FIELD(HWCFG0, MD_NUM, 24, 7)
> > +FIELD(HWCFG0, ENABLE, 31, 1)
> > +REG32(HWCFG1, 0x0C)
> > +FIELD(HWCFG1, RRID_NUM, 0, 16)
> > +FIELD(HWCFG1, ENTRY_NUM, 16, 16)
> > +REG32(HWCFG2, 0x10)
> > +FIELD(HWCFG2, PRIO_ENTRY, 0, 16)
> > +FIELD(HWCFG2, RRID_TRANSL, 16, 16)
> > +REG32(ENTRYOFFSET, 0x14)
> > +FIELD(ENTRYOFFSET, OFFSET, 0, 32)
> > +REG32(MDSTALL, 0x30)
> > +FIELD(MDSTALL, EXEMPT, 0, 1)
> > +FIELD(MDSTALL, MD, 1, 31)
> > +REG32(MDSTALLH, 0x34)
> > +FIELD(MDSTALLH, MD, 0, 32)
> > 

Re: [PATCH 2/3] exec: avoid using C++ keywords in function parameters

2024-06-19 Thread Richard Henderson

On 6/19/24 03:22, Philippe Mathieu-Daudé wrote:

On 19/6/24 00:45, Roman Kiryanov wrote:

to use the QEMU headers with a C++ compiler.

Google-Bug-Id: 331190993
Change-Id: Ic4e49b9c791616bb22c973922772b0494706092c
Signed-off-by: Roman Kiryanov 
---
  include/exec/memory.h | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 1be58f694c..d7591a60d9 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -945,7 +945,7 @@ struct MemoryListener {
   * the current transaction.
   */
  void (*log_start)(MemoryListener *listener, MemoryRegionSection *section,
-  int old, int new);
+  int old_val, int new_val);
  /**
   * @log_stop:
@@ -964,7 +964,7 @@ struct MemoryListener {
   * the current transaction.
   */
  void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section,
- int old, int new);
+ int old_val, int new_val);


OK but please keep the implementations in sync with the prototype
argument names:

accel/hvf/hvf-accel-ops.c:264: MemoryRegionSection *section, int old, int new)
accel/hvf/hvf-accel-ops.c:274: MemoryRegionSection *section, int old, int new)
accel/kvm/kvm-all.c:549:  int old, int new)
accel/kvm/kvm-all.c:566:  int old, int new)
hw/i386/xen/xen-hvm.c:430:  int old, int new)
hw/i386/xen/xen-hvm.c:441: int old, int new)
hw/virtio/vhost.c:1070:    int old, int new)
hw/virtio/vhost.c:1077:   int old, int new)
include/exec/memory.h:948:  int old, int new);
include/exec/memory.h:967: int old, int new);

See also:
target/arm/tcg/translate-a64.c:2161:    int new = a->imm * 3;


We're not trying to purge "new" from the entire tree, only (some) header files.


r~



Re: [PATCH v3] hw/gpio/aspeed: Add reg_table_size to AspeedGPIOClass

2024-06-19 Thread Andrew Jeffery
On Wed, 2024-06-19 at 20:36 +0200, Zheyu Ma wrote:
> ASan detected a global-buffer-overflow error in the aspeed_gpio_read()
> function. This issue occurred when reading beyond the bounds of the
> reg_table.
> 
> To enhance the safety and maintainability of the Aspeed GPIO code, this commit
> introduces a reg_table_size member to the AspeedGPIOClass structure. This
> change ensures that the size of the GPIO register table is explicitly tracked
> and initialized, reducing the risk of errors if new register tables are
> introduced in the future.
> 
> Reproducer:
> cat << EOF | qemu-system-aarch64 -display none \
> -machine accel=qtest, -m 512M -machine ast1030-evb -qtest stdio
> readq 0x7e780272
> EOF
> 
> ASAN log indicating the issue:
> ==2602930==ERROR: AddressSanitizer: global-buffer-overflow on address 
> 0x55a5da29e128 at pc 0x55a5d700dc62 bp 0x7fff096c4e90 sp 0x7fff096c4e88
> READ of size 2 at 0x55a5da29e128 thread T0
> #0 0x55a5d700dc61 in aspeed_gpio_read hw/gpio/aspeed_gpio.c:564:14
> #1 0x55a5d933f3ab in memory_region_read_accessor system/memory.c:445:11
> #2 0x55a5d92fba40 in access_with_adjusted_size system/memory.c:573:18
> #3 0x55a5d92f842c in memory_region_dispatch_read1 system/memory.c:1426:16
> #4 0x55a5d92f7b68 in memory_region_dispatch_read system/memory.c:1459:9
> #5 0x55a5d9376ad1 in flatview_read_continue_step system/physmem.c:2836:18
> #6 0x55a5d9376399 in flatview_read_continue system/physmem.c:2877:19
> #7 0x55a5d93775b8 in flatview_read system/physmem.c:2907:12
> 
> Signed-off-by: Zheyu Ma 

Reviewed-by: Andrew Jeffery 

> ---
> Changes in v3:
> - Add the reproducer
> 
> Changes in v2:
> - Introduce the reg_table_size to AspeedGPIOClass
> ---
>  hw/gpio/aspeed_gpio.c | 17 +
>  include/hw/gpio/aspeed_gpio.h |  1 +
>  2 files changed, 18 insertions(+)
> 
> diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
> index c1781e2ba3..fd4912edae 100644
> --- a/hw/gpio/aspeed_gpio.c
> +++ b/hw/gpio/aspeed_gpio.c
> @@ -559,6 +559,12 @@ static uint64_t aspeed_gpio_read(void *opaque, hwaddr 
> offset, uint32_t size)
>  return debounce_value;
>  }
>  
> +if (idx >= agc->reg_table_size) {
> +qemu_log_mask(LOG_GUEST_ERROR, "%s: idx 0x%" PRIx64 " out of 
> bounds\n",
> +  __func__, idx);
> +return 0;
> +}
> +
>  reg = >reg_table[idx];
>  if (reg->set_idx >= agc->nr_gpio_sets) {
>  qemu_log_mask(LOG_GUEST_ERROR, "%s: no getter for offset 0x%"
> @@ -785,6 +791,12 @@ static void aspeed_gpio_write(void *opaque, hwaddr 
> offset, uint64_t data,
>  return;
>  }
>  
> +if (idx >= agc->reg_table_size) {
> +qemu_log_mask(LOG_GUEST_ERROR, "%s: idx 0x%" PRIx64 " out of 
> bounds\n",
> +  __func__, idx);
> +return;
> +}
> +
>  reg = >reg_table[idx];
>  if (reg->set_idx >= agc->nr_gpio_sets) {
>  qemu_log_mask(LOG_GUEST_ERROR, "%s: no setter for offset 0x%"
> @@ -1117,6 +1129,7 @@ static void aspeed_gpio_ast2400_class_init(ObjectClass 
> *klass, void *data)
>  agc->nr_gpio_pins = 216;
>  agc->nr_gpio_sets = 7;
>  agc->reg_table = aspeed_3_3v_gpios;
> +agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
>  }
>  
>  static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data)
> @@ -1127,6 +1140,7 @@ static void aspeed_gpio_2500_class_init(ObjectClass 
> *klass, void *data)
>  agc->nr_gpio_pins = 228;
>  agc->nr_gpio_sets = 8;
>  agc->reg_table = aspeed_3_3v_gpios;
> +agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
>  }
>  
>  static void aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void 
> *data)
> @@ -1137,6 +1151,7 @@ static void 
> aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void *data)
>  agc->nr_gpio_pins = 208;
>  agc->nr_gpio_sets = 7;
>  agc->reg_table = aspeed_3_3v_gpios;
> +agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
>  }
>  
>  static void aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void 
> *data)
> @@ -1147,6 +1162,7 @@ static void 
> aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void *data)
>  agc->nr_gpio_pins = 36;
>  agc->nr_gpio_sets = 2;
>  agc->reg_table = aspeed_1_8v_gpios;
> +agc->reg_table_size = GPIO_1_8V_REG_ARRAY_SIZE;
>  }
>  
>  static void aspeed_gpio_1030_class_init(ObjectClass *klass, void *data)
> @@ -1157,6 +1173,7 @@ static void aspeed_gpio_1030_class_init(ObjectClass 
> *klass, void *data)
>  agc->nr_gpio_pins = 151;
>  agc->nr_gpio_sets = 6;
>  agc->reg_table = aspeed_3_3v_gpios;
> +agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
>  }
>  
>  static const TypeInfo aspeed_gpio_info = {
> diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h
> index 904eecf62c..e66036ac39 100644
> --- a/include/hw/gpio/aspeed_gpio.h
> +++ b/include/hw/gpio/aspeed_gpio.h
> @@ -75,6 +75,7 @@ struct AspeedGPIOClass {
>  uint32_t 

[PATCH v3 05/11] migration/postcopy: Add postcopy-recover-setup phase

2024-06-19 Thread Peter Xu
This patch adds a migration state on src called "postcopy-recover-setup".
The new state will describe the intermediate step starting from when the
src QEMU received a postcopy recovery request, until the migration channels
are properly established, but before the recovery process take place.

The request came from Libvirt where Libvirt currently rely on the migration
state events to detect migration state changes.  That works for most of the
migration process but except postcopy recovery failures at the beginning.

Currently postcopy recovery only has two major states:

  - postcopy-paused: this is the state that both sides of QEMU will be in
for a long time as long as the migration channel was interrupted.

  - postcopy-recover: this is the state where both sides of QEMU handshake
with each other, preparing for a continuation of postcopy which used to
be interrupted.

The issue here is when the recovery port is invalid, the src QEMU will take
the URI/channels, noticing the ports are not valid, and it'll silently keep
in the postcopy-paused state, with no event sent to Libvirt.  In this case,
the only thing Libvirt can do is to poll the migration status with a proper
interval, however that's less optimal.

Considering that this is the only case where Libvirt won't get a
notification from QEMU on such events, let's add postcopy-recover-setup
state to mimic what we have with the "setup" state of a newly initialized
migration, describing the phase of connection establishment.

With that, postcopy recovery will have two paths to go now, and either path
will guarantee an event generated.  Now the events will look like this
during a recovery process on src QEMU:

  - Initially when the recovery is initiated on src, QEMU will go from
"postcopy-paused" -> "postcopy-recover-setup".  Old QEMUs don't have
this event.

  - Depending on whether the channel re-establishment is succeeded:

- In succeeded case, src QEMU will move from "postcopy-recover-setup"
  to "postcopy-recover".  Old QEMUs also have this event.

- In failure case, src QEMU will move from "postcopy-recover-setup" to
  "postcopy-paused" again.  Old QEMUs don't have this event.

This guarantees that Libvirt will always receive a notification for
recovery process properly.

One thing to mention is, such new status is only needed on src QEMU not
both.  On dest QEMU, the state machine doesn't change.  Hence the events
don't change either.  It's done like so because dest QEMU may not have an
explicit point of setup start.  E.g., it can happen that when dest QEMUs
doesn't use migrate-recover command to use a new URI/channel, but the old
URI/channels can be reused in recovery, in which case the old ports simply
can work again after the network routes are fixed up.

Add a new helper postcopy_is_paused() detecting whether postcopy is still
paused, taking RECOVER_SETUP into account too.  When using it on both
src/dst, a slight change is done altogether to always wait for the
semaphore before checking the status, because for both sides a sem_post()
will be required for a recovery.

Cc: Jiri Denemark 
Cc: Prasad Pandit 
Reviewed-by: Fabiano Rosas 
Buglink: https://issues.redhat.com/browse/RHEL-38485
Signed-off-by: Peter Xu 
---
 qapi/migration.json  |  4 
 migration/postcopy-ram.h |  3 +++
 migration/migration.c| 40 ++--
 migration/postcopy-ram.c |  6 ++
 migration/savevm.c   |  4 ++--
 5 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index de6c8b0444..0f24206bce 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -142,6 +142,9 @@
 #
 # @postcopy-paused: during postcopy but paused.  (since 3.0)
 #
+# @postcopy-recover-setup: setup phase for a postcopy recovery process,
+# preparing for a recovery phase to start.  (since 9.1)
+#
 # @postcopy-recover: trying to recover from a paused postcopy.  (since
 # 3.0)
 #
@@ -166,6 +169,7 @@
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
 'active', 'postcopy-active', 'postcopy-paused',
+'postcopy-recover-setup',
 'postcopy-recover', 'completed', 'failed', 'colo',
 'pre-switchover', 'device', 'wait-unplug' ] }
 ##
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index ecae941211..a6df1b2811 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -13,6 +13,8 @@
 #ifndef QEMU_POSTCOPY_RAM_H
 #define QEMU_POSTCOPY_RAM_H
 
+#include "qapi/qapi-types-migration.h"
+
 /* Return true if the host supports everything we need to do postcopy-ram */
 bool postcopy_ram_supported_by_host(MigrationIncomingState *mis,
 Error **errp);
@@ -193,5 +195,6 @@ enum PostcopyChannels {
 void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
 void postcopy_preempt_setup(MigrationState *s);
 int 

[PATCH v3 08/11] tests/migration-tests: Always enable migration events

2024-06-19 Thread Peter Xu
Libvirt should always enable it, so it'll be nice qtest also cover that for
all tests on both sides.  migrate_incoming_qmp() used to enable it only on
dst, now we enable them on both, as we'll start to sanity check events even
on the src QEMU.

We'll need to leave the one in migrate_incoming_qmp(), because
virtio-net-failover test uses that one only, and it relies on the events to
work.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-helpers.c | 1 +
 tests/qtest/migration-test.c| 7 +++
 2 files changed, 8 insertions(+)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 0ac49ceb54..2ca4425d71 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -258,6 +258,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, 
const char *fmt, ...)
 g_assert(!qdict_haskey(args, "uri"));
 qdict_put_str(args, "uri", uri);
 
+/* This function relies on the event to work, make sure it's enabled */
 migrate_set_capability(to, "events", true);
 
 rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 640713bfd5..c015e801ac 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -851,6 +851,13 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 unlink(shmem_path);
 }
 
+/*
+ * Always enable migration events.  Libvirt always uses it, let's try
+ * to mimic as closer as that.
+ */
+migrate_set_capability(*from, "events", true);
+migrate_set_capability(*to, "events", true);
+
 return 0;
 }
 
-- 
2.45.0




[PATCH v3 07/11] tests/migration-tests: Drop most WIN32 ifdefs for postcopy failure tests

2024-06-19 Thread Peter Xu
Most of them are not needed, we can stick with one ifdef inside
postcopy_recover_fail() so as to cover the scm right tricks only.
The tests won't run on windows anyway due to has_uffd always false.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 85a21ff5e9..640713bfd5 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1363,9 +1363,9 @@ static void wait_for_postcopy_status(QTestState *one, 
const char *status)
   "completed", NULL });
 }
 
-#ifndef _WIN32
 static void postcopy_recover_fail(QTestState *from, QTestState *to)
 {
+#ifndef _WIN32
 int ret, pair1[2], pair2[2];
 char c;
 
@@ -1427,8 +1427,8 @@ static void postcopy_recover_fail(QTestState *from, 
QTestState *to)
 close(pair1[1]);
 close(pair2[0]);
 close(pair2[1]);
+#endif
 }
-#endif /* _WIN32 */
 
 static void test_postcopy_recovery_common(MigrateCommon *args)
 {
@@ -1468,7 +1468,6 @@ static void test_postcopy_recovery_common(MigrateCommon 
*args)
 wait_for_postcopy_status(to, "postcopy-paused");
 wait_for_postcopy_status(from, "postcopy-paused");
 
-#ifndef _WIN32
 if (args->postcopy_recovery_test_fail) {
 /*
  * Test when a wrong socket specified for recover, and then the
@@ -1477,7 +1476,6 @@ static void test_postcopy_recovery_common(MigrateCommon 
*args)
 postcopy_recover_fail(from, to);
 /* continue with a good recovery */
 }
-#endif /* _WIN32 */
 
 /*
  * Create a new socket to emulate a new channel that is different
@@ -1506,7 +1504,6 @@ static void test_postcopy_recovery(void)
 test_postcopy_recovery_common();
 }
 
-#ifndef _WIN32
 static void test_postcopy_recovery_double_fail(void)
 {
 MigrateCommon args = {
@@ -1515,7 +1512,6 @@ static void test_postcopy_recovery_double_fail(void)
 
 test_postcopy_recovery_common();
 }
-#endif /* _WIN32 */
 
 #ifdef CONFIG_GNUTLS
 static void test_postcopy_recovery_tls_psk(void)
@@ -3693,10 +3689,8 @@ int main(int argc, char **argv)
test_postcopy_preempt);
 migration_test_add("/migration/postcopy/preempt/recovery/plain",
test_postcopy_preempt_recovery);
-#ifndef _WIN32
 migration_test_add("/migration/postcopy/recovery/double-failures",
test_postcopy_recovery_double_fail);
-#endif /* _WIN32 */
 if (is_x86) {
 migration_test_add("/migration/postcopy/suspend",
test_postcopy_suspend);
-- 
2.45.0




[PATCH v3 10/11] tests/migration-tests: Verify postcopy-recover-setup status

2024-06-19 Thread Peter Xu
Making sure the postcopy-recover-setup status is present in the postcopy
failure unit test.  Note that it only applies to src QEMU not dest.

This also introduces the tiny but helpful migration_event_wait() helper.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index c015e801ac..de81e28088 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1412,6 +1412,12 @@ static void postcopy_recover_fail(QTestState *from, 
QTestState *to)
 migrate_recover(to, "fd:fd-mig");
 migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}");
 
+/*
+ * Source QEMU has an extra RECOVER_SETUP phase, dest doesn't have it.
+ * Make sure it appears along the way.
+ */
+migration_event_wait(from, "postcopy-recover-setup");
+
 /*
  * Make sure both QEMU instances will go into RECOVER stage, then test
  * kicking them out using migrate-pause.
-- 
2.45.0




[PATCH v3 04/11] migration: Cleanup incoming migration setup state change

2024-06-19 Thread Peter Xu
Destination QEMU can setup incoming ports for two purposes: either a fresh
new incoming migration, in which QEMU will switch to SETUP for channel
establishment, or a paused postcopy migration, in which QEMU will stay in
POSTCOPY_PAUSED until kicking off the RECOVER phase.

Now the state machine worked on dest node for the latter, only because
migrate_set_state() implicitly will become a noop if the current state
check failed.  It wasn't clear at all.

Clean it up by providing a helper migration_incoming_state_setup() doing
proper checks over current status.  Postcopy-paused will be explicitly
checked now, and then we can bail out for unknown states.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 migration/migration.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 795b30f0d0..41a88fc50a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -618,6 +618,29 @@ bool migrate_uri_parse(const char *uri, MigrationChannel 
**channel,
 return true;
 }
 
+static bool
+migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
+{
+MigrationStatus current = mis->state;
+
+if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+/*
+ * Incoming postcopy migration will stay in PAUSED state even if
+ * reconnection happened.
+ */
+return true;
+}
+
+if (current != MIGRATION_STATUS_NONE) {
+error_setg(errp, "Illegal migration incoming state: %s",
+   MigrationStatus_str(current));
+return false;
+}
+
+migrate_set_state(>state, current, MIGRATION_STATUS_SETUP);
+return true;
+}
+
 static void qemu_start_incoming_migration(const char *uri, bool has_channels,
   MigrationChannelList *channels,
   Error **errp)
@@ -656,8 +679,9 @@ static void qemu_start_incoming_migration(const char *uri, 
bool has_channels,
 return;
 }
 
-migrate_set_state(>state, MIGRATION_STATUS_NONE,
-  MIGRATION_STATUS_SETUP);
+if (!migration_incoming_state_setup(mis, errp)) {
+return;
+}
 
 if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
 SocketAddress *saddr = >u.socket;
-- 
2.45.0




[PATCH v3 03/11] migration: Use MigrationStatus instead of int

2024-06-19 Thread Peter Xu
QEMU uses "int" in most cases even if it stores MigrationStatus.  I don't
know why, so let's try to do that right and see what blows up..

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 migration/migration.h |  9 +
 migration/migration.c | 24 +++-
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/migration/migration.h b/migration/migration.h
index 6af01362d4..38aa1402d5 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -160,7 +160,7 @@ struct MigrationIncomingState {
 /* PostCopyFD's for external userfaultfds & handlers of shared memory */
 GArray   *postcopy_remote_fds;
 
-int state;
+MigrationStatus state;
 
 /*
  * The incoming migration coroutine, non-NULL during qemu_loadvm_state().
@@ -301,7 +301,7 @@ struct MigrationState {
 /* params from 'migrate-set-parameters' */
 MigrationParameters parameters;
 
-int state;
+MigrationStatus state;
 
 /* State related to return path */
 struct {
@@ -459,7 +459,8 @@ struct MigrationState {
 bool rdma_migration;
 };
 
-void migrate_set_state(int *state, int old_state, int new_state);
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
+   MigrationStatus new_state);
 
 void migration_fd_process_incoming(QEMUFile *f);
 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
@@ -479,7 +480,7 @@ int migrate_init(MigrationState *s, Error **errp);
 bool migration_is_blocked(Error **errp);
 /* True if outgoing migration has entered postcopy phase */
 bool migration_in_postcopy(void);
-bool migration_postcopy_is_alive(int state);
+bool migration_postcopy_is_alive(MigrationStatus state);
 MigrationState *migrate_get_current(void);
 bool migration_has_failed(MigrationState *);
 bool migrate_mode_is_cpr(MigrationState *);
diff --git a/migration/migration.c b/migration/migration.c
index f9b69af62f..795b30f0d0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -413,7 +413,7 @@ void migration_incoming_state_destroy(void)
 yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 }
 
-static void migrate_generate_event(int new_state)
+static void migrate_generate_event(MigrationStatus new_state)
 {
 if (migrate_events()) {
 qapi_event_send_migration(new_state);
@@ -1296,8 +1296,6 @@ static void fill_destination_migration_info(MigrationInfo 
*info)
 }
 
 switch (mis->state) {
-case MIGRATION_STATUS_NONE:
-return;
 case MIGRATION_STATUS_SETUP:
 case MIGRATION_STATUS_CANCELLING:
 case MIGRATION_STATUS_CANCELLED:
@@ -1313,6 +1311,8 @@ static void fill_destination_migration_info(MigrationInfo 
*info)
 info->has_status = true;
 fill_destination_postcopy_migration_info(info);
 break;
+default:
+return;
 }
 info->status = mis->state;
 
@@ -1360,7 +1360,8 @@ void qmp_migrate_start_postcopy(Error **errp)
 
 /* shared migration helpers */
 
-void migrate_set_state(int *state, int old_state, int new_state)
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
+   MigrationStatus new_state)
 {
 assert(new_state < MIGRATION_STATUS__MAX);
 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
@@ -1567,7 +1568,7 @@ bool migration_in_postcopy(void)
 }
 }
 
-bool migration_postcopy_is_alive(int state)
+bool migration_postcopy_is_alive(MigrationStatus state)
 {
 switch (state) {
 case MIGRATION_STATUS_POSTCOPY_ACTIVE:
@@ -1612,20 +1613,9 @@ bool migration_is_idle(void)
 case MIGRATION_STATUS_COMPLETED:
 case MIGRATION_STATUS_FAILED:
 return true;
-case MIGRATION_STATUS_SETUP:
-case MIGRATION_STATUS_CANCELLING:
-case MIGRATION_STATUS_ACTIVE:
-case MIGRATION_STATUS_POSTCOPY_ACTIVE:
-case MIGRATION_STATUS_COLO:
-case MIGRATION_STATUS_PRE_SWITCHOVER:
-case MIGRATION_STATUS_DEVICE:
-case MIGRATION_STATUS_WAIT_UNPLUG:
+default:
 return false;
-case MIGRATION_STATUS__MAX:
-g_assert_not_reached();
 }
-
-return false;
 }
 
 bool migration_is_active(void)
-- 
2.45.0




[PATCH v3 06/11] migration/docs: Update postcopy recover session for SETUP phase

2024-06-19 Thread Peter Xu
Firstly, the "Paused" state was added in the wrong place before. The state
machine section was describing PostcopyState, rather than MigrationStatus.
Drop the Paused state descriptions.

Then in the postcopy recover session, add more information on the state
machine for MigrationStatus in the lines.  Add the new RECOVER_SETUP phase.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 docs/devel/migration/postcopy.rst | 31 ---
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/docs/devel/migration/postcopy.rst 
b/docs/devel/migration/postcopy.rst
index 6c51e96d79..a15594e11f 100644
--- a/docs/devel/migration/postcopy.rst
+++ b/docs/devel/migration/postcopy.rst
@@ -99,17 +99,6 @@ ADVISE->DISCARD->LISTEN->RUNNING->END
 (although it can't do the cleanup it would do as it
 finishes a normal migration).
 
- - Paused
-
-Postcopy can run into a paused state (normally on both sides when
-happens), where all threads will be temporarily halted mostly due to
-network errors.  When reaching paused state, migration will make sure
-the qemu binary on both sides maintain the data without corrupting
-the VM.  To continue the migration, the admin needs to fix the
-migration channel using the QMP command 'migrate-recover' on the
-destination node, then resume the migration using QMP command 'migrate'
-again on source node, with resume=true flag set.
-
  - End
 
 The listen thread can now quit, and perform the cleanup of migration
@@ -221,7 +210,8 @@ paused postcopy migration.
 
 The recovery phase normally contains a few steps:
 
-  - When network issue occurs, both QEMU will go into PAUSED state
+  - When network issue occurs, both QEMU will go into **POSTCOPY_PAUSED**
+migration state.
 
   - When the network is recovered (or a new network is provided), the admin
 can setup the new channel for migration using QMP command
@@ -229,9 +219,20 @@ The recovery phase normally contains a few steps:
 
   - On source host, the admin can continue the interrupted postcopy
 migration using QMP command 'migrate' with resume=true flag set.
-
-  - After the connection is re-established, QEMU will continue the postcopy
-migration on both sides.
+Source QEMU will go into **POSTCOPY_RECOVER_SETUP** state trying to
+re-establish the channels.
+
+  - When both sides of QEMU successfully reconnects using a new or fixed up
+channel, they will go into **POSTCOPY_RECOVER** state, some handshake
+procedure will be needed to properly synchronize the VM states between
+the two QEMUs to continue the postcopy migration.  For example, there
+can be pages sent right during the window when the network is
+interrupted, then the handshake will guarantee pages lost in-flight
+will be resent again.
+
+  - After a proper handshake synchronization, QEMU will continue the
+postcopy migration on both sides and go back to **POSTCOPY_ACTIVE**
+state.  Postcopy migration will continue.
 
 During a paused postcopy migration, the VM can logically still continue
 running, and it will not be impacted from any page access to pages that
-- 
2.45.0




[PATCH v3 01/11] migration/multifd: Avoid the final FLUSH in complete()

2024-06-19 Thread Peter Xu
We always do the flush when finishing one round of scan, and during
complete() phase we should scan one more round making sure no dirty page
existed.  In that case we shouldn't need one explicit FLUSH at the end of
complete(), as when reaching there all pages should have been flushed.

Reviewed-by: Fabiano Rosas 
Tested-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 migration/ram.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index ceea586b06..edec1a2d07 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3300,10 +3300,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 }
 }
 
-if (migrate_multifd() && !migrate_multifd_flush_after_each_section() &&
-!migrate_mapped_ram()) {
-qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
-}
 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 return qemu_fflush(f);
 }
-- 
2.45.0




[PATCH v3 11/11] tests/migration-tests: Cover postcopy failure on reconnect

2024-06-19 Thread Peter Xu
Make sure there will be an event for postcopy recovery, irrelevant of
whether the reconnect will success, or when the failure happens.

The added new case is to fail early in postcopy recovery, in which case it
didn't even reach RECOVER stage on src (and in real life it'll be the same
to dest, but the test case is just slightly more involved due to the dual
socketpair setup).

To do that, rename the postcopy_recovery_test_fail to reflect either stage
to fail, instead of a boolean.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 95 +---
 1 file changed, 77 insertions(+), 18 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index de81e28088..fe33b86783 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -72,6 +72,17 @@ static QTestMigrationState dst_state;
 #define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC"
 #define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST"
 
+typedef enum PostcopyRecoveryFailStage {
+/*
+ * "no failure" must be 0 as it's the default.  OTOH, real failure
+ * cases must be >0 to make sure they trigger by a "if" test.
+ */
+POSTCOPY_FAIL_NONE = 0,
+POSTCOPY_FAIL_CHANNEL_ESTABLISH,
+POSTCOPY_FAIL_RECOVERY,
+POSTCOPY_FAIL_MAX
+} PostcopyRecoveryFailStage;
+
 #if defined(__linux__)
 #include 
 #include 
@@ -692,7 +703,7 @@ typedef struct {
 /* Postcopy specific fields */
 void *postcopy_data;
 bool postcopy_preempt;
-bool postcopy_recovery_test_fail;
+PostcopyRecoveryFailStage postcopy_recovery_fail_stage;
 } MigrateCommon;
 
 static int test_migrate_start(QTestState **from, QTestState **to,
@@ -1370,12 +1381,16 @@ static void wait_for_postcopy_status(QTestState *one, 
const char *status)
   "completed", NULL });
 }
 
-static void postcopy_recover_fail(QTestState *from, QTestState *to)
+static void postcopy_recover_fail(QTestState *from, QTestState *to,
+  PostcopyRecoveryFailStage stage)
 {
 #ifndef _WIN32
+bool fail_early = (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH);
 int ret, pair1[2], pair2[2];
 char c;
 
+g_assert(stage > POSTCOPY_FAIL_NONE && stage < POSTCOPY_FAIL_MAX);
+
 /* Create two unrelated socketpairs */
 ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1);
 g_assert_cmpint(ret, ==, 0);
@@ -1409,6 +1424,14 @@ static void postcopy_recover_fail(QTestState *from, 
QTestState *to)
 ret = send(pair2[1], , 1, 0);
 g_assert_cmpint(ret, ==, 1);
 
+if (stage == POSTCOPY_FAIL_CHANNEL_ESTABLISH) {
+/*
+ * This will make src QEMU to fail at an early stage when trying to
+ * resume later, where it shouldn't reach RECOVER stage at all.
+ */
+close(pair1[1]);
+}
+
 migrate_recover(to, "fd:fd-mig");
 migrate_qmp(from, to, "fd:fd-mig", NULL, "{'resume': true}");
 
@@ -1418,28 +1441,53 @@ static void postcopy_recover_fail(QTestState *from, 
QTestState *to)
  */
 migration_event_wait(from, "postcopy-recover-setup");
 
+if (fail_early) {
+/*
+ * When fails at reconnection, src QEMU will automatically goes
+ * back to PAUSED state.  Making sure there is an event in this
+ * case: Libvirt relies on this to detect early reconnection
+ * errors.
+ */
+migration_event_wait(from, "postcopy-paused");
+} else {
+/*
+ * We want to test "fail later" at RECOVER stage here.  Make sure
+ * both QEMU instances will go into RECOVER stage first, then test
+ * kicking them out using migrate-pause.
+ *
+ * Explicitly check the RECOVER event on src, that's what Libvirt
+ * relies on, rather than polling.
+ */
+migration_event_wait(from, "postcopy-recover");
+wait_for_postcopy_status(from, "postcopy-recover");
+
+/* Need an explicit kick on src QEMU in this case */
+migrate_pause(from);
+}
+
 /*
- * Make sure both QEMU instances will go into RECOVER stage, then test
- * kicking them out using migrate-pause.
+ * For all failure cases, we'll reach such states on both sides now.
+ * Check them.
  */
-wait_for_postcopy_status(from, "postcopy-recover");
+wait_for_postcopy_status(from, "postcopy-paused");
 wait_for_postcopy_status(to, "postcopy-recover");
 
 /*
- * This would be issued by the admin upon noticing the hang, we should
- * make sure we're able to kick this out.
+ * Kick dest QEMU out too. This is normally not needed in reality
+ * because when the channel is shutdown it should also happen on src.
+ * However here we used separate socket pairs so we need to do that
+ * explicitly.
  */
-migrate_pause(from);
-wait_for_postcopy_status(from, "postcopy-paused");
-
-/* Do the same test on dest */
 migrate_pause(to);
 

[PATCH v3 00/11] migration: New postcopy state, and some cleanups

2024-06-19 Thread Peter Xu
Based-on: <20240617185731.9725-1-faro...@suse.de>

v3:
- Added one comment in patch 8 explaining why migrate_incoming_qmp() needs
  to keep enabling "events" capability.
- Split patch 9 into two patches, which makes migration_event_wait() to be
  used also in migrate_incoming_qmp()
- Rename the tests in last patch, and a spell fix
- Rebased to "[PATCH v3 00/16] migration/mapped-ram: Add direct-io support"

v1: https://lore.kernel.org/r/20240612144228.1179240-1-pet...@redhat.com
v2: https://lore.kernel.org/r/20240617181534.1425179-1-pet...@redhat.com

The major goal of this patchset is patch 5, which introduced a new postcopy
state so that we will send an event in postcopy reconnect failures that
Libvirt would prefer to have.  There's more information for that issue in
the commit message alone.

Patch 1-2 are cleanups that are not directly relevant but I found/stored
that could be good to have.  I made it simple by putting them together in
one thread to make patch management easier, but I can send them separately
when necessary.

Patch 3 is also a cleanup, but will be needed for patch 4 as dependency.

Patch 4-5 is the core patches.

Patch 6 updates doc for the new state.

Patch 7-11 adds a new test for the new state.

CI: https://gitlab.com/peterx/qemu/-/pipelines/1339544694

Comments welcomed, thanks.

Peter Xu (11):
  migration/multifd: Avoid the final FLUSH in complete()
  migration: Rename thread debug names
  migration: Use MigrationStatus instead of int
  migration: Cleanup incoming migration setup state change
  migration/postcopy: Add postcopy-recover-setup phase
  migration/docs: Update postcopy recover session for SETUP phase
  tests/migration-tests: Drop most WIN32 ifdefs for postcopy failure
tests
  tests/migration-tests: Always enable migration events
  tests/migration-tests: migration_event_wait()
  tests/migration-tests: Verify postcopy-recover-setup status
  tests/migration-tests: Cover postcopy failure on reconnect

 docs/devel/migration/postcopy.rst |  31 
 qapi/migration.json   |   4 ++
 migration/migration.h |   9 +--
 migration/postcopy-ram.h  |   3 +
 tests/qtest/migration-helpers.h   |   2 +
 migration/colo.c  |   2 +-
 migration/migration.c |  98 +
 migration/multifd.c   |   6 +-
 migration/postcopy-ram.c  |  10 ++-
 migration/ram.c   |   4 --
 migration/savevm.c|   6 +-
 tests/qtest/migration-helpers.c   |  32 ++---
 tests/qtest/migration-test.c  | 116 +++---
 13 files changed, 229 insertions(+), 94 deletions(-)

-- 
2.45.0




[PATCH v3 09/11] tests/migration-tests: migration_event_wait()

2024-06-19 Thread Peter Xu
Introduce a small helper to wait for a migration event, generalized from
the incoming migration path.  Make the helper easier to use by allowing it
to keep waiting until the expected event is received.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-helpers.h |  2 ++
 tests/qtest/migration-helpers.c | 31 ++-
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 50095fca4a..72dba369fb 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -63,4 +63,6 @@ static inline bool probe_o_direct_support(const char *tmpfs)
 }
 #endif
 void migration_test_add(const char *path, void (*fn)(void));
+void migration_event_wait(QTestState *s, const char *target);
+
 #endif /* MIGRATION_HELPERS_H */
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 2ca4425d71..84f49db85e 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -249,7 +249,7 @@ void migrate_set_capability(QTestState *who, const char 
*capability,
 void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, 
...)
 {
 va_list ap;
-QDict *args, *rsp, *data;
+QDict *args, *rsp;
 
 va_start(ap, fmt);
 args = qdict_from_vjsonf_nofail(fmt, ap);
@@ -272,14 +272,7 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, 
const char *fmt, ...)
 g_assert(qdict_haskey(rsp, "return"));
 qobject_unref(rsp);
 
-rsp = qtest_qmp_eventwait_ref(to, "MIGRATION");
-g_assert(qdict_haskey(rsp, "data"));
-
-data = qdict_get_qdict(rsp, "data");
-g_assert(qdict_haskey(data, "status"));
-g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup");
-
-qobject_unref(rsp);
+migration_event_wait(to, "setup");
 }
 
 /*
@@ -518,3 +511,23 @@ bool probe_o_direct_support(const char *tmpfs)
 return true;
 }
 #endif
+
+/*
+ * Wait for a "MIGRATION" event.  This is what Libvirt uses to track
+ * migration status changes.
+ */
+void migration_event_wait(QTestState *s, const char *target)
+{
+QDict *response, *data;
+const char *status;
+bool found;
+
+do {
+response = qtest_qmp_eventwait_ref(s, "MIGRATION");
+data = qdict_get_qdict(response, "data");
+g_assert(data);
+status = qdict_get_str(data, "status");
+found = (strcmp(status, target) == 0);
+qobject_unref(response);
+} while (!found);
+}
-- 
2.45.0




[PATCH v3 02/11] migration: Rename thread debug names

2024-06-19 Thread Peter Xu
The postcopy thread names on dest QEMU are slightly confusing, partly I'll
need to blame myself on 36f62f11e4 ("migration: Postcopy preemption
preparation on channel creation").  E.g., "fault-fast" reads like a fast
version of "fault-default", but it's actually the fast version of
"postcopy/listen".

Taking this chance, rename all the migration threads with proper rules.
Considering we only have 15 chars usable, prefix all threads with "mig/",
meanwhile identify src/dst threads properly this time.  So now most thread
names will look like "mig/DIR/xxx", where DIR will be "src"/"dst", except
the bg-snapshot thread which doesn't have a direction.

For multifd threads, making them "mig/{src|dst}/{send|recv}_%d".

We used to have "live_migration" thread for a very long time, now it's
called "mig/src/main".  We may hope to have "mig/dst/main" soon but not
yet.

Reviewed-by: Fabiano Rosas 
Reviewed-by: Zhijian Li (Fujitsu) 
Signed-off-by: Peter Xu 
---
 migration/colo.c | 2 +-
 migration/migration.c| 6 +++---
 migration/multifd.c  | 6 +++---
 migration/postcopy-ram.c | 4 ++--
 migration/savevm.c   | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index f96c2ee069..6449490221 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -935,7 +935,7 @@ void coroutine_fn colo_incoming_co(void)
 assert(bql_locked());
 assert(migration_incoming_colo_enabled());
 
-qemu_thread_create(, "COLO incoming", colo_process_incoming_thread,
+qemu_thread_create(, "mig/dst/colo", colo_process_incoming_thread,
mis, QEMU_THREAD_JOINABLE);
 
 mis->colo_incoming_co = qemu_coroutine_self();
diff --git a/migration/migration.c b/migration/migration.c
index e03c80b3aa..f9b69af62f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2431,7 +2431,7 @@ static int open_return_path_on_source(MigrationState *ms)
 
 trace_open_return_path_on_source();
 
-qemu_thread_create(>rp_state.rp_thread, "return path",
+qemu_thread_create(>rp_state.rp_thread, "mig/src/rp-thr",
source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
 ms->rp_state.rp_thread_created = true;
 
@@ -3770,10 +3770,10 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
 }
 
 if (migrate_background_snapshot()) {
-qemu_thread_create(>thread, "bg_snapshot",
+qemu_thread_create(>thread, "mig/snapshot",
 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
 } else {
-qemu_thread_create(>thread, "live_migration",
+qemu_thread_create(>thread, "mig/src/main",
 migration_thread, s, QEMU_THREAD_JOINABLE);
 }
 s->migration_thread_running = true;
diff --git a/migration/multifd.c b/migration/multifd.c
index d82885fdbb..0b4cbaddfe 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1069,7 +1069,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams 
*p,
 args->p = p;
 
 p->tls_thread_created = true;
-qemu_thread_create(>tls_thread, "multifd-tls-handshake-worker",
+qemu_thread_create(>tls_thread, "mig/src/tls",
multifd_tls_handshake_thread, args,
QEMU_THREAD_JOINABLE);
 return true;
@@ -1190,7 +1190,7 @@ bool multifd_send_setup(void)
 p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
 p->packet->version = cpu_to_be32(MULTIFD_VERSION);
 }
-p->name = g_strdup_printf("multifdsend_%d", i);
+p->name = g_strdup_printf("mig/src/send_%d", i);
 p->page_size = qemu_target_page_size();
 p->page_count = page_count;
 p->write_flags = 0;
@@ -1604,7 +1604,7 @@ int multifd_recv_setup(Error **errp)
 + sizeof(uint64_t) * page_count;
 p->packet = g_malloc0(p->packet_len);
 }
-p->name = g_strdup_printf("multifdrecv_%d", i);
+p->name = g_strdup_printf("mig/dst/recv_%d", i);
 p->normal = g_new0(ram_addr_t, page_count);
 p->zero = g_new0(ram_addr_t, page_count);
 p->page_count = page_count;
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 3419779548..97701e6bb2 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1238,7 +1238,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState 
*mis)
 return -1;
 }
 
-postcopy_thread_create(mis, >fault_thread, "fault-default",
+postcopy_thread_create(mis, >fault_thread, "mig/dst/fault",
postcopy_ram_fault_thread, QEMU_THREAD_JOINABLE);
 mis->have_fault_thread = true;
 
@@ -1258,7 +1258,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState 
*mis)
  * This thread needs to be created after the temp pages because
  * it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately.
  */
-postcopy_thread_create(mis, >postcopy_prio_thread, "fault-fast",
+

Re: [PATCH v4 2/5] ppc/pnv: Extend SPI model

2024-06-19 Thread Miles Glenn
Hi Chalapathi,

I can't say I have a great understanding of this IBM SPI controller,
but I did find some places for improvement, mostly dealing with the use
of "magic numbers" throughout the code.  Please see comments below.

Thanks,

Glenn

On Mon, 2024-06-17 at 11:54 -0500, Chalapathi V wrote:
> In this commit SPI shift engine and sequencer logic is implemented.
> Shift engine performs serialization and de-serialization according to
> the
> control by the sequencer and according to the setup defined in the
> configuration registers. Sequencer implements the main control logic
> and
> FSM to handle data transmit and data receive control of the shift
> engine.
> 
> Signed-off-by: Chalapathi V 
> ---
>  include/hw/ssi/pnv_spi.h |   27 +
>  hw/ssi/pnv_spi.c | 1039
> ++
>  hw/ssi/trace-events  |   15 +
>  3 files changed, 1081 insertions(+)
> 
> diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
> index 71c53d4a17..21fbfcb69c 100644
> --- a/include/hw/ssi/pnv_spi.h
> +++ b/include/hw/ssi/pnv_spi.h
> @@ -8,6 +8,14 @@
>   * This model Supports a connection to a single SPI responder.
>   * Introduced for P10 to provide access to SPI seeproms, TPM, flash
> device
>   * and an ADC controller.
> + *
> + * All SPI function control is mapped into the SPI register space to
> enable
> + * full control by firmware.
> + *
> + * SPI Controller has sequencer and shift engine. The SPI shift
> engine
> + * performs serialization and de-serialization according to the
> control by
> + * the sequencer and according to the setup defined in the
> configuration
> + * registers and the SPI sequencer implements the main control
> logic.
>   */
>  #include "hw/ssi/ssi.h"
>  #include "hw/sysbus.h"
> @@ -50,6 +58,25 @@ typedef struct PnvSpi {
>  MemoryRegionxscom_spic_regs;
>  /* SPI object number */
>  uint32_tspic_num;
> +uint8_t transfer_len;
> +uint8_t responder_select;
> +/* To verify if shift_n1 happens prior to shift_n2 */
> +boolshift_n1_done;
> +/* Loop counter for branch operation opcode Ex/Fx */
> +uint8_t loop_counter_1;
> +uint8_t loop_counter_2;
> +/* N1/N2_bits specifies the size of the N1/N2 segment of a frame
> in bits.*/
> +uint8_t N1_bits;
> +uint8_t N2_bits;
> +/* Number of bytes in a payload for the N1/N2 frame segment.*/
> +uint8_t N1_bytes;
> +uint8_t N2_bytes;
> +/* Number of N1/N2 bytes marked for transmit */
> +uint8_t N1_tx;
> +uint8_t N2_tx;
> +/* Number of N1/N2 bytes marked for receive */
> +uint8_t N1_rx;
> +uint8_t N2_rx;
>  
>  /* SPI registers */
>  uint64_tregs[PNV_SPI_REGS];
> diff --git a/hw/ssi/pnv_spi.c b/hw/ssi/pnv_spi.c
> index da9e3925dd..b8f4370525 100644
> --- a/hw/ssi/pnv_spi.c
> +++ b/hw/ssi/pnv_spi.c
> @@ -14,9 +14,1040 @@
>  #include "hw/ssi/pnv_spi_regs.h"
>  #include "hw/ssi/ssi.h"
>  #include 
> +#include 

I think the only reason you are including math.h is because you're
using the `ceil` function below.  And, since you are only using it to
operate on integers, it is not really necessary.  See comment below on
how to do the same thing with integer math.

>  #include "hw/irq.h"
>  #include "trace.h"
>  
> +/* PnvXferBuffer */
> +typedef struct PnvXferBuffer {
> +
> +uint32_tlen;
> +uint8_t*data;
> +
> +} PnvXferBuffer;
> +
> +/* pnv_spi_xfer_buffer_methods */
> +static PnvXferBuffer *pnv_spi_xfer_buffer_new(void)
> +{
> +PnvXferBuffer *payload = g_malloc0(sizeof(*payload));
> +
> +return payload;
> +}
> +
> +static void pnv_spi_xfer_buffer_free(PnvXferBuffer *payload)
> +{
> +free(payload->data);
> +free(payload);
> +}
> +
> +static uint8_t *pnv_spi_xfer_buffer_write_ptr(PnvXferBuffer
> *payload,
> +uint32_t offset, uint32_t length)
> +{
> +if (payload->len < (offset + length)) {
> +payload->len = offset + length;
> +payload->data = g_realloc(payload->data, payload->len);
> +}
> +return >data[offset];
> +}
> +
> +static bool does_rdr_match(PnvSpi *s)
> +{
> +/*
> + * According to spec, the mask bits that are 0 are compared and
> the
> + * bits that are 1 are ignored.
> + */
> +uint16_t rdr_match_mask = GETFIELD(SPI_MM_RDR_MATCH_MASK,
> +s->regs[SPI_MM_REG]);
> +uint16_t rdr_match_val = GETFIELD(SPI_MM_RDR_MATCH_VAL,
> +s->regs[SPI_MM_REG]);
> +
> +if ((~rdr_match_mask & rdr_match_val) == ((~rdr_match_mask) &
> +GETFIELD(PPC_BITMASK(48, 63), s-
> >regs[SPI_RCV_DATA_REG]))) {
> +return true;
> +}
> +return false;
> +}
> +
> +static uint8_t get_from_offset(PnvSpi *s, uint8_t offset)
> +{
> +uint8_t byte;
> +
> +/*
> + * Offset is an index between 0 and PNV_SPI_REG_SIZE - 1
> + * 

[PULL 19/24] util/bufferiszero: Split out host include files

2024-06-19 Thread Richard Henderson
Split out host/bufferiszero.h.inc for x86, aarch64 and generic
in order to avoid an overlong ifdef ladder.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 util/bufferiszero.c  | 191 +--
 host/include/aarch64/host/bufferiszero.c.inc |  76 
 host/include/generic/host/bufferiszero.c.inc |  10 +
 host/include/i386/host/bufferiszero.c.inc| 124 
 host/include/x86_64/host/bufferiszero.c.inc  |   1 +
 5 files changed, 212 insertions(+), 190 deletions(-)
 create mode 100644 host/include/aarch64/host/bufferiszero.c.inc
 create mode 100644 host/include/generic/host/bufferiszero.c.inc
 create mode 100644 host/include/i386/host/bufferiszero.c.inc
 create mode 100644 host/include/x86_64/host/bufferiszero.c.inc

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 11c080e02c..522146dab9 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -81,196 +81,7 @@ static bool buffer_is_zero_int_ge256(const void *buf, 
size_t len)
 return t == 0;
 }
 
-#if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
-#include 
-
-/* Helper for preventing the compiler from reassociating
-   chains of binary vector operations.  */
-#define SSE_REASSOC_BARRIER(vec0, vec1) asm("" : "+x"(vec0), "+x"(vec1))
-
-/* Note that these vectorized functions may assume len >= 256.  */
-
-static bool __attribute__((target("sse2")))
-buffer_zero_sse2(const void *buf, size_t len)
-{
-/* Unaligned loads at head/tail.  */
-__m128i v = *(__m128i_u *)(buf);
-__m128i w = *(__m128i_u *)(buf + len - 16);
-/* Align head/tail to 16-byte boundaries.  */
-const __m128i *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
-const __m128i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
-__m128i zero = { 0 };
-
-/* Collect a partial block at tail end.  */
-v |= e[-1]; w |= e[-2];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-3]; w |= e[-4];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-5]; w |= e[-6];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-7]; v |= w;
-
-/*
- * Loop over complete 128-byte blocks.
- * With the head and tail removed, e - p >= 14, so the loop
- * must iterate at least once.
- */
-do {
-v = _mm_cmpeq_epi8(v, zero);
-if (unlikely(_mm_movemask_epi8(v) != 0x)) {
-return false;
-}
-v = p[0]; w = p[1];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[2]; w |= p[3];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[4]; w |= p[5];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[6]; w |= p[7];
-SSE_REASSOC_BARRIER(v, w);
-v |= w;
-p += 8;
-} while (p < e - 7);
-
-return _mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) == 0x;
-}
-
-#ifdef CONFIG_AVX2_OPT
-static bool __attribute__((target("avx2")))
-buffer_zero_avx2(const void *buf, size_t len)
-{
-/* Unaligned loads at head/tail.  */
-__m256i v = *(__m256i_u *)(buf);
-__m256i w = *(__m256i_u *)(buf + len - 32);
-/* Align head/tail to 32-byte boundaries.  */
-const __m256i *p = QEMU_ALIGN_PTR_DOWN(buf + 32, 32);
-const __m256i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 32);
-__m256i zero = { 0 };
-
-/* Collect a partial block at tail end.  */
-v |= e[-1]; w |= e[-2];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-3]; w |= e[-4];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-5]; w |= e[-6];
-SSE_REASSOC_BARRIER(v, w);
-v |= e[-7]; v |= w;
-
-/* Loop over complete 256-byte blocks.  */
-for (; p < e - 7; p += 8) {
-/* PTEST is not profitable here.  */
-v = _mm256_cmpeq_epi8(v, zero);
-if (unlikely(_mm256_movemask_epi8(v) != 0x)) {
-return false;
-}
-v = p[0]; w = p[1];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[2]; w |= p[3];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[4]; w |= p[5];
-SSE_REASSOC_BARRIER(v, w);
-v |= p[6]; w |= p[7];
-SSE_REASSOC_BARRIER(v, w);
-v |= w;
-}
-
-return _mm256_movemask_epi8(_mm256_cmpeq_epi8(v, zero)) == 0x;
-}
-#endif /* CONFIG_AVX2_OPT */
-
-static biz_accel_fn const accel_table[] = {
-buffer_is_zero_int_ge256,
-buffer_zero_sse2,
-#ifdef CONFIG_AVX2_OPT
-buffer_zero_avx2,
-#endif
-};
-
-static unsigned best_accel(void)
-{
-#ifdef CONFIG_AVX2_OPT
-unsigned info = cpuinfo_init();
-
-if (info & CPUINFO_AVX2) {
-return 2;
-}
-#endif
-return 1;
-}
-
-#elif defined(__aarch64__) && defined(__ARM_NEON)
-#include 
-
-/*
- * Helper for preventing the compiler from reassociating
- * chains of binary vector operations.
- */
-#define REASSOC_BARRIER(vec0, vec1) asm("" : "+w"(vec0), "+w"(vec1))
-
-static bool buffer_is_zero_simd(const void *buf, size_t len)
-{
-uint32x4_t t0, t1, t2, t3;
-
-/* Align head/tail to 16-byte boundaries.  */
-const uint32x4_t *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
-const uint32x4_t *e = QEMU_ALIGN_PTR_DOWN(buf + len 

[PULL 22/24] linux-user: Make TARGET_NR_setgroups affect only the current thread

2024-06-19 Thread Richard Henderson
From: Ilya Leoshkevich 

Like TARGET_NR_setuid, TARGET_NR_setgroups should affect only the
calling thread, and not the entire process. Therefore, implement it
using a syscall, and not a libc call.

Cc: qemu-sta...@nongnu.org
Fixes: 19b84f3c35d7 ("added setgroups and getgroups syscalls")
Signed-off-by: Ilya Leoshkevich 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20240614154710.1078766-1-...@linux.ibm.com>
Reviewed-by: Richard Henderson 
Signed-off-by: Richard Henderson 
---
 linux-user/syscall.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b9b5a387b3..e2804312fc 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -7209,11 +7209,17 @@ static inline int tswapid(int id)
 #else
 #define __NR_sys_setresgid __NR_setresgid
 #endif
+#ifdef __NR_setgroups32
+#define __NR_sys_setgroups __NR_setgroups32
+#else
+#define __NR_sys_setgroups __NR_setgroups
+#endif
 
 _syscall1(int, sys_setuid, uid_t, uid)
 _syscall1(int, sys_setgid, gid_t, gid)
 _syscall3(int, sys_setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 _syscall3(int, sys_setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
+_syscall2(int, sys_setgroups, int, size, gid_t *, grouplist)
 
 void syscall_init(void)
 {
@@ -11891,7 +11897,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int 
num, abi_long arg1,
 unlock_user(target_grouplist, arg2,
 gidsetsize * sizeof(target_id));
 }
-return get_errno(setgroups(gidsetsize, grouplist));
+return get_errno(sys_setgroups(gidsetsize, grouplist));
 }
 case TARGET_NR_fchown:
 return get_errno(fchown(arg1, low2highuid(arg2), low2highgid(arg3)));
@@ -12227,7 +12233,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int 
num, abi_long arg1,
 }
 unlock_user(target_grouplist, arg2, 0);
 }
-return get_errno(setgroups(gidsetsize, grouplist));
+return get_errno(sys_setgroups(gidsetsize, grouplist));
 }
 #endif
 #ifdef TARGET_NR_fchown32
-- 
2.34.1




[PULL 18/24] tcg/loongarch64: Enable v256 with LASX

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.h | 2 +-
 tcg/loongarch64/tcg-target.c.inc | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 990bad1d51..58bd7d258e 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -173,7 +173,7 @@ typedef enum {
 
 #define TCG_TARGET_HAS_v64  (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
-#define TCG_TARGET_HAS_v256 0
+#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_LASX)
 
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  1
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index dff966c395..1c4dc4decb 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -2487,6 +2487,9 @@ static void tcg_target_init(TCGContext *s)
 if (cpuinfo & CPUINFO_LSX) {
 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+if (cpuinfo & CPUINFO_LASX) {
+tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
+}
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
-- 
2.34.1




[PULL 21/24] accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded

2024-06-19 Thread Richard Henderson
From: Anton Johansson 

For TBs crossing page boundaries, the 2nd page will never be
recorded/removed, as the index of the 2nd page is computed from the
address of the 1st page. This is due to a typo, fix it.

Cc: qemu-sta...@nongnu.org
Fixes: deba78709a ("accel/tcg: Always lock pages before translation")
Signed-off-by: Anton Johansson 
Reviewed-by: Manos Pitsidianakis 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Alex Bennée 
Message-Id: <20240612133031.15298-1-a...@rev.ng>
Signed-off-by: Richard Henderson 
---
 accel/tcg/tb-maint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 19ae6793f3..cc0f5afd47 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -713,7 +713,7 @@ static void tb_record(TranslationBlock *tb)
 tb_page_addr_t paddr0 = tb_page_addr0(tb);
 tb_page_addr_t paddr1 = tb_page_addr1(tb);
 tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
+tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 
 assert(paddr0 != -1);
 if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
@@ -745,7 +745,7 @@ static void tb_remove(TranslationBlock *tb)
 tb_page_addr_t paddr0 = tb_page_addr0(tb);
 tb_page_addr_t paddr1 = tb_page_addr1(tb);
 tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
+tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 
 assert(paddr0 != -1);
 if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
-- 
2.34.1




[PULL 06/24] tcg/loongarch64: Simplify tcg_out_dup_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 980ea10211..b1d652355d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1674,22 +1674,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 TCGReg rd, TCGReg rs)
 {
-switch (vece) {
-case MO_8:
-tcg_out_opc_vreplgr2vr_b(s, rd, rs);
-break;
-case MO_16:
-tcg_out_opc_vreplgr2vr_h(s, rd, rs);
-break;
-case MO_32:
-tcg_out_opc_vreplgr2vr_w(s, rd, rs);
-break;
-case MO_64:
-tcg_out_opc_vreplgr2vr_d(s, rd, rs);
-break;
-default:
-g_assert_not_reached();
-}
+static const LoongArchInsn repl_insn[4] = {
+OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H, OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D
+};
+
+tcg_debug_assert(vece <= MO_64);
+tcg_out32(s, encode_vdj_insn(repl_insn[vece], rd, rs));
 return true;
 }
 
-- 
2.34.1




[PULL 20/24] util/bufferiszero: Add loongarch64 vector acceleration

2024-06-19 Thread Richard Henderson
Use inline assembly because no release compiler allows
per-function selection of the ISA.

Tested-by: Bibo Mao 
Signed-off-by: Richard Henderson 
---
 .../loongarch64/host/bufferiszero.c.inc   | 143 ++
 1 file changed, 143 insertions(+)
 create mode 100644 host/include/loongarch64/host/bufferiszero.c.inc

diff --git a/host/include/loongarch64/host/bufferiszero.c.inc 
b/host/include/loongarch64/host/bufferiszero.c.inc
new file mode 100644
index 00..69891eac80
--- /dev/null
+++ b/host/include/loongarch64/host/bufferiszero.c.inc
@@ -0,0 +1,143 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * buffer_is_zero acceleration, loongarch64 version.
+ */
+
+/*
+ * Builtins for LSX and LASX are introduced by gcc 14 and llvm 18,
+ * but as yet neither has support for attribute target, so neither
+ * is able to enable the optimization without globally enabling
+ * vector support.  Since we want runtime detection, use assembly.
+ */
+
+static bool buffer_is_zero_lsx(const void *buf, size_t len)
+{
+const void *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+const void *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16) - (7 * 16);
+const void *l = buf + len;
+bool ret;
+
+asm("vld $vr0,%2,0\n\t" /* first: buf + 0 */
+"vld $vr1,%4,-16\n\t"   /* last: buf + len - 16 */
+"vld $vr2,%3,0\n\t" /* e[0] */
+"vld $vr3,%3,16\n\t"/* e[1] */
+"vld $vr4,%3,32\n\t"/* e[2] */
+"vld $vr5,%3,48\n\t"/* e[3] */
+"vld $vr6,%3,64\n\t"/* e[4] */
+"vld $vr7,%3,80\n\t"/* e[5] */
+"vld $vr8,%3,96\n\t"/* e[6] */
+"vor.v $vr0,$vr0,$vr1\n\t"
+"vor.v $vr2,$vr2,$vr3\n\t"
+"vor.v $vr4,$vr4,$vr5\n\t"
+"vor.v $vr6,$vr6,$vr7\n\t"
+"vor.v $vr0,$vr0,$vr2\n\t"
+"vor.v $vr4,$vr4,$vr6\n\t"
+"vor.v $vr0,$vr0,$vr4\n\t"
+"vor.v $vr0,$vr0,$vr8\n\t"
+"or %0,$r0,$r0\n"   /* prepare return false */
+"1:\n\t"
+"vsetnez.v $fcc0,$vr0\n\t"
+"bcnez $fcc0,2f\n\t"
+"vld $vr0,%1,0\n\t" /* p[0] */
+"vld $vr1,%1,16\n\t"/* p[1] */
+"vld $vr2,%1,32\n\t"/* p[2] */
+"vld $vr3,%1,48\n\t"/* p[3] */
+"vld $vr4,%1,64\n\t"/* p[4] */
+"vld $vr5,%1,80\n\t"/* p[5] */
+"vld $vr6,%1,96\n\t"/* p[6] */
+"vld $vr7,%1,112\n\t"   /* p[7] */
+"addi.d %1,%1,128\n\t"
+"vor.v $vr0,$vr0,$vr1\n\t"
+"vor.v $vr2,$vr2,$vr3\n\t"
+"vor.v $vr4,$vr4,$vr5\n\t"
+"vor.v $vr6,$vr6,$vr7\n\t"
+"vor.v $vr0,$vr0,$vr2\n\t"
+"vor.v $vr4,$vr4,$vr6\n\t"
+"vor.v $vr0,$vr0,$vr4\n\t"
+"bltu %1,%3,1b\n\t"
+"vsetnez.v $fcc0,$vr0\n\t"
+"bcnez $fcc0,2f\n\t"
+"ori %0,$r0,1\n"
+"2:"
+: "="(ret), "+r"(p)
+: "r"(buf), "r"(e), "r"(l)
+: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "fcc0");
+
+return ret;
+}
+
+static bool buffer_is_zero_lasx(const void *buf, size_t len)
+{
+const void *p = QEMU_ALIGN_PTR_DOWN(buf + 32, 32);
+const void *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 32) - (7 * 32);
+const void *l = buf + len;
+bool ret;
+
+asm("xvld $xr0,%2,0\n\t" /* first: buf + 0 */
+"xvld $xr1,%4,-32\n\t"   /* last: buf + len - 32 */
+"xvld $xr2,%3,0\n\t" /* e[0] */
+"xvld $xr3,%3,32\n\t"/* e[1] */
+"xvld $xr4,%3,64\n\t"/* e[2] */
+"xvld $xr5,%3,96\n\t"/* e[3] */
+"xvld $xr6,%3,128\n\t"   /* e[4] */
+"xvld $xr7,%3,160\n\t"   /* e[5] */
+"xvld $xr8,%3,192\n\t"   /* e[6] */
+"xvor.v $xr0,$xr0,$xr1\n\t"
+"xvor.v $xr2,$xr2,$xr3\n\t"
+"xvor.v $xr4,$xr4,$xr5\n\t"
+"xvor.v $xr6,$xr6,$xr7\n\t"
+"xvor.v $xr0,$xr0,$xr2\n\t"
+"xvor.v $xr4,$xr4,$xr6\n\t"
+"xvor.v $xr0,$xr0,$xr4\n\t"
+"xvor.v $xr0,$xr0,$xr8\n\t"
+"or %0,$r0,$r0\n\t"  /* prepare return false */
+"bgeu %1,%3,2f\n"
+"1:\n\t"
+"xvsetnez.v $fcc0,$xr0\n\t"
+"bcnez $fcc0,3f\n\t"
+"xvld $xr0,%1,0\n\t" /* p[0] */
+"xvld $xr1,%1,32\n\t"/* p[1] */
+"xvld $xr2,%1,64\n\t"/* p[2] */
+"xvld $xr3,%1,96\n\t"/* p[3] */
+"xvld $xr4,%1,128\n\t"   /* p[4] */
+"xvld $xr5,%1,160\n\t"   /* p[5] */
+"xvld $xr6,%1,192\n\t"   /* p[6] */
+"xvld $xr7,%1,224\n\t"   /* p[7] */
+"addi.d %1,%1,256\n\t"
+"xvor.v $xr0,$xr0,$xr1\n\t"
+"xvor.v $xr2,$xr2,$xr3\n\t"
+"xvor.v $xr4,$xr4,$xr5\n\t"
+"xvor.v $xr6,$xr6,$xr7\n\t"
+"xvor.v $xr0,$xr0,$xr2\n\t"
+ 

[PULL 13/24] tcg/loongarch64: Split out vdvjvk in tcg_out_vec_op

2024-06-19 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 119 ---
 1 file changed, 63 insertions(+), 56 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 652aa261a3..8f5f38aa0a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1900,49 +1900,55 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 tcg_out_ld(s, type, a0, a1, a2);
 break;
 case INDEX_op_and_vec:
-tcg_out_opc_vand_v(s, a0, a1, a2);
-break;
+insn = OPC_VAND_V;
+goto vdvjvk;
 case INDEX_op_andc_vec:
 /*
  * vandn vd, vj, vk: vd = vk & ~vj
  * andc_vec vd, vj, vk: vd = vj & ~vk
- * vk and vk are swapped
+ * vj and vk are swapped
  */
-tcg_out_opc_vandn_v(s, a0, a2, a1);
-break;
+a1 = a2;
+a2 = args[1];
+insn = OPC_VANDN_V;
+goto vdvjvk;
 case INDEX_op_or_vec:
-tcg_out_opc_vor_v(s, a0, a1, a2);
-break;
+insn = OPC_VOR_V;
+goto vdvjvk;
 case INDEX_op_orc_vec:
-tcg_out_opc_vorn_v(s, a0, a1, a2);
-break;
+insn = OPC_VORN_V;
+goto vdvjvk;
 case INDEX_op_xor_vec:
-tcg_out_opc_vxor_v(s, a0, a1, a2);
-break;
-case INDEX_op_nor_vec:
-tcg_out_opc_vnor_v(s, a0, a1, a2);
-break;
+insn = OPC_VXOR_V;
+goto vdvjvk;
 case INDEX_op_not_vec:
-tcg_out_opc_vnor_v(s, a0, a1, a1);
-break;
+a2 = a1;
+/* fall through */
+case INDEX_op_nor_vec:
+insn = OPC_VNOR_V;
+goto vdvjvk;
 case INDEX_op_cmp_vec:
 {
 TCGCond cond = args[3];
+
 if (const_args[2]) {
 /*
  * cmp_vec dest, src, value
  * Try vseqi/vslei/vslti
  */
 int64_t value = sextract64(a2, 0, 8 << vece);
-if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
- cond == TCG_COND_LT) && (-0x10 <= value && value <= 
0x0f)) {
-tcg_out32(s, 
encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+if ((cond == TCG_COND_EQ ||
+ cond == TCG_COND_LE ||
+ cond == TCG_COND_LT) &&
+(-0x10 <= value && value <= 0x0f)) {
+insn = cmp_vec_imm_insn[cond][vece];
+tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value));
 break;
-} else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
-(0x00 <= value && value <= 0x1f)) {
-tcg_out32(s, 
encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+} else if ((cond == TCG_COND_LEU ||
+cond == TCG_COND_LTU) &&
+   (0x00 <= value && value <= 0x1f)) {
+insn = cmp_vec_imm_insn[cond][vece];
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
 break;
 }
 
@@ -1963,9 +1969,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 insn = cmp_vec_insn[cond][vece];
 tcg_debug_assert(insn != 0);
 }
-tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
-break;
+goto vdvjvk;
 case INDEX_op_add_vec:
 tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], true);
 break;
@@ -1976,41 +1981,41 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
 break;
 case INDEX_op_mul_vec:
-tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
-break;
+insn = mul_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_smin_vec:
-tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
-break;
+insn = smin_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_smax_vec:
-tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
-break;
+insn = smax_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_umin_vec:
-tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
-break;
+insn = umin_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_umax_vec:
-tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
-break;
+insn = umax_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_ssadd_vec:
-tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
-break;
+insn = ssadd_vec_insn[vece];
+goto 

[PULL 08/24] tcg/loongarch64: Support LASX in tcg_out_dupm_vec

2024-06-19 Thread Richard Henderson
Each element size has a different encoding, so code cannot
be shared in the same way as with tcg_out_dup_vec.

Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index cc54bc4a53..1e721b8b20 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1690,8 +1690,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, 
unsigned vece,
 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
  TCGReg r, TCGReg base, intptr_t offset)
 {
-/* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
-if (offset < -0x800 || offset > 0x7ff || \
+bool lasx = type == TCG_TYPE_V256;
+
+/* Handle imm overflow and division (vldrepl.d imm is divided by 8). */
+if (offset < -0x800 || offset > 0x7ff ||
 (offset & ((1 << vece) - 1)) != 0) {
 tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
 base = TCG_REG_TMP0;
@@ -1701,16 +1703,32 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType 
type, unsigned vece,
 
 switch (vece) {
 case MO_8:
-tcg_out_opc_vldrepl_b(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_b(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_b(s, r, base, offset);
+}
 break;
 case MO_16:
-tcg_out_opc_vldrepl_h(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_h(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_h(s, r, base, offset);
+}
 break;
 case MO_32:
-tcg_out_opc_vldrepl_w(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_w(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_w(s, r, base, offset);
+}
 break;
 case MO_64:
-tcg_out_opc_vldrepl_d(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_d(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_d(s, r, base, offset);
+}
 break;
 default:
 g_assert_not_reached();
-- 
2.34.1




[PULL 09/24] tcg/loongarch64: Use tcg_out_dup_vec in tcg_out_dupi_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 1e721b8b20..9a8f67cf3e 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1749,24 +1749,8 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 
 /* TODO: vldi patterns when imm 12 is set */
 
-/* Fallback to vreplgr2vr */
 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
-switch (vece) {
-case MO_8:
-tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
-break;
-case MO_16:
-tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
-break;
-case MO_32:
-tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
-break;
-case MO_64:
-tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
-break;
-default:
-g_assert_not_reached();
-}
+tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
 }
 
 static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
-- 
2.34.1




[PULL 14/24] tcg/loongarch64: Support LASX in tcg_out_{mov,ld,st}

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 8f5f38aa0a..4ead3bedef 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -325,6 +325,9 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg)
 case TCG_TYPE_V128:
 tcg_out_opc_vori_b(s, ret, arg, 0);
 break;
+case TCG_TYPE_V256:
+tcg_out_opc_xvori_b(s, ret, arg, 0);
+break;
 default:
 g_assert_not_reached();
 }
@@ -854,6 +857,14 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg 
dest,
 tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
 }
 break;
+case TCG_TYPE_V256:
+if (-0x800 <= offset && offset <= 0x7ff) {
+tcg_out_opc_xvld(s, dest, base, offset);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+tcg_out_opc_xvldx(s, dest, base, TCG_REG_TMP0);
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -886,6 +897,14 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg 
src,
 tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
 }
 break;
+case TCG_TYPE_V256:
+if (-0x800 <= offset && offset <= 0x7ff) {
+tcg_out_opc_xvst(s, src, base, offset);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+tcg_out_opc_xvstx(s, src, base, TCG_REG_TMP0);
+}
+break;
 default:
 g_assert_not_reached();
 }
-- 
2.34.1




[PULL 07/24] tcg/loongarch64: Support LASX in tcg_out_dup_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b1d652355d..cc54bc4a53 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1674,12 +1674,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 TCGReg rd, TCGReg rs)
 {
-static const LoongArchInsn repl_insn[4] = {
-OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H, OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D
+static const LoongArchInsn repl_insn[2][4] = {
+{ OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H,
+  OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D },
+{ OPC_XVREPLGR2VR_B, OPC_XVREPLGR2VR_H,
+  OPC_XVREPLGR2VR_W, OPC_XVREPLGR2VR_D },
 };
+bool lasx = type == TCG_TYPE_V256;
 
 tcg_debug_assert(vece <= MO_64);
-tcg_out32(s, encode_vdj_insn(repl_insn[vece], rd, rs));
+tcg_out32(s, encode_vdj_insn(repl_insn[lasx][vece], rd, rs));
 return true;
 }
 
-- 
2.34.1




[PULL 23/24] target/sparc: use signed denominator in sdiv helper

2024-06-19 Thread Richard Henderson
From: Clément Chigot 

The result has to be done with the signed denominator (b32) instead of
the unsigned value passed in argument (b).

Cc: qemu-sta...@nongnu.org
Fixes: 1326010322d6 ("target/sparc: Remove CC_OP_DIV")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2319
Signed-off-by: Clément Chigot 
Reviewed-by: Richard Henderson 
Message-Id: <20240606144331.698361-1-chi...@adacore.com>
Signed-off-by: Richard Henderson 
---
 target/sparc/helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/sparc/helper.c b/target/sparc/helper.c
index 2247e243b5..7846ddd6f6 100644
--- a/target/sparc/helper.c
+++ b/target/sparc/helper.c
@@ -121,7 +121,7 @@ uint64_t helper_sdiv(CPUSPARCState *env, target_ulong a, 
target_ulong b)
 return (uint32_t)(b32 < 0 ? INT32_MAX : INT32_MIN) | (-1ull << 32);
 }
 
-a64 /= b;
+a64 /= b32;
 r = a64;
 if (unlikely(r != a64)) {
 return (uint32_t)(a64 < 0 ? INT32_MIN : INT32_MAX) | (-1ull << 32);
-- 
2.34.1




[PULL 05/24] util/loongarch64: Detect LASX vector support

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 host/include/loongarch64/host/cpuinfo.h | 1 +
 util/cpuinfo-loongarch.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/host/include/loongarch64/host/cpuinfo.h 
b/host/include/loongarch64/host/cpuinfo.h
index fab664a10b..d7bf27501d 100644
--- a/host/include/loongarch64/host/cpuinfo.h
+++ b/host/include/loongarch64/host/cpuinfo.h
@@ -8,6 +8,7 @@
 
 #define CPUINFO_ALWAYS  (1u << 0)  /* so cpuinfo is nonzero */
 #define CPUINFO_LSX (1u << 1)
+#define CPUINFO_LASX(1u << 2)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c
index 08b6d7460c..bb1f7f698b 100644
--- a/util/cpuinfo-loongarch.c
+++ b/util/cpuinfo-loongarch.c
@@ -29,6 +29,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
 
 info = CPUINFO_ALWAYS;
 info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0);
+info |= (hwcap & HWCAP_LOONGARCH_LASX ? CPUINFO_LASX : 0);
 
 cpuinfo = info;
 return info;
-- 
2.34.1




[PULL 10/24] tcg/loongarch64: Support LASX in tcg_out_dupi_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 9a8f67cf3e..c7d0c7839b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1743,7 +1743,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 int64_t value = sextract64(v64, 0, 8 << vece);
 if (-0x200 <= value && value <= 0x1FF) {
 uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
-tcg_out_opc_vldi(s, rd, imm);
+
+if (type == TCG_TYPE_V256) {
+tcg_out_opc_xvldi(s, rd, imm);
+} else {
+tcg_out_opc_vldi(s, rd, imm);
+}
 return;
 }
 
-- 
2.34.1




[PULL 17/24] tcg/loongarch64: Support LASX in tcg_out_vec_op

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 223 +++
 1 file changed, 137 insertions(+), 86 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index ab1b67e028..dff966c395 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1833,76 +1833,125 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode 
opc,
const int const_args[TCG_MAX_OP_ARGS])
 {
 TCGType type = vecl + TCG_TYPE_V64;
+bool lasx = type == TCG_TYPE_V256;
 TCGArg a0, a1, a2, a3;
-
-static const LoongArchInsn cmp_vec_insn[16][4] = {
-[TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
-[TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
-[TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
-[TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
-[TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
-};
-static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
-[TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
-[TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
-[TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, 
OPC_VSLEI_DU},
-[TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
-[TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, 
OPC_VSLTI_DU},
-};
 LoongArchInsn insn;
-static const LoongArchInsn neg_vec_insn[4] = {
-OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+
+static const LoongArchInsn cmp_vec_insn[16][2][4] = {
+[TCG_COND_EQ] = {
+{ OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D },
+{ OPC_XVSEQ_B, OPC_XVSEQ_H, OPC_XVSEQ_W, OPC_XVSEQ_D },
+},
+[TCG_COND_LE] = {
+{ OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D },
+{ OPC_XVSLE_B, OPC_XVSLE_H, OPC_XVSLE_W, OPC_XVSLE_D },
+},
+[TCG_COND_LEU] = {
+{ OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU },
+{ OPC_XVSLE_BU, OPC_XVSLE_HU, OPC_XVSLE_WU, OPC_XVSLE_DU },
+},
+[TCG_COND_LT] = {
+{ OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D },
+{ OPC_XVSLT_B, OPC_XVSLT_H, OPC_XVSLT_W, OPC_XVSLT_D },
+},
+[TCG_COND_LTU] = {
+{ OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU },
+{ OPC_XVSLT_BU, OPC_XVSLT_HU, OPC_XVSLT_WU, OPC_XVSLT_DU },
+}
 };
-static const LoongArchInsn mul_vec_insn[4] = {
-OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+static const LoongArchInsn cmp_vec_imm_insn[16][2][4] = {
+[TCG_COND_EQ] = {
+{ OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D },
+{ OPC_XVSEQI_B, OPC_XVSEQI_H, OPC_XVSEQI_W, OPC_XVSEQI_D },
+},
+[TCG_COND_LE] = {
+{ OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D },
+{ OPC_XVSLEI_B, OPC_XVSLEI_H, OPC_XVSLEI_W, OPC_XVSLEI_D },
+},
+[TCG_COND_LEU] = {
+{ OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU },
+{ OPC_XVSLEI_BU, OPC_XVSLEI_HU, OPC_XVSLEI_WU, OPC_XVSLEI_DU },
+},
+[TCG_COND_LT] = {
+{ OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D },
+{ OPC_XVSLTI_B, OPC_XVSLTI_H, OPC_XVSLTI_W, OPC_XVSLTI_D },
+},
+[TCG_COND_LTU] = {
+{ OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU },
+{ OPC_XVSLTI_BU, OPC_XVSLTI_HU, OPC_XVSLTI_WU, OPC_XVSLTI_DU },
+}
 };
-static const LoongArchInsn smin_vec_insn[4] = {
-OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+static const LoongArchInsn neg_vec_insn[2][4] = {
+{ OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D },
+{ OPC_XVNEG_B, OPC_XVNEG_H, OPC_XVNEG_W, OPC_XVNEG_D },
 };
-static const LoongArchInsn umin_vec_insn[4] = {
-OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+static const LoongArchInsn mul_vec_insn[2][4] = {
+{ OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D },
+{ OPC_XVMUL_B, OPC_XVMUL_H, OPC_XVMUL_W, OPC_XVMUL_D },
 };
-static const LoongArchInsn smax_vec_insn[4] = {
-OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+static const LoongArchInsn smin_vec_insn[2][4] = {
+{ OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D },
+{ OPC_XVMIN_B, OPC_XVMIN_H, OPC_XVMIN_W, OPC_XVMIN_D },
 };
-static const LoongArchInsn umax_vec_insn[4] = {
-OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+static const LoongArchInsn umin_vec_insn[2][4] = {
+{ OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU },
+{ OPC_XVMIN_BU, OPC_XVMIN_HU, OPC_XVMIN_WU, OPC_XVMIN_DU },
 };
-static const LoongArchInsn 

[PULL 00/24] tcg patch queue

2024-06-19 Thread Richard Henderson
The following changes since commit 223696363bb117241ad9c2facbff0c474afa4104:

  Merge tag 'edgar/xilinx-queue-2024-06-17.for-upstream' of 
https://gitlab.com/edgar.iglesias/qemu into staging (2024-06-18 13:08:01 -0700)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240619

for you to fetch changes up to 521d7fb3ebdf88112ed13556a93e3037742b9eb8:

  tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers (2024-06-19 13:50:22 
-0700)


tcg/loongarch64: Support 64- and 256-bit vectors
tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers
util/bufferiszero: Split out host include files
util/bufferiszero: Add loongarch64 vector acceleration
accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded
target/sparc: use signed denominator in sdiv helper
linux-user: Make TARGET_NR_setgroups affect only the current thread


Anton Johansson (1):
  accel/tcg: Fix typo causing tb->page_addr[1] to not be recorded

Clément Chigot (1):
  target/sparc: use signed denominator in sdiv helper

Ilya Leoshkevich (1):
  linux-user: Make TARGET_NR_setgroups affect only the current thread

Richard Henderson (21):
  tcg/loongarch64: Import LASX, FP insns
  tcg/loongarch64: Use fp load/store for I32 and I64 into vector regs
  tcg/loongarch64: Handle i32 and i64 moves between gr and fr
  tcg/loongarch64: Support TCG_TYPE_V64
  util/loongarch64: Detect LASX vector support
  tcg/loongarch64: Simplify tcg_out_dup_vec
  tcg/loongarch64: Support LASX in tcg_out_dup_vec
  tcg/loongarch64: Support LASX in tcg_out_dupm_vec
  tcg/loongarch64: Use tcg_out_dup_vec in tcg_out_dupi_vec
  tcg/loongarch64: Support LASX in tcg_out_dupi_vec
  tcg/loongarch64: Simplify tcg_out_addsub_vec
  tcg/loongarch64: Support LASX in tcg_out_addsub_vec
  tcg/loongarch64: Split out vdvjvk in tcg_out_vec_op
  tcg/loongarch64: Support LASX in tcg_out_{mov,ld,st}
  tcg/loongarch64: Remove temp_vec from tcg_out_vec_op
  tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op
  tcg/loongarch64: Support LASX in tcg_out_vec_op
  tcg/loongarch64: Enable v256 with LASX
  util/bufferiszero: Split out host include files
  util/bufferiszero: Add loongarch64 vector acceleration
  tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers

 host/include/loongarch64/host/cpuinfo.h  |1 +
 tcg/loongarch64/tcg-target.h |4 +-
 accel/tcg/tb-maint.c |4 +-
 linux-user/syscall.c |   10 +-
 target/sparc/helper.c|2 +-
 util/bufferiszero.c  |  191 +-
 util/cpuinfo-loongarch.c |1 +
 host/include/aarch64/host/bufferiszero.c.inc |   76 +
 host/include/generic/host/bufferiszero.c.inc |   10 +
 host/include/i386/host/bufferiszero.c.inc|  124 +
 host/include/loongarch64/host/bufferiszero.c.inc |  143 +
 host/include/x86_64/host/bufferiszero.c.inc  |1 +
 tcg/loongarch64/tcg-insn-defs.c.inc  | 6181 --
 tcg/loongarch64/tcg-target.c.inc |  601 ++-
 14 files changed, 2838 insertions(+), 4511 deletions(-)
 create mode 100644 host/include/aarch64/host/bufferiszero.c.inc
 create mode 100644 host/include/generic/host/bufferiszero.c.inc
 create mode 100644 host/include/i386/host/bufferiszero.c.inc
 create mode 100644 host/include/loongarch64/host/bufferiszero.c.inc
 create mode 100644 host/include/x86_64/host/bufferiszero.c.inc



[PULL 11/24] tcg/loongarch64: Simplify tcg_out_addsub_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index c7d0c7839b..47011488dd 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1774,33 +1774,34 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 static const LoongArchInsn sub_vec_imm_insn[4] = {
 OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
 };
+LoongArchInsn insn;
 
 if (a2_is_const) {
 int64_t value = sextract64(a2, 0, 8 << vece);
+
 if (!is_add) {
 value = -value;
 }
-
-/* Try vaddi/vsubi */
-if (0 <= value && value <= 0x1f) {
-tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
- a1, value));
-return;
-} else if (-0x1f <= value && value < 0) {
-tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
- a1, -value));
-return;
+if (value < 0) {
+insn = sub_vec_imm_insn[vece];
+value = -value;
+} else {
+insn = add_vec_imm_insn[vece];
 }
 
-/* constraint TCG_CT_CONST_VADD ensures unreachable */
-g_assert_not_reached();
+/* Constraint TCG_CT_CONST_VADD ensures validity. */
+tcg_debug_assert(0 <= value && value <= 0x1f);
+
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
+return;
 }
 
 if (is_add) {
-tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+insn = add_vec_insn[vece];
 } else {
-tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+insn = sub_vec_insn[vece];
 }
+tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
 
 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
-- 
2.34.1




[PULL 24/24] tcg/loongarch64: Fix tcg_out_movi vs some pcrel pointers

2024-06-19 Thread Richard Henderson
Simplify the logic for two-part, 32-bit pc-relative addresses.
Rather than assume all such fit in int32_t, do some arithmetic
and assert a result, do some arithmetic first and then check
to see if the pieces are in range.

Cc: qemu-sta...@nongnu.org
Fixes: dacc51720db ("tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi")
Reviewed-by: Song Gao 
Reported-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 1c4dc4decb..5b7ed5c176 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -382,8 +382,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
  * back to the slow path.
  */
 
-intptr_t pc_offset;
-tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
+intptr_t src_rx, pc_offset;
 tcg_target_long hi12, hi32, hi52;
 
 /* Value fits in signed i32.  */
@@ -393,24 +392,23 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 }
 
 /* PC-relative cases.  */
-pc_offset = tcg_pcrel_diff(s, (void *)val);
-if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
-/* Single pcaddu2i.  */
-tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
-return;
+src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
+if ((val & 3) == 0) {
+pc_offset = val - src_rx;
+if (pc_offset == sextreg(pc_offset, 0, 22)) {
+/* Single pcaddu2i.  */
+tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
+return;
+}
 }
 
-if (pc_offset == (int32_t)pc_offset) {
-/* Offset within 32 bits; load with pcalau12i + ori.  */
-val_lo = sextreg(val, 0, 12);
-val_hi = val >> 12;
-pc_hi = (val - pc_offset) >> 12;
-offset_hi = val_hi - pc_hi;
-
-tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
-tcg_out_opc_pcalau12i(s, rd, offset_hi);
+pc_offset = (val >> 12) - (src_rx >> 12);
+if (pc_offset == sextreg(pc_offset, 0, 20)) {
+/* Load with pcalau12i + ori.  */
+tcg_target_long val_lo = val & 0xfff;
+tcg_out_opc_pcalau12i(s, rd, pc_offset);
 if (val_lo != 0) {
-tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff);
+tcg_out_opc_ori(s, rd, rd, val_lo);
 }
 return;
 }
-- 
2.34.1




[PULL 16/24] tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op

2024-06-19 Thread Richard Henderson
Fixes a bug in the immediate shifts, because the exact
encoding depends on the element size.

Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 58 ++--
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 1d9e0bf028..ab1b67e028 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1901,6 +1901,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 static const LoongArchInsn rotrv_vec_insn[4] = {
 OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
 };
+static const LoongArchInsn rotri_vec_insn[4] = {
+OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D
+};
 
 a0 = args[0];
 a1 = args[1];
@@ -2034,15 +2037,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_sarv_vec:
 insn = sarv_vec_insn[vece];
 goto vdvjvk;
-case INDEX_op_shli_vec:
-tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
-break;
-case INDEX_op_shri_vec:
-tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
-break;
-case INDEX_op_sari_vec:
-tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
-break;
 case INDEX_op_rotlv_vec:
 /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
@@ -2051,26 +2045,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_rotrv_vec:
 insn = rotrv_vec_insn[vece];
 goto vdvjvk;
+case INDEX_op_shli_vec:
+insn = shli_vec_insn[vece];
+goto vdvjukN;
+case INDEX_op_shri_vec:
+insn = shri_vec_insn[vece];
+goto vdvjukN;
+case INDEX_op_sari_vec:
+insn = sari_vec_insn[vece];
+goto vdvjukN;
 case INDEX_op_rotli_vec:
 /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
 a2 = extract32(-a2, 0, 3 + vece);
-switch (vece) {
-case MO_8:
-tcg_out_opc_vrotri_b(s, a0, a1, a2);
-break;
-case MO_16:
-tcg_out_opc_vrotri_h(s, a0, a1, a2);
-break;
-case MO_32:
-tcg_out_opc_vrotri_w(s, a0, a1, a2);
-break;
-case MO_64:
-tcg_out_opc_vrotri_d(s, a0, a1, a2);
-break;
-default:
-g_assert_not_reached();
-}
-break;
+insn = rotri_vec_insn[vece];
+goto vdvjukN;
 case INDEX_op_bitsel_vec:
 /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
 tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
@@ -2083,6 +2071,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 vdvjvk:
 tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 break;
+vdvjukN:
+switch (vece) {
+case MO_8:
+tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
+break;
+case MO_16:
+tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
+break;
+case MO_32:
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
+break;
+case MO_64:
+tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
+break;
+default:
+g_assert_not_reached();
+}
+break;
 }
 }
 
-- 
2.34.1




[PULL 03/24] tcg/loongarch64: Handle i32 and i64 moves between gr and fr

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b9078ac793..de5369536e 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -303,11 +303,23 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, 
TCGReg ret, TCGReg arg)
 switch (type) {
 case TCG_TYPE_I32:
 case TCG_TYPE_I64:
-/*
- * Conventional register-register move used in LoongArch is
- * `or dst, src, zero`.
- */
-tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+if (ret < TCG_REG_V0) {
+if (arg < TCG_REG_V0) {
+/*
+ * Conventional register-register move used in LoongArch is
+ * `or dst, src, zero`.
+ */
+tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+} else {
+tcg_out_opc_movfr2gr_d(s, ret, arg);
+}
+} else {
+if (arg < TCG_REG_V0) {
+tcg_out_opc_movgr2fr_d(s, ret, arg);
+} else {
+tcg_out_opc_fmov_d(s, ret, arg);
+}
+}
 break;
 case TCG_TYPE_V128:
 tcg_out_opc_vori_b(s, ret, arg, 0);
-- 
2.34.1




[PULL 15/24] tcg/loongarch64: Remove temp_vec from tcg_out_vec_op

2024-06-19 Thread Richard Henderson
Use TCG_VEC_TMP0 directly.

Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 4ead3bedef..1d9e0bf028 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1834,7 +1834,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 {
 TCGType type = vecl + TCG_TYPE_V64;
 TCGArg a0, a1, a2, a3;
-TCGReg temp_vec = TCG_VEC_TMP0;
 
 static const LoongArchInsn cmp_vec_insn[16][4] = {
 [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
@@ -1976,8 +1975,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
  * dupi_vec temp, a2
  * cmp_vec a0, a1, temp, cond
  */
-tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
-a2 = temp_vec;
+tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2);
+a2 = TCG_VEC_TMP0;
 }
 
 insn = cmp_vec_insn[cond][vece];
@@ -2046,8 +2045,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 break;
 case INDEX_op_rotlv_vec:
 /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
-tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
-a2 = temp_vec;
+tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
+a2 = TCG_VEC_TMP0;
 /* fall through */
 case INDEX_op_rotrv_vec:
 insn = rotrv_vec_insn[vece];
-- 
2.34.1




[PULL 04/24] tcg/loongarch64: Support TCG_TYPE_V64

2024-06-19 Thread Richard Henderson
We can implement this with fld_d, fst_d for load and store,
and then use the normal v128 operations in registers.
This will improve support for guests which use v64.

Reviewed-by: Song Gao 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.h | 2 +-
 tcg/loongarch64/tcg-target.c.inc | 8 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 29e4860d20..990bad1d51 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -171,7 +171,7 @@ typedef enum {
 
 #define TCG_TARGET_HAS_tst  0
 
-#define TCG_TARGET_HAS_v64  0
+#define TCG_TARGET_HAS_v64  (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v256 0
 
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index de5369536e..980ea10211 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -321,6 +321,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg)
 }
 }
 break;
+case TCG_TYPE_V64:
 case TCG_TYPE_V128:
 tcg_out_opc_vori_b(s, ret, arg, 0);
 break;
@@ -838,6 +839,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg 
dest,
 }
 break;
 case TCG_TYPE_I64:
+case TCG_TYPE_V64:
 if (dest < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
 } else {
@@ -869,6 +871,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg 
src,
 }
 break;
 case TCG_TYPE_I64:
+case TCG_TYPE_V64:
 if (src < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_ST_D, src, base, offset);
 } else {
@@ -1880,8 +1883,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 a2 = args[2];
 a3 = args[3];
 
-/* Currently only supports V128 */
-tcg_debug_assert(type == TCG_TYPE_V128);
+/* Currently only supports V64 & V128 */
+tcg_debug_assert(type == TCG_TYPE_V64 || type == TCG_TYPE_V128);
 
 switch (opc) {
 case INDEX_op_st_vec:
@@ -2394,6 +2397,7 @@ static void tcg_target_init(TCGContext *s)
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
 if (cpuinfo & CPUINFO_LSX) {
+tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
-- 
2.34.1




[PULL 02/24] tcg/loongarch64: Use fp load/store for I32 and I64 into vector regs

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 36 +---
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 06ca1ab11c..b9078ac793 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -803,6 +803,12 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, 
TCGReg data,
 case OPC_ST_D:
 tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
 break;
+case OPC_FLD_S:
+case OPC_FLD_D:
+case OPC_FST_S:
+case OPC_FST_D:
+tcg_out32(s, encode_fdjsk12_insn(opc, data, addr, imm12));
+break;
 default:
 g_assert_not_reached();
 }
@@ -816,14 +822,14 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg dest,
 if (dest < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
 } else {
-tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
+tcg_out_ldst(s, OPC_FLD_S, dest, base, offset);
 }
 break;
 case TCG_TYPE_I64:
 if (dest < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
 } else {
-tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
+tcg_out_ldst(s, OPC_FLD_D, dest, base, offset);
 }
 break;
 case TCG_TYPE_V128:
@@ -847,36 +853,14 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg src,
 if (src < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_ST_W, src, base, offset);
 } else {
-/* TODO: Could use fst_s, fstx_s */
-if (offset < -0x100 || offset > 0xff || (offset & 3)) {
-if (-0x800 <= offset && offset <= 0x7ff) {
-tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
-} else {
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
-tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
-}
-base = TCG_REG_TMP0;
-offset = 0;
-}
-tcg_out_opc_vstelm_w(s, src, base, offset, 0);
+tcg_out_ldst(s, OPC_FST_S, src, base, offset);
 }
 break;
 case TCG_TYPE_I64:
 if (src < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_ST_D, src, base, offset);
 } else {
-/* TODO: Could use fst_d, fstx_d */
-if (offset < -0x100 || offset > 0xff || (offset & 7)) {
-if (-0x800 <= offset && offset <= 0x7ff) {
-tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
-} else {
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
-tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
-}
-base = TCG_REG_TMP0;
-offset = 0;
-}
-tcg_out_opc_vstelm_d(s, src, base, offset, 0);
+tcg_out_ldst(s, OPC_FST_D, src, base, offset);
 }
 break;
 case TCG_TYPE_V128:
-- 
2.34.1




[PULL 12/24] tcg/loongarch64: Support LASX in tcg_out_addsub_vec

2024-06-19 Thread Richard Henderson
Reviewed-by: Song Gao 
Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 36 ++--
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 47011488dd..652aa261a3 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1758,21 +1758,25 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
 }
 
-static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
-   const TCGArg a1, const TCGArg a2,
+static void tcg_out_addsub_vec(TCGContext *s, bool lasx, unsigned vece,
+   TCGArg a0, TCGArg a1, TCGArg a2,
bool a2_is_const, bool is_add)
 {
-static const LoongArchInsn add_vec_insn[4] = {
-OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+static const LoongArchInsn add_vec_insn[2][4] = {
+{ OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D },
+{ OPC_XVADD_B, OPC_XVADD_H, OPC_XVADD_W, OPC_XVADD_D },
 };
-static const LoongArchInsn add_vec_imm_insn[4] = {
-OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+static const LoongArchInsn add_vec_imm_insn[2][4] = {
+{ OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU },
+{ OPC_XVADDI_BU, OPC_XVADDI_HU, OPC_XVADDI_WU, OPC_XVADDI_DU },
 };
-static const LoongArchInsn sub_vec_insn[4] = {
-OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+static const LoongArchInsn sub_vec_insn[2][4] = {
+{ OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D },
+{ OPC_XVSUB_B, OPC_XVSUB_H, OPC_XVSUB_W, OPC_XVSUB_D },
 };
-static const LoongArchInsn sub_vec_imm_insn[4] = {
-OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+static const LoongArchInsn sub_vec_imm_insn[2][4] = {
+{ OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU },
+{ OPC_XVSUBI_BU, OPC_XVSUBI_HU, OPC_XVSUBI_WU, OPC_XVSUBI_DU },
 };
 LoongArchInsn insn;
 
@@ -1783,10 +1787,10 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 value = -value;
 }
 if (value < 0) {
-insn = sub_vec_imm_insn[vece];
+insn = sub_vec_imm_insn[lasx][vece];
 value = -value;
 } else {
-insn = add_vec_imm_insn[vece];
+insn = add_vec_imm_insn[lasx][vece];
 }
 
 /* Constraint TCG_CT_CONST_VADD ensures validity. */
@@ -1797,9 +1801,9 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 }
 
 if (is_add) {
-insn = add_vec_insn[vece];
+insn = add_vec_insn[lasx][vece];
 } else {
-insn = sub_vec_insn[vece];
+insn = sub_vec_insn[lasx][vece];
 }
 tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
@@ -1963,10 +1967,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 case INDEX_op_add_vec:
-tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], true);
 break;
 case INDEX_op_sub_vec:
-tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], false);
 break;
 case INDEX_op_neg_vec:
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
-- 
2.34.1




Re: [PATCH v2 08/10] tests/migration-tests: Always enable migration events

2024-06-19 Thread Peter Xu
On Mon, Jun 17, 2024 at 05:23:24PM -0400, Peter Xu wrote:
> On Mon, Jun 17, 2024 at 04:51:32PM -0300, Fabiano Rosas wrote:
> > Peter Xu  writes:
> > 
> > > Libvirt should always enable it, so it'll be nice qtest also cover that 
> > > for
> > > all tests.  Though this patch only enables it, no extra tests are done on
> > > these events yet.
> > >
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  tests/qtest/migration-test.c | 7 +++
> > >  1 file changed, 7 insertions(+)
> > >
> > > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > > index 13b59d4c10..9ae8892e26 100644
> > > --- a/tests/qtest/migration-test.c
> > > +++ b/tests/qtest/migration-test.c
> > > @@ -841,6 +841,13 @@ static int test_migrate_start(QTestState **from, 
> > > QTestState **to,
> > >  unlink(shmem_path);
> > >  }
> > >  
> > > +/*
> > > + * Always enable migration events.  Libvirt always uses it, let's try
> > > + * to mimic as closer as that.
> > > + */
> > > +migrate_set_capability(*from, "events", true);
> > > +migrate_set_capability(*to, "events", true);
> > > +
> > 
> > What do we do with the one at migrate_incoming_qmp()?
> 
> Hmm missed that..  I'll drop that one in this same patch and rewrite the
> commit message.  New version attached:
> 
> ===8<===
> From 443fef4188d544362fc026b46784c15b82624642 Mon Sep 17 00:00:00 2001
> From: Peter Xu 
> Date: Mon, 17 Jun 2024 10:49:52 -0400
> Subject: [PATCH] tests/migration-tests: Always enable migration events
> 
> Libvirt should always enable it, so it'll be nice qtest also cover that for
> all tests on both sides.  migrate_incoming_qmp() used to enable it only on
> dst, now we enable them on both, as we'll start to sanity check events even
> on the src QEMU.
> 
> Signed-off-by: Peter Xu 
> ---
>  tests/qtest/migration-helpers.c | 2 --
>  tests/qtest/migration-test.c| 7 +++
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
> index 0ac49ceb54..797b1e8c1c 100644
> --- a/tests/qtest/migration-helpers.c
> +++ b/tests/qtest/migration-helpers.c
> @@ -258,8 +258,6 @@ void migrate_incoming_qmp(QTestState *to, const char 
> *uri, const char *fmt, ...)
>  g_assert(!qdict_haskey(args, "uri"));
>  qdict_put_str(args, "uri", uri);
>  
> -migrate_set_capability(to, "events", true);
> -

Unfortunately this will break virtio-net-failover test... as it uses
migrate_incoming_qmp() without using test_migrate_start().

I'll leave it there for now, perhaps adding a comment.

>  rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
>  args);
>  
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 640713bfd5..c015e801ac 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -851,6 +851,13 @@ static int test_migrate_start(QTestState **from, 
> QTestState **to,
>  unlink(shmem_path);
>  }
>  
> +/*
> + * Always enable migration events.  Libvirt always uses it, let's try
> + * to mimic as closer as that.
> + */
> +migrate_set_capability(*from, "events", true);
> +migrate_set_capability(*to, "events", true);
> +
>  return 0;
>  }
>  
> -- 
> 2.45.0
> 
> 
> -- 
> Peter Xu

-- 
Peter Xu




[RFC PATCH v3 3/5] rust: add PL011 device model

2024-06-19 Thread Manos Pitsidianakis
This commit adds a re-implementation of hw/char/pl011.c in Rust.

It uses generated Rust bindings (produced by `ninja
aarch64-softmmu-generated.rs`) to
register itself as a QOM type/class.

How to build:

1. Make sure rust, cargo and bindgen (cargo install bindgen-cli) are
   installed
2. Configure a QEMU build with:
   --enable-system --target-list=aarch64-softmmu --enable-with-rust
3. Launching a VM with qemu-system-aarch64 should use the Rust version
   of the pl011 device (unless it is not set up so in hw/arm/virt.c; the
   type of the UART device is hardcoded).

   To confirm, inspect `info qom-tree` in the monitor and look for an
   `x-pl011-rust` device.

Signed-off-by: Manos Pitsidianakis 
---
 MAINTAINERS|   7 +
 meson.build|   4 +
 rust/.cargo/config.toml|   2 +
 rust/meson.build   |   2 +
 rust/pl011/.gitignore  |   2 +
 rust/pl011/Cargo.lock  | 120 +++
 rust/pl011/Cargo.toml  |  66 
 rust/pl011/README.md   |  42 +++
 rust/pl011/build.rs|  44 +++
 rust/pl011/deny.toml   |  57 
 rust/pl011/meson.build |   7 +
 rust/pl011/rustfmt.toml|   1 +
 rust/pl011/src/definitions.rs  |  95 ++
 rust/pl011/src/device.rs   | 531 ++
 rust/pl011/src/device_class.rs |  95 ++
 rust/pl011/src/generated.rs|   5 +
 rust/pl011/src/lib.rs  | 581 +
 rust/pl011/src/memory_ops.rs   |  38 +++
 rust/rustfmt.toml  |   7 +
 19 files changed, 1706 insertions(+)
 create mode 100644 rust/.cargo/config.toml
 create mode 100644 rust/pl011/.gitignore
 create mode 100644 rust/pl011/Cargo.lock
 create mode 100644 rust/pl011/Cargo.toml
 create mode 100644 rust/pl011/README.md
 create mode 100644 rust/pl011/build.rs
 create mode 100644 rust/pl011/deny.toml
 create mode 100644 rust/pl011/meson.build
 create mode 12 rust/pl011/rustfmt.toml
 create mode 100644 rust/pl011/src/definitions.rs
 create mode 100644 rust/pl011/src/device.rs
 create mode 100644 rust/pl011/src/device_class.rs
 create mode 100644 rust/pl011/src/generated.rs
 create mode 100644 rust/pl011/src/lib.rs
 create mode 100644 rust/pl011/src/memory_ops.rs
 create mode 100644 rust/rustfmt.toml

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f36bb3e9a..1420d83402 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1181,6 +1181,11 @@ F: include/hw/*/microbit*.h
 F: tests/qtest/microbit-test.c
 F: docs/system/arm/nrf.rst
 
+ARM PL011 Rust device
+M: Manos Pitsidianakis 
+S: Maintained
+F: rust/pl011/
+
 AVR Machines
 -
 
@@ -4230,6 +4235,8 @@ F: scripts/cargo_wrapper.py
 F: rust/meson.build
 F: rust/wrapper.h
 F: rust/.gitignore
+F: rust/rustfmt.toml
+F: rust/.cargo/config.toml
 
 Miscellaneous
 -
diff --git a/meson.build b/meson.build
index 2b305e745a..ca40a39ad7 100644
--- a/meson.build
+++ b/meson.build
@@ -296,6 +296,10 @@ if get_option('with_rust').allowed()
 endif
 with_rust = cargo.found()
 
+if with_rust
+  subdir('rust')
+endif
+
 # default flags for all hosts
 # We use -fwrapv to tell the compiler that we require a C dialect where
 # left shift of signed integers is well defined and has the expected
diff --git a/rust/.cargo/config.toml b/rust/.cargo/config.toml
new file mode 100644
index 00..241210ffa7
--- /dev/null
+++ b/rust/.cargo/config.toml
@@ -0,0 +1,2 @@
+[build]
+rustflags = ["-Crelocation-model=pic", "-Ctarget-feature=+crt-static"]
diff --git a/rust/meson.build b/rust/meson.build
index 435abd3e1c..e21309d922 100644
--- a/rust/meson.build
+++ b/rust/meson.build
@@ -109,6 +109,8 @@ endif
 # bindgen dependency is declared.
 rust_hw_target_list = {}
 
+subdir('pl011')
+
 foreach rust_hw_target, rust_hws: rust_hw_target_list
   foreach rust_hw_dev: rust_hws
 output = meson.current_build_dir() / rust_target_triple / rs_build_type / 
rust_hw_dev['output']
diff --git a/rust/pl011/.gitignore b/rust/pl011/.gitignore
new file mode 100644
index 00..d8db38b44e
--- /dev/null
+++ b/rust/pl011/.gitignore
@@ -0,0 +1,2 @@
+# Ignore generated bindings file overrides.
+src/generated.rs.inc
diff --git a/rust/pl011/Cargo.lock b/rust/pl011/Cargo.lock
new file mode 100644
index 00..d0fa46f9f5
--- /dev/null
+++ b/rust/pl011/Cargo.lock
@@ -0,0 +1,120 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "arbitrary-int"
+version = "1.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "c84fc003e338a6f69fbd4f7fe9f92b535ff13e9af8997f3b14b6ddff8b1df46d"
+
+[[package]]
+name = "bilge"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index;
+checksum = "dc707ed8ebf81de5cd6c7f48f54b4c8621760926cdf35a57000747c512e67b57"
+dependencies = [
+ "arbitrary-int",
+ "bilge-impl",
+]
+
+[[package]]
+name = "bilge-impl"
+version = "0.2.0"
+source = 

[RFC PATCH v3 2/5] rust: add bindgen step as a meson dependency

2024-06-19 Thread Manos Pitsidianakis
Add mechanism to generate rust hw targets that depend on a custom
bindgen target for rust bindings to C.

This way bindings will be created before the rust crate is compiled.

The bindings will end up in BUILDDIR/{target}-generated.rs and have the same 
name
as a target:

ninja aarch64-softmmu-generated.rs

The way the bindings are generated is:

1. All required C headers are included in a single file, in our case
   rust/wrapper.h for convenience. Otherwise we'd have to provide a list
   of headers every time to the bindgen tool.

2. Meson creates a generated_rs target that runs bindgen making sure
   the architecture etc header dependencies are present.

3. The generated_rs target takes a list of files, type symbols,
   function symbols to block from being generated. This is not necessary
   for the bindings to work, but saves us time and space.

4. Meson creates rust hardware target dependencies from the rust_targets
   dictionary defined in rust/meson.build.

   Since we cannot declare a dependency on generated_rs before it is
   declared in meson.build, the rust crate targets must be defined after
   the generated_rs target for each target architecture is defined. This
   way meson sets up the dependency tree properly.

5. After compiling each rust crate with the cargo_wrapper.py script,
   its static library artifact is linked as a `whole-archive` with the
   final binary.

Signed-off-by: Manos Pitsidianakis 
---
 MAINTAINERS  |   3 +
 meson.build  |  56 +
 rust/.gitignore  |   3 +
 rust/meson.build | 129 +++
 rust/wrapper.h   |  39 
 scripts/cargo_wrapper.py |  10 +++
 6 files changed, 240 insertions(+)
 create mode 100644 rust/.gitignore
 create mode 100644 rust/meson.build
 create mode 100644 rust/wrapper.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 431010ddbf..0f36bb3e9a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4227,6 +4227,9 @@ Rust build system integration
 M: Manos Pitsidianakis 
 S: Maintained
 F: scripts/cargo_wrapper.py
+F: rust/meson.build
+F: rust/wrapper.h
+F: rust/.gitignore
 
 Miscellaneous
 -
diff --git a/meson.build b/meson.build
index 3533889852..2b305e745a 100644
--- a/meson.build
+++ b/meson.build
@@ -3876,6 +3876,62 @@ foreach target : target_dirs
 lib_deps += dep.partial_dependency(compile_args: true, includes: true)
   endforeach
 
+  if with_rust and target_type == 'system'
+   # FIXME:
+   # > WARNING: Project specifies a minimum meson_version '>=0.63.0' but
+   # > uses features which were added in newer versions:
+   # > * 0.64.0: {'fs.copyfile'}
+   # > * 1.0.0: {'dependencies arg in rust.bindgen', 'module rust as 
stable module'}
+  rust_bindgen = import('rust')
+
+  # We need one generated_rs target per target, so give them
+  # target-specific names.
+  copy = fs.copyfile('rust/wrapper.h',
+ target + '_wrapper.h')
+  generated_rs = rust_bindgen.bindgen(
+input: copy,
+dependencies: arch_deps + lib_deps,
+output: target + '-generated.rs',
+include_directories: include_directories('.', 'include'),
+args: [
+  '--ctypes-prefix', 'core::ffi',
+  '--formatter', 'rustfmt',
+  '--generate-block',
+  '--generate-cstr',
+  '--impl-debug',
+  '--merge-extern-blocks',
+  '--no-doc-comments',
+  '--no-include-path-detection',
+  '--use-core',
+  '--with-derive-default',
+  '--allowlist-file', meson.project_source_root() + '/include/.*',
+  '--allowlist-file', meson.project_source_root() + '/.*',
+  '--allowlist-file', meson.project_build_root() + '/.*'
+],
+  )
+
+  if target in rust_targets
+rust_hw = ss.source_set()
+foreach t: rust_targets[target]
+  rust_device_cargo = custom_target(t['name'],
+   output: t['output'],
+   depends: [generated_rs],
+   build_always_stale: true,
+   command: t['command'])
+  rust_dep = declare_dependency(link_args: [
+  '-Wl,--whole-archive',
+  t['output-path'],
+  '-Wl,--no-whole-archive'
+  ],
+  sources: [rust_device_cargo])
+  rust_hw.add(rust_dep)
+endforeach
+rust_hw_config = rust_hw.apply(config_target, strict: false)
+arch_srcs += rust_hw_config.sources()
+arch_deps += rust_hw_config.dependencies()
+  endif
+  endif
+
   lib = static_library('qemu-' + target,
  sources: arch_srcs + genh,
  dependencies: lib_deps,
diff --git 

[RFC PATCH v3 1/5] build-sys: Add rust feature option

2024-06-19 Thread Manos Pitsidianakis
Add options for Rust in meson_options.txt, meson.build, configure to
prepare for adding Rust code in the followup commits.

`rust` is a reserved meson name, so we have to use an alternative.
`with_rust` was chosen.

A cargo_wrapper.py script is added that is heavily based on the work of
Marc-André Lureau from 2021.

https://patchew.org/QEMU/20210907121943.3498701-1-marcandre.lur...@redhat.com/

Signed-off-by: Marc-André Lureau 
Signed-off-by: Manos Pitsidianakis 
---
 MAINTAINERS   |   5 +
 configure |  11 ++
 meson.build   |  11 ++
 meson_options.txt |   4 +
 scripts/cargo_wrapper.py  | 279 ++
 scripts/meson-buildoptions.sh |   6 +
 6 files changed, 316 insertions(+)
 create mode 100644 scripts/cargo_wrapper.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b79767d61..431010ddbf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4223,6 +4223,11 @@ F: docs/sphinx/
 F: docs/_templates/
 F: docs/devel/docs.rst
 
+Rust build system integration
+M: Manos Pitsidianakis 
+S: Maintained
+F: scripts/cargo_wrapper.py
+
 Miscellaneous
 -
 Performance Tools and Tests
diff --git a/configure b/configure
index 38ee257701..6894d7c2d1 100755
--- a/configure
+++ b/configure
@@ -302,6 +302,9 @@ else
   objcc="${objcc-${cross_prefix}clang}"
 fi
 
+with_rust="auto"
+with_rust_target_triple=""
+
 ar="${AR-${cross_prefix}ar}"
 as="${AS-${cross_prefix}as}"
 ccas="${CCAS-$cc}"
@@ -760,6 +763,12 @@ for opt do
   ;;
   --gdb=*) gdb_bin="$optarg"
   ;;
+  --enable-with-rust) with_rust=enabled
+  ;;
+  --disable-with-rust) with_rust=disabled
+  ;;
+  --with-rust-target-triple=*) with_rust_target_triple="$optarg"
+  ;;
   # everything else has the same name in configure and meson
   --*) meson_option_parse "$opt" "$optarg"
   ;;
@@ -1796,6 +1805,8 @@ if test "$skip_meson" = no; then
   test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add 
"-Dfuzzing_engine=$LIB_FUZZING_ENGINE"
   test "$plugins" = yes && meson_option_add "-Dplugins=true"
   test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg"
+  test "$with_rust" != enabled && meson_option_add "-Dwith_rust=$with_rust"
+  test "$with_rust_target_triple" != "" && meson_option_add 
"-Dwith_rust_target_triple=$with_rust_target_triple"
   run_meson() {
 NINJA=$ninja $meson setup "$@" "$PWD" "$source_path"
   }
diff --git a/meson.build b/meson.build
index a9de71d450..3533889852 100644
--- a/meson.build
+++ b/meson.build
@@ -290,6 +290,12 @@ foreach lang : all_languages
   endif
 endforeach
 
+cargo = not_found
+if get_option('with_rust').allowed()
+  cargo = find_program('cargo', required: get_option('with_rust'))
+endif
+with_rust = cargo.found()
+
 # default flags for all hosts
 # We use -fwrapv to tell the compiler that we require a C dialect where
 # left shift of signed integers is well defined and has the expected
@@ -2066,6 +2072,7 @@ endif
 
 config_host_data = configuration_data()
 
+config_host_data.set('CONFIG_WITH_RUST', with_rust)
 audio_drivers_selected = []
 if have_system
   audio_drivers_available = {
@@ -4190,6 +4197,10 @@ if 'objc' in all_languages
 else
   summary_info += {'Objective-C compiler': false}
 endif
+summary_info += {'Rust support':  with_rust}
+if with_rust and get_option('with_rust_target_triple') != ''
+  summary_info += {'Rust target': get_option('with_rust_target_triple')}
+endif
 option_cflags = (get_option('debug') ? ['-g'] : [])
 if get_option('optimization') != 'plain'
   option_cflags += ['-O' + get_option('optimization')]
diff --git a/meson_options.txt b/meson_options.txt
index 4c1583eb40..223491b731 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -366,3 +366,7 @@ option('qemu_ga_version', type: 'string', value: '',
 
 option('hexagon_idef_parser', type : 'boolean', value : true,
description: 'use idef-parser to automatically generate TCG code for 
the Hexagon frontend')
+option('with_rust', type: 'feature', value: 'auto',
+   description: 'Enable Rust support')
+option('with_rust_target_triple', type : 'string', value: '',
+   description: 'Rust target triple')
diff --git a/scripts/cargo_wrapper.py b/scripts/cargo_wrapper.py
new file mode 100644
index 00..927336f80e
--- /dev/null
+++ b/scripts/cargo_wrapper.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+
+"""Wrap cargo builds for meson integration
+
+This program builds Rust library crates and makes sure:
+ - They receive the correct --cfg compile flags from the QEMU build that calls
+   it.
+ - They receive the generated Rust bindings path so that they can copy it
+   inside their output subdirectories.
+ - Cargo puts all its build artifacts in the appropriate meson build directory.
+ - The produced static libraries are copied to the path the caller (meson)
+   defines.
+
+Copyright (c) 2020 Red Hat, Inc.
+Copyright (c) 2024 Linaro Ltd.
+
+Authors:
+ Marc-André Lureau 
+ Manos Pitsidianakis 
+
+This program is free software; you can 

[RFC PATCH v3 4/5] DO NOT MERGE: add rustdoc build for gitlab pages

2024-06-19 Thread Manos Pitsidianakis
Deploy the generated rustdocs for my personal rust qemu fork on gitlab.

The URL is:

https://rust-for-qemu-epilys-aebb06ca9f9adfe6584811c14ae44156501d935ba4.gitlab.io/pl011/index.html

Signed-off-by: Manos Pitsidianakis 
---
 .gitlab-ci.d/buildtest.yml | 64 +++---
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 91c57efded..380c24897d 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -715,31 +715,57 @@ build-tools-and-docs-debian:
 # For contributor forks we want to publish from any repo so
 # that users can see the results of their commits, regardless
 # of what topic branch they're currently using
+# pages:
+#   extends: .base_job_template
+#   image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG
+#   stage: test
+#   needs:
+# - job: build-tools-and-docs-debian
+#   script:
+# - mkdir -p public
+# # HTML-ised source tree
+# - make gtags
+# # We unset variables to work around a bug in some htags versions
+# # which causes it to fail when the environment is large
+# - CI_COMMIT_MESSAGE= CI_COMMIT_TAG_MESSAGE= htags
+# -anT --tree-view=filetree -m qemu_init
+# -t "Welcome to the QEMU sourcecode"
+# - mv HTML public/src
+# # Project documentation
+# - make -C build install DESTDIR=$(pwd)/temp-install
+# - mv temp-install/usr/local/share/doc/qemu/* public/
+#   artifacts:
+# when: on_success
+# paths:
+#   - public
+#   variables:
+# QEMU_JOB_PUBLISH: 1
+# The Docker image that will be used to build your app
 pages:
-  extends: .base_job_template
-  image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG
-  stage: test
-  needs:
-- job: build-tools-and-docs-debian
+  image: rust:latest
   script:
-- mkdir -p public
-# HTML-ised source tree
-- make gtags
-# We unset variables to work around a bug in some htags versions
-# which causes it to fail when the environment is large
-- CI_COMMIT_MESSAGE= CI_COMMIT_TAG_MESSAGE= htags
--anT --tree-view=filetree -m qemu_init
--t "Welcome to the QEMU sourcecode"
-- mv HTML public/src
-# Project documentation
-- make -C build install DESTDIR=$(pwd)/temp-install
-- mv temp-install/usr/local/share/doc/qemu/* public/
+- rustup component add rustfmt
+- DEBIAN_FRONTEND=noninteractive apt-get update -y
+- DEBIAN_FRONTEND=noninteractive apt-get install -y python3-venv meson 
libgcrypt20-dev zlib1g-dev autoconf automake libtool bison flex git 
libglib2.0-dev libfdt-dev libpixman-1-dev ninja-build make libclang-14-dev
+- cargo install bindgen-cli
+- mkdir ./build/
+- cd ./build/
+- ../configure --enable-system --disable-kvm --target-list=aarch64-softmmu 
--enable-with-rust
+- ninja "aarch64-softmmu-generated.rs"
+- cp ./aarch64-softmmu-generated.rs ../rust/pl011/src/generated.rs.inc
+- cd ../rust/pl011/
+- cargo tree --depth 1 -e normal --prefix none | cut -d' ' -f1  | xargs
+  printf -- '-p %s\n'  | xargs cargo doc --no-deps 
--document-private-items --target x86_64-unknown-linux-gnu
+- cd ./../..
+- mv ./rust/pl011/target/x86_64-unknown-linux-gnu/doc ./public
   artifacts:
 when: on_success
 paths:
   - public
-  variables:
-QEMU_JOB_PUBLISH: 1
+  rules:
+# This ensures that only pushes to the default branch will trigger
+# a pages deploy
+- if: $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH
 
 coverity:
   image: $CI_REGISTRY_IMAGE/qemu/fedora:$QEMU_CI_CONTAINER_TAG
-- 
γαῖα πυρί μιχθήτω




[RFC PATCH v3 5/5] DO NOT MERGE: replace TYPE_PL011 with x-pl011-rust in arm virt machine

2024-06-19 Thread Manos Pitsidianakis
Convenience patch for testing the rust device.

Signed-off-by: Manos Pitsidianakis 
---
 hw/arm/virt.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3c93c0c0a6..f33b58ae0d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -912,7 +912,11 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 int irq = vms->irqmap[uart];
 const char compat[] = "arm,pl011\0arm,primecell";
 const char clocknames[] = "uartclk\0apb_pclk";
+#ifdef CONFIG_WITH_RUST
+DeviceState *dev = qdev_new("x-pl011-rust");
+#else
 DeviceState *dev = qdev_new(TYPE_PL011);
+#endif
 SysBusDevice *s = SYS_BUS_DEVICE(dev);
 MachineState *ms = MACHINE(vms);
 
-- 
γαῖα πυρί μιχθήτω




[RFC PATCH v3 0/5] Implement ARM PL011 in Rust

2024-06-19 Thread Manos Pitsidianakis
Changes from v2->v3:
- Addressed minor mistakes (thanks Stefan)
- Setup supported version checks for cargo, rustc and bindgen (thanks 
  everyone who pointed it out / suggested it)
- Fixed problem with bindgen failing if certain system headers where 
  needed by defining an allowlist for headers instead of a blocklist for 
  what we don't want (thanks Alex Bennée for reporting it)
- Cleaned up bindgen target/dependendy definition in meson.build by 
  removing unnecessary bits

Changes from v1->v2:
- Create bindgen target first, then add commit for device (thanks 
  Pierrick)
- Create a special named generated.rs for each target as compilation 
  would fail if more than one targets were defined. The generated.rs 
  target names would clash.
- Add more descriptive commit messages
- Update MAINTAINERS
- Cleanup patch order for better review, hopefully

v2 was:


v1 was:


Patches can be found online at 
https://gitlab.com/epilys/rust-for-qemu/-/tags

Tag/refs:
- rust-pl011-rfc-v3
- rust-pl011-rfc-v2
- rust-pl011-rfc-v1

Manos Pitsidianakis (5):
  build-sys: Add rust feature option
  rust: add bindgen step as a meson dependency
  rust: add PL011 device model
  DO NOT MERGE: add rustdoc build for gitlab pages
  DO NOT MERGE: replace TYPE_PL011 with x-pl011-rust in arm virt machine

 .gitlab-ci.d/buildtest.yml |  64 ++--
 MAINTAINERS|  15 +
 configure  |  11 +
 hw/arm/virt.c  |   4 +
 meson.build|  71 
 meson_options.txt  |   4 +
 rust/.cargo/config.toml|   2 +
 rust/.gitignore|   3 +
 rust/meson.build   | 131 
 rust/pl011/.gitignore  |   2 +
 rust/pl011/Cargo.lock  | 120 +++
 rust/pl011/Cargo.toml  |  66 
 rust/pl011/README.md   |  42 +++
 rust/pl011/build.rs|  44 +++
 rust/pl011/deny.toml   |  57 
 rust/pl011/meson.build |   7 +
 rust/pl011/rustfmt.toml|   1 +
 rust/pl011/src/definitions.rs  |  95 ++
 rust/pl011/src/device.rs   | 531 ++
 rust/pl011/src/device_class.rs |  95 ++
 rust/pl011/src/generated.rs|   5 +
 rust/pl011/src/lib.rs  | 581 +
 rust/pl011/src/memory_ops.rs   |  38 +++
 rust/rustfmt.toml  |   7 +
 rust/wrapper.h |  39 +++
 scripts/cargo_wrapper.py   | 289 
 scripts/meson-buildoptions.sh  |   6 +
 27 files changed, 2311 insertions(+), 19 deletions(-)
 create mode 100644 rust/.cargo/config.toml
 create mode 100644 rust/.gitignore
 create mode 100644 rust/meson.build
 create mode 100644 rust/pl011/.gitignore
 create mode 100644 rust/pl011/Cargo.lock
 create mode 100644 rust/pl011/Cargo.toml
 create mode 100644 rust/pl011/README.md
 create mode 100644 rust/pl011/build.rs
 create mode 100644 rust/pl011/deny.toml
 create mode 100644 rust/pl011/meson.build
 create mode 12 rust/pl011/rustfmt.toml
 create mode 100644 rust/pl011/src/definitions.rs
 create mode 100644 rust/pl011/src/device.rs
 create mode 100644 rust/pl011/src/device_class.rs
 create mode 100644 rust/pl011/src/generated.rs
 create mode 100644 rust/pl011/src/lib.rs
 create mode 100644 rust/pl011/src/memory_ops.rs
 create mode 100644 rust/rustfmt.toml
 create mode 100644 rust/wrapper.h
 create mode 100644 scripts/cargo_wrapper.py


base-commit: 01782d6b294f95bcde334386f0aaac593cd28c0d
-- 
γαῖα πυρί μιχθήτω




Re: [PULL v3 00/74] Misc patches for 2024-06-19

2024-06-19 Thread Richard Henderson

On 6/19/24 03:54, Philippe Mathieu-Daudé wrote:

The following changes since commit 223696363bb117241ad9c2facbff0c474afa4104:

   Merge tag 'edgar/xilinx-queue-2024-06-17.for-upstream' 
ofhttps://gitlab.com/edgar.iglesias/qemu  into staging (2024-06-18 13:08:01 
-0700)

are available in the Git repository at:

   https://github.com/philmd/qemu.git  tags/misc-20240619

for you to fetch changes up to fc0870c180872d0f40e63507cc6bf8565ffd8d98:

   exec: Make the MemOp enum cast explicit (2024-06-19 12:52:21 +0200)

Spurious warning (3 times):

  WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?

Deliberately ignored in ui/ (also 3 times):

   WARNING: line over 80 characters


Misc patches queue

. Remove deprecated pc-i440fx-2.0 -> 2.3 machines (Phil)
. Always use little endian audio format in virtio-snd (Phil)
. Avoid using Monitor in INTERRUPT_STATS_PROVIDER::print_info (Phil)
. Introduce x-query-interrupt-controllers QMP command (Phil)
. Introduce pnv_chip_foreach_cpu() to remove one CPU_FOREACH use (Cédric)
. Constify few uses of IOMMUTLBEvent (Phil)
. Wire loongson_ipi device to loongson3_virt/TCG (Jiaxun)
. Fix inclusion of tracing headers on s390x/TCG (Phil)
. Add few shortcuts missing to readline (Manos)
. Update ui/display entries in MAINTAINERS (Gerd)
. Use qemu_add_mouse_change_notifier on Cocoa (Akihiko)
. Fix Standard VGA screen blanking and cleanups (Gerd)
. Fix USB/MTP reported "free space" value (Fabio)
. Cast size_memop() returned value (Roman)


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




[PATCH v4] hw/arm/virt-acpi-build: Fix id_count in build_iort_id_mapping

2024-06-19 Thread Nicolin Chen
It's observed that Linux kernel booting with the VM reports a "conflicting
mapping for input ID" FW_BUG.

The IORT doc defines "Number of IDs" to be "the number of IDs in the range
minus one", while virt-acpi-build.c simply stores the number of IDs in the
id_count without the "minus one". Meanwhile, some of the callers pass in a
0x following the spec. So, this is a mismatch between the function and
its callers.

Fix build_iort_id_mapping() by internally subtracting one from the pass-in
@id_count. Accordingly make sure that all existing callers pass in a value
without the "minus one", i.e. change all 0xs to 0x1s.

Also, add a few lines of comments to highlight this change along with the
referencing document for this build_iort_id_mapping().

Fixes: 42e0f050e3a5 ("hw/arm/virt-acpi-build: Add IORT support to bypass 
SMMUv3")
Suggested-by: Michael S. Tsirkin 
Reviewed-by: Eric Auger 
Signed-off-by: Nicolin Chen 
---
Changelog
v4:
 * Rephrased the function documentation and used the latest IORT spec ver.
 * Added "Reviewed-by" from Eric
v3:
 https://lore.kernel.org/all/2024061820.922809-1-nicol...@nvidia.com/
 * Added "-1" internally in build_iort_id_mapping() instead
 * Added comments to highlight this and referencing doc
v2:
 https://lore.kernel.org/all/20240617223945.906996-1-nicol...@nvidia.com/
 * Moved "-1" to the same line of id_count calculation
 * Added "+1" to the next_range.input_base calculation
v1:
 https://lore.kernel.org/all/20240613234802.828265-1-nicol...@nvidia.com/

 hw/arm/virt-acpi-build.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index c3ccfef026..60a79b91ca 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -209,12 +209,19 @@ static void acpi_dsdt_add_tpm(Aml *scope, 
VirtMachineState *vms)
 #define ROOT_COMPLEX_ENTRY_SIZE 36
 #define IORT_NODE_OFFSET 48
 
+/*
+ * Append an ID mapping entry as described by "Table 4 ID mapping format" in
+ * "IO Remapping Table System Software on ARM Platforms", Chapter 3.
+ * Document number: ARM DEN 0049E.f, Apr 2024
+ *
+ * Note that @id_count gets internally subtracted by one, following the spec.
+ */
 static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
   uint32_t id_count, uint32_t out_ref)
 {
-/* Table 4 ID mapping format */
 build_append_int_noprefix(table_data, input_base, 4); /* Input base */
-build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */
+/* Number of IDs - The number of IDs in the range minus one */
+build_append_int_noprefix(table_data, id_count - 1, 4);
 build_append_int_noprefix(table_data, input_base, 4); /* Output base */
 build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
 /* Flags */
@@ -306,8 +313,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 }
 
 /* Append the last RC -> ITS ID mapping */
-if (next_range.input_base < 0x) {
-next_range.id_count = 0x - next_range.input_base;
+if (next_range.input_base < 0x1) {
+next_range.id_count = 0x1 - next_range.input_base;
 g_array_append_val(its_idmaps, next_range);
 }
 
@@ -366,7 +373,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 build_append_int_noprefix(table_data, 0, 4);
 
 /* output IORT node is the ITS group node (the first node) */
-build_iort_id_mapping(table_data, 0, 0x, IORT_NODE_OFFSET);
+build_iort_id_mapping(table_data, 0, 0x1, IORT_NODE_OFFSET);
 }
 
 /* Table 17 Root Complex Node */
@@ -419,7 +426,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 }
 } else {
 /* output IORT node is the ITS group node (the first node) */
-build_iort_id_mapping(table_data, 0, 0x, IORT_NODE_OFFSET);
+build_iort_id_mapping(table_data, 0, 0x1, IORT_NODE_OFFSET);
 }
 
 acpi_table_end(linker, );
-- 
2.43.0




Re: [PATCH v3] hw/arm/virt-acpi-build: Fix id_count in build_iort_id_mapping

2024-06-19 Thread Nicolin Chen
On Wed, Jun 19, 2024 at 04:15:35PM +0200, Eric Auger wrote:
> > @@ -209,12 +209,20 @@ static void acpi_dsdt_add_tpm(Aml *scope, 
> > VirtMachineState *vms)
> >  #define ROOT_COMPLEX_ENTRY_SIZE 36
> >  #define IORT_NODE_OFFSET 48
> >
> > +/*
> > + * Input Output Remapping Table (IORT) -- Table 4 ID mapping format
> > + * Conforms to "IO Remapping Table System Software on ARM Platforms",
> > + * Document number: ARM DEN 0049E.b, Feb 2021
> I would rather explain what the function does, ie append an ID mapping
> entry as desribed in Tabble 4 ID Mapping format.
> 
> Also while at it you may use a more recent revision
> There is DEN0049E_IO_Remapping_Table_E.f.pdf available

Sure. Will do a v4 with something like:

+/*
+ * Append an ID mapping entry as described in "Table 4 ID mapping format"
+ * from "IO Remapping Table System Software on ARM Platforms", Chapter 3.
+ * Document number: ARM DEN 0049E.f, Apr 2024

> Reviewed-by: Eric Auger 

Thanks for the review!

Nicolin



Re: [PATCH v2] target/riscv: fix instructions count handling in icount mode

2024-06-19 Thread Atish Kumar Patra
On Tue, Jun 18, 2024 at 4:27 AM Clément Léger  wrote:
>
> When icount is enabled, rather than returning the virtual CPU time, we
> should return the instruction count itself. Add an instructions bool
> parameter to get_ticks() to correctly return icount_get_raw() when
> icount_enabled() == 1 and instruction count is queried. This will modify
> the existing behavior which was returning an instructions count close to
> the number of cycles (CPI ~= 1).
>
> Signed-off-by: Clément Léger 
>
> ---
>
> v2:
>  - Apply checkpatch and fixed missing braces
>

As the changes were minor, you can keep the RB tag. Anyways,

Reviewed-by: Atish Patra 

> ---
>  target/riscv/csr.c | 30 +-
>  1 file changed, 17 insertions(+), 13 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 58ef7079dc..b8915e32a2 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -762,14 +762,18 @@ static RISCVException write_vcsr(CPURISCVState *env, 
> int csrno,
>  }
>
>  /* User Timers and Counters */
> -static target_ulong get_ticks(bool shift)
> +static target_ulong get_ticks(bool shift, bool instructions)
>  {
>  int64_t val;
>  target_ulong result;
>
>  #if !defined(CONFIG_USER_ONLY)
>  if (icount_enabled()) {
> -val = icount_get();
> +if (instructions) {
> +val = icount_get_raw();
> +} else {
> +val = icount_get();
> +}
>  } else {
>  val = cpu_get_host_ticks();
>  }
> @@ -804,14 +808,14 @@ static RISCVException read_timeh(CPURISCVState *env, 
> int csrno,
>  static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
>target_ulong *val)
>  {
> -*val = get_ticks(false);
> +*val = get_ticks(false, (csrno == CSR_INSTRET));
>  return RISCV_EXCP_NONE;
>  }
>
>  static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
> target_ulong *val)
>  {
> -*val = get_ticks(true);
> +*val = get_ticks(true, (csrno == CSR_INSTRETH));
>  return RISCV_EXCP_NONE;
>  }
>
> @@ -875,11 +879,11 @@ static RISCVException write_mhpmcounter(CPURISCVState 
> *env, int csrno,
>  int ctr_idx = csrno - CSR_MCYCLE;
>  PMUCTRState *counter = >pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounter_val = val;
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounter_prev = get_ticks(false);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounter_prev = get_ticks(false, instr);
>  if (ctr_idx > 2) {
>  if (riscv_cpu_mxl(env) == MXL_RV32) {
>  mhpmctr_val = mhpmctr_val |
> @@ -902,12 +906,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState 
> *env, int csrno,
>  PMUCTRState *counter = >pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = counter->mhpmcounter_val;
>  uint64_t mhpmctrh_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounterh_val = val;
>  mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32);
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounterh_prev = get_ticks(true);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounterh_prev = get_ticks(true, instr);
>  if (ctr_idx > 2) {
>  riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx);
>  }
> @@ -926,6 +930,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   counter->mhpmcounter_prev;
>  target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val :
>  counter->mhpmcounter_val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  if (get_field(env->mcountinhibit, BIT(ctr_idx))) {
>  /*
> @@ -946,9 +951,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   * The kernel computes the perf delta by subtracting the current value 
> from
>   * the value it initialized previously (ctr_val).
>   */
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -*val = get_ticks(upper_half) - ctr_prev + ctr_val;
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +*val = get_ticks(upper_half, instr) - ctr_prev + ctr_val;
>  } else {
>  *val = ctr_val;
>  }
> --
> 2.45.2
>



[PATCH] linux-user: open_self_stat: Implement num_threads

2024-06-19 Thread Fabio D'Urso
The num_threads field reports the total number of threads in the
process. In QEMU, this is equal to the number of CPU instances.

Signed-off-by: Fabio D'Urso 
---
 linux-user/syscall.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b9b5a387b3..a47b2eeb65 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8171,6 +8171,16 @@ static int open_self_stat(CPUArchState *cpu_env, int fd)
 } else if (i == 3) {
 /* ppid */
 g_string_printf(buf, FMT_pid " ", getppid());
+} else if (i == 19) {
+/* num_threads */
+int cpus = 0;
+WITH_RCU_READ_LOCK_GUARD() {
+CPUState *cpu_iter;
+CPU_FOREACH(cpu_iter) {
+cpus++;
+}
+}
+g_string_printf(buf, "%d ", cpus);
 } else if (i == 21) {
 /* starttime */
 g_string_printf(buf, "%" PRIu64 " ", ts->start_boottime);
-- 
2.45.2.627.g7a2c4fd464-goog




Re: [PATCH] hw/gpio/aspeed: Add bounds checking for register table access

2024-06-19 Thread Zheyu Ma
Hi Philippe,

On Wed, Jun 19, 2024 at 6:29 PM Philippe Mathieu-Daudé 
wrote:

> On 19/6/24 08:49, Zheyu Ma wrote:
> > Hi Andrew,
> >
> > On Wed, Jun 19, 2024 at 1:58 AM Andrew Jeffery
> > mailto:and...@codeconstruct.com.au>>
> wrote:
> >
> > Hello Zheyu Ma,
> >
> > On Tue, 2024-06-18 at 15:09 +0200, Zheyu Ma wrote:
> >  > Added bounds checking in the aspeed_gpio_read() and
> > aspeed_gpio_write()
> >  > functions to ensure the index idx is within the valid range of the
> >  > reg_table array.
> >  >
> >  > The correct size of reg_table is determined dynamically based on
> > whether
> >  > it is aspeed_3_3v_gpios or aspeed_1_8v_gpios. If idx exceeds the
> >  > size of reg_table, an error is logged, and the function returns.
> >  >
> >  > AddressSanitizer log indicating the issue:
> >  >
> >  > ==2602930==ERROR: AddressSanitizer: global-buffer-overflow on
> > address 0x55a5da29e128 at pc 0x55a5d700dc62 bp 0x7fff096c4e90 sp
> > 0x7fff096c4e88
> >  > READ of size 2 at 0x55a5da29e128 thread T0
> >  > #0 0x55a5d700dc61 in aspeed_gpio_read
> > hw/gpio/aspeed_gpio.c:564:14
> >  > #1 0x55a5d933f3ab in memory_region_read_accessor
> > system/memory.c:445:11
> >  > #2 0x55a5d92fba40 in access_with_adjusted_size
> > system/memory.c:573:18
> >  > #3 0x55a5d92f842c in memory_region_dispatch_read1
> > system/memory.c:1426:16
> >  > #4 0x55a5d92f7b68 in memory_region_dispatch_read
> > system/memory.c:1459:9
> >  > #5 0x55a5d9376ad1 in flatview_read_continue_step
> > system/physmem.c:2836:18
> >  > #6 0x55a5d9376399 in flatview_read_continue
> > system/physmem.c:2877:19
> >  > #7 0x55a5d93775b8 in flatview_read system/physmem.c:2907:12
> >
> > I'm mildly interested in what you were doing to trigger this.
> Certainly
> > we could do with a guard in the model to prevent it, but I'm curious
> > all the same.
> >
> >
> > Actually, I'm doing the virtual device fuzzing test and trying to
> > discover bugs.
>
> Could you share the reproducer? (As you did in your other patches,
> it is very useful to reproduce).
>

Sure, I've sent a v3 patch.

Zheyu

>
> >
> >  >
> >  > Signed-off-by: Zheyu Ma  > >
> >  > ---
> >  >  hw/gpio/aspeed_gpio.c | 26 ++
> >  >  1 file changed, 26 insertions(+)
> >  >
> >  > diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
> >  > index c1781e2ba3..1441046f6c 100644
> >  > --- a/hw/gpio/aspeed_gpio.c
> >  > +++ b/hw/gpio/aspeed_gpio.c
> >  > @@ -550,6 +550,7 @@ static uint64_t aspeed_gpio_read(void
> > *opaque, hwaddr offset, uint32_t size)
> >  >  GPIOSets *set;
> >  >  uint32_t value = 0;
> >  >  uint64_t debounce_value;
> >  > +uint32_t reg_table_size;
> >  >
> >  >  idx = offset >> 2;
> >  >  if (idx >= GPIO_DEBOUNCE_TIME_1 && idx <=
> > GPIO_DEBOUNCE_TIME_3) {
> >  > @@ -559,6 +560,18 @@ static uint64_t aspeed_gpio_read(void
> > *opaque, hwaddr offset, uint32_t size)
> >  >  return debounce_value;
> >  >  }
> >  >
> >  > +if (agc->reg_table == aspeed_3_3v_gpios) {
> >  > +reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
> >  > +} else {
> >  > +reg_table_size = GPIO_1_8V_REG_ARRAY_SIZE;
> >  > +}
> >
> > I think I'd prefer we add reg_table_size as a member of
> AspeedGPIOClass
> > and initialise it at the same time as we initialise reg_table. I feel
> > it would help maintain safety in the face of future changes (i.e. if
> > another reg table were introduced). With that approach the hunk above
> > can be dropped.
> >
> >  > +
> >  > +if (idx >= reg_table_size) {
> >
> > This condition would then become:
> >
> > ```
> > if (idx >= agc->reg_table_size) {
> > ```
> >
> > Thoughts?
> >
> >
> > I agree with you, adding a new member is a more maintainable way, I'll
> > send a v2 patch, thanks!
> >
> > Zheyu
>
>


[PATCH v3] hw/gpio/aspeed: Add reg_table_size to AspeedGPIOClass

2024-06-19 Thread Zheyu Ma
ASan detected a global-buffer-overflow error in the aspeed_gpio_read()
function. This issue occurred when reading beyond the bounds of the
reg_table.

To enhance the safety and maintainability of the Aspeed GPIO code, this commit
introduces a reg_table_size member to the AspeedGPIOClass structure. This
change ensures that the size of the GPIO register table is explicitly tracked
and initialized, reducing the risk of errors if new register tables are
introduced in the future.

Reproducer:
cat << EOF | qemu-system-aarch64 -display none \
-machine accel=qtest, -m 512M -machine ast1030-evb -qtest stdio
readq 0x7e780272
EOF

ASAN log indicating the issue:
==2602930==ERROR: AddressSanitizer: global-buffer-overflow on address 
0x55a5da29e128 at pc 0x55a5d700dc62 bp 0x7fff096c4e90 sp 0x7fff096c4e88
READ of size 2 at 0x55a5da29e128 thread T0
#0 0x55a5d700dc61 in aspeed_gpio_read hw/gpio/aspeed_gpio.c:564:14
#1 0x55a5d933f3ab in memory_region_read_accessor system/memory.c:445:11
#2 0x55a5d92fba40 in access_with_adjusted_size system/memory.c:573:18
#3 0x55a5d92f842c in memory_region_dispatch_read1 system/memory.c:1426:16
#4 0x55a5d92f7b68 in memory_region_dispatch_read system/memory.c:1459:9
#5 0x55a5d9376ad1 in flatview_read_continue_step system/physmem.c:2836:18
#6 0x55a5d9376399 in flatview_read_continue system/physmem.c:2877:19
#7 0x55a5d93775b8 in flatview_read system/physmem.c:2907:12

Signed-off-by: Zheyu Ma 
---
Changes in v3:
- Add the reproducer

Changes in v2:
- Introduce the reg_table_size to AspeedGPIOClass
---
 hw/gpio/aspeed_gpio.c | 17 +
 include/hw/gpio/aspeed_gpio.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
index c1781e2ba3..fd4912edae 100644
--- a/hw/gpio/aspeed_gpio.c
+++ b/hw/gpio/aspeed_gpio.c
@@ -559,6 +559,12 @@ static uint64_t aspeed_gpio_read(void *opaque, hwaddr 
offset, uint32_t size)
 return debounce_value;
 }
 
+if (idx >= agc->reg_table_size) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: idx 0x%" PRIx64 " out of bounds\n",
+  __func__, idx);
+return 0;
+}
+
 reg = >reg_table[idx];
 if (reg->set_idx >= agc->nr_gpio_sets) {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: no getter for offset 0x%"
@@ -785,6 +791,12 @@ static void aspeed_gpio_write(void *opaque, hwaddr offset, 
uint64_t data,
 return;
 }
 
+if (idx >= agc->reg_table_size) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: idx 0x%" PRIx64 " out of bounds\n",
+  __func__, idx);
+return;
+}
+
 reg = >reg_table[idx];
 if (reg->set_idx >= agc->nr_gpio_sets) {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: no setter for offset 0x%"
@@ -1117,6 +1129,7 @@ static void aspeed_gpio_ast2400_class_init(ObjectClass 
*klass, void *data)
 agc->nr_gpio_pins = 216;
 agc->nr_gpio_sets = 7;
 agc->reg_table = aspeed_3_3v_gpios;
+agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
 }
 
 static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data)
@@ -1127,6 +1140,7 @@ static void aspeed_gpio_2500_class_init(ObjectClass 
*klass, void *data)
 agc->nr_gpio_pins = 228;
 agc->nr_gpio_sets = 8;
 agc->reg_table = aspeed_3_3v_gpios;
+agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
 }
 
 static void aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void *data)
@@ -1137,6 +1151,7 @@ static void 
aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void *data)
 agc->nr_gpio_pins = 208;
 agc->nr_gpio_sets = 7;
 agc->reg_table = aspeed_3_3v_gpios;
+agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
 }
 
 static void aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void *data)
@@ -1147,6 +1162,7 @@ static void 
aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void *data)
 agc->nr_gpio_pins = 36;
 agc->nr_gpio_sets = 2;
 agc->reg_table = aspeed_1_8v_gpios;
+agc->reg_table_size = GPIO_1_8V_REG_ARRAY_SIZE;
 }
 
 static void aspeed_gpio_1030_class_init(ObjectClass *klass, void *data)
@@ -1157,6 +1173,7 @@ static void aspeed_gpio_1030_class_init(ObjectClass 
*klass, void *data)
 agc->nr_gpio_pins = 151;
 agc->nr_gpio_sets = 6;
 agc->reg_table = aspeed_3_3v_gpios;
+agc->reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
 }
 
 static const TypeInfo aspeed_gpio_info = {
diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h
index 904eecf62c..e66036ac39 100644
--- a/include/hw/gpio/aspeed_gpio.h
+++ b/include/hw/gpio/aspeed_gpio.h
@@ -75,6 +75,7 @@ struct AspeedGPIOClass {
 uint32_t nr_gpio_pins;
 uint32_t nr_gpio_sets;
 const AspeedGPIOReg *reg_table;
+uint32_t reg_table_size;
 };
 
 struct AspeedGPIOState {
-- 
2.34.1




Re: [PATCH v2 11/12] tests/qtest/bios-tables-test.c: Enable basic testing for RISC-V

2024-06-19 Thread Sunil V L
On Wed, Jun 19, 2024 at 12:12:50PM +0200, Igor Mammedov wrote:
> On Fri, 24 May 2024 11:44:10 +0530
> Sunil V L  wrote:
> 
> > Add basic ACPI table test case for RISC-V.
> > 
> > Signed-off-by: Sunil V L 
> > ---
> >  tests/qtest/bios-tables-test.c | 27 +++
> >  1 file changed, 27 insertions(+)
> > 
> > diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
> > index c73174ad00..880435a5fa 100644
> > --- a/tests/qtest/bios-tables-test.c
> > +++ b/tests/qtest/bios-tables-test.c
> > @@ -1935,6 +1935,29 @@ static void test_acpi_microvm_acpi_erst(void)
> >  }
> >  #endif /* CONFIG_POSIX */
> >  
> > +static void test_acpi_riscv64_virt_tcg(void)
> > +{
> > +->cd data = {
> > +.machine = "virt",
> > +.arch = "riscv64",
> > +.tcg_only = true,
> > +.uefi_fl1 = "pc-bios/edk2-riscv-code.fd",
> > +.uefi_fl2 = "pc-bios/edk2-riscv-vars.fd",
> > +.ram_start = 0x8000ULL,
> > +.scan_len = 128ULL * 1024 * 1024,
> > +};
> > +
> > +/*
> > + * RHCT will have ISA string encoded. To reduce the effort
> > + * of updating expected AML file for any new default ISA extension,
> > + * use the profile rva22s64.
> > + */
> > +test_acpi_one("-cpu rva22s64 -device virtio-blk-device,drive=hd0 "
> > +  "-drive 
> > file=tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2,id=hd0",
> 
> Can you reuse test_data->cd, instead of specifying disk here? 
> 
Actually, currently there is an issue with RISC-V virt machine due to
which -cdrom doesn't work properly. But I have a fix for that. With that
fix in place , I can use cd here.

I will send that fix tomorrow separately.

Thanks,
Sunil



Re: [PATCH v2 06/12] tests/data/acpi/virt: Move ACPI tables under aarch64

2024-06-19 Thread Sunil V L
On Wed, Jun 19, 2024 at 05:20:50AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jun 19, 2024 at 11:17:43AM +0200, Igor Mammedov wrote:
> > On Mon, 27 May 2024 20:46:29 +0530
> > Sunil V L  wrote:
> > 
> > > On Mon, May 27, 2024 at 12:12:10PM +0200, Philippe Mathieu-Daudé wrote:
> > > > Hi Sunil,
> > > > 
> > > > On 24/5/24 08:14, Sunil V L wrote:  
> > > > > Since virt is a common machine name across architectures like ARM64 
> > > > > and
> > > > > RISC-V, move existing ARM64 ACPI tables under aarch64 folder so that
> > > > > RISC-V tables can be added under riscv64 folder in future.
> > > > > 
> > > > > Signed-off-by: Sunil V L 
> > > > > Reviewed-by: Alistair Francis 
> > > > > ---
> > > > >   tests/data/acpi/virt/{ => aarch64}/APIC | Bin  
> > > > 
> > > > The usual pattern is {target}/{machine}, so instead of:
> > > > 
> > > >   microvm/
> > > >   pc/
> > > >   q35/
> > > >   virt/aarch64/
> > > >   virt/riscv64/
> > > > 
> > > > (which is odd because q35 is the x86 'virt'), I'd rather see:
> > > > 
> > > >   x86/microvm/
> > > >   x86/pc/
> > > >   x86/q35/
> > > >   aarch64/virt/
> > > >   riscv64/virt/
> > > > 
> > > > Anyhow just my 2 cents, up to the ACPI maintainers :)
> > > >   
> > > Hi Phil,
> > > 
> > > Your suggestion does make sense to me. Let me wait for feedback from
> > > ARM/ACPI maintainers.
> > 
> > I'd prefer  {target}/{machine} hierarchy like Philippe suggests
> 
> Agreed.
> 
Thanks for the confirmation!. Let me send the updated version soon.

Moving pc/q35/microvm also under new x86 would need many changes in
bios-table-test.c. So, the question is, are you ok to combine x86
changes as well in this series or prefer to it later in separate series?

Thanks,
Sunil



Re: [PATCH v2] Consider discard option when writing zeros

2024-06-19 Thread Nir Soffer
On Wed, Jun 19, 2024 at 8:40 PM Nir Soffer  wrote:

> - Need to run all block tests
>

Stale note, make check pass


Re: [PATCH] configure: detect --cpu=mipsisa64r6

2024-06-19 Thread Thomas Huth

On 19/06/2024 15.34, Paolo Bonzini wrote:

On Wed, Jun 19, 2024 at 2:49 PM Thomas Huth  wrote:


On 19/06/2024 13.46, Paolo Bonzini wrote:

Treat it as a MIPS64 machine.

...

diff --git a/configure b/configure
index d0703ea279d..3669eec86e5 100755
--- a/configure
+++ b/configure
@@ -452,7 +452,7 @@ case "$cpu" in
   linux_arch=loongarch
   ;;

-  mips64*)
+  mips64*|mipsisa64*)


Maybe simply switch to mips*64*) ?


Not sure if it's a good idea, since we know the exact prefixes.


Fair point.

Reviewed-by: Thomas Huth 




Re: [PATCH v2] Consider discard option when writing zeros

2024-06-19 Thread Nir Soffer
Tested using:

$ cat test-unmap.sh
#!/bin/sh

qemu=${1:?Usage: $0 qemu-executable}
img=/tmp/test.raw

echo
echo "defaults - write zeroes"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -z 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw >/dev/null
du -sh $img

echo
echo "defaults - write zeroes unmap"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw >/dev/null
du -sh $img

echo
echo "defaults - write actual zeros"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw >/dev/null
du -sh $img

echo
echo "discard=off - write zeroes unmap"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw,discard=off >/dev/null
du -sh $img

echo
echo "detect-zeros=on - write actual zeros"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw,detect-zeroes=on >/dev/null
du -sh $img

echo
echo "detect-zeros=unmap,discard=unmap - write actual zeros"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -P 0 0 1m"\nquit' |  $qemu -monitor stdio \
-drive if=none,file=$img,format=raw,detect-zeroes=unmap,discard=unmap
>/dev/null
du -sh $img

echo
echo "discard=unmap - write zeroes"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -z 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw,discard=unmap >/dev/null
du -sh $img

echo
echo "discard=unmap - write zeroes unmap"
fallocate -l 1m $img
echo -e 'qemu-io none0 "write -zu 0 1m"\nquit' | $qemu -monitor stdio \
-drive if=none,file=$img,format=raw,discard=unmap >/dev/null
du -sh $img

rm $img


Before this change:

$ cat before.out

defaults - write zeroes
1.0M /tmp/test.raw

defaults - write zeroes unmap
0 /tmp/test.raw

defaults - write actual zeros
1.0M /tmp/test.raw

discard=off - write zeroes unmap
0 /tmp/test.raw

detect-zeros=on - write actual zeros
1.0M /tmp/test.raw

detect-zeros=unmap,discard=unmap - write actual zeros
0 /tmp/test.raw

discard=unmap - write zeroes
1.0M /tmp/test.raw

discard=unmap - write zeroes unmap
0 /tmp/test.raw
[nsoffer build (consider-discard-option)]$


After this change:

$ cat after.out

defaults - write zeroes
1.0M /tmp/test.raw

defaults - write zeroes unmap
1.0M /tmp/test.raw

defaults - write actual zeros
1.0M /tmp/test.raw

discard=off - write zeroes unmap
1.0M /tmp/test.raw

detect-zeros=on - write actual zeros
1.0M /tmp/test.raw

detect-zeros=unmap,discard=unmap - write actual zeros
0 /tmp/test.raw

discard=unmap - write zeroes
1.0M /tmp/test.raw

discard=unmap - write zeroes unmap
0 /tmp/test.raw


Differences:

$ diff -u before.out after.out
--- before.out 2024-06-19 20:24:09.234083713 +0300
+++ after.out 2024-06-19 20:24:20.526165573 +0300
@@ -3,13 +3,13 @@
 1.0M /tmp/test.raw

 defaults - write zeroes unmap
-0 /tmp/test.raw
+1.0M /tmp/test.raw

 defaults - write actual zeros
 1.0M /tmp/test.raw

 discard=off - write zeroes unmap
-0 /tmp/test.raw
+1.0M /tmp/test.raw

On Wed, Jun 19, 2024 at 8:40 PM Nir Soffer  wrote:

> When opening an image with discard=off, we punch hole in the image when
> writing zeroes, making the image sparse. This breaks users that want to
> ensure that writes cannot fail with ENOSPACE by using fully allocated
> images.
>
> bdrv_co_pwrite_zeroes() correctly disable BDRV_REQ_MAY_UNMAP if we
> opened the child without discard=unmap or discard=on. But we don't go
> through this function when accessing the top node. Move the check down
> to bdrv_co_do_pwrite_zeroes() which seems to be used in all code paths.
>
> Issues:
> - We don't punch hole by default, so images are kept allocated. Before
>   this change we punched holes by default. I'm not sure this is a good
>   change in behavior.
> - Need to run all block tests
> - Not sure that we have tests covering unmapping, we may need new tests
> - We may need new tests to cover this change
>
> Signed-off-by: Nir Soffer 
> ---
>
> Changes since v1:
> - Replace the incorrect has_discard change with the right fix
>
> v1 was here:
> https://lists.nongnu.org/archive/html/qemu-block/2024-06/msg00198.html
>
>  block/io.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/block/io.c b/block/io.c
> index 7217cf811b..301514c880 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1860,10 +1860,15 @@ bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
> int64_t offset, int64_t bytes,
>  /* By definition there is no user buffer so this flag doesn't make
> sense */
>  if (flags & BDRV_REQ_REGISTERED_BUF) {
>  return -EINVAL;
>  }
>
> +/* If opened with discard=off we should never unmap. */
> +if (!(bs->open_flags & BDRV_O_UNMAP)) {
> +flags &= ~BDRV_REQ_MAY_UNMAP;
> +}
> +
>  /* Invalidate the cached block-status data range if this 

Re: [RFC PATCH v2 0/5] Implement ARM PL011 in Rust

2024-06-19 Thread Manos Pitsidianakis

On Wed, 19 Jun 2024 06:31, Richard Henderson  
wrote:

On 6/11/24 03:33, Manos Pitsidianakis wrote:

If `cargo` and `bindgen` is installed in your system, you should be able
to build qemu-system-aarch64 with configure flag --enable-rust and
launch an arm virt VM. One of the patches hardcodes the default UART of
the machine to the Rust one, so if something goes wrong you will see it
upon launching qemu-system-aarch64.


What version is required?

On my stock ubuntu 22.04 system, I get

/usr/bin/bindgen aarch64-softmmu_wrapper.h ...
error: Found argument '--formatter' which wasn't expected, or isn't valid in 
this context

USAGE:
bindgen [FLAGS] [OPTIONS]  -- ...

$ bindgen --version
bindgen 0.59.1


I added version checks in the (yet unpublished) next version:

(Which we will also need to match with distro ones whenever possible)

+# FIXME: Latest stable versions, refine to actual minimum ones.
+msrv = {
+  'rustc': '1.79.0',
+  'cargo': '1.79.0',
+  'bindgen': '0.69.4',
+}
+
+# rustup = find_program('rustup', required: false)
+foreach bin_dep: msrv.keys()
+  bin = find_program(bin_dep, required: true)
+  if bin.version() < msrv[bin_dep]
+# TODO verify behavior
+if bin == 'bindgen' and get_option('wrap_mode') != 'nodownload'
+  run_command(cargo, 'install', 'bindgen', capture: true, check: true)
+  bin = find_program(bin_dep, required: true)
+  if bin.version() >= msrv[bin_dep]
+continue
+  endif
+endif
+message()
+error(bin_dep + ' version ' + bin.version() + ' is unsupported: Please 
upgrade to at least ' + msrv[bin_dep])
+  endif
+endforeach



[PATCH v2] Consider discard option when writing zeros

2024-06-19 Thread Nir Soffer
When opening an image with discard=off, we punch hole in the image when
writing zeroes, making the image sparse. This breaks users that want to
ensure that writes cannot fail with ENOSPACE by using fully allocated
images.

bdrv_co_pwrite_zeroes() correctly disable BDRV_REQ_MAY_UNMAP if we
opened the child without discard=unmap or discard=on. But we don't go
through this function when accessing the top node. Move the check down
to bdrv_co_do_pwrite_zeroes() which seems to be used in all code paths.

Issues:
- We don't punch hole by default, so images are kept allocated. Before
  this change we punched holes by default. I'm not sure this is a good
  change in behavior.
- Need to run all block tests
- Not sure that we have tests covering unmapping, we may need new tests
- We may need new tests to cover this change

Signed-off-by: Nir Soffer 
---

Changes since v1:
- Replace the incorrect has_discard change with the right fix

v1 was here:
https://lists.nongnu.org/archive/html/qemu-block/2024-06/msg00198.html

 block/io.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index 7217cf811b..301514c880 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1860,10 +1860,15 @@ bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
 /* By definition there is no user buffer so this flag doesn't make sense */
 if (flags & BDRV_REQ_REGISTERED_BUF) {
 return -EINVAL;
 }
 
+/* If opened with discard=off we should never unmap. */
+if (!(bs->open_flags & BDRV_O_UNMAP)) {
+flags &= ~BDRV_REQ_MAY_UNMAP;
+}
+
 /* Invalidate the cached block-status data range if this write overlaps */
 bdrv_bsc_invalidate_range(bs, offset, bytes);
 
 assert(alignment % bs->bl.request_alignment == 0);
 head = offset % alignment;
@@ -2313,14 +2318,10 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild 
*child, int64_t offset,
 {
 IO_CODE();
 trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
 assert_bdrv_graph_readable();
 
-if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
-flags &= ~BDRV_REQ_MAY_UNMAP;
-}
-
 return bdrv_co_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
 }
 
 /*
-- 
2.45.1




Re: [PATCH 22/32] hw/sd: Add emmc_cmd_SEND_EXT_CSD() handler

2024-06-19 Thread Philippe Mathieu-Daudé

Hi,

On 3/7/23 15:24, Cédric Le Goater wrote:

The parameters mimick a real 4GB eMMC, but it can be set to various
sizes. Initially from Vincent Palatin 

Signed-off-by: Cédric Le Goater 
---
  hw/sd/sdmmc-internal.h |  97 
  include/hw/sd/sd.h |   1 +
  hw/sd/sd.c | 109 -
  3 files changed, 206 insertions(+), 1 deletion(-)


First pass review, this will take time...


+static void mmc_set_ext_csd(SDState *sd, uint64_t size)
+{
+uint32_t sectcount = size >> HWBLOCK_SHIFT;
+
+memset(sd->ext_csd, 0, sizeof(sd->ext_csd));
+
+sd->ext_csd[EXT_CSD_S_CMD_SET] = 0x1; /* supported command sets */
+sd->ext_csd[EXT_CSD_HPI_FEATURES] = 0x3; /* HPI features  */
+sd->ext_csd[EXT_CSD_BKOPS_SUPPORT] = 0x1; /* Background operations */
+sd->ext_csd[241] = 0xA; /* 1st initialization time after partitioning */
+sd->ext_csd[EXT_CSD_TRIM_MULT] = 0x1; /* Trim multiplier */
+sd->ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT] = 0x15; /* Secure feature */


We do not support (and are not interested in) that. I'll use 0x0 for
"do not support".


+sd->ext_csd[EXT_CSD_SEC_ERASE_MULT] = 0x96; /* Secure erase support */


This value is obsolete, so I'd use 0x0 to avoid confusions.


+sd->ext_csd[EXT_CSD_SEC_TRIM_MULT] = 0x96; /* Secure TRIM multiplier */


Again, 0x0 for "not defined".


+sd->ext_csd[EXT_CSD_BOOT_INFO] = 0x7; /* Boot information */
+sd->ext_csd[EXT_CSD_BOOT_MULT] = 0x8; /* Boot partition size. 128KB unit */
+sd->ext_csd[EXT_CSD_ACC_SIZE] = 0x6; /* Access size */


16KB of super_page_size hmm. Simpler could be the underlying block
retrieved with bdrv_nb_sectors() or simply BDRV_SECTOR_SIZE (0x1).


+sd->ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] = 0x4; /* HC Erase unit size */


2MB of erase size hmmm why not.


+sd->ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT] = 0x1; /* HC erase timeout */


We don't implement timeout, can we use 0?


+sd->ext_csd[EXT_CSD_REL_WR_SEC_C] = 0x1; /* Reliable write sector count */
+sd->ext_csd[EXT_CSD_HC_WP_GRP_SIZE] = 0x4; /* HC write protect group size 
*/
+sd->ext_csd[EXT_CSD_S_C_VCC] = 0x8; /* Sleep current VCC  */
+sd->ext_csd[EXT_CSD_S_C_VCCQ] = 0x7; /* Sleep current VCCQ */
+sd->ext_csd[EXT_CSD_S_A_TIMEOUT] = 0x11; /* Sleep/Awake timeout */
+sd->ext_csd[215] = (sectcount >> 24) & 0xff; /* Sector count */
+sd->ext_csd[214] = (sectcount >> 16) & 0xff; /* ... */
+sd->ext_csd[213] = (sectcount >> 8) & 0xff;  /* ... */
+sd->ext_csd[EXT_CSD_SEC_CNT] = (sectcount & 0xff);   /* ... */
+sd->ext_csd[210] = 0xa; /* Min write perf for 8bit@52Mhz */
+sd->ext_csd[209] = 0xa; /* Min read perf for 8bit@52Mhz  */
+sd->ext_csd[208] = 0xa; /* Min write perf for 4bit@52Mhz */
+sd->ext_csd[207] = 0xa; /* Min read perf for 4bit@52Mhz */
+sd->ext_csd[206] = 0xa; /* Min write perf for 4bit@26Mhz */
+sd->ext_csd[205] = 0xa; /* Min read perf for 4bit@26Mhz */


Class B at 3MB/s. I suppose announcing up to J at 21MB/s is safe (0x46).


+sd->ext_csd[EXT_CSD_PART_SWITCH_TIME] = 0x1;


SWITCH command isn't implemented so far. We could use 0x0 for "not
defined".


+sd->ext_csd[EXT_CSD_OUT_OF_INTERRUPT_TIME] = 0x1;


Similarly, 0x0 for "undefined" is legal.


+sd->ext_csd[EXT_CSD_CARD_TYPE] = 0x7;


You anounce dual data rate. Could we just use High-Speed mode (0x3)
to ease modelling?


+sd->ext_csd[EXT_CSD_STRUCTURE] = 0x2;
+sd->ext_csd[EXT_CSD_REV] = 0x5;


This is Revision 1.5 (for MMC v4.41)... The first QEMU implementation
was based on Revision 1.3 (for MMC v4.3) and I'm seeing some features
from Revision 1.6 (for MMC v4.5)...

Do we want to implement all of them? Since we are adding from
scratch, I suggest we directly start with v4.5 (0x6).

Note, EXT_CSD_BUS_WIDTH is not set (0x0) meaning 1-bit data bus.
I'd set it to 0x2 (8-bit):

   sd->ext_csd[EXT_CSD_BUS_WIDTH] = EXT_CSD_BUS_WIDTH_8_MASK;


+sd->ext_csd[EXT_CSD_RPMB_MULT] = 0x1; /* RPMB size */
+sd->ext_csd[EXT_CSD_PARTITION_SUPPORT] = 0x3;
+sd->ext_csd[159] = 0x00; /* Max enhanced area size */
+sd->ext_csd[158] = 0x00; /* ... */
+sd->ext_csd[157] = 0xEC; /* ... */
+}





Re: [PATCH v14 00/14] Support blob memory and venus on qemu

2024-06-19 Thread Alex Bennée
Dmitry Osipenko  writes:

> Hello,
>
> This series enables Vulkan Venus context support on virtio-gpu.
>
> All virglrender and almost all Linux kernel prerequisite changes
> needed by Venus are already in upstream. For kernel there is a pending
> KVM patchset that fixes mapping of compound pages needed for DRM drivers
> using TTM [1], othewrwise hostmem blob mapping will fail with a KVM error
> from Qemu.

So I've been experimenting with Aarch64 TCG with an Intel backend like
this:

./qemu-system-aarch64 \
   -M virt -cpu cortex-a76 \
   -device virtio-net-pci,netdev=unet \
   -netdev user,id=unet,hostfwd=tcp::-:22 \
   -m 8192 \
   -object memory-backend-memfd,id=mem,size=8G,share=on \
   -serial mon:stdio \
   -kernel 
~/lsrc/linux.git/builds/arm64.initramfs/arch/arm64/boot/Image \
   -append "console=ttyAMA0" \
   -device qemu-xhci -device usb-kbd -device usb-tablet \
   -device virtio-gpu-gl-pci,blob=true,venus=true,hostmem=4G \
   -display sdl,gl=on -d 
plugin,guest_errors,trace:virtio_gpu_cmd_res_create_blob,trace:virtio_gpu_cmd_res_back_\*,trace:virtio_gpu_cmd_res_xfer_toh_3d,trace:virtio_gpu_cmd_res_xfer_fromh_3d,trace:address_space_map
 

And I've noticed a couple of things. First trying to launch vkmark to
run a KMS mode test fails with:

  vkr_context_add_object: 5 -> 0x7f24b81d7198   

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac648:20 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x109dc5be0:18 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac668:18 write:1 attrs:0x1 

   
  vkr_context_add_object: 6 -> 0x7f24b81d7240   

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac648:20 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x109dc5be0:18 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac668:18 write:1 attrs:0x1 

   
  vkr_context_add_object: 7 -> 0x7f24b81d71e0   

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac648:48 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac690:18 write:1 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac570:20 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x101d64300:40 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac590:18 write:1 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac720:20 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x1008ac740:18 write:1 attrs:0x1 

   
  virtio_gpu_cmd_res_back_attach res 0x5, 4 entries 

   
  address_space_map as:0x561b48ec48c0 addr 0x109fd5000:2b000 write:0 attrs:0x1  

   
  address_space_map as:0x561b48ec48c0 addr 0x10220:10 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x100e0:20 write:0 attrs:0x1 

   
  address_space_map as:0x561b48ec48c0 addr 0x10a00:bd000 write:0 

Re: [RFC PATCH v2 1/5] build-sys: Add rust feature option

2024-06-19 Thread Manos Pitsidianakis

On Wed, 19 Jun 2024 19:52, Richard Henderson  
wrote:

On 6/11/24 03:33, Manos Pitsidianakis wrote:

+++ b/scripts/cargo_wrapper.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020 Red Hat, Inc.
+# Copyright (c) 2023 Linaro Ltd.
+#
+# Authors:
+#  Manos Pitsidianakis
+#  Marc-André Lureau
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import argparse
+import configparser
+import distutils.file_util
+import json
+import logging
+import os
+import os.path
+import re
+import subprocess
+import sys
+import pathlib
+import shutil
+import tomllib


Correct me if I'm wrong, but does this require python 3.11 for tomllib?
AFAIK, we're limited to assuming 3.9 from Debian 11 until Feb 2026, or
3.10 from Ubuntu 22.04 until Apr 2026.

I presume this package can be downloaded from pip, and therefore should be added to the 
python venv that we create in configure?



r~


That's absolutely correct. I will make it compatible with at least 3.9 
in the next version, thanks!


Manos



Re: [PATCH 13/13] qapi: convert "Example" sections to rST

2024-06-19 Thread John Snow
On Wed, Jun 19, 2024, 9:20 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > Eliminate the "Example" sections in QAPI doc blocks, converting them
> > into QMP example code blocks. This is generally done in this patch by
> > converting "Example:" or "Examples:" lines into ".. code-block:: QMP"
> > lines.
> >
> > The old "Example:" or "Examples:" syntax is now caught as an error; but
> > with the previous commit, "Example::" is still permitted as explicit rST
> > syntax. ('Example' is not special in this case; any sentence that ends
> > with "::" will start an indented code block in rST.)
> >
> > The ".. code-block:: QMP" form explicitly applies the QMP lexer and will
> > loosely validate an example as valid QMP/JSON. The "::" form does not
> > apply any lexer in particular and will not emit any errors.
> >
> > It is possible to choose the QMP lexer with the "::" form by using the
> > Sphinx directive ".. highlight:: QMP" in the document above where the
> > example appears; but this changes the lexer for *all* subsequent "::"
> > style code-blocks in the document thereafter.
> >
> > This patch does not change the default lexer for the legacy qapidoc
> > generator documents; future patches for the new qapidoc generator *may*
> > change this default.
> >
> > This patch has several benefits:
> >
> > 1. Example sections can now be written more arbitrarily, mixing
> >explanatory paragraphs and code blocks however desired.
> >
> > 2. "Example sections" can now use fully arbitrary rST.
>
> Do the double-quotes signify something I'm missing?
>

They aren't *sections* (QAPIDoc terminology) anymore, but was at a loss for
more precise phrasing.


> >
> > 3. All code blocks are now lexed and validated as QMP; increasing
> >usability of the docs and ensuring validity of example snippets.
> >
> >(To some extent - This patch only gaurantees it lexes correctly, not
> >that it's valid under the JSON or QMP grammars. It will catch most
> >small mistakes, however.)
> >
> > 4. Each code-block can be captioned independently without bypassing the
> >QMP lexer/validator.
> >
> >(i.e. code blocks are now for *code* only, so we don't have to
> >sacrifice annotations/captions for having lexicographically correct
> >examples.)
> >
> > For any sections with more than one example, examples are split up into
> > multiple code-block regions. If annotations are present, those
> > annotations are converted into code-block captions instead, e.g.
> >
> > ```
> > Examples:
> >
> >1. Lorem Ipsum
> >
> >-> { "foo": "bar" }
> > ```
> >
> > Is rewritten as:
> >
> > ```
> > .. code-block:: QMP
> >:caption: Example: Lorem Ipsum
> >
> >-> { "foo": "bar" }
> > ```
> >
> > This process was only semi-automated:
> >
> > 1. Replace "Examples?:" sections with sed:
> >
> > sed -i 's|# Example:|# .. code-block:: QMP|' *.json
> > sed -i 's|# Examples:|# .. code-block:: QMP|' *.json
> >
> > 2. Identify sections that no longer parse successfully by attempting the
> >doc build, convert annotations into captions manually.
> >(Tedious, oh well.)
> >
> > 3. Add captions where still needed:
> >
> > sed -zi 's|# .. code-block:: QMP\n#\n|# .. code-block:: QMP\n#
> :caption: Example\n#\n|g' *.json
> >
> > Not fully ideal, but hopefully not something that has to be done very
> > often. (Or ever again.)
> >
> > Signed-off-by: John Snow 
> > Acked-by: Stefan Hajnoczi  [for block*.json]
>
> [...]
>
> > diff --git a/qapi/pci.json b/qapi/pci.json
> > index f51159a2c4c..9192212661b 100644
> > --- a/qapi/pci.json
> > +++ b/qapi/pci.json
> > @@ -182,7 +182,8 @@
> >  #
> >  # Since: 0.14
> >  #
> > -# Example:
> > +# .. code-block:: QMP
> > +#:caption: Example
> >  #
> >  # -> { "execute": "query-pci" }
> >  # <- { "return": [
> > @@ -311,8 +312,7 @@
> >  #   ]
> >  #}
> >  #
> > -# Note: This example has been shortened as the real response is too
> > -# long.
> > +# This example has been shortened as the real response is too long.
>
> Squash into PATCH 09.
>
> >  #
> >  ##
> >  { 'command': 'query-pci', 'returns': ['PciInfo'] }
>
> Otherwise looks good to me except for the somewhat ugly rendering in
> HTML mentioned in the commit message.
>

ACK


> [...]
>
>


Re: [RFC PATCH v2 3/5] rust: add PL011 device model

2024-06-19 Thread Paolo Bonzini
Il mer 19 giu 2024, 18:54 Daniel P. Berrangé  ha
scritto:

> >build/
> >  rust/
> >.cargo/
> >  config.toml   # generated by configure or meson.build
> >Cargo.toml  # workspace generated by configure or meson.build
> >Cargo.lock  # can be either linked to srctree or generated
> >qemu/   # symlink to srctree/rust/qemu
> >aarch64-softmmu-hw/
> >  Cargo.toml# generated by meson.build (*)
> >  src/  # symlink to srctree/rust/hw/
> >i386-softmmu-hw/
> >  Cargo.toml# generated by meson.build
> >  src/  # symlink to srctree/rust/hw/
> >generated/  # files generated by rust/generated/meson.build
>
> If we're generating a build tree to invoke cargo on, can we then
> avoid creating a completely separate dir hierarchy in the source
> tree rooted at /rust, and just have rust source within our existing
> hierarchy.
>

Maybe... I hadn't even considered the possibility of having a single cargo
invocation (and thus a cargo workspace in the build tree) until Richard
pointed out the duplication in configuration files.

I suppose you could just link rust/aarch64-softmmu-hw to srctree/hw, and
have a srctree/hw/lib.rs file in there to prime the search for submodules.

I think the resulting hierarchy would feel a little foreign though. Without
seeing the code I can't judge but my impression is that, if we wanted to go
that way, I would also move all C code under src/. Perhaps we can consider
such a unification later, once we have more experience, but for now keep
Rust and C code separate?

Paolo



> eg
>
> aarch64-softmmu-hw/
>   Cargo.toml# generated by meson.build (*)
>   src/
> pl011/  # symlink to srctree/hw/p1011/
>
>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>


Re: [PATCH 03/13] docs/qapidoc: delint a tiny portion of the module

2024-06-19 Thread John Snow
On Wed, Jun 19, 2024, 2:28 AM Markus Armbruster  wrote:

> John Snow  writes:
>
> > In a forthcoming series that adds a new QMP documentation generator, it
> > will be helpful to have a linting baseline. However, there's no need to
> > shuffle around the deck chairs too much, because most of this code will
> > be removed once that new qapidoc generator (the "transmogrifier") is in
> > place.
> >
> > To ease my pain: just turn off the black auto-formatter for most, but
> > not all, of qapidoc.py. This will help ensure that *new* code follows a
> > coding standard without bothering too much with cleaning up the existing
> > code.
> >
> > Code that I intend to keep is still subject to the delinting beam.
> >
> > Signed-off-by: John Snow 
> > ---
> >  docs/sphinx/qapidoc.py | 66 +-
> >  1 file changed, 40 insertions(+), 26 deletions(-)
> >
> > diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
> > index f270b494f01..e675966defa 100644
> > --- a/docs/sphinx/qapidoc.py
> > +++ b/docs/sphinx/qapidoc.py
> > @@ -28,33 +28,42 @@
> >  import re
> >
> >  from docutils import nodes
> > +from docutils.parsers.rst import Directive, directives
> >  from docutils.statemachine import ViewList
> > -from docutils.parsers.rst import directives, Directive
> > -from sphinx.errors import ExtensionError
> > -from sphinx.util.nodes import nested_parse_with_titles
> > -import sphinx
> > -from qapi.gen import QAPISchemaVisitor
> >  from qapi.error import QAPIError, QAPISemError
> > +from qapi.gen import QAPISchemaVisitor
> >  from qapi.schema import QAPISchema
> >
> > +import sphinx
> > +from sphinx.errors import ExtensionError
> > +from sphinx.util.nodes import nested_parse_with_titles
> > +
> >
> >  # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later
> >  # use switch_source_input. Check borrowed from kerneldoc.py.
> > -Use_SSI = sphinx.__version__[:3] >= '1.7'
> > -if Use_SSI:
> > +USE_SSI = sphinx.__version__[:3] >= "1.7"
> > +if USE_SSI:
> >  from sphinx.util.docutils import switch_source_input
> >  else:
> > -from sphinx.ext.autodoc import AutodocReporter
> > +from sphinx.ext.autodoc import (  # pylint:
> disable=no-name-in-module
> > +AutodocReporter,
> > +)
> >
> >
> > -__version__ = '1.0'
> > +__version__ = "1.0"
> >
> >
> > +# Disable black auto-formatter until re-enabled:
> > +# fmt: off
> > +
> >  # Function borrowed from pydash, which is under the MIT license
> >  def intersperse(iterable, separator):
> >  """Yield the members of *iterable* interspersed with *separator*."""
> >  iterable = iter(iterable)
> > -yield next(iterable)
> > +try:
> > +yield next(iterable)
> > +except StopIteration:
> > +return
>
> This gets rid of pylint's
>
> docs/sphinx/qapidoc.py:82:10: R1708: Do not raise StopIteration in
> generator, use return statement instead (stop-iteration-return)
>
> I considered the same change some time ago, and decided against it to
> avoid deviating from pydash.  StopIteration would be a programming error
> here.
>
> Please *delete* the function instead: commit fd62bff901b removed its
> last use.  I'd do it in a separate commit, but that's up to you.
>

Oh! I didn't realize it wasn't being used. That's certainly easier :)


> >  for item in iterable:
> >  yield separator
> >  yield item
> > @@ -451,6 +460,10 @@ def get_document_nodes(self):
> >  return self._top_node.children
> >
> >
> > +# Turn the black formatter on for the rest of the file.
> > +# fmt: on
> > +
> > +
> >  class QAPISchemaGenDepVisitor(QAPISchemaVisitor):
> >  """A QAPI schema visitor which adds Sphinx dependencies each module
> >
> > @@ -458,34 +471,34 @@ class QAPISchemaGenDepVisitor(QAPISchemaVisitor):
> >  that the generated documentation output depends on the input
> >  schema file associated with each module in the QAPI input.
> >  """
> > +
> >  def __init__(self, env, qapidir):
> >  self._env = env
> >  self._qapidir = qapidir
> >
> >  def visit_module(self, name):
> >  if name != "./builtin":
> > -qapifile = self._qapidir + '/' + name
> > +qapifile = self._qapidir + "/" + name
>
> The string literal quote changes are mildly annoying.  But since by your
> good work you're effectively appointing yourself maintainer of this
> file...  ;)
>

Mildly. However, I do think black is "close enough" on most style issues
that I have absolutely no regret or hesitation using it for all new python
development.

(I've been using it a lot in hobby code the last year and I find it to be
remarkably helpful for my own consistency in style issues, it's
indispensable for me.)

So in this series, I start using it because I essentially wind up rewriting
this entire file and wanted an autoformatter so I could shut my brain off
for stuff like this.

A "flag day" as you call it is likely coming soon to python/ where I'll
start enforcing black 

Re: [RFC PATCH v2 3/5] rust: add PL011 device model

2024-06-19 Thread Daniel P . Berrangé
On Wed, Jun 19, 2024 at 06:43:01PM +0200, Paolo Bonzini wrote:
> On 6/19/24 07:34, Richard Henderson wrote:
> > First silly question: how much of this is boiler plate that gets moved
> > the moment that the second rust subdirectory is added?
> 
> If my suggestion at 
> https://lore.kernel.org/qemu-devel/CABgObfaP7DRD8dbSKNmUzhZNyxeHWO0MztaW3_EFYt=vf6s...@mail.gmail.com/
> works, we'd have only two directories that have a Cargo.toml in it.  For
> example it could be rust/qemu/ (common code) and rust/hw/ (code that depends
> on Kconfig).
> 
> I think we can also have a rust/Cargo.toml file as in
> https://doc.rust-lang.org/cargo/reference/workspaces.html#virtual-workspace,
> and then the various configuration files under rust/ will be valid for all
> subpackages.
> 
> > > +[build]
> > > +rustflags = ["-Crelocation-model=pic", "-Ctarget-feature=+crt-static"]
> > 
> > It seems certain that this is not specific to pl011, and will be commot
> > to other rust subdirectories.  Or, given the .cargo directory name, is
> > this generated by cargo and committed by mistake?
> 
> -Crelocation-mode should be pie.  But also, I am not sure it works because I
> think it's always going to be overridden by cargo_wrapper.py? See
> https://doc.rust-lang.org/cargo/reference/config.html#buildrustflags.
> 
> (I'm not sure what +crt-static is for).
> 
> I think the generate_cfg_flags() mechanism of cargo_wrapper.py has to be
> rewritten from Python to Rust and moved to build.rs (using
> cargo::rustc-cfg).  By doing this, the cfg flags are added to whatever is in
> .cargo/config.toml, rather than replaced.
> 
> > > diff --git a/rust/pl011/.gitignore b/rust/pl011/.gitignore
> > > new file mode 100644
> > > index 00..28a02c847f
> > > --- /dev/null
> > > +++ b/rust/pl011/.gitignore
> > > @@ -0,0 +1,2 @@
> > > +target
> > > +src/generated.rs.inc
> > 
> > Is this a symptom of generating files into the source directory and not
> > build directory?
> 
> If I understand correctly, Manos considered two possible ways to invoke
> cargo on the Rust code:
> 
> - directly, in which case you need to copy the generated source file to
> rust/pl011/src/generated.rs.inc, because cargo does not know where the build
> tree
> 
> - do everything through meson, which does the right thing because
> cargo_wrapper.py knows about the build tree and passes the information.
> 
> To avoid this, the first workflow could be adjusted so that cargo can still
> be invoked directly, _but from the build tree_, not the source tree.  For
> example configure could generate a .../build/.cargo/config.toml with
> 
>[env]
>MESON_BUILD_ROOT = ".../build"
> 
> (extra advantage: -Crelocation-model=pie can be added based on
> --enable-pie/--disable-pie).  configure can also create symlinks in the
> build tree for the source tree's rust/, Cargo.toml and Cargo.lock.
> 
> This allows rust/pl011/src/generated.rs (which probably will become
> something like rust/common/src/generated.rs) to be:
> 
>include!(concat!(env!("MESON_BUILD_ROOT"), "/generated.rs.inc"));
> 
> when cargo is invoked from the build tree.
> 
> Putting everything together you'd have
> 
>build/
>  rust/
>.cargo/
>  config.toml   # generated by configure or meson.build
>Cargo.toml  # workspace generated by configure or meson.build
>Cargo.lock  # can be either linked to srctree or generated
>qemu/   # symlink to srctree/rust/qemu
>aarch64-softmmu-hw/
>  Cargo.toml# generated by meson.build (*)
>  src/  # symlink to srctree/rust/hw/
>i386-softmmu-hw/
>  Cargo.toml# generated by meson.build
>  src/  # symlink to srctree/rust/hw/
>generated/  # files generated by rust/generated/meson.build

If we're generating a build tree to invoke cargo on, can we then
avoid creating a completely separate dir hierarchy in the source
tree rooted at /rust, and just have rust source within our existing
hierarchy.

eg

aarch64-softmmu-hw/
  Cargo.toml# generated by meson.build (*)
  src/
pl011/  # symlink to srctree/hw/p1011/


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [RFC PATCH v2 1/5] build-sys: Add rust feature option

2024-06-19 Thread Richard Henderson

On 6/11/24 03:33, Manos Pitsidianakis wrote:

+++ b/scripts/cargo_wrapper.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020 Red Hat, Inc.
+# Copyright (c) 2023 Linaro Ltd.
+#
+# Authors:
+#  Manos Pitsidianakis
+#  Marc-André Lureau
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import argparse
+import configparser
+import distutils.file_util
+import json
+import logging
+import os
+import os.path
+import re
+import subprocess
+import sys
+import pathlib
+import shutil
+import tomllib


Correct me if I'm wrong, but does this require python 3.11 for tomllib?
AFAIK, we're limited to assuming 3.9 from Debian 11 until Feb 2026, or
3.10 from Ubuntu 22.04 until Apr 2026.

I presume this package can be downloaded from pip, and therefore should be added to the 
python venv that we create in configure?



r~



Re: [RFC PATCH v2 3/5] rust: add PL011 device model

2024-06-19 Thread Paolo Bonzini

On 6/19/24 07:34, Richard Henderson wrote:
First silly question: how much of this is boiler plate that gets moved 
the moment that the second rust subdirectory is added?


If my suggestion at 
https://lore.kernel.org/qemu-devel/CABgObfaP7DRD8dbSKNmUzhZNyxeHWO0MztaW3_EFYt=vf6s...@mail.gmail.com/ 
works, we'd have only two directories that have a Cargo.toml in it.  For 
example it could be rust/qemu/ (common code) and rust/hw/ (code that 
depends on Kconfig).


I think we can also have a rust/Cargo.toml file as in 
https://doc.rust-lang.org/cargo/reference/workspaces.html#virtual-workspace, 
and then the various configuration files under rust/ will be valid for 
all subpackages.



+[build]
+rustflags = ["-Crelocation-model=pic", "-Ctarget-feature=+crt-static"]


It seems certain that this is not specific to pl011, and will be commot to other rust 
subdirectories.  Or, given the .cargo directory name, is this generated by cargo and 
committed by mistake?


-Crelocation-mode should be pie.  But also, I am not sure it works 
because I think it's always going to be overridden by cargo_wrapper.py? 
See https://doc.rust-lang.org/cargo/reference/config.html#buildrustflags.


(I'm not sure what +crt-static is for).

I think the generate_cfg_flags() mechanism of cargo_wrapper.py has to be 
rewritten from Python to Rust and moved to build.rs (using 
cargo::rustc-cfg).  By doing this, the cfg flags are added to whatever 
is in .cargo/config.toml, rather than replaced.



diff --git a/rust/pl011/.gitignore b/rust/pl011/.gitignore
new file mode 100644
index 00..28a02c847f
--- /dev/null
+++ b/rust/pl011/.gitignore
@@ -0,0 +1,2 @@
+target
+src/generated.rs.inc


Is this a symptom of generating files into the source directory and not 
build directory?


If I understand correctly, Manos considered two possible ways to invoke 
cargo on the Rust code:


- directly, in which case you need to copy the generated source file to 
rust/pl011/src/generated.rs.inc, because cargo does not know where the 
build tree


- do everything through meson, which does the right thing because 
cargo_wrapper.py knows about the build tree and passes the information.


To avoid this, the first workflow could be adjusted so that cargo can 
still be invoked directly, _but from the build tree_, not the source 
tree.  For example configure could generate a 
.../build/.cargo/config.toml with


   [env]
   MESON_BUILD_ROOT = ".../build"

(extra advantage: -Crelocation-model=pie can be added based on 
--enable-pie/--disable-pie).  configure can also create symlinks in the 
build tree for the source tree's rust/, Cargo.toml and Cargo.lock.


This allows rust/pl011/src/generated.rs (which probably will become 
something like rust/common/src/generated.rs) to be:


   include!(concat!(env!("MESON_BUILD_ROOT"), "/generated.rs.inc"));

when cargo is invoked from the build tree.

Putting everything together you'd have

   build/
 rust/
   .cargo/
 config.toml   # generated by configure or meson.build
   Cargo.toml  # workspace generated by configure or meson.build
   Cargo.lock  # can be either linked to srctree or generated
   qemu/   # symlink to srctree/rust/qemu
   aarch64-softmmu-hw/
 Cargo.toml# generated by meson.build (*)
 src/  # symlink to srctree/rust/hw/
   i386-softmmu-hw/
 Cargo.toml# generated by meson.build
 src/  # symlink to srctree/rust/hw/
   generated/  # files generated by rust/generated/meson.build

(*) these have to be generated to change the package name, so 
configure_file() seems like a good match for it.


This is suspiciously similar to what tests/tcg/ looks like, except that 
tests/tcg/*/Makefile is just a symbolic link.  I tried creating a 
similar directory structure in a toy project, and it seemed to work...


Second silly question: does this really need to be committed to the 
repository? It *appears* to be specific to the host+os-version of the 
build.  It is certainly very specific about versions and checksums...


Generally the idea is that libraries should not commit it, while 
applications should commit it.  The idea is that the Cargo.lock file 
reproduces a working configuration, and dependencies are updated to 
known-good releases when CI passes.  I don't think I like this, but it 
is what it is.  I ascribe it to me being from the Jurassic.


But for now I would consider not committing Cargo.lock, because we don't 
have the infrastructure to do that automatic dependency update (assuming 
we want it).  But we could generate it at release time so that it is 
included in tarballs, and create the symlink from 
srctree/rust/Cargo.lock into build/rust/ only if the file is present in 
the source tree.



diff --git a/rust/pl011/Cargo.toml b/rust/pl011/Cargo.toml
[...]
+# bilge deps included here to include them with docs
+[dependencies]
+arbitrary-int = { version = "1.2.7" }
+bilge = { version = "0.2.0" }

Re: [PATCH v4 1/5] ppc/pnv: Add SPI model

2024-06-19 Thread Chalapathi V

Hello Cedric,

Thank You for reviewing this patch series.

Regards,

Chalapathi

On 18-06-2024 21:18, Cédric Le Goater wrote:

Hello Chalapathi,

On 6/17/24 6:54 PM, Chalapathi V wrote:
SPI controller device model supports a connection to a single SPI 
responder.
This provide access to SPI seeproms, TPM, flash device and an ADC 
controller.


All SPI function control is mapped into the SPI register space to 
enable full
control by firmware. In this commit SPI configuration component is 
modelled
which contains all SPI configuration and status registers as well as 
the hold

registers for data to be sent or having been received.

An existing QEMU SSI framework is used and SSI_BUS is created.

Signed-off-by: Chalapathi V 
---
  include/hw/ppc/pnv_xscom.h    |   5 +-
  include/hw/ssi/pnv_spi.h  |  59 ++
  include/hw/ssi/pnv_spi_regs.h | 114 +++
  hw/ssi/pnv_spi.c  | 202 ++
  hw/ppc/Kconfig    |   3 +
  hw/ssi/Kconfig    |   4 +
  hw/ssi/meson.build    |   1 +
  hw/ssi/trace-events   |   6 +
  8 files changed, 393 insertions(+), 1 deletion(-)
  create mode 100644 include/hw/ssi/pnv_spi.h
  create mode 100644 include/hw/ssi/pnv_spi_regs.h
  create mode 100644 hw/ssi/pnv_spi.c

diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 6209e18492..0020dd172f 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -21,9 +21,9 @@
  #define PPC_PNV_XSCOM_H
    #include "exec/memory.h"
-#include "hw/ppc/pnv.h"
    typedef struct PnvXScomInterface PnvXScomInterface;
+typedef struct PnvChip PnvChip;
    #define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface"
  #define PNV_XSCOM_INTERFACE(obj) \


Could please provide the above change in its own patch ?

Sure.



@@ -194,6 +194,9 @@ struct PnvXScomInterfaceClass {
  #define PNV10_XSCOM_PEC_PCI_BASE   0x8010800 /* index goes upwards 
... */

  #define PNV10_XSCOM_PEC_PCI_SIZE   0x200
  +#define PNV10_XSCOM_PIB_SPIC_BASE 0xc
+#define PNV10_XSCOM_PIB_SPIC_SIZE 0x20
+
  void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr);
  int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
   uint64_t xscom_base, uint64_t xscom_size,
diff --git a/include/hw/ssi/pnv_spi.h b/include/hw/ssi/pnv_spi.h
new file mode 100644
index 00..71c53d4a17
--- /dev/null
+++ b/include/hw/ssi/pnv_spi.h
@@ -0,0 +1,59 @@
+/*
+ * QEMU PowerPC SPI model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This model Supports a connection to a single SPI responder.
+ * Introduced for P10 to provide access to SPI seeproms, TPM, flash 
device

+ * and an ADC controller.
+ */
+#include "hw/ssi/ssi.h"
+#include "hw/sysbus.h"


The include should be protected by PPC_PNV_SPI_H also.


+#ifndef PPC_PNV_SPI_H
+#define PPC_PNV_SPI_H
+
+/* Userful macros */


/* Macros from target/ppc/cpu.h */

please explain why they are copied in the comment and move them in the .c
file for now. We don't want them to conflict with other definitions.


Sure. Will move these to pnv_spi_regs.h as they are used in 
pnv_spi_regs.h and pnv_spi.c


Thank You

Chalapathi




+#define PPC_BIT(bit) (0x8000ULL >> (bit))
+#define PPC_BIT8(bit)   (0x80 >> (bit))
+#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | 
PPC_BIT(bs))
+#define PPC_BITMASK8(bs, be)    ((PPC_BIT8(bs) - PPC_BIT8(be)) | 
PPC_BIT8(bs))

+#define MASK_TO_LSH(m)  (__builtin_ffsll(m) - 1)
+#define GETFIELD(m, v)  (((v) & (m)) >> MASK_TO_LSH(m))
+#define SETFIELD(m, v, val) \
+    (((v) & ~(m)) | typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))


same comment for _FDT() which comes from include/hw/ppc/fdt.h.

Sure.


The rest looks OK.

Thanks,

C.




+#define _FDT(exp)  \
+    do {   \
+    int _ret = (exp);  \
+    if (_ret < 0) {    \
+    error_report("error creating device tree: %s: %s", \
+    #exp, fdt_strerror(_ret)); \
+ exit(1);   \
+ }  \
+    } while (0)
+
+#define TYPE_PNV_SPI "pnv-spi"
+OBJECT_DECLARE_SIMPLE_TYPE(PnvSpi, PNV_SPI)
+
+#define PNV_SPI_REG_SIZE 8
+#define PNV_SPI_REGS 7
+
+#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+typedef struct PnvSpi {
+    SysBusDevice parent_obj;
+
+    SSIBus *ssi_bus;
+    qemu_irq *cs_line;
+    MemoryRegion    xscom_spic_regs;
+    /* SPI object number */
+    uint32_t    spic_num;
+
+    /* SPI registers */
+    uint64_t    regs[PNV_SPI_REGS];
+    uint8_t seq_op[PNV_SPI_REG_SIZE];
+    uint64_t    status;
+} PnvSpi;
+#endif /* PPC_PNV_SPI_H */
diff --git a/include/hw/ssi/pnv_spi_regs.h 

Re: [PATCH] hw/gpio/aspeed: Add bounds checking for register table access

2024-06-19 Thread Philippe Mathieu-Daudé

On 19/6/24 08:49, Zheyu Ma wrote:

Hi Andrew,

On Wed, Jun 19, 2024 at 1:58 AM Andrew Jeffery 
mailto:and...@codeconstruct.com.au>> wrote:


Hello Zheyu Ma,

On Tue, 2024-06-18 at 15:09 +0200, Zheyu Ma wrote:
 > Added bounds checking in the aspeed_gpio_read() and
aspeed_gpio_write()
 > functions to ensure the index idx is within the valid range of the
 > reg_table array.
 >
 > The correct size of reg_table is determined dynamically based on
whether
 > it is aspeed_3_3v_gpios or aspeed_1_8v_gpios. If idx exceeds the
 > size of reg_table, an error is logged, and the function returns.
 >
 > AddressSanitizer log indicating the issue:
 >
 > ==2602930==ERROR: AddressSanitizer: global-buffer-overflow on
address 0x55a5da29e128 at pc 0x55a5d700dc62 bp 0x7fff096c4e90 sp
0x7fff096c4e88
 > READ of size 2 at 0x55a5da29e128 thread T0
 >     #0 0x55a5d700dc61 in aspeed_gpio_read
hw/gpio/aspeed_gpio.c:564:14
 >     #1 0x55a5d933f3ab in memory_region_read_accessor
system/memory.c:445:11
 >     #2 0x55a5d92fba40 in access_with_adjusted_size
system/memory.c:573:18
 >     #3 0x55a5d92f842c in memory_region_dispatch_read1
system/memory.c:1426:16
 >     #4 0x55a5d92f7b68 in memory_region_dispatch_read
system/memory.c:1459:9
 >     #5 0x55a5d9376ad1 in flatview_read_continue_step
system/physmem.c:2836:18
 >     #6 0x55a5d9376399 in flatview_read_continue
system/physmem.c:2877:19
 >     #7 0x55a5d93775b8 in flatview_read system/physmem.c:2907:12

I'm mildly interested in what you were doing to trigger this. Certainly
we could do with a guard in the model to prevent it, but I'm curious
all the same.


Actually, I'm doing the virtual device fuzzing test and trying to 
discover bugs.


Could you share the reproducer? (As you did in your other patches,
it is very useful to reproduce).



 >
 > Signed-off-by: Zheyu Ma mailto:zheyum...@gmail.com>>
 > ---
 >  hw/gpio/aspeed_gpio.c | 26 ++
 >  1 file changed, 26 insertions(+)
 >
 > diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
 > index c1781e2ba3..1441046f6c 100644
 > --- a/hw/gpio/aspeed_gpio.c
 > +++ b/hw/gpio/aspeed_gpio.c
 > @@ -550,6 +550,7 @@ static uint64_t aspeed_gpio_read(void
*opaque, hwaddr offset, uint32_t size)
 >      GPIOSets *set;
 >      uint32_t value = 0;
 >      uint64_t debounce_value;
 > +    uint32_t reg_table_size;
 >
 >      idx = offset >> 2;
 >      if (idx >= GPIO_DEBOUNCE_TIME_1 && idx <=
GPIO_DEBOUNCE_TIME_3) {
 > @@ -559,6 +560,18 @@ static uint64_t aspeed_gpio_read(void
*opaque, hwaddr offset, uint32_t size)
 >          return debounce_value;
 >      }
 >
 > +    if (agc->reg_table == aspeed_3_3v_gpios) {
 > +        reg_table_size = GPIO_3_3V_REG_ARRAY_SIZE;
 > +    } else {
 > +        reg_table_size = GPIO_1_8V_REG_ARRAY_SIZE;
 > +    }

I think I'd prefer we add reg_table_size as a member of AspeedGPIOClass
and initialise it at the same time as we initialise reg_table. I feel
it would help maintain safety in the face of future changes (i.e. if
another reg table were introduced). With that approach the hunk above
can be dropped.

 > +
 > +    if (idx >= reg_table_size) {

This condition would then become:

```
if (idx >= agc->reg_table_size) {
```

Thoughts?


I agree with you, adding a new member is a more maintainable way, I'll 
send a v2 patch, thanks!


Zheyu





Re: [PATCH] configure: detect --cpu=mipsisa64r6

2024-06-19 Thread Philippe Mathieu-Daudé

On 19/6/24 13:46, Paolo Bonzini wrote:

Treat it as a MIPS64 machine.

Signed-off-by: Paolo Bonzini 
---
  configure | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Philippe Mathieu-Daudé 





Re: [PATCH v14 12/14] virtio-gpu: Handle resource blob commands

2024-06-19 Thread Dmitry Osipenko
16.06.2024 12:23, Akihiko Odaki пишет:
...
>>   #endif
>>   +#if VIRGL_VERSION_MAJOR >= 1
>> +typedef enum {
>> +    HOSTMEM_MR_MAPPED,
> 
> HOSTMEM_MR_MAPPED is no longer used.

Good catch

-- 
Best regards,
Dmitry




Re: [PATCH v14 12/14] virtio-gpu: Handle resource blob commands

2024-06-19 Thread Dmitry Osipenko
19.06.2024 18:27, Alex Bennée пишет:
> Dmitry Osipenko  writes:
> 
>> From: Antonio Caggiano 
>>
>> Support BLOB resources creation, mapping and unmapping by calling the
>> new stable virglrenderer 0.10 interface. Only enabled when available and
>> via the blob config. E.g. -device virtio-vga-gl,blob=true
>>
> 
>>  
>>  #if VIRGL_VERSION_MAJOR >= 1
>> +static void virgl_cmd_resource_create_blob(VirtIOGPU *g,
>> +   struct virtio_gpu_ctrl_command 
>> *cmd)
>> +{
>> +struct virgl_renderer_resource_create_blob_args virgl_args = { 0 };
>> +g_autofree struct virtio_gpu_virgl_resource *res;
> 
> Newer compilers rightly complain that g_free may be called on an
> uninitialised value (if we early return). Setting to NULL should be
> enough here.

Good catch! GCC 13 doesn't detect it

-- 
Best regards,
Dmitry




[PATCH v2 3/6] target/riscv: Add support for Control Transfer Records extension CSRs.

2024-06-19 Thread Rajnesh Kanwal
This commit adds support for [m|s|vs]ctrcontrol, sctrstatus and
sctrdepth CSRs handling.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.h |   5 ++
 target/riscv/cpu_cfg.h |   2 +
 target/riscv/csr.c | 128 +
 3 files changed, 135 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index a185e2d494..3d4d5172b8 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -263,6 +263,11 @@ struct CPUArchState {
 target_ulong mcause;
 target_ulong mtval;  /* since: priv-1.10.0 */
 
+uint64_t mctrctl;
+uint32_t sctrdepth;
+uint32_t sctrstatus;
+uint64_t vsctrctl;
+
 /* Machine and Supervisor interrupt priorities */
 uint8_t miprio[64];
 uint8_t siprio[64];
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index d9354dc80a..d329a65811 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -123,6 +123,8 @@ struct RISCVCPUConfig {
 bool ext_zvfhmin;
 bool ext_smaia;
 bool ext_ssaia;
+bool ext_smctr;
+bool ext_ssctr;
 bool ext_sscofpmf;
 bool ext_smepmp;
 bool rvv_ta_all_1s;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 2f92e4b717..0b5bf4d050 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -621,6 +621,48 @@ static RISCVException pointer_masking(CPURISCVState *env, 
int csrno)
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
+/*
+ * M-mode:
+ * Without ext_smctr raise illegal inst excep.
+ * Otherwise everything is accessible to m-mode.
+ *
+ * S-mode:
+ * Without ext_ssctr or mstateen.ctr raise illegal inst excep.
+ * Otherwise everything other than mctrctl is accessible.
+ *
+ * VS-mode:
+ * Without ext_ssctr or mstateen.ctr raise illegal inst excep.
+ * Without hstateen.ctr raise virtual illegal inst excep.
+ * Otherwise allow sctrctl (vsctrctl), sctrstatus, 0x200-0x2ff entry range.
+ * Always raise illegal instruction exception for sctrdepth.
+ */
+static RISCVException ctr_mmode(CPURISCVState *env, int csrno)
+{
+/* Check if smctr-ext is present */
+if (riscv_cpu_cfg(env)->ext_smctr) {
+return RISCV_EXCP_NONE;
+}
+
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+static RISCVException ctr_smode(CPURISCVState *env, int csrno)
+{
+const RISCVCPUConfig *cfg = riscv_cpu_cfg(env);
+
+if (!cfg->ext_smctr && !cfg->ext_ssctr) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_CTR);
+if (ret == RISCV_EXCP_NONE && csrno == CSR_SCTRDEPTH &&
+env->virt_enabled) {
+return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+}
+
+return ret;
+}
+
 static RISCVException aia_hmode(CPURISCVState *env, int csrno)
 {
 int ret;
@@ -3835,6 +3877,86 @@ static RISCVException write_satp(CPURISCVState *env, int 
csrno,
 return RISCV_EXCP_NONE;
 }
 
+static RISCVException rmw_sctrdepth(CPURISCVState *env, int csrno,
+target_ulong *ret_val,
+target_ulong new_val, target_ulong wr_mask)
+{
+uint64_t mask = wr_mask & SCTRDEPTH_MASK;
+
+if (ret_val) {
+*ret_val = env->sctrdepth;
+}
+
+env->sctrdepth = (env->sctrdepth & ~mask) | (new_val & mask);
+
+/* Correct depth. */
+if (wr_mask & SCTRDEPTH_MASK) {
+uint64_t depth = get_field(env->sctrdepth, SCTRDEPTH_MASK);
+
+if (depth > SCTRDEPTH_MAX) {
+depth = SCTRDEPTH_MAX;
+env->sctrdepth = set_field(env->sctrdepth, SCTRDEPTH_MASK, depth);
+}
+
+/* Update sctrstatus.WRPTR with a legal value */
+depth = 16 << depth;
+env->sctrstatus =
+env->sctrstatus & (~SCTRSTATUS_WRPTR_MASK | (depth - 1));
+}
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_sctrstatus(CPURISCVState *env, int csrno,
+ target_ulong *ret_val,
+ target_ulong new_val, target_ulong 
wr_mask)
+{
+uint32_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+uint32_t mask = wr_mask & SCTRSTATUS_MASK;
+
+if (ret_val) {
+*ret_val = env->sctrstatus;
+}
+
+env->sctrstatus = (env->sctrstatus & ~mask) | (new_val & mask);
+
+/* Update sctrstatus.WRPTR with a legal value */
+env->sctrstatus = env->sctrstatus & (~SCTRSTATUS_WRPTR_MASK | (depth - 1));
+
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException rmw_xctrctl(CPURISCVState *env, int csrno,
+target_ulong *ret_val,
+target_ulong new_val, target_ulong wr_mask)
+{
+uint64_t csr_mask, mask = wr_mask;
+uint64_t *ctl_ptr = >mctrctl;
+
+if (csrno == CSR_MCTRCTL) {
+csr_mask = MCTRCTL_MASK;
+} else if (csrno == CSR_SCTRCTL && !env->virt_enabled) {
+csr_mask = SCTRCTL_MASK;
+} else {
+/*
+ * This is for csrno == CSR_SCTRCTL and env->virt_enabled == true
+ * or csrno == 

[PATCH v2 5/6] target/riscv: Add CTR sctrclr instruction.

2024-06-19 Thread Rajnesh Kanwal
CTR extension adds a new instruction sctrclr to quickly
clear the recorded entries buffer.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.h|  1 +
 target/riscv/cpu_helper.c |  7 
 target/riscv/helper.h |  1 +
 target/riscv/insn32.decode|  1 +
 .../riscv/insn_trans/trans_privileged.c.inc   | 10 ++
 target/riscv/op_helper.c  | 33 +++
 6 files changed, 53 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index e32f5ab146..fdc18a782a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -572,6 +572,7 @@ void riscv_cpu_set_mode(CPURISCVState *env, target_ulong 
newpriv, bool virt_en);
 void riscv_ctr_freeze(CPURISCVState *env, uint64_t freeze_mask, bool virt);
 void riscv_ctr_add_entry(CPURISCVState *env, target_long src, target_long dst,
  uint64_t type, target_ulong prev_priv, bool 
prev_virt);
+void riscv_ctr_clear(CPURISCVState *env);
 
 void riscv_translate_init(void);
 G_NORETURN void riscv_raise_exception(CPURISCVState *env,
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 1537602e1b..d98628cfe3 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -702,6 +702,13 @@ void riscv_ctr_freeze(CPURISCVState *env, uint64_t 
freeze_mask, bool virt)
 }
 }
 
+void riscv_ctr_clear(CPURISCVState *env)
+{
+memset(env->ctr_src, 0x0, sizeof(env->ctr_src));
+memset(env->ctr_dst, 0x0, sizeof(env->ctr_dst));
+memset(env->ctr_data, 0x0, sizeof(env->ctr_data));
+}
+
 static uint64_t riscv_ctr_priv_to_mask(target_ulong priv, bool virt)
 {
 switch (priv) {
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index b8fb7c8734..a3b2d87527 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -131,6 +131,7 @@ DEF_HELPER_6(csrrw_i128, tl, env, int, tl, tl, tl, tl)
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_2(sret, tl, env, tl)
 DEF_HELPER_2(mret, tl, env, tl)
+DEF_HELPER_1(ctr_clear, void, env)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(wrs_nto, void, env)
 DEF_HELPER_1(tlb_flush, void, env)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 9cb1a1b4ec..d3d38c7c68 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -107,6 +107,7 @@
 # *** Privileged Instructions ***
 ecall    0 000 0 1110011
 ebreak  0001 0 000 0 1110011
+sctrclr 00010100 0 000 0 1110011
 uret00000010 0 000 0 1110011
 sret000100000010 0 000 0 1110011
 mret001100000010 0 000 0 1110011
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
b/target/riscv/insn_trans/trans_privileged.c.inc
index 339d659151..dd9da8651f 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -69,6 +69,16 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
 return true;
 }
 
+static bool trans_sctrclr(DisasContext *ctx, arg_sctrclr *a)
+{
+#ifndef CONFIG_USER_ONLY
+gen_helper_ctr_clear(tcg_env);
+return true;
+#else
+return false;
+#endif
+}
+
 static bool trans_uret(DisasContext *ctx, arg_uret *a)
 {
 return false;
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index 5a1e92c45e..15a770360e 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -475,6 +475,39 @@ void helper_ctr_branch(CPURISCVState *env, target_ulong 
src, target_ulong dest,
 }
 }
 
+void helper_ctr_clear(CPURISCVState *env)
+{
+if (!riscv_cpu_cfg(env)->ext_ssctr && !riscv_cpu_cfg(env)->ext_smctr) {
+riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+}
+
+/*
+ * It's safe to call smstateen_acc_ok() for umode access regardless of the
+ * state of bit 54 (CTR bit in case of m/hstateen) of sstateen. If the bit
+ * is zero, smstateen_acc_ok() will return the correct exception code and
+ * if it's one, smstateen_acc_ok() will return RISCV_EXCP_NONE. In that
+ * scenario the U-mode check below will handle that case.
+ */
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_CTR);
+if (ret != RISCV_EXCP_NONE) {
+riscv_raise_exception(env, ret, GETPC());
+}
+
+if (env->priv == PRV_U) {
+/*
+ * One corner case is when sctrclr is executed from VU-mode and
+ * mstateen.CTR = 0, in which case we are supposed to raise
+ * RISCV_EXCP_ILLEGAL_INST. This case is already handled in
+ * smstateen_acc_ok().
+ */
+uint32_t excep = env->virt_enabled ? RISCV_EXCP_VIRT_INSTRUCTION_FAULT 
:
+RISCV_EXCP_ILLEGAL_INST;
+riscv_raise_exception(env, excep, GETPC());
+}
+
+riscv_ctr_clear(env);
+}
+
 void helper_wfi(CPURISCVState *env)
 {
 CPUState *cs = env_cpu(env);
-- 
2.34.1




[PATCH v2 6/6] target/riscv: Add support to access ctrsource, ctrtarget, ctrdata regs.

2024-06-19 Thread Rajnesh Kanwal
CTR entries are accessed using ctrsource, ctrtarget and ctrdata
registers using smcsrind/sscsrind extension. This commits extends
the csrind extension to support CTR registers.

ctrsource is accessible through xireg CSR, ctrtarget is accessible
through xireg1 and ctrdata is accessible through xireg2 CSR.

CTR supports maximum depth of 256 entries which are accessed using
xiselect range 0x200 to 0x2ff.

This commits also adds properties to enable CTR extension. CTR can be
enabled using smctr=true and ssctr=true now.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.c |   4 +
 target/riscv/csr.c | 148 -
 target/riscv/tcg/tcg-cpu.c |   6 ++
 3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 30bdfc22ae..a77b1d5caf 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -193,6 +193,8 @@ const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12),
 ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12),
 ISA_EXT_DATA_ENTRY(svade, PRIV_VERSION_1_11_0, ext_svade),
+ISA_EXT_DATA_ENTRY(smctr, PRIV_VERSION_1_12_0, ext_smctr),
+ISA_EXT_DATA_ENTRY(ssctr, PRIV_VERSION_1_12_0, ext_ssctr),
 ISA_EXT_DATA_ENTRY(svadu, PRIV_VERSION_1_12_0, ext_svadu),
 ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval),
 ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot),
@@ -1473,6 +1475,8 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 MULTI_EXT_CFG_BOOL("sscsrind", ext_sscsrind, false),
 MULTI_EXT_CFG_BOOL("smcdeleg", ext_smcdeleg, false),
 MULTI_EXT_CFG_BOOL("ssccfg", ext_ssccfg, false),
+MULTI_EXT_CFG_BOOL("smctr", ext_smctr, false),
+MULTI_EXT_CFG_BOOL("ssctr", ext_ssctr, false),
 MULTI_EXT_CFG_BOOL("zifencei", ext_zifencei, true),
 MULTI_EXT_CFG_BOOL("zicsr", ext_zicsr, true),
 MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 0b5bf4d050..3ed9f95a4f 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -2278,6 +2278,13 @@ static bool xiselect_cd_range(target_ulong isel)
 return (ISELECT_CD_FIRST <= isel && isel <= ISELECT_CD_LAST);
 }
 
+static bool xiselect_ctr_range(int csrno, target_ulong isel)
+{
+/* MIREG-MIREG6 for the range 0x200-0x2ff are not used by CTR. */
+return CTR_ENTRIES_FIRST <= isel && isel <= CTR_ENTRIES_LAST &&
+   csrno < CSR_MIREG;
+}
+
 static int rmw_iprio(target_ulong xlen,
  target_ulong iselect, uint8_t *iprio,
  target_ulong *val, target_ulong new_val,
@@ -2323,6 +2330,124 @@ static int rmw_iprio(target_ulong xlen,
 return 0;
 }
 
+static int rmw_ctrsource(CPURISCVState *env, int isel, target_ulong *val,
+  target_ulong new_val, target_ulong wr_mask)
+{
+/*
+ * CTR arrays are treated as circular buffers and TOS always points to next
+ * empty slot, keeping TOS - 1 always pointing to latest entry. Given entry
+ * 0 is always the latest one, traversal is a bit different here. See the
+ * below example.
+ *
+ * Depth = 16.
+ *
+ * idx[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+ * TOS H
+ * entry   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8   7
+ */
+const uint64_t entry = isel - CTR_ENTRIES_FIRST;
+const uint64_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+uint64_t idx;
+
+/* Entry greater than depth-1 is read-only zero */
+if (entry >= depth) {
+if (val) {
+*val = 0;
+}
+return 0;
+}
+
+idx = get_field(env->sctrstatus, SCTRSTATUS_WRPTR_MASK);
+idx = (idx - entry - 1) & (depth - 1);
+
+if (val) {
+*val = env->ctr_src[idx];
+}
+
+env->ctr_src[idx] = (env->ctr_src[idx] & ~wr_mask) | (new_val & wr_mask);
+
+return 0;
+}
+
+static int rmw_ctrtarget(CPURISCVState *env, int isel, target_ulong *val,
+  target_ulong new_val, target_ulong wr_mask)
+{
+/*
+ * CTR arrays are treated as circular buffers and TOS always points to next
+ * empty slot, keeping TOS - 1 always pointing to latest entry. Given entry
+ * 0 is always the latest one, traversal is a bit different here. See the
+ * below example.
+ *
+ * Depth = 16.
+ *
+ * idx[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [A] [B] [C] [D] [E] [F]
+ * headH
+ * entry   6   5   4   3   2   1   0   F   E   D   C   B   A   9   8   7
+ */
+const uint64_t entry = isel - CTR_ENTRIES_FIRST;
+const uint64_t depth = 16 << get_field(env->sctrdepth, SCTRDEPTH_MASK);
+uint64_t idx;
+
+/* Entry greater than depth-1 is read-only zero */
+if (entry >= depth) {
+if (val) {
+*val = 0;
+}
+   

[PATCH v2 0/6] target/riscv: Add support for Control Transfer Records Ext.

2024-06-19 Thread Rajnesh Kanwal
This series enables Control Transfer Records extension support on riscv
platform. This extension is similar to Arch LBR in x86 and BRBE in ARM.
The Extension has been stable and the latest release can be found here [0]

CTR extension depends on couple of other extensions:

1. S[m|s]csrind : The indirect CSR extension [1] which defines additional
   ([M|S|VS]IREG2-[M|S|VS]IREG6) register to address size limitation of
   RISC-V CSR address space. CTR access ctrsource, ctrtartget and ctrdata
   CSRs using sscsrind extension.

2. Smstateen: The mstateen bit[54] controls the access to the CTR ext to
   S-mode.

3. Sscofpmf: Counter overflow and privilege mode filtering. [2]

The series is based on Smcdeleg/Ssccfg counter delegation extension [3]
patches. CTR itself doesn't depend on counter delegation support. This
rebase is basically to include the Smcsrind patches.

Due to the dependency of these extensions, the following extensions must be
enabled to use the control transfer records feature.

"smstateen=true,sscofpmf=true,smcsrind=true,sscsrind=true,smctr=true,ssctr=true"

Here is the link to a quick guide [5] to setup and run a basic perf demo on
Linux to use CTR Ext.

The Qemu patches can be found here:
https://github.com/rajnesh-kanwal/qemu/tree/ctr_upstream_v2

The opensbi patch can be found here:
https://github.com/rajnesh-kanwal/opensbi/tree/ctr_upstream_v2

The Linux kernel patches can be found here:
https://github.com/rajnesh-kanwal/linux/tree/ctr_upstream_v2

[0]: https://github.com/riscv/riscv-control-transfer-records/release
[1]: https://github.com/riscv/riscv-indirect-csr-access
[2]: https://github.com/riscvarchive/riscv-count-overflow/tree/main
[3]: https://github.com/riscv/riscv-smcdeleg-ssccfg
[4]: https://lore.kernel.org/all/20240217000134.3634191-1-ati...@rivosinc.com/
[5]: 
https://github.com/rajnesh-kanwal/linux/wiki/Running-CTR-basic-demo-on-QEMU-RISC%E2%80%90V-Virt-machine

Changelog:

v2: Lots of improvements based on Jason Chien's feedback including:
  - Added CTR recording for cm.jalt, cm.jt, cm.popret, cm.popretz.
  - Fixed and added more CTR extension enable checks.
  - Fixed CTR CSR predicate functions.
  - Fixed external trap xTE bit checks.
  - One fix in freeze function for VS-mode.
  - Lots of minor code improvements.
  - Added checks in sctrclr instruction helper.

v1:
  - https://github.com/rajnesh-kanwal/qemu/tree/ctr_upstream


Rajnesh Kanwal (6):
  target/riscv: Remove obsolete sfence.vm instruction
  target/riscv: Add Control Transfer Records CSR definitions.
  target/riscv: Add support for Control Transfer Records extension CSRs.
  target/riscv: Add support to record CTR entries.
  target/riscv: Add CTR sctrclr instruction.
  target/riscv: Add support to access ctrsource, ctrtarget, ctrdata
regs.

 target/riscv/cpu.c|   4 +
 target/riscv/cpu.h|  14 +
 target/riscv/cpu_bits.h   | 154 ++
 target/riscv/cpu_cfg.h|   2 +
 target/riscv/cpu_helper.c | 265 +
 target/riscv/csr.c| 276 +-
 target/riscv/helper.h |   9 +-
 target/riscv/insn32.decode|   2 +-
 .../riscv/insn_trans/trans_privileged.c.inc   |  21 +-
 target/riscv/insn_trans/trans_rvi.c.inc   |  31 ++
 target/riscv/insn_trans/trans_rvzce.c.inc |  20 ++
 target/riscv/op_helper.c  | 159 +-
 target/riscv/tcg/tcg-cpu.c|   6 +
 target/riscv/translate.c  |  10 +
 14 files changed, 960 insertions(+), 13 deletions(-)

-- 
2.34.1




[PATCH v2 4/6] target/riscv: Add support to record CTR entries.

2024-06-19 Thread Rajnesh Kanwal
This commit adds logic to records CTR entries of different types
and adds required hooks in TCG and interrupt/Exception logic to
record events.

This commit also adds support to invoke freeze CTR logic for breakpoint
exceptions and counter overflow interrupts.

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu.h|   8 +
 target/riscv/cpu_helper.c | 258 ++
 target/riscv/helper.h |   8 +-
 .../riscv/insn_trans/trans_privileged.c.inc   |   6 +-
 target/riscv/insn_trans/trans_rvi.c.inc   |  31 +++
 target/riscv/insn_trans/trans_rvzce.c.inc |  20 ++
 target/riscv/op_helper.c  | 126 -
 target/riscv/translate.c  |  10 +
 8 files changed, 461 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 3d4d5172b8..e32f5ab146 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -268,6 +268,10 @@ struct CPUArchState {
 uint32_t sctrstatus;
 uint64_t vsctrctl;
 
+uint64_t ctr_src[16 << SCTRDEPTH_MAX];
+uint64_t ctr_dst[16 << SCTRDEPTH_MAX];
+uint64_t ctr_data[16 << SCTRDEPTH_MAX];
+
 /* Machine and Supervisor interrupt priorities */
 uint8_t miprio[64];
 uint8_t siprio[64];
@@ -565,6 +569,10 @@ RISCVException smstateen_acc_ok(CPURISCVState *env, int 
index, uint64_t bit);
 #endif
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv, bool 
virt_en);
 
+void riscv_ctr_freeze(CPURISCVState *env, uint64_t freeze_mask, bool virt);
+void riscv_ctr_add_entry(CPURISCVState *env, target_long src, target_long dst,
+ uint64_t type, target_ulong prev_priv, bool 
prev_virt);
+
 void riscv_translate_init(void);
 G_NORETURN void riscv_raise_exception(CPURISCVState *env,
   uint32_t exception, uintptr_t pc);
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index a441a03ef4..1537602e1b 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -691,6 +691,246 @@ void riscv_cpu_set_aia_ireg_rmw_fn(CPURISCVState *env, 
uint32_t priv,
 }
 }
 
+void riscv_ctr_freeze(CPURISCVState *env, uint64_t freeze_mask, bool virt)
+{
+uint64_t ctl = virt ? env->mctrctl : env->vsctrctl;
+
+assert((freeze_mask & (~(MCTRCTL_BPFRZ | MCTRCTL_LCOFIFRZ))) == 0);
+
+if (ctl & freeze_mask) {
+env->sctrstatus |= SCTRSTATUS_FROZEN;
+}
+}
+
+static uint64_t riscv_ctr_priv_to_mask(target_ulong priv, bool virt)
+{
+switch (priv) {
+case PRV_M:
+return MCTRCTL_M_ENABLE;
+case PRV_S:
+if (virt) {
+return VSCTRCTL_VS_ENABLE;
+}
+return MCTRCTL_S_ENABLE;
+case PRV_U:
+if (virt) {
+return VSCTRCTL_VU_ENABLE;
+}
+return MCTRCTL_U_ENABLE;
+}
+
+g_assert_not_reached();
+}
+
+static uint64_t riscv_ctr_get_control(CPURISCVState *env, target_long priv,
+  bool virt)
+{
+switch (priv) {
+case PRV_M:
+return env->mctrctl;
+case PRV_S:
+case PRV_U:
+if (virt) {
+return env->vsctrctl;
+}
+return env->mctrctl;
+}
+
+g_assert_not_reached();
+}
+
+/*
+ * This function assumes that src privilege and target privilege are not same
+ * and src privilege is less than target privilege. This includes the virtual
+ * state as well.
+ */
+static bool riscv_ctr_check_xte(CPURISCVState *env, target_long src_prv,
+bool src_virt)
+{
+target_long tgt_prv = env->priv;
+bool res = true;
+
+/*
+ * VS and U mode are same in terms of xTE bits required to record an
+ * external trap. See 6.1.2. External Traps, table 8 External Trap Enable
+ * Requirements. This changes VS to U to simplify the logic a bit.
+ */
+if (src_virt && src_prv == PRV_S) {
+src_prv = PRV_U;
+} else if (env->virt_enabled && tgt_prv == PRV_S) {
+tgt_prv = PRV_U;
+}
+
+/* VU mode is an outlier here. */
+if (src_virt && src_prv == PRV_U) {
+res &= !!(env->vsctrctl & VSCTRCTL_VSTE);
+}
+
+switch (src_prv) {
+case PRV_U:
+if (tgt_prv == PRV_U) {
+break;
+}
+res &= !!(env->mctrctl & SCTRCTL_STE);
+/* fall-through */
+case PRV_S:
+if (tgt_prv == PRV_S) {
+break;
+}
+res &= !!(env->mctrctl & MCTRCTL_MTE);
+/* fall-through */
+case PRV_M:
+break;
+}
+
+return res;
+}
+
+/*
+ * Special cases for traps and trap returns:
+ *
+ * 1- Traps, and trap returns, between enabled modes are recorded as normal.
+ * 2- Traps from an inhibited mode to an enabled mode, and trap returns from an
+ * enabled mode back to an inhibited mode, are partially recorded.  In such
+ * cases, the PC from the inhibited mode (source PC for traps, and target PC
+ * for trap returns) is 0.
+ *
+ * 3- 

[PATCH v2 2/6] target/riscv: Add Control Transfer Records CSR definitions.

2024-06-19 Thread Rajnesh Kanwal
The Control Transfer Records (CTR) extension provides a method to
record a limited branch history in register-accessible internal chip
storage.

This extension is similar to Arch LBR in x86 and BRBE in ARM.
The Extension has been stable and the latest release can be found here
https://github.com/riscv/riscv-control-transfer-records/release

Signed-off-by: Rajnesh Kanwal 
---
 target/riscv/cpu_bits.h | 154 
 1 file changed, 154 insertions(+)

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 86e15381c8..71ddccaf1a 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -242,6 +242,17 @@
 #define CSR_SIEH0x114
 #define CSR_SIPH0x154
 
+/* Machine-Level Control transfer records CSRs */
+#define CSR_MCTRCTL 0x34e
+
+/* Supervisor-Level Control transfer records CSRs */
+#define CSR_SCTRCTL 0x14e
+#define CSR_SCTRSTATUS  0x14f
+#define CSR_SCTRDEPTH   0x15f
+
+/* VS-Level Control transfer records CSRs */
+#define CSR_VSCTRCTL0x24e
+
 /* Hpervisor CSRs */
 #define CSR_HSTATUS 0x600
 #define CSR_HEDELEG 0x602
@@ -339,6 +350,7 @@
 #define SMSTATEEN0_CS   (1ULL << 0)
 #define SMSTATEEN0_FCSR (1ULL << 1)
 #define SMSTATEEN0_JVT  (1ULL << 2)
+#define SMSTATEEN0_CTR  (1ULL << 54)
 #define SMSTATEEN0_HSCONTXT (1ULL << 57)
 #define SMSTATEEN0_IMSIC(1ULL << 58)
 #define SMSTATEEN0_AIA  (1ULL << 59)
@@ -854,6 +866,148 @@ typedef enum RISCVException {
 #define UMTE_U_PM_INSN  U_PM_INSN
 #define UMTE_MASK (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | UMTE_U_PM_INSN)
 
+/* mctrctl CSR bits. */
+#define MCTRCTL_U_ENABLEBIT(0)
+#define MCTRCTL_S_ENABLEBIT(1)
+#define MCTRCTL_M_ENABLEBIT(2)
+#define MCTRCTL_RASEMU  BIT(7)
+#define MCTRCTL_STE BIT(8)
+#define MCTRCTL_MTE BIT(9)
+#define MCTRCTL_BPFRZ   BIT(11)
+#define MCTRCTL_LCOFIFRZBIT(12)
+#define MCTRCTL_EXCINH  BIT(33)
+#define MCTRCTL_INTRINH BIT(34)
+#define MCTRCTL_TRETINH BIT(35)
+#define MCTRCTL_NTBREN  BIT(36)
+#define MCTRCTL_TKBRINH BIT(37)
+#define MCTRCTL_INDCALL_INH BIT(40)
+#define MCTRCTL_DIRCALL_INH BIT(41)
+#define MCTRCTL_INDJUMP_INH BIT(42)
+#define MCTRCTL_DIRJUMP_INH BIT(43)
+#define MCTRCTL_CORSWAP_INH BIT(44)
+#define MCTRCTL_RET_INH BIT(45)
+#define MCTRCTL_INDOJUMP_INHBIT(46)
+#define MCTRCTL_DIROJUMP_INHBIT(47)
+
+#define MCTRCTL_INH_START   32U
+
+#define MCTRCTL_MASK (MCTRCTL_M_ENABLE | MCTRCTL_S_ENABLE |   \
+  MCTRCTL_U_ENABLE | MCTRCTL_RASEMU | \
+  MCTRCTL_MTE | MCTRCTL_STE | \
+  MCTRCTL_BPFRZ | MCTRCTL_LCOFIFRZ |  \
+  MCTRCTL_EXCINH | MCTRCTL_INTRINH |  \
+  MCTRCTL_TRETINH | MCTRCTL_NTBREN |  \
+  MCTRCTL_TKBRINH | MCTRCTL_INDCALL_INH | \
+  MCTRCTL_DIRCALL_INH | MCTRCTL_INDJUMP_INH | \
+  MCTRCTL_DIRJUMP_INH | MCTRCTL_CORSWAP_INH | \
+  MCTRCTL_RET_INH | MCTRCTL_INDOJUMP_INH |\
+  MCTRCTL_DIROJUMP_INH)
+
+/* sctrctl CSR bits. */
+#define SCTRCTL_U_ENABLE  MCTRCTL_U_ENABLE
+#define SCTRCTL_S_ENABLE  MCTRCTL_S_ENABLE
+#define SCTRCTL_RASEMUMCTRCTL_RASEMU
+#define SCTRCTL_STE   MCTRCTL_STE
+#define SCTRCTL_BPFRZ MCTRCTL_BPFRZ
+#define SCTRCTL_LCOFIFRZ  MCTRCTL_LCOFIFRZ
+#define SCTRCTL_EXCINHMCTRCTL_EXCINH
+#define SCTRCTL_INTRINH   MCTRCTL_INTRINH
+#define SCTRCTL_TRETINH   MCTRCTL_TRETINH
+#define SCTRCTL_NTBRENMCTRCTL_NTBREN
+#define SCTRCTL_TKBRINH   MCTRCTL_TKBRINH
+#define SCTRCTL_INDCALL_INH   MCTRCTL_INDCALL_INH
+#define SCTRCTL_DIRCALL_INH   MCTRCTL_DIRCALL_INH
+#define SCTRCTL_INDJUMP_INH   MCTRCTL_INDJUMP_INH
+#define SCTRCTL_DIRJUMP_INH   MCTRCTL_DIRJUMP_INH
+#define SCTRCTL_CORSWAP_INH   MCTRCTL_CORSWAP_INH
+#define SCTRCTL_RET_INH   MCTRCTL_RET_INH
+#define SCTRCTL_INDOJUMP_INH  MCTRCTL_INDOJUMP_INH
+#define SCTRCTL_DIROJUMP_INH  MCTRCTL_DIROJUMP_INH
+
+#define SCTRCTL_MASK (SCTRCTL_S_ENABLE | SCTRCTL_U_ENABLE |   \
+  SCTRCTL_RASEMU | SCTRCTL_STE |  \
+  SCTRCTL_BPFRZ | SCTRCTL_LCOFIFRZ |  \
+  SCTRCTL_EXCINH | SCTRCTL_INTRINH |  \
+  SCTRCTL_TRETINH | SCTRCTL_NTBREN |  \
+  SCTRCTL_TKBRINH | SCTRCTL_INDCALL_INH | \
+  SCTRCTL_DIRCALL_INH | SCTRCTL_INDJUMP_INH | \
+  SCTRCTL_DIRJUMP_INH | SCTRCTL_CORSWAP_INH | \
+  SCTRCTL_RET_INH | SCTRCTL_INDOJUMP_INH |\
+  SCTRCTL_DIROJUMP_INH)
+
+/* 

[PATCH v2 1/6] target/riscv: Remove obsolete sfence.vm instruction

2024-06-19 Thread Rajnesh Kanwal
Signed-off-by: Rajnesh Kanwal 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn32.decode | 1 -
 target/riscv/insn_trans/trans_privileged.c.inc | 5 -
 2 files changed, 6 deletions(-)

diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f22df04cfd..9cb1a1b4ec 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -112,7 +112,6 @@ sret000100000010 0 000 0 1110011
 mret001100000010 0 000 0 1110011
 wfi 000100000101 0 000 0 1110011
 sfence_vma  0001001. . 000 0 1110011 @sfence_vma
-sfence_vm   000100000100 . 000 0 1110011 @sfence_vm
 
 # *** RV32I Base Instruction Set ***
 lui     . 0110111 @u
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
b/target/riscv/insn_trans/trans_privileged.c.inc
index bc5263a4e0..4eccdddeaa 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -127,8 +127,3 @@ static bool trans_sfence_vma(DisasContext *ctx, 
arg_sfence_vma *a)
 #endif
 return false;
 }
-
-static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a)
-{
-return false;
-}
-- 
2.34.1




Re: [PATCH v14 12/14] virtio-gpu: Handle resource blob commands

2024-06-19 Thread Alex Bennée
Dmitry Osipenko  writes:

> From: Antonio Caggiano 
>
> Support BLOB resources creation, mapping and unmapping by calling the
> new stable virglrenderer 0.10 interface. Only enabled when available and
> via the blob config. E.g. -device virtio-vga-gl,blob=true
>

>  
>  #if VIRGL_VERSION_MAJOR >= 1
> +static void virgl_cmd_resource_create_blob(VirtIOGPU *g,
> +   struct virtio_gpu_ctrl_command 
> *cmd)
> +{
> +struct virgl_renderer_resource_create_blob_args virgl_args = { 0 };
> +g_autofree struct virtio_gpu_virgl_resource *res;

Newer compilers rightly complain that g_free may be called on an
uninitialised value (if we early return). Setting to NULL should be
enough here.



-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



Re: [PATCH 9/9] contrib/plugins: add ips plugin example for cost modeling

2024-06-19 Thread Pierrick Bouvier

On 6/19/24 02:49, Alex Bennée wrote:

Pierrick Bouvier  writes:


On 6/18/24 02:53, Alex Bennée wrote:

Pierrick Bouvier  writes:


On 6/17/24 13:56, Dr. David Alan Gilbert wrote:

* Pierrick Bouvier (pierrick.bouv...@linaro.org) wrote:

On 6/14/24 15:00, Dr. David Alan Gilbert wrote:

* Pierrick Bouvier (pierrick.bouv...@linaro.org) wrote:

Hi Dave,

On 6/12/24 14:02, Dr. David Alan Gilbert wrote:

* Alex Bennée (alex.ben...@linaro.org) wrote:

From: Pierrick Bouvier 

This plugin uses the new time control interface to make decisions
about the state of time during the emulation. The algorithm is
currently very simple. The user specifies an ips rate which applies
per core. If the core runs ahead of its allocated execution time the
plugin sleeps for a bit to let real time catch up. Either way time is
updated for the emulation as a function of total executed instructions
with some adjustments for cores that idle.


A few random thoughts:
   a) Are there any definitions of what a plugin that controls time
  should do with a live migration?


It's not something that was considered as part of this work.


That's OK, the only thing is we need to stop anyone from hitting problems
when they don't realise it's not been addressed.
One way might be to add a migration blocker; see include/migration/blocker.h
then you might print something like 'Migration not available due to plugin '



So basically, we could make a call to migrate_add_blocker(), when someone
request time_control through plugin API?

IMHO, it's something that should be part of plugin API (if any plugin calls
qemu_plugin_request_time_control()), instead of the plugin code itself. This
way, any plugin getting time control automatically blocks any potential
migration.

Note my question asked for a 'any definitions of what a plugin ..' -
so
you could define it that way, another one is to think that in the future
you may allow it and the plugin somehow interacts with migration not to
change time at certain migration phases.



I would be in favor to forbid usage for now in this context. I'm not
sure why people would play with migration and plugins generally at
this time (there might be experiments or use cases I'm not aware of),
so a simple barrier preventing that seems ok.

This plugin is part of an experiment where we implement a qemu feature
(icount=auto in this case) by using plugins. If it turns into a
successful usage and this plugin becomes popular, we can always lift
the limitation later.

@Alex, would you like to add this now (icount=auto is still not
removed from qemu), or wait for integration, and add this as another
patch?

I think we follow the deprecation process so once integrated we post
a
deprecation notice in:
https://qemu.readthedocs.io/en/master/about/deprecated.html
and then remove it after a couple of releases.



Sorry, I was not clear. I meant do we add a blocker in case someone
tries to migrate a vm while this plugin is used?


Oh yes - I can add that in the core plugin code.



Thanks!


Re: [PATCH] hw/core: Rename CpuTopology to CPUTopology

2024-06-19 Thread Thomas Huth

On 19/06/2024 16.49, Zhao Liu wrote:

Hi maintainers,

Per my communication with Markus, it seems this renaming matches the
"local consistency" principle in (include/hw/boards.h). :-)

So do you think this change is acceptable?


I don't care too much, both ways of naming look acceptable to me...
... but in case somebody else wants to merge this, FWIW:

s390x parts
Acked-by: Thomas Huth 



On Mon, May 27, 2024 at 09:18:37PM +0800, Zhao Liu wrote:

Date: Mon, 27 May 2024 21:18:37 +0800
From: Zhao Liu 
Subject: [PATCH] hw/core: Rename CpuTopology to CPUTopology
X-Mailer: git-send-email 2.34.1

Use CPUTopology to honor the generic style of CPU capitalization
abbreviations.

Signed-off-by: Zhao Liu 
---
  * Split from the previous SMP cache RFC:

https://lore.kernel.org/qemu-devel/20240220092504.726064-2-zhao1@linux.intel.com/
---
  hw/s390x/cpu-topology.c |  6 +++---
  include/hw/boards.h |  8 
  include/hw/s390x/cpu-topology.h |  6 +++---
  tests/unit/test-smp-parse.c | 14 +++---
  4 files changed, 17 insertions(+), 17 deletions(-)








[PATCH] hw/intc/s390_flic: Fix interrupt controller migration on s390x with TCG

2024-06-19 Thread Thomas Huth
Migration of a s390x guest with TCG was long known to be very unstable,
so the tests in tests/qtest/migration-test.c are disabled if running
with TCG instead of KVM.

Nicholas Piggin did a great analysis of the problem:

"The flic pending state is not migrated, so if the machine is migrated
 while an interrupt is pending, it can be lost. This shows up in
 qtest migration test, an extint is pending (due to console writes?)
 and the CPU waits via s390_cpu_set_psw and expects the interrupt to
 wake it. However when the flic pending state is lost, s390_cpu_has_int
 returns false, so s390_cpu_exec_interrupt falls through to halting
 again."

Thus let's finally migrate the pending state, and to be on the safe
side, also the other state variables of the QEMUS390FLICState structure.

Signed-off-by: Thomas Huth 
---
 Once this has been merged, we can enable the migration-test again
 with Nicholas' patch here:
 https://lore.kernel.org/qemu-devel/20240525131241.378473-3-npig...@gmail.com/

 include/hw/s390x/s390_flic.h |  1 +
 hw/intc/s390_flic.c  | 75 ++--
 hw/s390x/s390-virtio-ccw.c   |  5 +++
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
index 382d9833f1..4d66c5e42e 100644
--- a/include/hw/s390x/s390_flic.h
+++ b/include/hw/s390x/s390_flic.h
@@ -116,6 +116,7 @@ struct QEMUS390FLICState {
 uint8_t simm;
 uint8_t nimm;
 QLIST_HEAD(, QEMUS390FlicIO) io[8];
+bool migrate_all_state;
 };
 
 uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic);
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index 6771645699..a91a4a47e8 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -361,15 +361,77 @@ bool ais_needed(void *opaque)
 return s->ais_supported;
 }
 
+static bool ais_needed_v(void *opaque, int version_id)
+{
+return ais_needed(opaque);
+}
+
+static bool qemu_s390_flic_full_state_needed(void *opaque)
+{
+QEMUS390FLICState *s = opaque;
+
+return s->migrate_all_state;
+}
+
+static bool qemu_s390_flic_state_needed(void *opaque)
+{
+return ais_needed(opaque) || qemu_s390_flic_full_state_needed(opaque);
+}
+
+static const VMStateDescription vmstate_qemu_s390_flic_io = {
+ .name = "qemu-s390-flic-io",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT16(id, QEMUS390FlicIO),
+ VMSTATE_UINT16(nr, QEMUS390FlicIO),
+ VMSTATE_UINT32(parm, QEMUS390FlicIO),
+ VMSTATE_UINT32(word, QEMUS390FlicIO),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_qemu_s390_flic_full = {
+.name = "qemu-s390-flic-full",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = qemu_s390_flic_full_state_needed,
+.fields = (const VMStateField[]) {
+VMSTATE_UINT32(pending, QEMUS390FLICState),
+VMSTATE_UINT32(service_param, QEMUS390FLICState),
+VMSTATE_QLIST_V(io[0], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[1], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[2], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[3], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[4], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[5], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[6], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_QLIST_V(io[7], QEMUS390FLICState, 1,
+vmstate_qemu_s390_flic_io, QEMUS390FlicIO, next),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription qemu_s390_flic_vmstate = {
 .name = "qemu-s390-flic",
 .version_id = 1,
 .minimum_version_id = 1,
-.needed = ais_needed,
+.needed = qemu_s390_flic_state_needed,
 .fields = (const VMStateField[]) {
-VMSTATE_UINT8(simm, QEMUS390FLICState),
-VMSTATE_UINT8(nimm, QEMUS390FLICState),
+VMSTATE_UINT8_TEST(simm, QEMUS390FLICState, ais_needed_v),
+VMSTATE_UINT8_TEST(nimm, QEMUS390FLICState, ais_needed_v),
 VMSTATE_END_OF_LIST()
+},
+.subsections = (const VMStateDescription * const []) {
+_qemu_s390_flic_full,
+NULL
 }
 };
 
@@ -383,11 +445,18 @@ static void qemu_s390_flic_instance_init(Object *obj)
 }
 }
 
+static Property qemu_s390_flic_properties[] = {
+DEFINE_PROP_BOOL("migrate-all-state", QEMUS390FLICState,
+ migrate_all_state, true),
+DEFINE_PROP_END_OF_LIST(),
+};
+
 

Re: [PATCH] hw/core: Rename CpuTopology to CPUTopology

2024-06-19 Thread Zhao Liu
Hi maintainers,

Per my communication with Markus, it seems this renaming matches the
"local consistency" principle in (include/hw/boards.h). :-)

So do you think this change is acceptable?

Thanks,
Zhao

On Mon, May 27, 2024 at 09:18:37PM +0800, Zhao Liu wrote:
> Date: Mon, 27 May 2024 21:18:37 +0800
> From: Zhao Liu 
> Subject: [PATCH] hw/core: Rename CpuTopology to CPUTopology
> X-Mailer: git-send-email 2.34.1
> 
> Use CPUTopology to honor the generic style of CPU capitalization
> abbreviations.
> 
> Signed-off-by: Zhao Liu 
> ---
>  * Split from the previous SMP cache RFC:
>
> https://lore.kernel.org/qemu-devel/20240220092504.726064-2-zhao1@linux.intel.com/
> ---
>  hw/s390x/cpu-topology.c |  6 +++---
>  include/hw/boards.h |  8 
>  include/hw/s390x/cpu-topology.h |  6 +++---
>  tests/unit/test-smp-parse.c | 14 +++---
>  4 files changed, 17 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> index f16bdf65faa0..016f6c1c15ac 100644
> --- a/hw/s390x/cpu-topology.c
> +++ b/hw/s390x/cpu-topology.c
> @@ -86,7 +86,7 @@ bool s390_has_topology(void)
>   */
>  static void s390_topology_init(MachineState *ms)
>  {
> -CpuTopology *smp = >smp;
> +CPUTopology *smp = >smp;
>  
>  s390_topology.cores_per_socket = g_new0(uint8_t, smp->sockets *
>  smp->books * smp->drawers);
> @@ -181,7 +181,7 @@ void s390_topology_reset(void)
>   */
>  static bool s390_topology_cpu_default(S390CPU *cpu, Error **errp)
>  {
> -CpuTopology *smp = _machine->smp;
> +CPUTopology *smp = _machine->smp;
>  CPUS390XState *env = >env;
>  
>  /* All geometry topology attributes must be set or all unset */
> @@ -234,7 +234,7 @@ static bool s390_topology_check(uint16_t socket_id, 
> uint16_t book_id,
>  uint16_t drawer_id, uint16_t entitlement,
>  bool dedicated, Error **errp)
>  {
> -CpuTopology *smp = _machine->smp;
> +CPUTopology *smp = _machine->smp;
>  
>  if (socket_id >= smp->sockets) {
>  error_setg(errp, "Unavailable socket: %d", socket_id);
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 2fa800f11ae4..c1737f2a5736 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -334,7 +334,7 @@ typedef struct DeviceMemoryState {
>  } DeviceMemoryState;
>  
>  /**
> - * CpuTopology:
> + * CPUTopology:
>   * @cpus: the number of present logical processors on the machine
>   * @drawers: the number of drawers on the machine
>   * @books: the number of books in one drawer
> @@ -346,7 +346,7 @@ typedef struct DeviceMemoryState {
>   * @threads: the number of threads in one core
>   * @max_cpus: the maximum number of logical processors on the machine
>   */
> -typedef struct CpuTopology {
> +typedef struct CPUTopology {
>  unsigned int cpus;
>  unsigned int drawers;
>  unsigned int books;
> @@ -357,7 +357,7 @@ typedef struct CpuTopology {
>  unsigned int cores;
>  unsigned int threads;
>  unsigned int max_cpus;
> -} CpuTopology;
> +} CPUTopology;
>  
>  /**
>   * MachineState:
> @@ -409,7 +409,7 @@ struct MachineState {
>  const char *cpu_type;
>  AccelState *accelerator;
>  CPUArchIdList *possible_cpus;
> -CpuTopology smp;
> +CPUTopology smp;
>  struct NVDIMMState *nvdimms_state;
>  struct NumaState *numa_state;
>  };
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> index c064f427e948..ff09c57a4428 100644
> --- a/include/hw/s390x/cpu-topology.h
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -63,17 +63,17 @@ static inline void s390_topology_reset(void)
>  
>  extern S390Topology s390_topology;
>  
> -static inline int s390_std_socket(int n, CpuTopology *smp)
> +static inline int s390_std_socket(int n, CPUTopology *smp)
>  {
>  return (n / smp->cores) % smp->sockets;
>  }
>  
> -static inline int s390_std_book(int n, CpuTopology *smp)
> +static inline int s390_std_book(int n, CPUTopology *smp)
>  {
>  return (n / (smp->cores * smp->sockets)) % smp->books;
>  }
>  
> -static inline int s390_std_drawer(int n, CpuTopology *smp)
> +static inline int s390_std_drawer(int n, CPUTopology *smp)
>  {
>  return (n / (smp->cores * smp->sockets * smp->books)) % smp->drawers;
>  }
> diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
> index 9fdba24fce56..f51138794ca1 100644
> --- a/tests/unit/test-smp-parse.c
> +++ b/tests/unit/test-smp-parse.c
> @@ -120,8 +120,8 @@
>   */
>  typedef struct SMPTestData {
>  SMPConfiguration config;
> -CpuTopology expect_prefer_sockets;
> -CpuTopology expect_prefer_cores;
> +CPUTopology expect_prefer_sockets;
> +CPUTopology expect_prefer_cores;
>  const char *expect_error;
>  } SMPTestData;
>  
> @@ -643,7 +643,7 @@ static char *smp_config_to_string(const SMPConfiguration 
> *config)
>  }
>  
>  /* Use the different 

[PATCH 1/3] target/i386/cpu: Use hex mask to check for valid cache CPUID leaf

2024-06-19 Thread Zhao Liu
Hexadecimal mask is more intuitive comparing to decimal.

Therefore convert the mask of bits 00-04 to hexadecimal value.

Signed-off-by: Zhao Liu 
---
 target/i386/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 365852cb99e1..c4d4048ec32a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6452,7 +6452,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  * QEMU has its own number of cores/logical cpus,
  * set 24..14, 31..26 bit to configured values
  */
-if (*eax & 31) {
+if (*eax & 0x1f) {
 int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
 
 *eax &= ~0xFC00;
-- 
2.34.1




[PATCH 2/3] target/i386/cpu: Check guest_thread_ids_per_pkg for host-cache-info case

2024-06-19 Thread Zhao Liu
The CPUID[4].EAX[bits 25:14] encodes the "maximum number of addressable
IDs for logical processors", which value may be different with the
actual number of threads.

For example, there's a Guest with the topology like: 3 threads per core
and 3 cores per package. Its maximum ids for package level is 15 (0xf),
but it has 9 threads per package.

Therefore, using "threads_per_pkg" to check sharing threads overflow (out
of package scope) is not sufficient.

Use Guest's maximum ids for package level information to compare with
Host's.

Note the original check is stricter, but it can be mathematically proven
that the original check does not contain redundant case (e.g.
guest_thread_ids_per_pkg >= host_thread_ids_per_cache > threads_per_pkg,
which is impossible for the current QEMU APIC ID encoding rule).

Therefore, the behavior of this feature is consistent before and after
the change.

Signed-off-by: Zhao Liu 
---
 target/i386/cpu.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c4d4048ec32a..c20ff69b7b65 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6453,16 +6453,22 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  * set 24..14, 31..26 bit to configured values
  */
 if (*eax & 0x1f) {
-int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
+int host_thread_ids_per_cache;
+int guest_thread_ids_per_pkg;
 
 *eax &= ~0xFC00;
 *eax |= max_core_ids_in_package(_info) << 26;
-if (host_vcpus_per_cache > threads_per_pkg) {
+
+host_thread_ids_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
+guest_thread_ids_per_pkg =
+max_thread_ids_for_cache(_info,
+ CPU_TOPO_LEVEL_PACKAGE);
+
+if (host_thread_ids_per_cache > guest_thread_ids_per_pkg) {
 *eax &= ~0x3FFC000;
 
 /* Share the cache at package level. */
-*eax |= max_thread_ids_for_cache(_info,
-CPU_TOPO_LEVEL_PACKAGE) << 14;
+*eax |= guest_thread_ids_per_pkg << 14;
 }
 }
 } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
-- 
2.34.1




[PATCH 3/3] target/i386/cpu: Add comment about adjusting the Guest cache topo for host-cache-info

2024-06-19 Thread Zhao Liu
The host-cache-info needs the check to ensure the valid maximum
addressable thread IDs.

We don't need to adjust the information in this one field for all cache
topology cases by default, even though Host's cache topology may not
correspond to Guest's CPU topology level.

For example, when a Geust (3 threads per core) runs on a Host with 1
threads per core, the L2 cache topo (L2 per core on Host) obtained by
Guest does not correspond to the Guest's core level. So for the case
where the topology of Guest and Host are very inconsistent, it is not
possible to do a perfect job, so we try to let the Guest have similar
cache topo info as Host, at least in the case of an even distribution
of vCPUs, which can benefit the Guest internal scheduling.

To this end, add a comment to explain why we need to care for this check
and why we don't need to adjust the topology for all cache cases.

Signed-off-by: Zhao Liu 
---
 target/i386/cpu.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c20ff69b7b65..71300ac6d197 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6463,7 +6463,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 guest_thread_ids_per_pkg =
 max_thread_ids_for_cache(_info,
  CPU_TOPO_LEVEL_PACKAGE);
-
+/*
+ * We handle this case because it causes sharing threads to
+ * overflow out of the package scope. In other cases, there
+ * is no need to adjust the cache topology info for the Guest,
+ * as the Host's maximum addressable thread IDs are not out of
+ * bounds in the Guest's APIC ID scope, and are always valid,
+ * even though Host's cache topology may not correspond to
+ * Guest's CPU topology level.
+ */
 if (host_thread_ids_per_cache > guest_thread_ids_per_pkg) {
 *eax &= ~0x3FFC000;
 
-- 
2.34.1




[PATCH 0/3] target/i386/cpu: Misc Cleanup on host-cache-info

2024-06-19 Thread Zhao Liu
Hi,

This series is mainly to addresss Igor's comment about if one check in
host-cache-info could be removed [1], i.e., whether Guest's cache
topology should be self-consistent (able to correspond to Guest's CPU
topology level, as we currently do with the Guest cache topo).

I originally thought (in the mail thread with Igor) that host-cache-info
should allow Guest and Host to have the same topology level information,
e.g. if Host shares cache on core level, then via host-cache-info, Guest
should also share on core level.

But in practice, I gave up on this idea, because in the cache info
passthrough case, it should be possible for Guest to get the original
Host cache info (including the original threads sharing cache) without
further modifying the info to Guest.

Therefore, I simply added the comment in PATCH 3 to hopefully illustrate
the need for such a check.

Hope my explanation is clear enough so that my poor English doesn't
bother you!

[1]: 
https://lore.kernel.org/qemu-devel/20240527170317.14520...@imammedo.users.ipa.redhat.com/

Thanks and Best Regards,
Zhao
---
Zhao Liu (3):
  target/i386/cpu: Use hex mask to check for valid cache CPUID leaf
  target/i386/cpu: Check guest_thread_ids_per_pkg for host-cache-info
case
  target/i386/cpu: Add comment about adjusting the Guest cache topo for
host-cache-info

 target/i386/cpu.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

-- 
2.34.1




Re: [PATCH v3] hw/arm/virt-acpi-build: Fix id_count in build_iort_id_mapping

2024-06-19 Thread Eric Auger
Hi Nicolin,

On 6/18/24 23:11, Nicolin Chen wrote:
> It's observed that Linux kernel booting with the VM reports a "conflicting
> mapping for input ID" FW_BUG.
> 
> The IORT doc defines "Number of IDs" to be "the number of IDs in the range
> minus one", while virt-acpi-build.c simply stores the number of IDs in the
> id_count without the "minus one". Meanwhile, some of the callers pass in a
> 0x following the spec. So, this is a mismatch between the function and
> its callers.
> 
> Fix build_iort_id_mapping() by internally subtracting one from the pass-in
> @id_count. Accordingly make sure that all existing callers pass in a value
> without the "minus one", i.e. change all 0xs to 0x1s.
> 
> Also, add a few lines of comments to highlight this change along with the
> referencing document for this build_iort_id_mapping().
> 
> Fixes: 42e0f050e3a5 ("hw/arm/virt-acpi-build: Add IORT support to bypass 
> SMMUv3")
> Suggested-by: Michael S. Tsirkin 
> Signed-off-by: Nicolin Chen 
> ---
> Changelog
> v3:
>  * Added "-1" internally in build_iort_id_mapping() instead
>  * Added comments to highlight this and referencing doc
> v2:
>  https://lore.kernel.org/all/20240617223945.906996-1-nicol...@nvidia.com/
>  * Moved "-1" to the same line of id_count calculation
>  * Added "+1" to the next_range.input_base calculation
> v1:
>  https://lore.kernel.org/all/20240613234802.828265-1-nicol...@nvidia.com/
> 
>  hw/arm/virt-acpi-build.c | 20 ++--
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index c3ccfef026..ee6f56b410 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -209,12 +209,20 @@ static void acpi_dsdt_add_tpm(Aml *scope, 
> VirtMachineState *vms)
>  #define ROOT_COMPLEX_ENTRY_SIZE 36
>  #define IORT_NODE_OFFSET 48
>  
> +/*
> + * Input Output Remapping Table (IORT) -- Table 4 ID mapping format
> + * Conforms to "IO Remapping Table System Software on ARM Platforms",
> + * Document number: ARM DEN 0049E.b, Feb 2021
I would rather explain what the function does, ie append an ID mapping
entry as desribed in Tabble 4 ID Mapping format.

Also while at it you may use a more recent revision
There is DEN0049E_IO_Remapping_Table_E.f.pdf available

Besides:

Reviewed-by: Eric Auger 

Thanks

Eric



> + *
> + * Note that @id_count will be internally subtracted by one, following
> + * the IORT spec.
> + */
>  static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
>uint32_t id_count, uint32_t out_ref)
>  {
> -/* Table 4 ID mapping format */
>  build_append_int_noprefix(table_data, input_base, 4); /* Input base */
> -build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */
> +/* Number of IDs - The number of IDs in the range minus one */
> +build_append_int_noprefix(table_data, id_count - 1, 4);
>  build_append_int_noprefix(table_data, input_base, 4); /* Output base */
>  build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
>  /* Flags */
> @@ -306,8 +314,8 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  }
>  
>  /* Append the last RC -> ITS ID mapping */
> -if (next_range.input_base < 0x) {
> -next_range.id_count = 0x - next_range.input_base;
> +if (next_range.input_base < 0x1) {
> +next_range.id_count = 0x1 - next_range.input_base;
>  g_array_append_val(its_idmaps, next_range);
>  }
>  
> @@ -366,7 +374,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  build_append_int_noprefix(table_data, 0, 4);
>  
>  /* output IORT node is the ITS group node (the first node) */
> -build_iort_id_mapping(table_data, 0, 0x, IORT_NODE_OFFSET);
> +build_iort_id_mapping(table_data, 0, 0x1, IORT_NODE_OFFSET);
>  }
>  
>  /* Table 17 Root Complex Node */
> @@ -419,7 +427,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  }
>  } else {
>  /* output IORT node is the ITS group node (the first node) */
> -build_iort_id_mapping(table_data, 0, 0x, IORT_NODE_OFFSET);
> +build_iort_id_mapping(table_data, 0, 0x1, IORT_NODE_OFFSET);
>  }
>  
>  acpi_table_end(linker, );




  1   2   >