date:20240202

[PULL v2 35/58] tcg/loongarch64: Set vector registers call clobbered

2024-02-02 Thread Richard Henderson

Because there are more call clobbered registers than
call saved registers, we begin with all registers as
call clobbered and then reset those that are saved.

This was missed when we introduced the LSX support.

Cc: qemu-sta...@nongnu.org
Fixes: 16288ded944 ("tcg/loongarch64: Lower basic tcg vec ops to LSX")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2136
Signed-off-by: Richard Henderson 
Reviewed-by: Song Gao 
Message-Id: <20240201233414.500588-1-richard.hender...@linaro.org>
---
 tcg/loongarch64/tcg-target.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index bab0a173a3..dcf0205458 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -2327,7 +2327,7 @@ static void tcg_target_init(TCGContext *s)
 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
-tcg_target_call_clobber_regs = ALL_GENERAL_REGS;
+tcg_target_call_clobber_regs = ALL_GENERAL_REGS | ALL_VECTOR_REGS;
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
-- 
2.34.1

[PULL v2 12/58] target/loongarch: Rename MMU_IDX_*

2024-02-02 Thread Richard Henderson

The expected form is MMU_FOO_IDX, not MMU_IDX_FOO.
Rename to match generic code.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/loongarch/cpu.h | 8 
 target/loongarch/cpu.c | 2 +-
 target/loongarch/cpu_helper.c  | 4 ++--
 target/loongarch/tcg/translate.c   | 2 +-
 target/loongarch/tcg/insn_trans/trans_privileged.c.inc | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 5dfcfeb3a4..47fd110e81 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -404,15 +404,15 @@ struct LoongArchCPUClass {
  */
 #define MMU_PLV_KERNEL   0
 #define MMU_PLV_USER 3
-#define MMU_IDX_KERNEL   MMU_PLV_KERNEL
-#define MMU_IDX_USER MMU_PLV_USER
-#define MMU_IDX_DA   4
+#define MMU_KERNEL_IDX   MMU_PLV_KERNEL
+#define MMU_USER_IDX MMU_PLV_USER
+#define MMU_DA_IDX   4
 
 int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch);
 static inline int cpu_mmu_index(CPULoongArchState *env, bool ifetch)
 {
 #ifdef CONFIG_USER_ONLY
-return MMU_IDX_USER;
+return MMU_USER_IDX;
 #else
 return loongarch_cpu_mmu_index(env_cpu(env), ifetch);
 #endif
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e96159943a..49ced9888e 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -382,7 +382,7 @@ int loongarch_cpu_mmu_index(CPUState *cs, bool ifetch)
 if (FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG)) {
 return FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV);
 }
-return MMU_IDX_DA;
+return MMU_DA_IDX;
 }
 
 static void loongarch_la464_initfn(Object *obj)
diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c
index f68d63f466..b0658773b0 100644
--- a/target/loongarch/cpu_helper.c
+++ b/target/loongarch/cpu_helper.c
@@ -171,8 +171,8 @@ int get_physical_address(CPULoongArchState *env, hwaddr 
*physical,
  int *prot, target_ulong address,
  MMUAccessType access_type, int mmu_idx)
 {
-int user_mode = mmu_idx == MMU_IDX_USER;
-int kernel_mode = mmu_idx == MMU_IDX_KERNEL;
+int user_mode = mmu_idx == MMU_USER_IDX;
+int kernel_mode = mmu_idx == MMU_KERNEL_IDX;
 uint32_t plv, base_c, base_v;
 int64_t addr_high;
 uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA);
diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c
index 235515c629..58674cb268 100644
--- a/target/loongarch/tcg/translate.c
+++ b/target/loongarch/tcg/translate.c
@@ -125,7 +125,7 @@ static void 
loongarch_tr_init_disas_context(DisasContextBase *dcbase,
 if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) {
 ctx->mem_idx = ctx->plv;
 } else {
-ctx->mem_idx = MMU_IDX_DA;
+ctx->mem_idx = MMU_DA_IDX;
 }
 
 /* Bound the number of insns to execute to those left on the page.  */
diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc 
b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
index 01d457212b..7e4ec93edb 100644
--- a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
+++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc
@@ -323,7 +323,7 @@ TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d)
 
 static void check_mmu_idx(DisasContext *ctx)
 {
-if (ctx->mem_idx != MMU_IDX_DA) {
+if (ctx->mem_idx != MMU_DA_IDX) {
 tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
 ctx->base.is_jmp = DISAS_EXIT;
 }
-- 
2.34.1

[PULL v2 58/58] linux-user/aarch64: Add padding before __kernel_rt_sigreturn

2024-02-02 Thread Richard Henderson

Without this padding, an unwind through the signal handler
will pick up the unwind info for the preceding syscall.

This fixes gcc's 30_threads/thread/native_handle/cancel.cc.

Cc: qemu-sta...@nongnu.org
Fixes: ee95fae075c6 ("linux-user/aarch64: Add vdso")
Resolves: https://linaro.atlassian.net/browse/GNU-974
Signed-off-by: Richard Henderson 
Reviewed-by: Alex Bennée 
Message-Id: <20240202034427.504686-1-richard.hender...@linaro.org>
---
 linux-user/aarch64/vdso-be.so | Bin 3216 -> 3224 bytes
 linux-user/aarch64/vdso-le.so | Bin 3216 -> 3224 bytes
 linux-user/aarch64/vdso.S |   4 
 3 files changed, 4 insertions(+)

diff --git a/linux-user/aarch64/vdso-be.so b/linux-user/aarch64/vdso-be.so
index 
6084f3d1a701316004894fcdd739c4e1e0463b68..808206ade824b09d786f6cc34f7cddf80b63130e
 100755
GIT binary patch
delta 121
zcmbOrIYV-SKI4pu2Kk&{7{Gw#%fuBAMC1c?^>~k}v|avdxNjSSLfftVb3bgJ!|2S&
z_-6A1CJrVZc?IUH8G;R$7#SF@Om<{a*v!K!^~TWO|cva$K*Om;sOMw`hy
ZxXl@VO#Z-a#EKC)of1

delta 116
zcmbOsIYDxQKI4Rm2Kk{Gw#!^9O2L>8U?-5V_M@!kH(Sx4vJn|*ujLPgija~Pc&
z8DDIEz{J5c`3;N8W)W6tCdL<&4cM*OEF8_CVnO!c?IUH8G;R$7#SF@Om<{a*v!K!!o>JyvLd?^n`3BUW_royOm=q`Mw`hS
dxy>1WOn%92lgFM@hy!9z%j7~Xc>tTxDQW-!

delta 108
zcmbOsIYDxQ2IGW@n)#d`SQx

[PULL v2 00/58] tcg patch queue

2024-02-02 Thread Richard Henderson

v2: Rebase and resolve target/loongarch conflicts.
Include linux-user/aarch64 vdso fix.

r~

The following changes since commit 29b008927ef6e3fbb70e6607b25d3fcae26a5190:

  Merge tag 'pull-nic-config-2-20240202' of 
git://git.infradead.org/users/dwmw2/qemu into staging (2024-02-02 16:47:36 
+)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240202-2

for you to fetch changes up to 6400be014f80e4c2c246eb8be709ea3a96428233:

  linux-user/aarch64: Add padding before __kernel_rt_sigreturn (2024-02-03 
16:46:10 +1000)


tests/tcg: Fix multiarch/gdbstub/prot-none.py
hw/core: Convert cpu_mmu_index to a CPUClass hook
tcg/loongarch64: Set vector registers call clobbered
target/sparc: floating-point cleanup
linux-user/aarch64: Add padding before __kernel_rt_sigreturn


Ilya Leoshkevich (1):
  tests/tcg: Fix the /proc/self/mem probing in the PROT_NONE gdbstub test

Richard Henderson (57):
  include/hw/core: Add mmu_index to CPUClass
  target/alpha: Split out alpha_env_mmu_index
  target/alpha: Populate CPUClass.mmu_index
  target/arm: Split out arm_env_mmu_index
  target/arm: Populate CPUClass.mmu_index
  target/avr: Populate CPUClass.mmu_index
  target/cris: Cache mem_index in DisasContext
  target/cris: Populate CPUClass.mmu_index
  target/hppa: Populate CPUClass.mmu_index
  target/i386: Populate CPUClass.mmu_index
  target/loongarch: Populate CPUClass.mmu_index
  target/loongarch: Rename MMU_IDX_*
  target/m68k: Populate CPUClass.mmu_index
  target/microblaze: Populate CPUClass.mmu_index
  target/mips: Pass ptw_mmu_idx down from mips_cpu_tlb_fill
  target/mips: Split out mips_env_mmu_index
  target/mips: Populate CPUClass.mmu_index
  target/nios2: Populate CPUClass.mmu_index
  target/openrisc: Populate CPUClass.mmu_index
  target/ppc: Split out ppc_env_mmu_index
  target/ppc: Populate CPUClass.mmu_index
  target/riscv: Rename riscv_cpu_mmu_index to riscv_env_mmu_index
  target/riscv: Replace cpu_mmu_index with riscv_env_mmu_index
  target/riscv: Populate CPUClass.mmu_index
  target/rx: Populate CPUClass.mmu_index
  target/s390x: Split out s390x_env_mmu_index
  target/s390x: Populate CPUClass.mmu_index
  target/sh4: Populate CPUClass.mmu_index
  target/sparc: Populate CPUClass.mmu_index
  target/tricore: Populate CPUClass.mmu_index
  target/xtensa: Populate CPUClass.mmu_index
  include/exec: Implement cpu_mmu_index generically
  include/exec: Change cpu_mmu_index argument to CPUState
  tcg/loongarch64: Set vector registers call clobbered
  target/sparc: Use tcg_gen_qemu_{ld, st}_i128 for ASI_M_BCOPY
  target/sparc: Use tcg_gen_qemu_{ld, st}_i128 for ASI_M_BFILL
  target/sparc: Remove gen_dest_fpr_F
  target/sparc: Introduce gen_{load,store}_fpr_Q
  target/sparc: Inline FNEG, FABS
  target/sparc: Use i128 for FSQRTq
  target/sparc: Use i128 for FADDq, FSUBq, FMULq, FDIVq
  target/sparc: Use i128 for FqTOs, FqTOi
  target/sparc: Use i128 for FqTOd, FqTOx
  target/sparc: Use i128 for FCMPq, FCMPEq
  target/sparc: Use i128 for FsTOq, FiTOq
  target/sparc: Use i128 for FdTOq, FxTOq
  target/sparc: Use i128 for Fdmulq
  target/sparc: Remove qt0, qt1 temporaries
  target/sparc: Introduce cpu_get_fsr, cpu_put_fsr
  target/sparc: Split ver from env->fsr
  target/sparc: Clear cexc and ftt in do_check_ieee_exceptions
  target/sparc: Merge check_ieee_exceptions with FPop helpers
  target/sparc: Split cexc and ftt from env->fsr
  target/sparc: Remove cpu_fsr
  target/sparc: Split fcc out of env->fsr
  target/sparc: Remove FSR_FTT_NMASK, FSR_FTT_CEXC_NMASK
  linux-user/aarch64: Add padding before __kernel_rt_sigreturn

 include/exec/cpu-all.h |   4 +
 include/exec/cpu-common.h  |  21 +
 include/hw/core/cpu.h  |   3 +
 target/alpha/cpu.h |   2 +-
 target/arm/cpu.h   |  13 -
 target/arm/internals.h |   5 +
 target/avr/cpu.h   |   7 -
 target/cris/cpu.h  |   4 -
 target/hexagon/cpu.h   |   9 -
 target/hppa/cpu.h  |  13 -
 target/i386/cpu.h  |   7 -
 target/loongarch/cpu.h |  18 +-
 target/m68k/cpu.h  |   4 -
 target/microblaze/cpu.h|  15 -
 target/mips/cpu.h  |   6 +-
 target/nios2/cpu.h |   6 -
 target/openrisc/cpu.h

Re: [PATCH 30/33] target/tricore: Populate CPUClass.mmu_index

2024-02-02 Thread Bastian Koppelmann

On Tue, Jan 30, 2024 at 09:30:40AM +1000, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/tricore/cpu.c | 6 ++
>  1 file changed, 6 insertions(+)

Reviewed-by: Bastian Koppelmann 

Cheers,
Bastian

Re: [PATCH 0/3] monitor: only run coroutine commands in qemu_aio_context

2024-02-02 Thread YangHang Liu

It's easily for me to encounter " ../block/qcow2.c:5263:
ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error
**): Assertion `false' failed" issue during 1Q vhost-user interface +
RT VM + post-copy migration

After applying this patch, the issue is still not reproduced even if I
repeat the same migration test for 60 times.

Tested-by: Yanghang Liu 


Best Regards,
YangHang Liu

On Mon, Jan 29, 2024 at 7:39 PM Peter Maydell  wrote:
>
> On Tue, 16 Jan 2024 at 19:01, Stefan Hajnoczi  wrote:
> >
> > Several bugs have been reported related to how QMP commands are rescheduled 
> > in
> > qemu_aio_context:
> > - https://gitlab.com/qemu-project/qemu/-/issues/1933
> > - https://issues.redhat.com/browse/RHEL-17369
> > - https://bugzilla.redhat.com/show_bug.cgi?id=2215192
> > - https://bugzilla.redhat.com/show_bug.cgi?id=2214985
> >
> > The first instance of the bug interacted with drain_call_rcu() temporarily
> > dropping the BQL and resulted in vCPU threads entering device emulation code
> > simultaneously (something that should never happen). I set out to make
> > drain_call_rcu() safe to use in this environment, but Paolo and Kevin 
> > discussed
> > the possibility of avoiding rescheduling the monitor_qmp_dispatcher_co()
> > coroutine for non-coroutine commands. This would prevent monitor commands 
> > from
> > running during vCPU thread aio_poll() entirely and addresses the root cause.
> >
> > This patch series implements this idea. qemu-iotests is sensitive to the 
> > exact
> > order in which QMP events and responses are emitted. Running QMP handlers in
> > the iohandler AioContext causes some QMP events to be ordered differently 
> > than
> > before. It is therefore necessary to adjust the reference output in many 
> > test
> > cases. The actual QMP code change is small and everything else is just to 
> > make
> > qemu-iotests happy.
>
> Hi; we have a suspicion that this change has resulted in a flaky-CI
> test: iotest-144 sometimes fails, apparently because a "return"
> result from QMP isn't always returned at the same place in relation
> to other QMP events. Could you have a look at it?
>
> https://gitlab.com/qemu-project/qemu/-/issues/2126
>
> thanks
> -- PMM
>

Re: [PATCH v3 0/5] gdbstub: Implement catching syscalls

2024-02-02 Thread Alex Bennée

Ilya Leoshkevich  writes:

> v2: https://lists.gnu.org/archive/html/qemu-devel/2024-01/msg02980.html
> v2 -> v3: Simplify the catchpoint state by making "don't catch" a
>   subset of "catch some".
>   Factor out several prep patches;
>   Don't use snprintf;
>   Add some comments (Alex).
>
> v1: https://lists.gnu.org/archive/html/qemu-devel/2024-01/msg02911.html
> v1 -> v2: Avoid touching the system gdbstub.
>   Advertise QCatchSyscalls+ only on Linux.
>
> Hi,
>
> I noticed that GDB's "catch syscall" does not work with qemu-user.
> This series adds the missing bits in [1/2] and a test in [2/2].
> I'm basing this on my other series, since it contains useful gdbstub
> test refactorings.

Queued to gdbstub/next, thanks.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

[PATCH v1 09/15] libvhost-user: Don't search for duplicates when removing memory regions

2024-02-02 Thread David Hildenbrand

We cannot have duplicate memory regions, something would be deeply
flawed elsewhere. Let's just stop the search once we found an entry.

We'll add more sanity checks when adding memory regions later.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index e1a1b9df88..22154b217f 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -896,8 +896,7 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 i--;
 
 found = true;
-
-/* Continue the search for eventual duplicates. */
+break;
 }
 }
 
-- 
2.43.0

[PATCH v1 12/15] libvhost-user: Use most of mmap_offset as fd_offset

2024-02-02 Thread David Hildenbrand

In the past, QEMU would create memory regions that could partially cover
hugetlb pages, making mmap() fail if we would use the mmap_offset as an
fd_offset. For that reason, we never used the mmap_offset as an offset into
the fd and instead always mapped the fd from the very start.

However, that can easily result in us mmap'ing a lot of unnecessary
parts of an fd, possibly repeatedly.

QEMU nowadays does not create memory regions that partially cover huge
pages -- it never really worked with postcopy. QEMU handles merging of
regions that partially cover huge pages (due to holes in boot memory) since
2018 in c1ece84e7c93 ("vhost: Huge page align and merge").

Let's be a bit careful and not unconditionally convert the
mmap_offset into an fd_offset. Instead, let's simply detect the hugetlb
size and pass as much as we can as fd_offset, making sure that we call
mmap() with a properly aligned offset.

With QEMU and a virtio-mem device that is fully plugged (50GiB using 50
memslots) the qemu-storage daemon process consumes in the VA space
1281GiB before this change and 58GiB after this change.

Example debug output:
   Vhost user message 
  Request: VHOST_USER_ADD_MEM_REG (37)
  Flags:   0x9
  Size:40
  Fds: 59
  Adding region 50
  guest_phys_addr: 0x000d8000
  memory_size: 0x4000
  userspace_addr   0x7f54ebffe000
  mmap_offset  0x000c
  fd_offset:   0x000c
  new mmap_offset: 0x
  mmap_addr:   0x7f7ecc00
  Successfully added new region
   Vhost user message 
  Request: VHOST_USER_ADD_MEM_REG (37)
  Flags:   0x9
  Size:40
  Fds: 59
  Adding region 51
  guest_phys_addr: 0x000dc000
  memory_size: 0x4000
  userspace_addr   0x7f552bffe000
  mmap_offset  0x000c4000
  fd_offset:   0x000c4000
  new mmap_offset: 0x
  mmap_addr:   0x7f7e8c00
  Successfully added new region

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 50 ---
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 75e47b7bb3..7d8293dc84 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -43,6 +43,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #ifdef __NR_userfaultfd
 #include 
@@ -281,12 +283,36 @@ vu_remove_all_mem_regs(VuDev *dev)
 dev->nregions = 0;
 }
 
+static size_t
+get_fd_pagesize(int fd)
+{
+static size_t pagesize;
+#if defined(__linux__)
+struct statfs fs;
+int ret;
+
+do {
+ret = fstatfs(fd, );
+} while (ret != 0 && errno == EINTR);
+
+if (!ret && fs.f_type == HUGETLBFS_MAGIC) {
+return fs.f_bsize;
+}
+#endif
+
+if (!pagesize) {
+pagesize = getpagesize();
+}
+return pagesize;
+}
+
 static void
 _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
 {
 const uint64_t start_gpa = msg_region->guest_phys_addr;
 const uint64_t end_gpa = start_gpa + msg_region->memory_size;
 int prot = PROT_READ | PROT_WRITE;
+uint64_t mmap_offset, fd_offset;
 VuDevRegion *r;
 void *mmap_addr;
 int low = 0;
@@ -335,11 +361,25 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
 idx = low;
 
 /*
- * We don't use offset argument of mmap() since the mapped address has
- * to be page aligned, and we use huge pages.
+ * Convert most of msg_region->mmap_offset to fd_offset. In almost all
+ * cases, this will leave us with mmap_offset == 0, mmap()'ing only
+ * what we really need. Only if a memory region would partially cover
+ * hugetlb pages, we'd get mmap_offset != 0, which usually doesn't happen
+ * anymore (i.e., modern QEMU).
+ *
+ * Note that mmap() with hugetlb would fail if the offset into the file
+ * is not aligned to the huge page size.
  */
-mmap_addr = mmap(0, msg_region->memory_size + msg_region->mmap_offset,
- prot, MAP_SHARED | MAP_NORESERVE, fd, 0);
+fd_offset = ALIGN_DOWN(msg_region->mmap_offset, get_fd_pagesize(fd));
+mmap_offset = msg_region->mmap_offset - fd_offset;
+
+DPRINT("fd_offset:   0x%016"PRIx64"\n",
+   fd_offset);
+DPRINT("adj mmap_offset: 0x%016"PRIx64"\n",
+   mmap_offset);
+
+mmap_addr = mmap(0, msg_region->memory_size + mmap_offset,
+ prot, MAP_SHARED | MAP_NORESERVE, fd, fd_offset);
 if (mmap_addr == MAP_FAILED) {
 vu_panic(dev, "region mmap error: %s", strerror(errno));
 return;
@@ -354,7 +394,7 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
 r->size = msg_region->memory_size;
 r->qva =

[PATCH v1 13/15] libvhost-user: Factor out vq usability check

2024-02-02 Thread David Hildenbrand

Let's factor it out to prepare for further changes.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 24 +++
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 7d8293dc84..febeb2eb89 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -283,6 +283,12 @@ vu_remove_all_mem_regs(VuDev *dev)
 dev->nregions = 0;
 }
 
+static bool
+vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
+{
+return likely(!dev->broken) && likely(vq->vring.avail);
+}
+
 static size_t
 get_fd_pagesize(int fd)
 {
@@ -2378,8 +2384,7 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, 
unsigned int *in_bytes,
 idx = vq->last_avail_idx;
 
 total_bufs = in_total = out_total = 0;
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 goto done;
 }
 
@@ -2494,8 +2499,7 @@ vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned 
int in_bytes,
 bool
 vu_queue_empty(VuDev *dev, VuVirtq *vq)
 {
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 return true;
 }
 
@@ -2534,8 +2538,7 @@ vring_notify(VuDev *dev, VuVirtq *vq)
 
 static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
 {
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 return;
 }
 
@@ -2860,8 +2863,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
 unsigned int head;
 VuVirtqElement *elem;
 
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 return NULL;
 }
 
@@ -3018,8 +3020,7 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
 {
 struct vring_used_elem uelem;
 
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 return;
 }
 
@@ -3048,8 +3049,7 @@ vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int 
count)
 {
 uint16_t old, new;
 
-if (unlikely(dev->broken) ||
-unlikely(!vq->vring.avail)) {
+if (!vu_is_vq_usable(dev, vq)) {
 return;
 }
 
-- 
2.43.0

[PATCH v1 05/15] libvhost-user: Merge vu_set_mem_table_exec_postcopy() into vu_set_mem_table_exec()

2024-02-02 Thread David Hildenbrand

Let's reduce some code duplication and prepare for further changes.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 119 +++---
 1 file changed, 39 insertions(+), 80 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index d5b3468e43..d9e2214ad2 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -937,95 +937,23 @@ vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg)
 }
 
 static bool
-vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
+vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
-unsigned int i;
 VhostUserMemory m = vmsg->payload.memory, *memory = 
-dev->nregions = memory->nregions;
-
-DPRINT("Nregions: %u\n", memory->nregions);
-for (i = 0; i < dev->nregions; i++) {
-void *mmap_addr;
-VhostUserMemoryRegion *msg_region = >regions[i];
-VuDevRegion *dev_region = >regions[i];
-
-DPRINT("Region %d\n", i);
-DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
-   msg_region->guest_phys_addr);
-DPRINT("memory_size: 0x%016"PRIx64"\n",
-   msg_region->memory_size);
-DPRINT("userspace_addr   0x%016"PRIx64"\n",
-   msg_region->userspace_addr);
-DPRINT("mmap_offset  0x%016"PRIx64"\n",
-   msg_region->mmap_offset);
-
-dev_region->gpa = msg_region->guest_phys_addr;
-dev_region->size = msg_region->memory_size;
-dev_region->qva = msg_region->userspace_addr;
-dev_region->mmap_offset = msg_region->mmap_offset;
+int prot = PROT_READ | PROT_WRITE;
+unsigned int i;
 
-/* We don't use offset argument of mmap() since the
- * mapped address has to be page aligned, and we use huge
- * pages.
+if (dev->postcopy_listening) {
+/*
  * In postcopy we're using PROT_NONE here to catch anyone
  * accessing it before we userfault
  */
-mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_NONE, MAP_SHARED | MAP_NORESERVE,
- vmsg->fds[i], 0);
-
-if (mmap_addr == MAP_FAILED) {
-vu_panic(dev, "region mmap error: %s", strerror(errno));
-} else {
-dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
-DPRINT("mmap_addr:   0x%016"PRIx64"\n",
-   dev_region->mmap_addr);
-}
-
-/* Return the address to QEMU so that it can translate the ufd
- * fault addresses back.
- */
-msg_region->userspace_addr = dev_region->mmap_addr +
- dev_region->mmap_offset;
-close(vmsg->fds[i]);
-}
-
-/* Send the message back to qemu with the addresses filled in */
-vmsg->fd_num = 0;
-if (!vu_send_reply(dev, dev->sock, vmsg)) {
-vu_panic(dev, "failed to respond to set-mem-table for postcopy");
-return false;
-}
-
-/* Wait for QEMU to confirm that it's registered the handler for the
- * faults.
- */
-if (!dev->read_msg(dev, dev->sock, vmsg) ||
-vmsg->size != sizeof(vmsg->payload.u64) ||
-vmsg->payload.u64 != 0) {
-vu_panic(dev, "failed to receive valid ack for postcopy 
set-mem-table");
-return false;
+prot = PROT_NONE;
 }
 
-/* OK, now we can go and register the memory and generate faults */
-(void)generate_faults(dev);
-
-return false;
-}
-
-static bool
-vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
-{
-unsigned int i;
-VhostUserMemory m = vmsg->payload.memory, *memory = 
-
 vu_remove_all_mem_regs(dev);
 dev->nregions = memory->nregions;
 
-if (dev->postcopy_listening) {
-return vu_set_mem_table_exec_postcopy(dev, vmsg);
-}
-
 DPRINT("Nregions: %u\n", memory->nregions);
 for (i = 0; i < dev->nregions; i++) {
 void *mmap_addr;
@@ -1051,8 +979,7 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
  * mapped address has to be page aligned, and we use huge
  * pages.  */
 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE,
- vmsg->fds[i], 0);
+ prot, MAP_SHARED | MAP_NORESERVE, vmsg->fds[i], 0);
 
 if (mmap_addr == MAP_FAILED) {
 vu_panic(dev, "region mmap error: %s", strerror(errno));
@@ -1062,9 +989,41 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
dev_region->mmap_addr);
 }
 
+if (dev->postcopy_listening) {
+/*
+ * Return the address to QEMU so that it can translate the ufd
+ * fault addresses back.
+ */
+msg_region->userspace_addr = dev_region->mmap_addr +
+

[PATCH v1 02/15] libvhost-user: Dynamically allocate memory for memory slots

2024-02-02 Thread David Hildenbrand

Let's prepare for increasing VHOST_USER_MAX_RAM_SLOTS by dynamically
allocating dev->regions. We don't have any ABI guarantees (not
dynamically linked), so we can simply change the layout of VuDev.

Let's zero out the memory, just as we used to do.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 11 +++
 subprojects/libvhost-user/libvhost-user.h |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 7e515ed15d..8a5a7a2295 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -2171,6 +2171,8 @@ vu_deinit(VuDev *dev)
 
 free(dev->vq);
 dev->vq = NULL;
+free(dev->regions);
+dev->regions = NULL;
 }
 
 bool
@@ -2205,9 +2207,18 @@ vu_init(VuDev *dev,
 dev->backend_fd = -1;
 dev->max_queues = max_queues;
 
+dev->regions = malloc(VHOST_USER_MAX_RAM_SLOTS * sizeof(dev->regions[0]));
+if (!dev->regions) {
+DPRINT("%s: failed to malloc mem regions\n", __func__);
+return false;
+}
+memset(dev->regions, 0, VHOST_USER_MAX_RAM_SLOTS * 
sizeof(dev->regions[0]));
+
 dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
 if (!dev->vq) {
 DPRINT("%s: failed to malloc virtqueues\n", __func__);
+free(dev->regions);
+dev->regions = NULL;
 return false;
 }
 
diff --git a/subprojects/libvhost-user/libvhost-user.h 
b/subprojects/libvhost-user/libvhost-user.h
index c2352904f0..c882b4e3a2 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -398,7 +398,7 @@ typedef struct VuDevInflightInfo {
 struct VuDev {
 int sock;
 uint32_t nregions;
-VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS];
+VuDevRegion *regions;
 VuVirtq *vq;
 VuDevInflightInfo inflight_info;
 int log_call_fd;
-- 
2.43.0

[PATCH v1 08/15] libvhost-user: Don't zero out memory for memory regions

2024-02-02 Thread David Hildenbrand

dev->nregions always covers only valid entries. Stop zeroing out other
array elements that are unused.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index f99c888b48..e1a1b9df88 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -888,13 +888,9 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 
 munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
 
-/*
- * Shift all affected entries by 1 to close the hole at index i and
- * zero out the last entry.
- */
+/* Shift all affected entries by 1 to close the hole at index. */
 memmove(dev->regions + i, dev->regions + i + 1,
 sizeof(VuDevRegion) * (dev->nregions - i - 1));
-memset(dev->regions + dev->nregions - 1, 0, sizeof(VuDevRegion));
 DPRINT("Successfully removed a region\n");
 dev->nregions--;
 i--;
@@ -2119,7 +2115,6 @@ vu_init(VuDev *dev,
 DPRINT("%s: failed to malloc mem regions\n", __func__);
 return false;
 }
-memset(dev->regions, 0, VHOST_USER_MAX_RAM_SLOTS * 
sizeof(dev->regions[0]));
 
 dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
 if (!dev->vq) {
-- 
2.43.0

[PATCH v1 11/15] libvhost-user: Speedup gpa_to_mem_region() and vu_gpa_to_va()

2024-02-02 Thread David Hildenbrand

Let's speed up GPA to memory region / virtual address lookup. Store the
memory regions ordered by guest physical addresses, and use binary
search for address translation, as well as when adding/removing memory
regions.

Most importantly, this will speed up GPA->VA address translation when we
have many memslots.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 49 +--
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index d036b54ed0..75e47b7bb3 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -199,19 +199,30 @@ vu_panic(VuDev *dev, const char *msg, ...)
 static VuDevRegion *
 vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
 {
-unsigned int i;
+int low = 0;
+int high = dev->nregions - 1;
 
 /*
  * Memory regions cannot overlap in guest physical address space. Each
  * GPA belongs to exactly one memory region, so there can only be one
  * match.
+ *
+ * We store our memory regions ordered by GPA and can simply perform a
+ * binary search.
  */
-for (i = 0; i < dev->nregions; i++) {
-VuDevRegion *cur = >regions[i];
+while (low <= high) {
+unsigned int mid = low + (high - low) / 2;
+VuDevRegion *cur = >regions[mid];
 
 if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
 return cur;
 }
+if (guest_addr >= cur->gpa + cur->size) {
+low = mid + 1;
+}
+if (guest_addr < cur->gpa) {
+high = mid - 1;
+}
 }
 return NULL;
 }
@@ -273,9 +284,14 @@ vu_remove_all_mem_regs(VuDev *dev)
 static void
 _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
 {
+const uint64_t start_gpa = msg_region->guest_phys_addr;
+const uint64_t end_gpa = start_gpa + msg_region->memory_size;
 int prot = PROT_READ | PROT_WRITE;
 VuDevRegion *r;
 void *mmap_addr;
+int low = 0;
+int high = dev->nregions - 1;
+unsigned int idx;
 
 DPRINT("Adding region %d\n", dev->nregions);
 DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
@@ -295,6 +311,29 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
 prot = PROT_NONE;
 }
 
+/*
+ * We will add memory regions into the array sorted by GPA. Perform a
+ * binary search to locate the insertion point: it will be at the low
+ * index.
+ */
+while (low <= high) {
+unsigned int mid = low + (high - low)  / 2;
+VuDevRegion *cur = >regions[mid];
+
+/* Overlap of GPA addresses. */
+if (start_gpa < cur->gpa + cur->size && cur->gpa < end_gpa) {
+vu_panic(dev, "regions with overlapping guest physical addresses");
+return;
+}
+if (start_gpa >= cur->gpa + cur->size) {
+low = mid + 1;
+}
+if (start_gpa < cur->gpa) {
+high = mid - 1;
+}
+}
+idx = low;
+
 /*
  * We don't use offset argument of mmap() since the mapped address has
  * to be page aligned, and we use huge pages.
@@ -308,7 +347,9 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
 DPRINT("mmap_addr:   0x%016"PRIx64"\n",
(uint64_t)(uintptr_t)mmap_addr);
 
-r = >regions[dev->nregions];
+/* Shift all affected entries by 1 to open a hole at idx. */
+r = >regions[idx];
+memmove(r + 1, r, sizeof(VuDevRegion) * (dev->nregions - idx));
 r->gpa = msg_region->guest_phys_addr;
 r->size = msg_region->memory_size;
 r->qva = msg_region->userspace_addr;
-- 
2.43.0

[PATCH v1 07/15] libvhost-user: No need to check for NULL when unmapping

2024-02-02 Thread David Hildenbrand

We never add a memory region if mmap() failed. Therefore, no need to check
for NULL.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index a2baefe84b..f99c888b48 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -247,11 +247,8 @@ vu_remove_all_mem_regs(VuDev *dev)
 
 for (i = 0; i < dev->nregions; i++) {
 VuDevRegion *r = >regions[i];
-void *ma = (void *)(uintptr_t)r->mmap_addr;
 
-if (ma) {
-munmap(ma, r->size + r->mmap_offset);
-}
+munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
 }
 dev->nregions = 0;
 }
@@ -888,11 +885,8 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 for (i = 0; i < dev->nregions; i++) {
 if (reg_equal(>regions[i], msg_region)) {
 VuDevRegion *r = >regions[i];
-void *ma = (void *) (uintptr_t) r->mmap_addr;
 
-if (ma) {
-munmap(ma, r->size + r->mmap_offset);
-}
+munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
 
 /*
  * Shift all affected entries by 1 to close the hole at index i and
-- 
2.43.0

[PATCH v1 15/15] libvhost-user: Mark mmap'ed region memory as MADV_DONTDUMP

2024-02-02 Thread David Hildenbrand

We already use MADV_NORESERVE to deal with sparse memory regions. Let's
also set madvise(MADV_DONTDUMP), otherwise a crash of the process can
result in us allocating all memory in the mmap'ed region for dumping
purposes.

This change implies that the mmap'ed rings won't be included in a
coredump. If ever required for debugging purposes, we could mark only
the mapped rings MADV_DODUMP.

Ignore errors during madvise() for now.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 738e84ab63..26c289518c 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -458,6 +458,12 @@ _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion 
*msg_region, int fd)
 DPRINT("mmap_addr:   0x%016"PRIx64"\n",
(uint64_t)(uintptr_t)mmap_addr);
 
+#if defined(__linux__)
+/* Don't include all guest memory in a coredump. */
+madvise(mmap_addr, msg_region->memory_size + mmap_offset,
+MADV_DONTDUMP);
+#endif
+
 /* Shift all affected entries by 1 to open a hole at idx. */
 r = >regions[idx];
 memmove(r + 1, r, sizeof(VuDevRegion) * (dev->nregions - idx));
-- 
2.43.0

[PATCH v1 01/15] libvhost-user: Fix msg_region->userspace_addr computation

2024-02-02 Thread David Hildenbrand

We barely had mmap_offset set in the past. With virtio-mem and
dynamic-memslots that will change.

In vu_add_mem_reg() and vu_set_mem_table_exec_postcopy(), we are
performing pointer arithmetics, which is wrong. Let's simply
use dev_region->mmap_addr instead of "void *mmap_addr".

Fixes: ec94c8e621de ("Support adding individual regions in libvhost-user")
Fixes: 9bb38019942c ("vhost+postcopy: Send address back to qemu")
Cc: Raphael Norwitz 
Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index a3b158c671..7e515ed15d 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -800,8 +800,8 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
  * Return the address to QEMU so that it can translate the ufd
  * fault addresses back.
  */
-msg_region->userspace_addr = (uintptr_t)(mmap_addr +
- dev_region->mmap_offset);
+msg_region->userspace_addr = dev_region->mmap_addr +
+ dev_region->mmap_offset;
 
 /* Send the message back to qemu with the addresses filled in. */
 vmsg->fd_num = 0;
@@ -969,8 +969,8 @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg 
*vmsg)
 /* Return the address to QEMU so that it can translate the ufd
  * fault addresses back.
  */
-msg_region->userspace_addr = (uintptr_t)(mmap_addr +
- dev_region->mmap_offset);
+msg_region->userspace_addr = dev_region->mmap_addr +
+ dev_region->mmap_offset;
 close(vmsg->fds[i]);
 }
 
-- 
2.43.0

[PATCH v1 00/15] libvhost-user: support more memslots and cleanup memslot handling code

2024-02-02 Thread David Hildenbrand

This series adds support for more memslots (509) to libvhost-user, to
make it fully compatible with virtio-mem that uses up to 256 memslots
accross all memory devices in "dynamic-memslot" mode (more details
in patch #3).

One simple fix upfront.

With that in place, this series optimizes and extens memory region
handling in libvhost-user:
* Heavily deduplicate and clean up the memory region handling code
* Speeds up GPA->VA translation with many memslots using binary search
* Optimize mmap_offset handling to use it as fd_offset for mmap()
* Avoid ring remapping when adding a single memory region
* Avoid dumping all guest memory, possibly allocating memory in sparse
  memory mappings when the process crashes

I'm being very careful to not break some weird corner case that modern
QEMU might no longer trigger, but older one could have triggered or some
other frontend might trigger.

The only thing where I am not careful is to forbid memory regions that
overlap in GPA space: it doesn't make any sense.

With this series, virtio-mem (with dynamic-memslots=on) +
qemu-storage-daemon works flawlessly and as expected in my tests.

Cc: Michael S. Tsirkin 
Cc: Jason Wang 
Cc: Stefan Hajnoczi 
Cc: Stefano Garzarella 
Cc: Germano Veit Michel 
Cc: Raphael Norwitz 

David Hildenbrand (15):
  libvhost-user: Fix msg_region->userspace_addr computation
  libvhost-user: Dynamically allocate memory for memory slots
  libvhost-user: Bump up VHOST_USER_MAX_RAM_SLOTS to 509
  libvhost-user: Factor out removing all mem regions
  libvhost-user: Merge vu_set_mem_table_exec_postcopy() into
vu_set_mem_table_exec()
  libvhost-user: Factor out adding a memory region
  libvhost-user: No need to check for NULL when unmapping
  libvhost-user: Don't zero out memory for memory regions
  libvhost-user: Don't search for duplicates when removing memory
regions
  libvhost-user: Factor out search for memory region by GPA and simplify
  libvhost-user: Speedup gpa_to_mem_region() and vu_gpa_to_va()
  libvhost-user: Use most of mmap_offset as fd_offset
  libvhost-user: Factor out vq usability check
  libvhost-user: Dynamically remap rings after (temporarily?) removing
memory regions
  libvhost-user: Mark mmap'ed region memory as MADV_DONTDUMP

 subprojects/libvhost-user/libvhost-user.c | 593 --
 subprojects/libvhost-user/libvhost-user.h |  10 +-
 2 files changed, 332 insertions(+), 271 deletions(-)

-- 
2.43.0

[PATCH v1 03/15] libvhost-user: Bump up VHOST_USER_MAX_RAM_SLOTS to 509

2024-02-02 Thread David Hildenbrand

Let's support up to 509 mem slots, just like vhost in the kernel usually
does and the rust vhost-user implementation recently [1] started doing.
This is required to properly support memory hotplug, either using
multiple DIMMs (ACPI supports up to 256) or using virtio-mem.

The 509 used to be the KVM limit, it supported 512, but 3 were
used for internal purposes. Currently, KVM supports more than 512, but
it usually doesn't make use of more than ~260 (i.e., 256 DIMMs + boot
memory), except when other memory devices like PCI devices with BARs are
used. So, 509 seems to work well for vhost in the kernel.

Details can be found in the QEMU change that made virtio-mem consume
up to 256 mem slots across all virtio-mem devices. [2]

509 mem slots implies 509 VMAs/mappings in the worst case (even though,
in practice with virtio-mem we won't be seeing more than ~260 in most
setups).

With max_map_count under Linux defaulting to 64k, 509 mem slots
still correspond to less than 1% of the maximum number of mappings.
There are plenty left for the application to consume.

[1] https://github.com/rust-vmm/vhost/pull/224
[2] https://lore.kernel.org/all/20230926185738.277351-1-da...@redhat.com/

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.h 
b/subprojects/libvhost-user/libvhost-user.h
index c882b4e3a2..deb40e77b3 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -31,10 +31,12 @@
 #define VHOST_MEMORY_BASELINE_NREGIONS 8
 
 /*
- * Set a reasonable maximum number of ram slots, which will be supported by
- * any architecture.
+ * vhost in the kernel usually supports 509 mem slots. 509 used to be the
+ * KVM limit, it supported 512, but 3 were used for internal purposes. This
+ * limit is sufficient to support many DIMMs and virtio-mem in
+ * "dynamic-memslots" mode.
  */
-#define VHOST_USER_MAX_RAM_SLOTS 32
+#define VHOST_USER_MAX_RAM_SLOTS 509
 
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
 
-- 
2.43.0

[PATCH v1 06/15] libvhost-user: Factor out adding a memory region

2024-02-02 Thread David Hildenbrand

Let's factor it out, reducing quite some code duplication and perparing
for further changes.

If we fail to mmap a region and panic, we now simply don't add that
(broken) region.

Note that we now increment dev->nregions as we are successfully
adding memory regions, and don't increment dev->nregions if anything went
wrong.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 168 --
 1 file changed, 60 insertions(+), 108 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index d9e2214ad2..a2baefe84b 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -256,6 +256,61 @@ vu_remove_all_mem_regs(VuDev *dev)
 dev->nregions = 0;
 }
 
+static void
+_vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
+{
+int prot = PROT_READ | PROT_WRITE;
+VuDevRegion *r;
+void *mmap_addr;
+
+DPRINT("Adding region %d\n", dev->nregions);
+DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
+   msg_region->guest_phys_addr);
+DPRINT("memory_size: 0x%016"PRIx64"\n",
+   msg_region->memory_size);
+DPRINT("userspace_addr   0x%016"PRIx64"\n",
+   msg_region->userspace_addr);
+DPRINT("mmap_offset  0x%016"PRIx64"\n",
+   msg_region->mmap_offset);
+
+if (dev->postcopy_listening) {
+/*
+ * In postcopy we're using PROT_NONE here to catch anyone
+ * accessing it before we userfault
+ */
+prot = PROT_NONE;
+}
+
+/*
+ * We don't use offset argument of mmap() since the mapped address has
+ * to be page aligned, and we use huge pages.
+ */
+mmap_addr = mmap(0, msg_region->memory_size + msg_region->mmap_offset,
+ prot, MAP_SHARED | MAP_NORESERVE, fd, 0);
+if (mmap_addr == MAP_FAILED) {
+vu_panic(dev, "region mmap error: %s", strerror(errno));
+return;
+}
+DPRINT("mmap_addr:   0x%016"PRIx64"\n",
+   (uint64_t)(uintptr_t)mmap_addr);
+
+r = >regions[dev->nregions];
+r->gpa = msg_region->guest_phys_addr;
+r->size = msg_region->memory_size;
+r->qva = msg_region->userspace_addr;
+r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+r->mmap_offset = msg_region->mmap_offset;
+dev->nregions++;
+
+if (dev->postcopy_listening) {
+/*
+ * Return the address to QEMU so that it can translate the ufd
+ * fault addresses back.
+ */
+msg_region->userspace_addr = r->mmap_addr + r->mmap_offset;
+}
+}
+
 static void
 vmsg_close_fds(VhostUserMsg *vmsg)
 {
@@ -727,10 +782,7 @@ generate_faults(VuDev *dev) {
 static bool
 vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 int i;
-bool track_ramblocks = dev->postcopy_listening;
 VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = 
-VuDevRegion *dev_region = >regions[dev->nregions];
-void *mmap_addr;
 
 if (vmsg->fd_num != 1) {
 vmsg_close_fds(vmsg);
@@ -760,69 +812,20 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
  * we know all the postcopy client bases have been received, and we
  * should start generating faults.
  */
-if (track_ramblocks &&
+if (dev->postcopy_listening &&
 vmsg->size == sizeof(vmsg->payload.u64) &&
 vmsg->payload.u64 == 0) {
 (void)generate_faults(dev);
 return false;
 }
 
-DPRINT("Adding region: %u\n", dev->nregions);
-DPRINT("guest_phys_addr: 0x%016"PRIx64"\n",
-   msg_region->guest_phys_addr);
-DPRINT("memory_size: 0x%016"PRIx64"\n",
-   msg_region->memory_size);
-DPRINT("userspace_addr   0x%016"PRIx64"\n",
-   msg_region->userspace_addr);
-DPRINT("mmap_offset  0x%016"PRIx64"\n",
-   msg_region->mmap_offset);
-
-dev_region->gpa = msg_region->guest_phys_addr;
-dev_region->size = msg_region->memory_size;
-dev_region->qva = msg_region->userspace_addr;
-dev_region->mmap_offset = msg_region->mmap_offset;
-
-/*
- * We don't use offset argument of mmap() since the
- * mapped address has to be page aligned, and we use huge
- * pages.
- */
-if (track_ramblocks) {
-/*
- * In postcopy we're using PROT_NONE here to catch anyone
- * accessing it before we userfault.
- */
-mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_NONE, MAP_SHARED | MAP_NORESERVE,
- vmsg->fds[0], 0);
-} else {
-mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE,
- vmsg->fds[0], 0);
-}
-
-if (mmap_addr == MAP_FAILED) {
-vu_panic(dev, "region mmap error: %s", strerror(errno));
-} else {
-

[PATCH v1 14/15] libvhost-user: Dynamically remap rings after (temporarily?) removing memory regions

2024-02-02 Thread David Hildenbrand

Currently, we try to remap all rings whenever we add a single new memory
region. That doesn't quite make sense, because we already map rings when
setting the ring address, and panic if that goes wrong. Likely, that
handling was simply copied from set_mem_table code, where we actually
have to remap all rings.

Remapping all rings might require us to walk quite a lot of memory
regions to perform the address translations. Ideally, we'd simply remove
that remapping.

However, let's be a bit careful. There might be some weird corner cases
where we might temporarily remove a single memory region (e.g., resize
it), that would have worked for now. Further, a ring might be located on
hotplugged memory, and as the VM reboots, we might unplug that memory, to
hotplug memory before resetting the ring addresses.

So let's unmap affected rings as we remove a memory region, and try
dynamically mapping the ring again when required.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 107 --
 1 file changed, 78 insertions(+), 29 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index febeb2eb89..738e84ab63 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -283,10 +283,75 @@ vu_remove_all_mem_regs(VuDev *dev)
 dev->nregions = 0;
 }
 
+static bool
+map_ring(VuDev *dev, VuVirtq *vq)
+{
+vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
+vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
+vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
+
+DPRINT("Setting virtq addresses:\n");
+DPRINT("vring_desc  at %p\n", vq->vring.desc);
+DPRINT("vring_used  at %p\n", vq->vring.used);
+DPRINT("vring_avail at %p\n", vq->vring.avail);
+
+return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
+}
+
 static bool
 vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
 {
-return likely(!dev->broken) && likely(vq->vring.avail);
+if (unlikely(dev->broken)) {
+return false;
+}
+
+if (likely(vq->vring.avail)) {
+return true;
+}
+
+/*
+ * In corner cases, we might temporarily remove a memory region that
+ * mapped a ring. When removing a memory region we make sure to
+ * unmap any rings that would be impacted. Let's try to remap if we
+ * already succeeded mapping this ring once.
+ */
+if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr ||
+!vq->vra.avail_user_addr) {
+return false;
+}
+if (map_ring(dev, vq)) {
+vu_panic(dev, "remapping queue on access");
+return false;
+}
+return true;
+}
+
+static void
+unmap_rings(VuDev *dev, VuDevRegion *r)
+{
+int i;
+
+for (i = 0; i < dev->max_queues; i++) {
+VuVirtq *vq = >vq[i];
+const uintptr_t desc = (uintptr_t)vq->vring.desc;
+const uintptr_t used = (uintptr_t)vq->vring.used;
+const uintptr_t avail = (uintptr_t)vq->vring.avail;
+
+if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) {
+continue;
+}
+if (used < r->mmap_addr || used >= r->mmap_addr + r->size) {
+continue;
+}
+if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) {
+continue;
+}
+
+DPRINT("Unmapping rings of queue %d\n", i);
+vq->vring.desc = NULL;
+vq->vring.used = NULL;
+vq->vring.avail = NULL;
+}
 }
 
 static size_t
@@ -784,21 +849,6 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
 return false;
 }
 
-static bool
-map_ring(VuDev *dev, VuVirtq *vq)
-{
-vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
-vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
-vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
-
-DPRINT("Setting virtq addresses:\n");
-DPRINT("vring_desc  at %p\n", vq->vring.desc);
-DPRINT("vring_used  at %p\n", vq->vring.used);
-DPRINT("vring_avail at %p\n", vq->vring.avail);
-
-return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
-}
-
 static bool
 generate_faults(VuDev *dev) {
 unsigned int i;
@@ -882,7 +932,6 @@ generate_faults(VuDev *dev) {
 
 static bool
 vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
-int i;
 VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = 
 
 if (vmsg->fd_num != 1) {
@@ -928,19 +977,9 @@ vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 vmsg->fd_num = 0;
 DPRINT("Successfully added new region in postcopy\n");
 return true;
-} else {
-for (i = 0; i < dev->max_queues; i++) {
-if (dev->vq[i].vring.desc) {
-if (map_ring(dev, >vq[i])) {
-vu_panic(dev, "remapping queue %d for new memory region",
- i);
-}
-}
-}
-
-

[PATCH v1 10/15] libvhost-user: Factor out search for memory region by GPA and simplify

2024-02-02 Thread David Hildenbrand

Memory regions cannot overlap, and if we ever hit that case something
would be really flawed.

For example, when vhost code in QEMU decides to increase the size of memory
regions to cover full huge pages, it makes sure to never create overlaps,
and if there would be overlaps, it would bail out.

QEMU commits 48d7c9757749 ("vhost: Merge sections added to temporary
list"), c1ece84e7c93 ("vhost: Huge page align and merge") and
e7b94a84b6cb ("vhost: Allow adjoining regions") added and clarified that
handling and how overlaps are impossible.

Consequently, each GPA can belong to at most one memory region, and
everything else doesn't make sense. Let's factor out our search to prepare
for further changes.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 79 +--
 1 file changed, 45 insertions(+), 34 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 22154b217f..d036b54ed0 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -195,30 +195,47 @@ vu_panic(VuDev *dev, const char *msg, ...)
  */
 }
 
+/* Search for a memory region that covers this guest physical address. */
+static VuDevRegion *
+vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
+{
+unsigned int i;
+
+/*
+ * Memory regions cannot overlap in guest physical address space. Each
+ * GPA belongs to exactly one memory region, so there can only be one
+ * match.
+ */
+for (i = 0; i < dev->nregions; i++) {
+VuDevRegion *cur = >regions[i];
+
+if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
+return cur;
+}
+}
+return NULL;
+}
+
 /* Translate guest physical address to our virtual address.  */
 void *
 vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr)
 {
-unsigned int i;
+VuDevRegion *r;
 
 if (*plen == 0) {
 return NULL;
 }
 
-/* Find matching memory region.  */
-for (i = 0; i < dev->nregions; i++) {
-VuDevRegion *r = >regions[i];
-
-if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
-if ((guest_addr + *plen) > (r->gpa + r->size)) {
-*plen = r->gpa + r->size - guest_addr;
-}
-return (void *)(uintptr_t)
-guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
-}
+r = vu_gpa_to_mem_region(dev, guest_addr);
+if (!r) {
+return NULL;
 }
 
-return NULL;
+if ((guest_addr + *plen) > (r->gpa + r->size)) {
+*plen = r->gpa + r->size - guest_addr;
+}
+return (void *)(uintptr_t)guest_addr - r->gpa + r->mmap_addr +
+   r->mmap_offset;
 }
 
 /* Translate qemu virtual address to our virtual address.  */
@@ -854,8 +871,8 @@ static inline bool reg_equal(VuDevRegion *vudev_reg,
 static bool
 vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = 
-unsigned int i;
-bool found = false;
+unsigned int idx;
+VuDevRegion *r;
 
 if (vmsg->fd_num > 1) {
 vmsg_close_fds(vmsg);
@@ -882,28 +899,22 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
 DPRINT("mmap_offset  0x%016"PRIx64"\n",
msg_region->mmap_offset);
 
-for (i = 0; i < dev->nregions; i++) {
-if (reg_equal(>regions[i], msg_region)) {
-VuDevRegion *r = >regions[i];
-
-munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
-
-/* Shift all affected entries by 1 to close the hole at index. */
-memmove(dev->regions + i, dev->regions + i + 1,
-sizeof(VuDevRegion) * (dev->nregions - i - 1));
-DPRINT("Successfully removed a region\n");
-dev->nregions--;
-i--;
-
-found = true;
-break;
-}
-}
-
-if (!found) {
+r = vu_gpa_to_mem_region(dev, msg_region->guest_phys_addr);
+if (!r || !reg_equal(r, msg_region)) {
+vmsg_close_fds(vmsg);
 vu_panic(dev, "Specified region not found\n");
+return false;
 }
 
+munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
+
+idx = r - dev->regions;
+assert(idx < dev->nregions);
+/* Shift all affected entries by 1 to close the hole. */
+memmove(r, r + 1, sizeof(VuDevRegion) * (dev->nregions - idx - 1));
+DPRINT("Successfully removed a region\n");
+dev->nregions--;
+
 vmsg_close_fds(vmsg);
 
 return false;
-- 
2.43.0

[PATCH v1 04/15] libvhost-user: Factor out removing all mem regions

2024-02-02 Thread David Hildenbrand

Let's factor it out. Note that the check for MAP_FAILED was wrong as
we never set mmap_addr if mmap() failed. We'll remove the NULL check
separately.

Signed-off-by: David Hildenbrand 
---
 subprojects/libvhost-user/libvhost-user.c | 34 ---
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index 8a5a7a2295..d5b3468e43 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -240,6 +240,22 @@ qva_to_va(VuDev *dev, uint64_t qemu_addr)
 return NULL;
 }
 
+static void
+vu_remove_all_mem_regs(VuDev *dev)
+{
+unsigned int i;
+
+for (i = 0; i < dev->nregions; i++) {
+VuDevRegion *r = >regions[i];
+void *ma = (void *)(uintptr_t)r->mmap_addr;
+
+if (ma) {
+munmap(ma, r->size + r->mmap_offset);
+}
+}
+dev->nregions = 0;
+}
+
 static void
 vmsg_close_fds(VhostUserMsg *vmsg)
 {
@@ -1003,14 +1019,7 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
 unsigned int i;
 VhostUserMemory m = vmsg->payload.memory, *memory = 
 
-for (i = 0; i < dev->nregions; i++) {
-VuDevRegion *r = >regions[i];
-void *ma = (void *) (uintptr_t) r->mmap_addr;
-
-if (ma) {
-munmap(ma, r->size + r->mmap_offset);
-}
-}
+vu_remove_all_mem_regs(dev);
 dev->nregions = memory->nregions;
 
 if (dev->postcopy_listening) {
@@ -2112,14 +2121,7 @@ vu_deinit(VuDev *dev)
 {
 unsigned int i;
 
-for (i = 0; i < dev->nregions; i++) {
-VuDevRegion *r = >regions[i];
-void *m = (void *) (uintptr_t) r->mmap_addr;
-if (m != MAP_FAILED) {
-munmap(m, r->size + r->mmap_offset);
-}
-}
-dev->nregions = 0;
+vu_remove_all_mem_regs(dev);
 
 for (i = 0; i < dev->max_queues; i++) {
 VuVirtq *vq = >vq[i];
-- 
2.43.0

Re: [PATCH v2 23/23] migration/multifd: Optimize sender side to be lockless

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> When reviewing my attempt to refactor send_prepare(), Fabiano suggested we
> try out with dropping the mutex in multifd code [1].
>
> I thought about that before but I never tried to change the code.  Now
> maybe it's time to give it a stab.  This only optimizes the sender side.
>
> The trick here is multifd has a clear provider/consumer model, that the
> migration main thread publishes requests (either pending_job/pending_sync),
> while the multifd sender threads are consumers.  Here we don't have a lot
> of comlicated data sharing, and the jobs can logically be submitted lockless.

complicated

>
> Arm the code with atomic weapons.  Two things worth mentioning:
>
>   - For multifd_send_pages(): we can use qatomic_load_acquire() when trying
>   to find a free channel, but that's expensive if we attach one ACQUIRE per
>   channel.  Instead, make it atomic_read() on the pending_job flag, but

s/make it/keep it/

The diff doesn't show the atomic_read already there so it's confusing.

>   merge the ACQUIRE into one single smp_mb_acquire() later.
>
>   - For pending_sync: it doesn't have any extra data required since now
>   p->flags are never touched, it should be safe to not use memory barrier.
>   That's different from pending_sync.

pending_job?

>
> Provide rich comments for all the lockless operations to state how they are
> paired.  With that, we can remove the mutex.
>
> [1] https://lore.kernel.org/r/87o7d1jlu5@suse.de
>
> Suggested-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 
> ---
>  migration/multifd.h |  2 --
>  migration/multifd.c | 51 +++--
>  2 files changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 98876ff94a..78a2317263 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -91,8 +91,6 @@ typedef struct {
>  /* syncs main thread and channels */
>  QemuSemaphore sem_sync;
>  
> -/* this mutex protects the following parameters */
> -QemuMutex mutex;
>  /* is this channel thread running */
>  bool running;
>  /* multifd flags for each packet */
> diff --git a/migration/multifd.c b/migration/multifd.c
> index b317d57d61..ef13e2e781 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -501,19 +501,19 @@ static bool multifd_send_pages(void)
>  }
>  }
>  
> -qemu_mutex_lock(>mutex);
> -assert(!p->pages->num);
> -assert(!p->pages->block);
>  /*
> - * Double check on pending_job==false with the lock.  In the future if
> - * we can have >1 requester thread, we can replace this with a "goto
> - * retry", but that is for later.
> + * Make sure we read p->pending_job before all the rest.  Pairs with
> + * qatomic_store_release() in multifd_send_thread().
>   */
> -assert(qatomic_read(>pending_job) == false);
> -qatomic_set(>pending_job, true);
> +smp_mb_acquire();
> +assert(!p->pages->num);
>  multifd_send_state->pages = p->pages;
>  p->pages = pages;
> -qemu_mutex_unlock(>mutex);
> +/*
> + * Making sure p->pages is setup before marking pending_job=true. Pairs
> + * with the qatomic_load_acquire() in multifd_send_thread().
> + */
> +qatomic_store_release(>pending_job, true);
>  qemu_sem_post(>sem);
>  
>  return true;
> @@ -648,7 +648,6 @@ static bool 
> multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
>  }
>  multifd_send_channel_destroy(p->c);
>  p->c = NULL;
> -qemu_mutex_destroy(>mutex);
>  qemu_sem_destroy(>sem);
>  qemu_sem_destroy(>sem_sync);
>  g_free(p->name);
> @@ -742,14 +741,12 @@ int multifd_send_sync_main(void)
>  
>  trace_multifd_send_sync_main_signal(p->id);
>  
> -qemu_mutex_lock(>mutex);
>  /*
>   * We should be the only user so far, so not possible to be set by
>   * others concurrently.
>   */
>  assert(qatomic_read(>pending_sync) == false);
>  qatomic_set(>pending_sync, true);
> -qemu_mutex_unlock(>mutex);
>  qemu_sem_post(>sem);
>  }
>  for (i = 0; i < migrate_multifd_channels(); i++) {
> @@ -796,9 +793,12 @@ static void *multifd_send_thread(void *opaque)
>  if (multifd_send_should_exit()) {
>  break;
>  }
> -qemu_mutex_lock(>mutex);
>  
> -if (qatomic_read(>pending_job)) {
> +/*
> + * Read pending_job flag before p->pages.  Pairs with the
> + * qatomic_store_release() in multifd_send_pages().
> + */
> +if (qatomic_load_acquire(>pending_job)) {
>  MultiFDPages_t *pages = p->pages;
>  
>  p->iovs_num = 0;
> @@ -806,14 +806,12 @@ static void *multifd_send_thread(void *opaque)
>  
>  ret = multifd_send_state->ops->send_prepare(p, _err);
>  if (ret != 0) {
> -qemu_mutex_unlock(>mutex);
>

Re: [PATCH v2 22/23] migration/multifd: Fix MultiFDSendParams.packet_num race

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> As reported correctly by Fabiano [1], MultiFDSendParams.packet_num is buggy
> to be assigned and stored.  Consider two consequent operations of: (1)
> queue a job into multifd send thread X, then (2) queue another sync request
> to the same send thread X.  Then the MultiFDSendParams.packet_num will be
> assigned twice, and the first assignment can get lost already.
>
> To avoid that, we move the packet_num assignment from p->packet_num into
> where the thread will fill in the packet.  Use atomic operations to protect
> the field, making sure there's no race.
>
> Note that atomic fetch_add() may not be good for scaling purposes, however
> multifd should be fine as number of threads should normally not go beyond
> 16 threads.  Let's leave that concern for later but fix the issue first.
>
> There's also a trick on how to make it always work even on 32 bit hosts for
> uint64_t packet number.  Switching to uintptr_t as of now to simply the
> case.  It will cause packet number to overflow easier on 32 bit, but that
> shouldn't be a major concern for now as 32 bit systems is not the major
> audience for any performance concerns like what multifd wants to address.
>
> We also need to move multifd_send_state definition upper, so that
> multifd_send_fill_packet() can reference it.
>
> [1] https://lore.kernel.org/r/87o7d1jlu5@suse.de
>
> Reported-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 

Elena had reported this in October already.

Reported-by: Elena Ufimtseva 
Reviewed-by: Fabiano Rosas 

> ---
>  migration/multifd.h |  2 --
>  migration/multifd.c | 56 +++--
>  2 files changed, 34 insertions(+), 24 deletions(-)
>
> diff --git a/migration/multifd.h b/migration/multifd.h
> index 9b40a53cb6..98876ff94a 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -97,8 +97,6 @@ typedef struct {
>  bool running;
>  /* multifd flags for each packet */
>  uint32_t flags;
> -/* global number of generated multifd packets */
> -uint64_t packet_num;
>  /*
>   * The sender thread has work to do if either of below boolean is set.
>   *
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 130f86a1fb..b317d57d61 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -45,6 +45,35 @@ typedef struct {
>  uint64_t unused2[4];/* Reserved for future use */
>  } __attribute__((packed)) MultiFDInit_t;
>  
> +struct {
> +MultiFDSendParams *params;
> +/* array of pages to sent */
> +MultiFDPages_t *pages;
> +/*
> + * Global number of generated multifd packets.
> + *
> + * Note that we used 'uintptr_t' because it'll naturally support atomic
> + * operations on both 32bit / 64 bits hosts.  It means on 32bit systems
> + * multifd will overflow the packet_num easier, but that should be
> + * fine.
> + *
> + * Another option is to use QEMU's Stat64 then it'll be 64 bits on all
> + * hosts, however so far it does not support atomic fetch_add() yet.
> + * Make it easy for now.
> + */
> +uintptr_t packet_num;
> +/* send channels ready */
> +QemuSemaphore channels_ready;
> +/*
> + * Have we already run terminate threads.  There is a race when it
> + * happens that we got one error while we are exiting.
> + * We will use atomic operations.  Only valid values are 0 and 1.
> + */
> +int exiting;
> +/* multifd ops */
> +MultiFDMethods *ops;
> +} *multifd_send_state;
> +
>  /* Multifd without compression */
>  
>  /**
> @@ -292,13 +321,16 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
>  {
>  MultiFDPacket_t *packet = p->packet;
>  MultiFDPages_t *pages = p->pages;
> +uint64_t packet_num;
>  int i;
>  
>  packet->flags = cpu_to_be32(p->flags);
>  packet->pages_alloc = cpu_to_be32(p->pages->allocated);
>  packet->normal_pages = cpu_to_be32(pages->num);
>  packet->next_packet_size = cpu_to_be32(p->next_packet_size);
> -packet->packet_num = cpu_to_be64(p->packet_num);
> +
> +packet_num = qatomic_fetch_inc(_send_state->packet_num);
> +packet->packet_num = cpu_to_be64(packet_num);
>  
>  if (pages->block) {
>  strncpy(packet->ramblock, pages->block->idstr, 256);
> @@ -314,7 +346,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
>  p->packets_sent++;
>  p->total_normal_pages += pages->num;
>  
> -trace_multifd_send(p->id, p->packet_num, pages->num, p->flags,
> +trace_multifd_send(p->id, packet_num, pages->num, p->flags,
> p->next_packet_size);
>  }
>  
> @@ -398,24 +430,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
> *p, Error **errp)
>  return 0;
>  }
>  
> -struct {
> -MultiFDSendParams *params;
> -/* array of pages to sent */
> -MultiFDPages_t *pages;
> -/* global number of generated multifd packets */
> -uint64_t packet_num;
> -/*

Re: [PATCH v2 21/23] migration/multifd: Stick with send/recv on function names

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Most of the multifd code uses send/recv to represent the two sides, but
> some rare cases use save/load.
>
> Since send/recv is the majority, replacing the save/load use cases to use
> send/recv globally.  Now we reach a consensus on the naming.
>
> Signed-off-by: Peter Xu 

Good!

Reviewed-by: Fabiano Rosas 

> ---
>  migration/multifd.h   | 10 +-
>  migration/migration.c | 12 ++--
>  migration/multifd.c   | 10 +-
>  3 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/migration/multifd.h b/migration/multifd.h
> index a320c53a6f..9b40a53cb6 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -13,11 +13,11 @@
>  #ifndef QEMU_MIGRATION_MULTIFD_H
>  #define QEMU_MIGRATION_MULTIFD_H
>  
> -int multifd_save_setup(Error **errp);
> -void multifd_save_cleanup(void);
> -int multifd_load_setup(Error **errp);
> -void multifd_load_cleanup(void);
> -void multifd_load_shutdown(void);
> +int multifd_send_setup(Error **errp);
> +void multifd_send_shutdown(void);
> +int multifd_recv_setup(Error **errp);
> +void multifd_recv_cleanup(void);
> +void multifd_recv_shutdown(void);
>  bool multifd_recv_all_channels_created(void);
>  void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
>  void multifd_recv_sync_main(void);
> diff --git a/migration/migration.c b/migration/migration.c
> index d5f705ceef..ba99772e76 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -311,7 +311,7 @@ void migration_incoming_state_destroy(void)
>  {
>  struct MigrationIncomingState *mis = migration_incoming_get_current();
>  
> -multifd_load_cleanup();
> +multifd_recv_cleanup();
>  compress_threads_load_cleanup();
>  
>  if (mis->to_src_file) {
> @@ -662,7 +662,7 @@ static void process_incoming_migration_bh(void *opaque)
>  
>  trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
>  
> -multifd_load_shutdown();
> +multifd_recv_shutdown();
>  
>  dirty_bitmap_mig_before_vm_start();
>  
> @@ -759,7 +759,7 @@ fail:
>MIGRATION_STATUS_FAILED);
>  qemu_fclose(mis->from_src_file);
>  
> -multifd_load_cleanup();
> +multifd_recv_cleanup();
>  compress_threads_load_cleanup();
>  
>  exit(EXIT_FAILURE);
> @@ -885,7 +885,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> Error **errp)
>  default_channel = !mis->from_src_file;
>  }
>  
> -if (multifd_load_setup(errp) != 0) {
> +if (multifd_recv_setup(errp) != 0) {
>  return;
>  }
>  
> @@ -1331,7 +1331,7 @@ static void migrate_fd_cleanup(MigrationState *s)
>  }
>  bql_lock();
>  
> -multifd_save_cleanup();
> +multifd_send_shutdown();
>  qemu_mutex_lock(>qemu_file_lock);
>  tmp = s->to_dst_file;
>  s->to_dst_file = NULL;
> @@ -3623,7 +3623,7 @@ void migrate_fd_connect(MigrationState *s, Error 
> *error_in)
>  return;
>  }
>  
> -if (multifd_save_setup(_err) != 0) {
> +if (multifd_send_setup(_err) != 0) {
>  migrate_set_error(s, local_err);
>  error_report_err(local_err);
>  migrate_set_state(>state, MIGRATION_STATUS_SETUP,
> diff --git a/migration/multifd.c b/migration/multifd.c
> index e2dd2f6e04..130f86a1fb 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -663,7 +663,7 @@ static void multifd_send_cleanup_state(void)
>  multifd_send_state = NULL;
>  }
>  
> -void multifd_save_cleanup(void)
> +void multifd_send_shutdown(void)
>  {
>  int i;
>  
> @@ -965,7 +965,7 @@ static void multifd_new_send_channel_create(gpointer 
> opaque)
>  socket_send_channel_create(multifd_new_send_channel_async, opaque);
>  }
>  
> -int multifd_save_setup(Error **errp)
> +int multifd_send_setup(Error **errp)
>  {
>  int thread_count;
>  uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
> @@ -1063,7 +1063,7 @@ static void multifd_recv_terminate_threads(Error *err)
>  }
>  }
>  
> -void multifd_load_shutdown(void)
> +void multifd_recv_shutdown(void)
>  {
>  if (migrate_multifd()) {
>  multifd_recv_terminate_threads(NULL);
> @@ -1098,7 +1098,7 @@ static void multifd_recv_cleanup_state(void)
>  multifd_recv_state = NULL;
>  }
>  
> -void multifd_load_cleanup(void)
> +void multifd_recv_cleanup(void)
>  {
>  int i;
>  
> @@ -1213,7 +1213,7 @@ static void *multifd_recv_thread(void *opaque)
>  return NULL;
>  }
>  
> -int multifd_load_setup(Error **errp)
> +int multifd_recv_setup(Error **errp)
>  {
>  int thread_count;
>  uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();

Re: [PATCH v2 20/23] migration/multifd: Cleanup multifd_load_cleanup()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Use similar logic to cleanup the recv side.
>
> Note that multifd_recv_terminate_threads() may need some similar rework
> like the sender side, but let's leave that for later.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 19/23] migration/multifd: Cleanup multifd_save_cleanup()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Shrink the function by moving relevant works into helpers: move the thread
> join()s into multifd_send_terminate_threads(), then create two more helpers
> to cover channel/state cleanups.
>
> Add a TODO entry for the thread terminate process because p->running is
> still buggy.  We need to fix it at some point but not yet covered.
>
> Suggested-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas 

minor comment below

> ---
>  migration/multifd.c | 91 +
>  1 file changed, 59 insertions(+), 32 deletions(-)
>
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 4ab8e6eff2..4cb0d2cc17 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -593,6 +593,11 @@ static void multifd_send_terminate_threads(void)
>   * always set it.
>   */
>  qatomic_set(_send_state->exiting, 1);
> +
> +/*
> + * Firstly, kick all threads out; no matter whether they are just idle,
> + * or blocked in an IO system call.
> + */
>  for (i = 0; i < migrate_multifd_channels(); i++) {
>  MultiFDSendParams *p = _send_state->params[i];
>  
> @@ -601,6 +606,21 @@ static void multifd_send_terminate_threads(void)
>  qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
>  }
>  }
> +
> +/*
> + * Finally recycle all the threads.
> + *
> + * TODO: p->running is still buggy, e.g. we can reach here without the
> + * corresponding multifd_new_send_channel_async() get invoked yet,
> + * then a new thread can even be created after this function returns.
> + */

Series on the list:

https://lore.kernel.org/r/20240202191128.1901-1-faro...@suse.de

> +for (i = 0; i < migrate_multifd_channels(); i++) {
> +MultiFDSendParams *p = _send_state->params[i];
> +
> +if (p->running) {
> +qemu_thread_join(>thread);
> +}
> +}
>  }
>  
>  static int multifd_send_channel_destroy(QIOChannel *send)
> @@ -608,6 +628,41 @@ static int multifd_send_channel_destroy(QIOChannel *send)
>  return socket_send_channel_destroy(send);
>  }
>  
> +static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
> +{
> +if (p->registered_yank) {
> +migration_ioc_unregister_yank(p->c);
> +}
> +multifd_send_channel_destroy(p->c);
> +p->c = NULL;
> +qemu_mutex_destroy(>mutex);
> +qemu_sem_destroy(>sem);
> +qemu_sem_destroy(>sem_sync);
> +g_free(p->name);
> +p->name = NULL;
> +multifd_pages_clear(p->pages);
> +p->pages = NULL;
> +p->packet_len = 0;
> +g_free(p->packet);
> +p->packet = NULL;
> +g_free(p->iov);
> +p->iov = NULL;
> +multifd_send_state->ops->send_cleanup(p, errp);
> +
> +return *errp == NULL;

I think technically this would require the ERRP_GUARD() macro?

> +}
> +
> +static void multifd_send_cleanup_state(void)
> +{
> +qemu_sem_destroy(_send_state->channels_ready);
> +g_free(multifd_send_state->params);
> +multifd_send_state->params = NULL;
> +multifd_pages_clear(multifd_send_state->pages);
> +multifd_send_state->pages = NULL;
> +g_free(multifd_send_state);
> +multifd_send_state = NULL;
> +}
> +
>  void multifd_save_cleanup(void)
>  {
>  int i;
> @@ -615,48 +670,20 @@ void multifd_save_cleanup(void)
>  if (!migrate_multifd()) {
>  return;
>  }
> +
>  multifd_send_terminate_threads();
> -for (i = 0; i < migrate_multifd_channels(); i++) {
> -MultiFDSendParams *p = _send_state->params[i];
>  
> -if (p->running) {
> -qemu_thread_join(>thread);
> -}
> -}
>  for (i = 0; i < migrate_multifd_channels(); i++) {
>  MultiFDSendParams *p = _send_state->params[i];
>  Error *local_err = NULL;
>  
> -if (p->registered_yank) {
> -migration_ioc_unregister_yank(p->c);
> -}
> -multifd_send_channel_destroy(p->c);
> -p->c = NULL;
> -qemu_mutex_destroy(>mutex);
> -qemu_sem_destroy(>sem);
> -qemu_sem_destroy(>sem_sync);
> -g_free(p->name);
> -p->name = NULL;
> -multifd_pages_clear(p->pages);
> -p->pages = NULL;
> -p->packet_len = 0;
> -g_free(p->packet);
> -p->packet = NULL;
> -g_free(p->iov);
> -p->iov = NULL;
> -multifd_send_state->ops->send_cleanup(p, _err);
> -if (local_err) {
> +if (!multifd_send_cleanup_channel(p, _err)) {
>  migrate_set_error(migrate_get_current(), local_err);
>  error_free(local_err);
>  }
>  }
> -qemu_sem_destroy(_send_state->channels_ready);
> -g_free(multifd_send_state->params);
> -multifd_send_state->params = NULL;
> -multifd_pages_clear(multifd_send_state->pages);
> -multifd_send_state->pages = NULL;
> -g_free(multifd_send_state);
> -multifd_send_state = NULL;

[PATCH v3 3/3] hw/i2c: smbus_slave: Reset state on reset

2024-02-02 Thread Joe Komlodi

If a reset comes while the SMBus device is not in its idle state, it's
possible for it to get confused on valid transactions post-reset.

Signed-off-by: Joe Komlodi 
---
 hw/i2c/smbus_slave.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/i2c/smbus_slave.c b/hw/i2c/smbus_slave.c
index 9f9afc25a4..4615e8b097 100644
--- a/hw/i2c/smbus_slave.c
+++ b/hw/i2c/smbus_slave.c
@@ -201,10 +201,19 @@ static int smbus_i2c_send(I2CSlave *s, uint8_t data)
 return 0;
 }
 
+static void smbus_device_hold_reset(Object *obj)
+{
+SMBusDevice *dev = SMBUS_DEVICE(obj);
+dev->mode = SMBUS_IDLE;
+dev->data_len = 0;
+}
+
 static void smbus_device_class_init(ObjectClass *klass, void *data)
 {
 I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass);
+ResettableClass *rc = RESETTABLE_CLASS(klass);
 
+rc->phases.hold = smbus_device_hold_reset;
 sc->event = smbus_i2c_event;
 sc->recv = smbus_i2c_recv;
 sc->send = smbus_i2c_send;
-- 
2.43.0.594.gd9cf4e227d-goog

[PATCH v3 0/3] hw/i2c: smbus: Reset fixes

2024-02-02 Thread Joe Komlodi

Changelog:

v2 -> v3
Patch 1
- Removed I3CBus class definition, since it was unneeded.
- whitespace fixes
- Changed enter_reset to hold_reset

Patch 2
- Moved pointer returned by object_get_canonical_path outside of printf
  so it can be freed

Patch 3
- Changed enter_reset to hold_reset

v1 -> v2
- Dropped 4th patch "hw/i2c: smbus: mux: Reset SMBusDevice state
on reset". After more testing and Corey's comment, I realized it
wasn't needed.

Original message:

Hi all,

This series adds some resets for SMBus and for the I2C core. Along with
it, we make SMBus slave error printing a little more helpful.

These reset issues were very infrequent, they would maybe occur in 1 out
of hundreds of resets in our testing, but the way they happen is pretty
straightforward.

Basically as long as a reset happens in the middle of a transaction, the
state of the old transaction would still partially be there after the
reset. Once a new transaction comes in, the partial stale state can
cause the new transaction to incorrectly fail.

Thanks,
Joe

Joe Komlodi (3):
  hw/i2c: core: Add reset
  hw/i2c/smbus_slave: Add object path on error prints
  hw/i2c: smbus_slave: Reset state on reset

 hw/i2c/core.c| 19 +++
 hw/i2c/smbus_slave.c | 17 +++--
 include/hw/i2c/i2c.h |  2 +-
 3 files changed, 35 insertions(+), 3 deletions(-)

-- 
2.43.0.594.gd9cf4e227d-goog

[PATCH v3 1/3] hw/i2c: core: Add reset

2024-02-02 Thread Joe Komlodi

It's possible for a reset to come in the middle of a transaction, which
causes the bus to be in an old state when a new transaction comes in.

Signed-off-by: Joe Komlodi 
---
 hw/i2c/core.c| 19 +++
 include/hw/i2c/i2c.h |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index 4cf30b2c86..3128067bba 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -23,10 +23,29 @@ static Property i2c_props[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static void i2c_bus_hold_reset(Object *obj)
+{
+I2CBus *bus = I2C_BUS(obj);
+I2CNode *node, *next;
+
+bus->broadcast = false;
+QLIST_FOREACH_SAFE(node, >current_devs, next, next) {
+QLIST_REMOVE(node, next);
+g_free(node);
+}
+}
+
+static void i2c_bus_class_init(ObjectClass *klass, void *data)
+{
+ResettableClass *rc = RESETTABLE_CLASS(klass);
+rc->phases.hold = i2c_bus_hold_reset;
+}
+
 static const TypeInfo i2c_bus_info = {
 .name = TYPE_I2C_BUS,
 .parent = TYPE_BUS,
 .instance_size = sizeof(I2CBus),
+.class_init = i2c_bus_class_init,
 };
 
 static int i2c_bus_pre_save(void *opaque)
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index 2a3abacd1b..49580e30e2 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -64,7 +64,7 @@ struct I2CSlave {
 };
 
 #define TYPE_I2C_BUS "i2c-bus"
-OBJECT_DECLARE_SIMPLE_TYPE(I2CBus, I2C_BUS)
+OBJECT_DECLARE_TYPE(I2CBus, I2CBusClass, I2C_BUS)
 
 typedef struct I2CNode I2CNode;
 
-- 
2.43.0.594.gd9cf4e227d-goog

[PATCH v3 2/3] hw/i2c/smbus_slave: Add object path on error prints

2024-02-02 Thread Joe Komlodi

The current logging doesn't tell us which specific smbus device is an
error state.

Signed-off-by: Joe Komlodi 
---
 hw/i2c/smbus_slave.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/i2c/smbus_slave.c b/hw/i2c/smbus_slave.c
index 1300c9ec72..9f9afc25a4 100644
--- a/hw/i2c/smbus_slave.c
+++ b/hw/i2c/smbus_slave.c
@@ -25,11 +25,15 @@
 #define DPRINTF(fmt, ...) \
 do { printf("smbus(%02x): " fmt , dev->i2c.address, ## __VA_ARGS__); } while 
(0)
 #define BADF(fmt, ...) \
-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__); exit(1);} while 
(0)
+do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev));  \
+fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \
+exit(1); } while (0)
 #else
 #define DPRINTF(fmt, ...) do {} while(0)
 #define BADF(fmt, ...) \
-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__);} while (0)
+do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev));  \
+fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \
+ } while (0)
 #endif
 
 enum {
-- 
2.43.0.594.gd9cf4e227d-goog

Re: [PATCH v2 18/23] migration/multifd: Rewrite multifd_queue_page()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> The current multifd_queue_page() is not easy to read and follow.  It is not
> good with a few reasons:
>
>   - No helper at all to show what exactly does a condition mean; in short,
>   readability is low.
>
>   - Rely on pages->ramblock being cleared to detect an empty queue.  It's
>   slightly an overload of the ramblock pointer, per Fabiano [1], which I
>   also agree.
>
>   - Contains a self recursion, even if not necessary..
>
> Rewrite this function.  We add some comments to make it even clearer on
> what it does.
>
> [1] https://lore.kernel.org/r/87wmrpjzew@suse.de
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas 

Patch looks good, but I have a question below.

> ---
>  migration/multifd.c | 56 ++---
>  1 file changed, 37 insertions(+), 19 deletions(-)
>
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 35d4e8ad1f..4ab8e6eff2 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -506,35 +506,53 @@ static bool multifd_send_pages(void)
>  return true;
>  }
>  
> +static inline bool multifd_queue_empty(MultiFDPages_t *pages)
> +{
> +return pages->num == 0;
> +}
> +
> +static inline bool multifd_queue_full(MultiFDPages_t *pages)
> +{
> +return pages->num == pages->allocated;
> +}
> +
> +static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
> +{
> +pages->offset[pages->num++] = offset;
> +}
> +
>  /* Returns true if enqueue successful, false otherwise */
>  bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
>  {
> -MultiFDPages_t *pages = multifd_send_state->pages;
> -bool changed = false;
> +MultiFDPages_t *pages;
> +
> +retry:
> +pages = multifd_send_state->pages;
>  
> -if (!pages->block) {
> +/* If the queue is empty, we can already enqueue now */
> +if (multifd_queue_empty(pages)) {
>  pages->block = block;
> +multifd_enqueue(pages, offset);
> +return true;
>  }
>  
> -if (pages->block == block) {
> -pages->offset[pages->num] = offset;
> -pages->num++;
> -
> -if (pages->num < pages->allocated) {
> -return true;
> +/*
> + * Not empty, meanwhile we need a flush.  It can because of either:
> + *
> + * (1) The page is not on the same ramblock of previous ones, or,
> + * (2) The queue is full.
> + *
> + * After flush, always retry.
> + */
> +if (pages->block != block || multifd_queue_full(pages)) {
> +if (!multifd_send_pages()) {
> +return false;
>  }
> -} else {
> -changed = true;
> -}
> -
> -if (!multifd_send_pages()) {
> -return false;
> -}
> -
> -if (changed) {
> -return multifd_queue_page(block, offset);
> +goto retry;
>  }
>  
> +/* Not empty, and we still have space, do it! */
> +multifd_enqueue(pages, offset);

Hm, here you're missing the flush of the last group of pages of the last
ramblock. Just like current code...

...which means we're relying on the multifd_send_pages() at
multifd_send_sync_main() to send the last few pages. So how can that
work when multifd_flush_after_each_section==false? Because it skips the
sync flag, but would also skip the last send. I'm confused.

Re: [PATCH v2 1/3] hw/i2c: core: Add reset

2024-02-02 Thread Joe Komlodi

Hi peter,

On Thu, Feb 1, 2024 at 7:24 AM Peter Maydell  wrote:
>
> On Fri, 26 Jan 2024 at 00:56, Joe Komlodi  wrote:
> >
> > It's possible for a reset to come in the middle of a transaction, which
> > causes the bus to be in an old state when a new transaction comes in.
> >
> > Signed-off-by: Joe Komlodi 
> > ---
> >  hw/i2c/core.c| 30 +-
> >  include/hw/i2c/i2c.h |  6 +-
> >  2 files changed, 30 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/i2c/core.c b/hw/i2c/core.c
> > index 4cf30b2c86..def4f134d0 100644
> > --- a/hw/i2c/core.c
> > +++ b/hw/i2c/core.c
> > @@ -23,11 +23,31 @@ static Property i2c_props[] = {
> >  DEFINE_PROP_END_OF_LIST(),
> >  };
> >
> > -static const TypeInfo i2c_bus_info = {
> > -.name = TYPE_I2C_BUS,
> > -.parent = TYPE_BUS,
> > -.instance_size = sizeof(I2CBus),
> > -};
> > +static void i2c_bus_enter_reset(Object *obj, ResetType type)
> > +{
> > +I2CBus *bus = I2C_BUS(obj);
> > +I2CNode *node, *next;
> > +
> > +bus->broadcast = false;
> > +QLIST_FOREACH_SAFE(node, >current_devs, next, next) {
> > +QLIST_REMOVE(node, next);
> > +g_free(node);
> > +}
>
> Doesn't it confuse the device that's partway through a
> transaction if we just forget about the transaction entirely
> without terminating it somehow? I'm not sure what real hardware
> does in this situation, though.
>

It could. Ideally all devices on the bus would have reset functions
implemented as well, so their state can reset.
With adding a bus-wide reset, what could end up happening is devices
without resets implemented end up in the wrong state compared to the
bus, while before they would stay in the same state as the bus.
However with the bus-wide reset, devices with resets now match their
state with the bus's state, while before there could be a mismatch.

Fixed the comments in this patch and in the other 2 patches in v3.

Thanks,
Joe

> > +}
> > +
> > +static void i2c_bus_class_init(ObjectClass *klass, void *data)
> > +{
> > +ResettableClass *rc = RESETTABLE_CLASS(klass);
> > +rc->phases.enter = i2c_bus_enter_reset;
> > +}
> > +
> > + static const TypeInfo i2c_bus_info = {
> > + .name = TYPE_I2C_BUS,
> > + .parent = TYPE_BUS,
> > + .instance_size = sizeof(I2CBus),
> > + .class_size = sizeof(I2CBusClass),
> > + .class_init = i2c_bus_class_init,
> > + };
>
> Looks like you have stray extra spaces in front of this
> type definition (which has then caused 'diff' to not notice
> that you're only adding fields to the existing struct).
>
> >
> >  static int i2c_bus_pre_save(void *opaque)
> >  {
> > diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
> > index 2a3abacd1b..420868a269 100644
> > --- a/include/hw/i2c/i2c.h
> > +++ b/include/hw/i2c/i2c.h
> > @@ -64,7 +64,7 @@ struct I2CSlave {
> >  };
> >
> >  #define TYPE_I2C_BUS "i2c-bus"
> > -OBJECT_DECLARE_SIMPLE_TYPE(I2CBus, I2C_BUS)
> > +OBJECT_DECLARE_TYPE(I2CBus, I2CBusClass, I2C_BUS)
> >
> >  typedef struct I2CNode I2CNode;
> >
> > @@ -83,6 +83,10 @@ struct I2CPendingMaster {
> >  typedef QLIST_HEAD(I2CNodeList, I2CNode) I2CNodeList;
> >  typedef QSIMPLEQ_HEAD(I2CPendingMasters, I2CPendingMaster) 
> > I2CPendingMasters;
> >
> > +struct I2CBusClass {
> > +DeviceClass parent_class;
> > +};
>
> This isn't correct -- a FooBusClass's parent_class field
> should be a BusClass. But since you don't define any new
> fields in it, you don't need to define the struct at all.
>
> Instead, your TypeInfo for the TYPE_I2C_BUS can add a
> .class_init member, and leave the .class_size unset
> (it will then inherit the class-size from the parent
> class, which will be sizeof(BusClass)).
>
> > +
> >  struct I2CBus {
> >  BusState qbus;
> >  I2CNodeList current_devs;
> > --
> > 2.43.0.429.g432eaa2c6b-goog
>
> thanks
> -- PMM

[PATCH] Hexagon (target/hexagon) Only pass env to generated helper when needed

2024-02-02 Thread Taylor Simpson

Currently, we pass env to every generated helper.  When the semantics of
the instruction only depend on the arguments, this is unnecessary and
adds extra overhead to the helper call.

The A2_nop and SA1_setin1 instructions end up with no arguments.  This
results in a "old-style function definition" error from the compiler, so
we write overrides for them.

With this change, the number of helpers with env argument is
idef-parser enabled:329 total, 23 with env
idef-parser disabled:   1543 total, 553 with env

Signed-off-by: Taylor Simpson 
---
 target/hexagon/gen_tcg.h |  3 +++
 target/hexagon/attribs_def.h.inc |  1 +
 target/hexagon/hex_common.py | 22 +-
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 1c4391b415..e9ac2e3fe0 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -1369,3 +1369,6 @@
 gen_helper_raise_exception(tcg_env, excp); \
 } while (0)
 #endif
+
+#define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0)
+#define fGEN_TCG_SA1_setin1(SHORTCODE) tcg_gen_movi_tl(RdV, -1)
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 87942d46f4..8949ee09cf 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -117,6 +117,7 @@ DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", 
"")
 DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "")
 DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "")
 DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "")
+DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "")
 DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "")
 DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "")
 DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "")
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 195620c7ec..12bc410bb2 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -101,6 +101,7 @@ def calculate_attribs():
 add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1')
 add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1')
 add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3')
+add_qemu_macro_attrib('fREAD_SP', 'A_IMPLICIT_READS_SP')
 
 # Recurse down macros, find attributes from sub-macros
 macroValues = list(macros.values())
@@ -197,6 +198,16 @@ def get_tagimms():
 return dict(zip(tags, list(map(compute_tag_immediates, tags
 
 
+def need_env(tag):
+return ("A_STORE" in attribdict[tag] or
+"A_LOAD" in attribdict[tag] or
+"A_CVI_GATHER" in attribdict[tag] or
+"A_CVI_SCATTER" in attribdict[tag] or
+"A_IMPLICIT_WRITES_USR" in attribdict[tag] or
+"A_IMPLICIT_READS_P0" in attribdict[tag] or
+"A_IMPLICIT_READS_SP" in attribdict[tag])
+
+
 def need_slot(tag):
 if (
 "A_CVI_SCATTER" not in attribdict[tag]
@@ -1060,11 +1071,12 @@ def helper_args(tag, regs, imms):
 args = []
 
 ## First argument is the CPU state
-args.append(HelperArg(
-"env",
-"tcg_env",
-"CPUHexagonState *env"
-))
+if need_env(tag):
+args.append(HelperArg(
+"env",
+"tcg_env",
+"CPUHexagonState *env"
+))
 
 ## For predicated instructions, we pass in the destination register
 if is_predicated(tag):
-- 
2.34.1

Re: [PATCH v2 17/23] migration/multifd: Change retval of multifd_send_pages()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Using int is an overkill when there're only two options.  Change it to a
> boolean.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 16/23] migration/multifd: Change retval of multifd_queue_page()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Using int is an overkill when there're only two options.  Change it to a
> boolean.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 15/23] migration/multifd: Split multifd_send_terminate_threads()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Split multifd_send_terminate_threads() into two functions:
>
>   - multifd_send_set_error(): used when an error happened on the sender
> side, set error and quit state only
>
>   - multifd_send_terminate_threads(): used only by the main thread to kick
> all multifd send threads out of sleep, for the last recycling.
>
> Use multifd_send_set_error() in the three old call sites where only the
> error will be set.
>
> Use multifd_send_terminate_threads() in the last one where the main thread
> will kick the multifd threads at last in multifd_save_cleanup().
>
> Both helpers will need to set quitting=1.
>
> Suggested-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PULL 02/10] hw/hppa/machine: Disable default devices with --nodefaults option

2024-02-02 Thread Helge Deller


On 2/2/24 19:04, Guenter Roeck wrote:

On Fri, Feb 02, 2024 at 10:54:20AM +0100, Helge Deller wrote:

Hi Guenter,

On 2/2/24 05:22, Guenter Roeck wrote:

On Sat, Jan 13, 2024 at 06:57:20AM +0100, del...@kernel.org wrote:

From: Helge Deller 

Recognize the qemu --nodefaults option, which will disable the
following default devices on hppa:
- lsi53c895a SCSI controller,
- artist graphics card,
- LASI 82596 NIC,
- tulip PCI NIC,
- second serial PCI card,
- USB OHCI controller.

Adding this option is very useful to allow manual testing and
debugging of the other possible devices on the command line.



With this patch in the tree, I get some interesting crashes in Seabios
if I provide a somewhat unusual command line option. For example,
something like

  -usb -device usb-ehci,id=ehci \
  -device usb-uas,bus=ehci.0,id=uas \
  -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=d0 \
  -drive file= ...

is accepted as command line option but results in

SeaBIOS PA-RISC 32-bit Firmware Version 15 (QEMU 8.2.1)
Duplex Console IO Dependent Code (IODC) revision 1
--
(c) Copyright 2017-2024 Helge Deller  and SeaBIOS developers.
--
Processor   SpeedState   Coprocessor State  Cache Size
-     -  -  --
0  250 MHzActive Functional0 KB
1  250 MHzIdle   Functional0 KB
2  250 MHzIdle   Functional0 KB
3  250 MHzIdle   Functional0 KB
Emulated machine: HP C3700 (64-bit PA2.0) with 32-bit PDC
Available memory: 1024 MB
Good memory required: 16 MB
Primary boot path:FWSCSI.0.0
Alternate boot path:  FWSCSI.0.0
Console path: SERIAL_2.9600.8.none
Keyboard path:SERIAL_2.9600.8.none
*ERROR* in SeaBIOS-hppa-v15:
prepare_boot_path:2898
SeaBIOS wants SYSTEM HALT.

This is without --nodefaults, and it used to work. Is that intentional ?


This should now be fixed in the upcoming SeaBIOS-hppa-v16 version ("devel" 
branch):
https://github.com/hdeller/seabios-hppa/tree/devel
Could you test?


I was able to build from the 'master' branch, but 'devel' gives me

hppa64-linux-ld: target elf32-hppa-linux not found


The devel branch now includes a 64-bit firmware too.
You need both, hppa (32-bit) and hppa64 (64-bit) gcc
and binutils packages installed.


Do you have a binary seabios image, by any chance ?


http://www.dellerweb.de/temp/hppa-firmware.img


If it doesn't work, please give me the full command line.



qemu-system-hppa -M C3700 -smp 4 \
-kernel vmlinux -no-reboot -snapshot \
-usb -device usb-ehci,id=ehci \
-device usb-uas,bus=ehci.0,id=uas \
-device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=d0 \
-drive file=rootfs.ext2,if=none,format=raw,id=d0 \
-append "root=/dev/sda rootwait console=ttyS0,115200" \
-nographic -monitor null


That line boots for me now.


This is with qemu 8.2.1. Note that the number of CPUs doesn't make a
difference. It turns out this also crashes/aborts immediately with
"nodefaults".


Adding "--nodefaults -serial mon:stdio" to the line above works too.


If I do use the --nodefaults parameter, I was unable to figure out how
to configure the serial console. What command line parameter(s) do I need to
get it ?


You need to add:
-serial mon:stdio
This will create a serial port if it's not yet there.



And there was me trying all variants of "-device pci-serial-4x..." I could
think of ;-).


:-)

Helge

Re: [PATCH v2 13/23] migration/multifd: Move header prepare/fill into send_prepare()

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> This patch redefines the interfacing of ->send_prepare().  It further
> simplifies multifd_send_thread() especially on zero copy.
>
> Now with the new interface, we require the hook to do all the work for
> preparing the IOVs to send.  After it's completed, the IOVs should be ready
> to be dumped into the specific multifd QIOChannel later.
>
> So now the API looks like:
>
>   p->pages --->  send_prepare() -> IOVs
>
> This also prepares for the case where the input can be extended to even not
> any p->pages.  But that's for later.
>
> This patch will achieve similar goal of what Fabiano used to propose here:
>
> https://lore.kernel.org/r/20240126221943.26628-1-faro...@suse.de
>
> However the send() interface may not be necessary.  I'm boldly attaching a
> "Co-developed-by" for Fabiano.
>
> Co-developed-by: Fabiano Rosas 
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 07/23] migration/multifd: Simplify locking in sender thread

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> The sender thread will yield the p->mutex before IO starts, trying to not
> block the requester thread.  This may be unnecessary lock optimizations,
> because the requester can already read pending_job safely even without the
> lock, because the requester is currently the only one who can assign a
> task.
>
> Drop that lock complication on both sides:
>
>   (1) in the sender thread, always take the mutex until job done
>   (2) in the requester thread, check pending_job clear lockless
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 06/23] migration/multifd: Separate SYNC request with normal jobs

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Multifd provide a threaded model for processing jobs.  On sender side,
> there can be two kinds of job: (1) a list of pages to send, or (2) a sync
> request.
>
> The sync request is a very special kind of job.  It never contains a page
> array, but only a multifd packet telling the dest side to synchronize with
> sent pages.
>
> Before this patch, both requests use the pending_job field, no matter what
> the request is, it will boost pending_job, while multifd sender thread will
> decrement it after it finishes one job.
>
> However this should be racy, because SYNC is special in that it needs to
> set p->flags with MULTIFD_FLAG_SYNC, showing that this is a sync request.
> Consider a sequence of operations where:
>
>   - migration thread enqueue a job to send some pages, pending_job++ (0->1)
>
>   - [...before the selected multifd sender thread wakes up...]
>
>   - migration thread enqueue another job to sync, pending_job++ (1->2),
> setup p->flags=MULTIFD_FLAG_SYNC
>
>   - multifd sender thread wakes up, found pending_job==2
> - send the 1st packet with MULTIFD_FLAG_SYNC and list of pages
> - send the 2nd packet with flags==0 and no pages
>
> This is not expected, because MULTIFD_FLAG_SYNC should hopefully be done
> after all the pages are received.  Meanwhile, the 2nd packet will be
> completely useless, which contains zero information.
>
> I didn't verify above, but I think this issue is still benign in that at
> least on the recv side we always receive pages before handling
> MULTIFD_FLAG_SYNC.  However that's not always guaranteed and just tricky.
>
> One other reason I want to separate it is using p->flags to communicate
> between the two threads is also not clearly defined, it's very hard to read
> and understand why accessing p->flags is always safe; see the current impl
> of multifd_send_thread() where we tried to cache only p->flags.  It doesn't
> need to be that complicated.
>
> This patch introduces pending_sync, a separate flag just to show that the
> requester needs a sync.  Alongside, we remove the tricky caching of
> p->flags now because after this patch p->flags should only be used by
> multifd sender thread now, which will be crystal clear.  So it is always
> thread safe to access p->flags.
>
> With that, we can also safely convert the pending_job into a boolean,
> because we don't support >1 pending jobs anyway.
>
> Always use atomic ops to access both flags to make sure no cache effect.
> When at it, drop the initial setting of "pending_job = 0" because it's
> always allocated using g_new0().
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 03/23] migration/multifd: Drop MultiFDSendParams.quit, cleanup error paths

2024-02-02 Thread Fabiano Rosas

pet...@redhat.com writes:

> From: Peter Xu 
>
> Multifd send side has two fields to indicate error quits:
>
>   - MultiFDSendParams.quit
>   - _send_state->exiting
>
> Merge them into the global one.  The replacement is done by changing all
> p->quit checks into the global var check.  The global check doesn't need
> any lock.
>
> A few more things done on top of this altogether:
>
>   - multifd_send_terminate_threads()
>
> Moving the xchg() of _send_state->exiting upper, so as to cover
> the tracepoint, migrate_set_error() and migrate_set_state().
>
>   - multifd_send_sync_main()
>
> In the 2nd loop, add one more check over the global var to make sure we
> don't keep the looping if QEMU already decided to quit.
>
>   - multifd_tls_outgoing_handshake()
>
> Use multifd_send_terminate_threads() to set the error state.  That has
> a benefit of updating MigrationState.error to that error too, so we can
> persist that 1st error we hit in that specific channel.
>
>   - multifd_new_send_channel_async()
>
> Take similar approach like above, drop the migrate_set_error() because
> multifd_send_terminate_threads() already covers that.  Unwrap the helper
> multifd_new_send_channel_cleanup() along the way; not really needed.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Fabiano Rosas

[PATCH 2/5] migration/multifd: Remove p->running

2024-02-02 Thread Fabiano Rosas

We currently only need p->running to avoid calling qemu_thread_join()
on a non existent thread if the thread has never been created.

However, there are at least two bugs in this logic:

1) On the sending side, p->running is set too early and
qemu_thread_create() can be skipped due to an error during TLS
handshake, leaving the flag set and leading to a crash when
multifd_save_cleanup() calls qemu_thread_join().

2) During exit, the multifd thread clears the flag while holding the
channel lock. The counterpart at multifd_save_cleanup() reads the flag
outside of the lock and might free the mutex while the multifd thread
still has it locked.

Fix the first issue by setting the flag right before creating the
thread. Rename it from p->running to p->thread_created to clarify its
usage.

Fix the second issue by not clearing the flag at the multifd thread
exit. We don't have any use for that.

Note that these bugs are straight-forward logic issues and not race
conditions. There is still a gap for races to affect this code due to
multifd_save_cleanup() being allowed to run concurrently with the
thread creation loop. This issue is solved in the next patch.

Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake")
Reported-by: Avihai Horon 
Reported-by: 
Signed-off-by: Fabiano Rosas 
---
 migration/multifd.c | 30 --
 migration/multifd.h |  7 ++-
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index b557d046a9..402b7fd776 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -528,7 +528,7 @@ void multifd_save_cleanup(void)
 qemu_thread_join(>tls_thread);
 }
 
-if (p->running) {
+if (p->thread_created) {
 qemu_thread_join(>thread);
 }
 }
@@ -759,10 +759,6 @@ out:
 error_free(local_err);
 }
 
-qemu_mutex_lock(>mutex);
-p->running = false;
-qemu_mutex_unlock(>mutex);
-
 rcu_unregister_thread();
 migration_threads_remove(thread);
 trace_multifd_send_thread_end(p->id, p->num_packets, 
p->total_normal_pages);
@@ -859,6 +855,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
 migration_ioc_register_yank(ioc);
 p->registered_yank = true;
 p->c = ioc;
+
+p->thread_created = true;
 qemu_thread_create(>thread, p->name, multifd_send_thread, p,
QEMU_THREAD_JOINABLE);
 return true;
@@ -890,7 +888,6 @@ static void multifd_new_send_channel_async(QIOTask *task, 
gpointer opaque)
 trace_multifd_new_send_channel_async(p->id);
 if (!qio_task_propagate_error(task, _err)) {
 qio_channel_set_delay(ioc, false);
-p->running = true;
 if (multifd_channel_connect(p, ioc, _err)) {
 return;
 }
@@ -1030,15 +1027,15 @@ void multifd_load_cleanup(void)
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDRecvParams *p = _recv_state->params[i];
 
-if (p->running) {
-/*
- * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
- * however try to wakeup it without harm in cleanup phase.
- */
-qemu_sem_post(>sem_sync);
-}
+/*
+ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
+ * however try to wakeup it without harm in cleanup phase.
+ */
+qemu_sem_post(>sem_sync);
 
-qemu_thread_join(>thread);
+if (p->thread_created) {
+qemu_thread_join(>thread);
+}
 }
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDRecvParams *p = _recv_state->params[i];
@@ -1148,9 +1145,6 @@ static void *multifd_recv_thread(void *opaque)
 multifd_recv_terminate_threads(local_err);
 error_free(local_err);
 }
-qemu_mutex_lock(>mutex);
-p->running = false;
-qemu_mutex_unlock(>mutex);
 
 rcu_unregister_thread();
 trace_multifd_recv_thread_end(p->id, p->num_packets, 
p->total_normal_pages);
@@ -1258,7 +1252,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error 
**errp)
 /* initial packet */
 p->num_packets = 1;
 
-p->running = true;
+p->thread_created = true;
 qemu_thread_create(>thread, p->name, multifd_recv_thread, p,
QEMU_THREAD_JOINABLE);
 qatomic_inc(_recv_state->count);
diff --git a/migration/multifd.h b/migration/multifd.h
index 5a69ef40e2..917833c309 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -73,6 +73,7 @@ typedef struct {
 char *name;
 /* channel thread id */
 QemuThread thread;
+bool thread_created;
 QemuThread tls_thread;
 bool tls_thread_created;
 /* communication channel */
@@ -95,8 +96,6 @@ typedef struct {
 
 /* this mutex protects the following parameters */
 QemuMutex mutex;
-/* is this channel thread running */
-bool running;
 /* should this thread finish */
 bool

[PATCH 4/5] migration/multifd: Move multifd_save_setup into migration thread

2024-02-02 Thread Fabiano Rosas

We currently have an unfavorable situation around multifd channels
creation and the migration thread execution.

We create the multifd channels with qio_channel_socket_connect_async
-> qio_task_run_in_thread, but only connect them at the
multifd_new_send_channel_async callback, called from
qio_task_complete, which is registered as a glib event.

So at multifd_save_setup() we create the channels, but they will only
be actually usable after the whole multifd_save_setup() calling stack
returns back to the main loop. Which means that the migration thread
is already up and running without any possibility for the multifd
channels to be ready on time.

We currently rely on the channels-ready semaphore blocking
multifd_send_sync_main() until channels start to come up and release
it. However there have been bugs recently found when a channel's
creation fails and multifd_save_cleanup() is allowed to run while
other channels are still being created.

Let's start to organize this situation by moving the
multifd_save_setup() call into the migration thread. That way we
unblock the main-loop to dispatch the completion callbacks and
actually have a chance of getting the multifd channels ready for when
the migration thread needs them.

The next patches will deal with the synchronization aspects.

Note that this takes multifd_save_setup() out of the BQL.

Signed-off-by: Fabiano Rosas 
---
 migration/migration.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 55abb175cc..c14d12497f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3315,6 +3315,10 @@ static void *migration_thread(void *opaque)
 object_ref(OBJECT(s));
 update_iteration_initial_status(s);
 
+if (!multifd_save_setup()) {
+goto out;
+}
+
 bql_lock();
 qemu_savevm_state_header(s->to_dst_file);
 bql_unlock();
@@ -3386,6 +3390,7 @@ static void *migration_thread(void *opaque)
 urgent = migration_rate_limit();
 }
 
+out:
 trace_migration_thread_after_loop();
 migration_iteration_finish(s);
 object_unref(OBJECT(s));
@@ -3623,11 +3628,6 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
 return;
 }
 
-if (!multifd_save_setup()) {
-migrate_fd_cleanup(s);
-return;
-}
-
 if (migrate_background_snapshot()) {
 qemu_thread_create(>thread, "bg_snapshot",
 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
-- 
2.35.3

[PATCH 5/5] migration/multifd: Add a synchronization point for channel creation

2024-02-02 Thread Fabiano Rosas

It is possible that one of the multifd channels fails to be created at
multifd_new_send_channel_async() while the rest of the channel
creation tasks are still in flight.

This could lead to multifd_save_cleanup() executing the
qemu_thread_join() loop too early and not waiting for the threads
which haven't been created yet, leading to the freeing of resources
that the newly created threads will try to access and crash.

Add a synchronization point after which there will be no attempts at
thread creation and therefore calling multifd_save_cleanup() past that
point will ensure it properly waits for the threads.

A note about performance: Prior to this patch, if a channel took too
long to be established, other channels could finish connecting first
and already start taking load. Now we're bounded by the
slowest-connecting channel.

Signed-off-by: Fabiano Rosas 
---
 migration/multifd.c | 67 +
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 1851206352..888ac8b05d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -360,6 +360,11 @@ struct {
 MultiFDPages_t *pages;
 /* global number of generated multifd packets */
 uint64_t packet_num;
+/*
+ * Synchronization point past which no more channels will be
+ * created.
+ */
+QemuSemaphore channels_created;
 /* send channels ready */
 QemuSemaphore channels_ready;
 /*
@@ -561,6 +566,7 @@ void multifd_save_cleanup(void)
 error_free(local_err);
 }
 }
+qemu_sem_destroy(_send_state->channels_created);
 qemu_sem_destroy(_send_state->channels_ready);
 g_free(multifd_send_state->params);
 multifd_send_state->params = NULL;
@@ -787,13 +793,6 @@ static void multifd_tls_outgoing_handshake(QIOTask *task,
 trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
 
 migrate_set_error(migrate_get_current(), err);
-/*
- * Error happen, mark multifd_send_thread status as 'quit' although it
- * is not created, and then tell who pay attention to me.
- */
-p->quit = true;
-qemu_sem_post(_send_state->channels_ready);
-qemu_sem_post(>sem_sync);
 error_free(err);
 }
 
@@ -862,39 +861,37 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
 return true;
 }
 
-static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
- QIOChannel *ioc, Error *err)
-{
- migrate_set_error(migrate_get_current(), err);
- /* Error happen, we need to tell who pay attention to me */
- qemu_sem_post(_send_state->channels_ready);
- qemu_sem_post(>sem_sync);
- /*
-  * Although multifd_send_thread is not created, but main migration
-  * thread need to judge whether it is running, so we need to mark
-  * its status.
-  */
- p->quit = true;
- object_unref(OBJECT(ioc));
- error_free(err);
-}
-
 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
 {
 MultiFDSendParams *p = opaque;
 QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
 Error *local_err = NULL;
+bool ret;
 
 trace_multifd_new_send_channel_async(p->id);
-if (!qio_task_propagate_error(task, _err)) {
-qio_channel_set_delay(ioc, false);
-if (multifd_channel_connect(p, ioc, _err)) {
-return;
-}
+
+if (qio_task_propagate_error(task, _err)) {
+ret = false;
+goto out;
+}
+
+qio_channel_set_delay(ioc, false);
+ret = multifd_channel_connect(p, ioc, _err);
+
+out:
+/*
+ * Here we're not interested whether creation succeeded, only that
+ * it happened at all.
+ */
+qemu_sem_post(_send_state->channels_created);
+if (ret) {
+return;
 }
 
 trace_multifd_new_send_channel_async_error(p->id, local_err);
-multifd_new_send_channel_cleanup(p, ioc, local_err);
+migrate_set_error(migrate_get_current(), local_err);
+object_unref(OBJECT(ioc));
+error_free(local_err);
 }
 
 static void multifd_new_send_channel_create(gpointer opaque)
@@ -918,6 +915,7 @@ bool multifd_save_setup(void)
 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
 multifd_send_state->pages = multifd_pages_init(page_count);
+qemu_sem_init(_send_state->channels_created, 0);
 qemu_sem_init(_send_state->channels_ready, 0);
 qatomic_set(_send_state->exiting, 0);
 multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
@@ -953,6 +951,15 @@ bool multifd_save_setup(void)
 multifd_new_send_channel_create(p);
 }
 
+/*
+ * Wait until channel creation has started for all channels. The
+ * creation can still fail, but no more channels will be created
+ * past this point.
+ */
+for (i = 0; i < thread_count; i++) {
+

[PATCH 0/5] migration/multifd: Fix channel creation vs. cleanup races

2024-02-02 Thread Fabiano Rosas

Hi,

This contains 2 patches from my previous series addressing the
p->running misuse and the TLS thread leak and 3 new patches to fix the
cleanup-while-creating-threads race.

For the p->running I'm keeping the idea from the other series to
remove p->running and use a more narrow p->thread_created flag. This
flag is used only inform whether the thread has been created so we can
join it.

For the cleanup race I have moved some code around and added a
semaphore to make multifd_save_setup() only return once all channel
creation tasks have started.

The idea is that after multifd_save_setup() returns, no new creations
are in flight and the p->thread_created flags will never change again,
so they're enough to cause the cleanup code to wait for the threads to
join.

CI run: https://gitlab.com/farosas/qemu/-/pipelines/1162798843

@Peter: I can rebase this on top of your series once we decide about
it.

Fabiano Rosas (5):
  migration/multifd: Join the TLS thread
  migration/multifd: Remove p->running
  migration/multifd: Move multifd_save_setup error handling in to the
function
  migration/multifd: Move multifd_save_setup into migration thread
  migration/multifd: Add a synchronization point for channel creation

 migration/migration.c |  14 ++---
 migration/multifd.c   | 129 --
 migration/multifd.h   |  11 ++--
 3 files changed, 83 insertions(+), 71 deletions(-)

-- 
2.35.3

[PATCH 3/5] migration/multifd: Move multifd_save_setup error handling in to the function

2024-02-02 Thread Fabiano Rosas

Hide the error handling inside multifd_save_setup to make it cleaner
for the next patch to move the function around.

Signed-off-by: Fabiano Rosas 
---
 migration/migration.c |  6 +-
 migration/multifd.c   | 24 +---
 migration/multifd.h   |  2 +-
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index d5f705ceef..55abb175cc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3623,11 +3623,7 @@ void migrate_fd_connect(MigrationState *s, Error 
*error_in)
 return;
 }
 
-if (multifd_save_setup(_err) != 0) {
-migrate_set_error(s, local_err);
-error_report_err(local_err);
-migrate_set_state(>state, MIGRATION_STATUS_SETUP,
-  MIGRATION_STATUS_FAILED);
+if (!multifd_save_setup()) {
 migrate_fd_cleanup(s);
 return;
 }
diff --git a/migration/multifd.c b/migration/multifd.c
index 402b7fd776..1851206352 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -902,14 +902,16 @@ static void multifd_new_send_channel_create(gpointer 
opaque)
 socket_send_channel_create(multifd_new_send_channel_async, opaque);
 }
 
-int multifd_save_setup(Error **errp)
+bool multifd_save_setup(void)
 {
-int thread_count;
+MigrationState *s = migrate_get_current();
+Error *local_err = NULL;
+int thread_count, ret = 0;
 uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
 uint8_t i;
 
 if (!migrate_multifd()) {
-return 0;
+return true;
 }
 
 thread_count = migrate_multifd_channels();
@@ -953,14 +955,22 @@ int multifd_save_setup(Error **errp)
 
 for (i = 0; i < thread_count; i++) {
 MultiFDSendParams *p = _send_state->params[i];
-int ret;
 
-ret = multifd_send_state->ops->send_setup(p, errp);
+ret = multifd_send_state->ops->send_setup(p, _err);
 if (ret) {
-return ret;
+break;
 }
 }
-return 0;
+
+if (ret) {
+migrate_set_error(s, local_err);
+error_report_err(local_err);
+migrate_set_state(>state, MIGRATION_STATUS_SETUP,
+  MIGRATION_STATUS_FAILED);
+return false;
+}
+
+return true;
 }
 
 struct {
diff --git a/migration/multifd.h b/migration/multifd.h
index 917833c309..4acbd65e91 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -13,7 +13,7 @@
 #ifndef QEMU_MIGRATION_MULTIFD_H
 #define QEMU_MIGRATION_MULTIFD_H
 
-int multifd_save_setup(Error **errp);
+bool multifd_save_setup(void);
 void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
 void multifd_load_cleanup(void);
-- 
2.35.3

[PATCH 1/5] migration/multifd: Join the TLS thread

2024-02-02 Thread Fabiano Rosas

We're currently leaking the resources of the TLS thread by not joining
it and also overwriting the p->thread pointer altogether.

Signed-off-by: Fabiano Rosas 
---
 migration/multifd.c | 8 +++-
 migration/multifd.h | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 25cbc6dc6b..b557d046a9 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -524,6 +524,10 @@ void multifd_save_cleanup(void)
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDSendParams *p = _send_state->params[i];
 
+if (p->tls_thread_created) {
+qemu_thread_join(>tls_thread);
+}
+
 if (p->running) {
 qemu_thread_join(>thread);
 }
@@ -827,7 +831,9 @@ static bool multifd_tls_channel_connect(MultiFDSendParams 
*p,
 trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
 qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
 p->c = QIO_CHANNEL(tioc);
-qemu_thread_create(>thread, "multifd-tls-handshake-worker",
+
+p->tls_thread_created = true;
+qemu_thread_create(>tls_thread, "multifd-tls-handshake-worker",
multifd_tls_handshake_thread, p,
QEMU_THREAD_JOINABLE);
 return true;
diff --git a/migration/multifd.h b/migration/multifd.h
index 35d11f103c..5a69ef40e2 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -73,6 +73,8 @@ typedef struct {
 char *name;
 /* channel thread id */
 QemuThread thread;
+QemuThread tls_thread;
+bool tls_thread_created;
 /* communication channel */
 QIOChannel *c;
 /* is the yank function registered */
-- 
2.35.3

Re: [PULL v2 00/47] nic-config-2 queue

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 16:38, David Woodhouse  wrote:
>
> From: David Woodhouse 
>
> The following changes since commit c3709fde5955d13f6d4f86ab46ef3cc2288ca65e:
>
>   Merge tag 'pull-aspeed-20240201' of https://github.com/legoater/qemu into 
> staging (2024-02-01 14:42:11 +)
>
> are available in the Git repository at:
>
>   git://git.infradead.org/users/dwmw2/qemu.git tags/pull-nic-config-2-20240202
>
> for you to fetch changes up to e8c5c4525cbbd7207c085732cfd1e67d8f3d662a:
>
>   net: make nb_nics and nd_table[] static in net/net.c (2024-02-02 16:23:48 
> +)
>
> 
> Rework matching of network devices to -nic options (v2)
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/9.0
for any user-visible changes.

-- PMM

Re: [PULL 02/10] hw/hppa/machine: Disable default devices with --nodefaults option

2024-02-02 Thread Guenter Roeck

On Fri, Feb 02, 2024 at 10:54:20AM +0100, Helge Deller wrote:
> Hi Guenter,
> 
> On 2/2/24 05:22, Guenter Roeck wrote:
> > On Sat, Jan 13, 2024 at 06:57:20AM +0100, del...@kernel.org wrote:
> > > From: Helge Deller 
> > > 
> > > Recognize the qemu --nodefaults option, which will disable the
> > > following default devices on hppa:
> > > - lsi53c895a SCSI controller,
> > > - artist graphics card,
> > > - LASI 82596 NIC,
> > > - tulip PCI NIC,
> > > - second serial PCI card,
> > > - USB OHCI controller.
> > > 
> > > Adding this option is very useful to allow manual testing and
> > > debugging of the other possible devices on the command line.
> > > 
> > 
> > With this patch in the tree, I get some interesting crashes in Seabios
> > if I provide a somewhat unusual command line option. For example,
> > something like
> > 
> >  -usb -device usb-ehci,id=ehci \
> >  -device usb-uas,bus=ehci.0,id=uas \
> >  -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=d0 \
> >  -drive file= ...
> > 
> > is accepted as command line option but results in
> > 
> > SeaBIOS PA-RISC 32-bit Firmware Version 15 (QEMU 8.2.1)
> > Duplex Console IO Dependent Code (IODC) revision 1
> > --
> >(c) Copyright 2017-2024 Helge Deller  and SeaBIOS 
> > developers.
> > --
> >Processor   SpeedState   Coprocessor State  Cache 
> > Size
> >-     -  -  
> > --
> >0  250 MHzActive Functional0 KB
> >1  250 MHzIdle   Functional0 KB
> >2  250 MHzIdle   Functional0 KB
> >3  250 MHzIdle   Functional0 KB
> >Emulated machine: HP C3700 (64-bit PA2.0) with 32-bit PDC
> >Available memory: 1024 MB
> >Good memory required: 16 MB
> >Primary boot path:FWSCSI.0.0
> >Alternate boot path:  FWSCSI.0.0
> >Console path: SERIAL_2.9600.8.none
> >Keyboard path:SERIAL_2.9600.8.none
> > *ERROR* in SeaBIOS-hppa-v15:
> > prepare_boot_path:2898
> > SeaBIOS wants SYSTEM HALT.
> > 
> > This is without --nodefaults, and it used to work. Is that intentional ?
> 
> This should now be fixed in the upcoming SeaBIOS-hppa-v16 version ("devel" 
> branch):
> https://github.com/hdeller/seabios-hppa/tree/devel
> Could you test?

I was able to build from the 'master' branch, but 'devel' gives me

hppa64-linux-ld: target elf32-hppa-linux not found

Do you have a binary seabios image, by any chance ?

> If it doesn't work, please give me the full command line.
> 

qemu-system-hppa -M C3700 -smp 4 \
-kernel vmlinux -no-reboot -snapshot \
-usb -device usb-ehci,id=ehci \
-device usb-uas,bus=ehci.0,id=uas \
-device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=d0 \
-drive file=rootfs.ext2,if=none,format=raw,id=d0 \
-append "root=/dev/sda rootwait console=ttyS0,115200" \
-nographic -monitor null

This is with qemu 8.2.1. Note that the number of CPUs doesn't make a
difference. It turns out this also crashes/aborts immediately with
"nodefaults".

> > If I do use the --nodefaults parameter, I was unable to figure out how
> > to configure the serial console. What command line parameter(s) do I need to
> > get it ?
> 
> You need to add:
> -serial mon:stdio
> This will create a serial port if it's not yet there.
> 

And there was me trying all variants of "-device pci-serial-4x..." I could
think of ;-).

Guenter

Re: [RFC v1 3/3] hw/arm/virt-acpi-build.c: Enable CPU cache topology

2024-02-02 Thread Jonathan Cameron via

On Thu, 1 Feb 2024 16:06:55 +
Peter Maydell  wrote:

> On Mon, 29 Jan 2024 at 11:08, Jonathan Cameron
>  wrote:
> >
> > On Mon, 29 Jan 2024 00:14:23 -0800
> > Sia Jee Heng  wrote:
> >  
> > > Introduced a 3-layer cache for the ARM virtual machine.
> > >
> > > Signed-off-by: Sia Jee Heng   
> >
> > There are a bunch of CPU registers that also need updating to reflect the
> > described cache.
> > https://lore.kernel.org/qemu-devel/20230808115713.2613-3-jonathan.came...@huawei.com/
> > It's called HACK for a reason ;)
> > But there is some discussion about this issue in the thread.
> >
> > The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
> > Wrong information being passed to a VM is probably worse than no 
> > information.  
> 
> Yes. The ACPI table information, if we provide it, should be
> being generated from the CPU cache ID registers.

That + some additional information I think.

> 
> But I'm a bit confused about why the ACPI table has the cache
> topology in it -- can't the guest read the cache ID registers
> and figure it out for itself?

That's a philosophy question for the ARM architects :)

The registers focus on correctness (so what you need to flush etc,
where the point of coherence is and other fun)
not all the info needed for performance tuning. There is some stuff
on the cache type that I guess is more perf tuning related (sets etc)

They probably could have expanded the CPU registers to provide a lot
more information (which is what x86 effectively does). IIRC what is
there today for ARM is pretty much useless for anything placement
decision related (scheduling etc)  You can't tell what shares a cache
(or more fun cluster topology structures). 

It is hard in registers to define a nice flexible graph where
all sorts of fun topology of the system can be described.
PPTT provides that opportunity for a richer description.
Sure you could squirt out the equivalent table via registers,
but what's the point given we have firmware tables for this
sort of thing..

Jonathan

> 
> thanks
> -- PMM

Re: [PULL 00/47] nic-config.for-upstream queue

2024-02-02 Thread David Woodhouse

On Fri, 2024-02-02 at 16:15 +, Peter Maydell wrote:
> On Fri, 2 Feb 2024 at 16:01, David Woodhouse  wrote:
> > 
> > What is the next step? Post the full series as a v5, or perhaps just
> > the single fixed patch which is now at
> > https://git.infradead.org/?p=users/dwmw2/qemu.git;a=commitdiff;h=2c20b4ee96db
> > 
> > ... and then another pull request?
> 
> If that diff above is the only change, then:
>  * roll a new pullrequest with the fix squashed into the appropriate patch
>  * have the subject marker be "PULL v2"
>  * you can send just the cover-letter and the one patch that has changed,
>    you don't need to resend the entire series (though it's not a big
>    deal if you do send the whole set of mails again)

Done, thank you.

> > The docs are fairly clear that pull
> > requests can't have even minor changes that haven't been posted
> > separately... and I guess the above incremental doesn't count?
> 
> Which bit of the docs is that? It's not our actual practice,
> so we should really fix the wording. The principle is "don't
> stick code into pullreqs that hasn't been through the review
> process", but in practice especially for submaintainers who
> know the system it's not uncommon to say "I'm going to
> squash change X in and take this" or similar rather than
> forcing submitters to do another round of sending out patches.
> There should be *something* on the list to say this change was
> put in, but eg the exchange in this email thread is fine for that.

I think I was looking at
https://www.qemu.org/docs/master/devel/submitting-a-pull-request.html
where it says "In particular if you’ve corrected issues in one round of
code review, you need to send your fixed patch series as normal to the
list; you can’t put it in a pull request until it’s gone through.
(Extremely trivial fixes may be OK to just fix in passing, but if in
doubt err on the side of not.)"

I think the doc is actually fine and I was just reading it too
conservatively. I have been making a conscious effort to pay attention,
follow the process and fit in, rather than just turning up and doing
whatever comes naturally from decades of working on open source. (I do
not *promise* that will last.)

smime.p7s
Description: S/MIME cryptographic signature

Re: Crash with CXL + TCG on 8.2: Was Re: qemu cxl memory expander shows numa_node -1

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 16:50, Gregory Price  wrote:
>
> On Fri, Feb 02, 2024 at 04:33:20PM +, Peter Maydell wrote:
> > Here we are trying to take an interrupt. This isn't related to the
> > other can_do_io stuff, it's happening because do_ld_mmio_beN assumes
> > it's called with the BQL not held, but in fact there are some
> > situations where we call into the memory subsystem and we do
> > already have the BQL.

> It's bugs all the way down as usual!
> https://xkcd.com/1416/
>
> I'll dig in a little next week to see if there's an easy fix. We can see
> the return address is already 0 going into mmu_translate, so it does
> look unrelated to the patch I threw out - but probably still has to do
> with things being on IO.

Yes, the low level memory accessors only need to take the BQL if the thing
being accessed is an MMIO device. Probably what is wanted is for those
functions to do "take the lock if we don't already have it", something
like hw/core/cpu-common.c:cpu_reset_interrupt() does.

-- PMM

Re: Crash with CXL + TCG on 8.2: Was Re: qemu cxl memory expander shows numa_node -1

2024-02-02 Thread Gregory Price

On Fri, Feb 02, 2024 at 04:33:20PM +, Peter Maydell wrote:
> On Fri, 2 Feb 2024 at 16:26, Jonathan Cameron
>  wrote:
> > #7  0x55ab1929 in bql_lock_impl (file=0x56049122 
> > "../../accel/tcg/cputlb.c", line=2033) at ../../system/cpus.c:524
> > #8  bql_lock_impl (file=file@entry=0x56049122 
> > "../../accel/tcg/cputlb.c", line=line@entry=2033) at ../../system/cpus.c:520
> > #9  0x55c9f7d6 in do_ld_mmio_beN (cpu=0x578e0cb0, 
> > full=0x7ffe88012950, ret_be=ret_be@entry=0, addr=19595792376, 
> > size=size@entry=8, mmu_idx=4, type=MMU_DATA_LOAD, ra=0) at 
> > ../../accel/tcg/cputlb.c:2033
> > #10 0x55ca0fbd in do_ld_8 (cpu=cpu@entry=0x578e0cb0, 
> > p=p@entry=0x74efd1d0, mmu_idx=, 
> > type=type@entry=MMU_DATA_LOAD, memop=, ra=ra@entry=0) at 
> > ../../accel/tcg/cputlb.c:2356
> > #11 0x55ca341f in do_ld8_mmu (cpu=cpu@entry=0x578e0cb0, 
> > addr=addr@entry=19595792376, oi=oi@entry=52, ra=0, ra@entry=52, 
> > access_type=access_type@entry=MMU_DATA_LOAD) at 
> > ../../accel/tcg/cputlb.c:2439
> > #12 0x55ca5f59 in cpu_ldq_mmu (ra=52, oi=52, addr=19595792376, 
> > env=0x578e3470) at ../../accel/tcg/ldst_common.c.inc:169
> > #13 cpu_ldq_le_mmuidx_ra (env=0x578e3470, addr=19595792376, 
> > mmu_idx=, ra=ra@entry=0) at 
> > ../../accel/tcg/ldst_common.c.inc:301
> > #14 0x55b4b5fc in ptw_ldq (ra=0, in=0x74efd320) at 
> > ../../target/i386/tcg/sysemu/excp_helper.c:98
> > #15 ptw_ldq (ra=0, in=0x74efd320) at 
> > ../../target/i386/tcg/sysemu/excp_helper.c:93
> > #16 mmu_translate (env=env@entry=0x578e3470, in=0x74efd3e0, 
> > out=0x74efd3b0, err=err@entry=0x74efd3c0, ra=ra@entry=0) at 
> > ../../target/i386/tcg/sysemu/excp_helper.c:174
> > #17 0x55b4c4b3 in get_physical_address (ra=0, err=0x74efd3c0, 
> > out=0x74efd3b0, mmu_idx=0, access_type=MMU_DATA_LOAD, 
> > addr=18446741874686299840, env=0x578e3470) at 
> > ../../target/i386/tcg/sysemu/excp_helper.c:580
> > #18 x86_cpu_tlb_fill (cs=0x578e0cb0, addr=18446741874686299840, 
> > size=, access_type=MMU_DATA_LOAD, mmu_idx=0, 
> > probe=, retaddr=0) at 
> > ../../target/i386/tcg/sysemu/excp_helper.c:606
> > #19 0x55ca0ee9 in tlb_fill (retaddr=0, mmu_idx=0, 
> > access_type=MMU_DATA_LOAD, size=, addr=18446741874686299840, 
> > cpu=0x74efd540) at ../../accel/tcg/cputlb.c:1315
> > #20 mmu_lookup1 (cpu=cpu@entry=0x578e0cb0, 
> > data=data@entry=0x74efd540, mmu_idx=0, 
> > access_type=access_type@entry=MMU_DATA_LOAD, ra=ra@entry=0) at 
> > ../../accel/tcg/cputlb.c:1713
> 
> Here we are trying to take an interrupt. This isn't related to the
> other can_do_io stuff, it's happening because do_ld_mmio_beN assumes
> it's called with the BQL not held, but in fact there are some
> situations where we call into the memory subsystem and we do
> already have the BQL.
> 
> -- PMM

It's bugs all the way down as usual!
https://xkcd.com/1416/

I'll dig in a little next week to see if there's an easy fix. We can see
the return address is already 0 going into mmu_translate, so it does
look unrelated to the patch I threw out - but probably still has to do
with things being on IO.

~Gregory

Re: [PULL 00/57] tcg patch queue

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 05:52, Richard Henderson
 wrote:
>
> The following changes since commit 14639717bf379480e937716fcaf1e72b47fd4c5f:
>
>   Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into 
> staging (2024-01-31 19:53:45 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240202
>
> for you to fetch changes up to 73e095fc71dfeb8f5f767d9ac71078e562d935b0:
>
>   target/sparc: Remove FSR_FTT_NMASK, FSR_FTT_CEXC_NMASK (2024-02-02 14:40:06 
> +1000)
>
> 
> tests/tcg: Fix multiarch/gdbstub/prot-none.py
> hw/core: Convert cpu_mmu_index to a CPUClass hook
> tcg/loongarch64: Set vector registers call clobbered
> target/sparc: floating-point cleanup
>

Hi; I'm afraid this seems to have collided with the loongarch
pullreq that went in yesterday. It had a merge conflict, which
I tried making a resolution to, but that failed to build.
(I think the problem is some code which your pullreq wants
to modify was moved from one file to another.)

Could you rebase and resend, please?

thanks
-- PMM

[PULL v2 38/47] hw/net/lasi_i82596: use qemu_create_nic_device()

2024-02-02 Thread David Woodhouse

From: David Woodhouse 

Create the device only if there is a corresponding NIC config for it.
Remove the explicit check on nd_table[0].used from hw/hppa/machine.c
which (since commit d8a3220005d7) tries to do the same thing.

The lasi_82596 support has been disabled since it was first introduced,
since enable_lasi_lan() has always been zero. This allows the user to
enable it by explicitly requesting a NIC model 'lasi_82596' or just
using the alias 'lasi'. Otherwise, it defaults to a PCI NIC as before.

Signed-off-by: David Woodhouse 
Reviewed-by: Thomas Huth 
---
 hw/hppa/machine.c   |  9 -
 hw/net/lasi_i82596.c| 12 +++-
 include/hw/net/lasi_82596.h |  4 ++--
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index a1045b48cc..eb78c46ff1 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -362,14 +362,13 @@ static void machine_HP_common_init_tail(MachineState 
*machine, PCIBus *pci_bus,
 }
 
 /* Network setup. */
-if (nd_table[0].used && enable_lasi_lan()) {
+if (lasi_dev) {
 lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
-qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA));
+qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
+enable_lasi_lan());
 }
 
-if (!enable_lasi_lan()) {
-pci_init_nic_devices(pci_bus, mc->default_nic);
-}
+pci_init_nic_devices(pci_bus, mc->default_nic);
 
 /* BMC board: HP Powerbar SP2 Diva (with console only) */
 pci_dev = pci_new(-1, "pci-serial");
diff --git a/hw/net/lasi_i82596.c b/hw/net/lasi_i82596.c
index 09e830ba5f..fcf7fae941 100644
--- a/hw/net/lasi_i82596.c
+++ b/hw/net/lasi_i82596.c
@@ -118,19 +118,21 @@ static void lasi_82596_realize(DeviceState *dev, Error 
**errp)
 i82596_common_init(dev, s, _lasi_82596_info);
 }
 
-SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space,
-  hwaddr hpa, qemu_irq lan_irq)
+SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space, hwaddr hpa,
+   qemu_irq lan_irq, gboolean match_default)
 {
 DeviceState *dev;
 SysBusI82596State *s;
 static const MACAddr HP_MAC = {
 .a = { 0x08, 0x00, 0x09, 0xef, 0x34, 0xf6 } };
 
-qemu_check_nic_model(_table[0], TYPE_LASI_82596);
-dev = qdev_new(TYPE_LASI_82596);
+dev = qemu_create_nic_device(TYPE_LASI_82596, match_default, "lasi");
+if (!dev) {
+return NULL;
+}
+
 s = SYSBUS_I82596(dev);
 s->state.irq = lan_irq;
-qdev_set_nic_properties(dev, _table[0]);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), _fatal);
 s->state.conf.macaddr = HP_MAC; /* set HP MAC prefix */
 
diff --git a/include/hw/net/lasi_82596.h b/include/hw/net/lasi_82596.h
index 3ef2f47ba2..439356ec19 100644
--- a/include/hw/net/lasi_82596.h
+++ b/include/hw/net/lasi_82596.h
@@ -25,7 +25,7 @@ struct SysBusI82596State {
 int val_index:1;
 };
 
-SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space,
-hwaddr hpa, qemu_irq irq);
+SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space, hwaddr hpa,
+   qemu_irq irq, gboolean match_default);
 
 #endif
-- 
2.43.0

[PULL v2 00/47] nic-config-2 queue

2024-02-02 Thread David Woodhouse

From: David Woodhouse 

The following changes since commit c3709fde5955d13f6d4f86ab46ef3cc2288ca65e:

  Merge tag 'pull-aspeed-20240201' of https://github.com/legoater/qemu into 
staging (2024-02-01 14:42:11 +)

are available in the Git repository at:

  git://git.infradead.org/users/dwmw2/qemu.git tags/pull-nic-config-2-20240202

for you to fetch changes up to e8c5c4525cbbd7207c085732cfd1e67d8f3d662a:

  net: make nb_nics and nd_table[] static in net/net.c (2024-02-02 16:23:48 
+)


Rework matching of network devices to -nic options (v2)


David Woodhouse (47):
  net: add qemu_{configure,create}_nic_device(), qemu_find_nic_info()
  net: report list of available models according to platform
  net: add qemu_create_nic_bus_devices()
  hw/pci: add pci_init_nic_devices(), pci_init_nic_in_slot()
  hw/i386/pc: use qemu_get_nic_info() and pci_init_nic_devices()
  hw/xen: use qemu_create_nic_bus_devices() to instantiate Xen NICs
  hw/alpha/dp264: use pci_init_nic_devices()
  hw/arm/sbsa-ref: use pci_init_nic_devices()
  hw/arm/virt: use pci_init_nic_devices()
  hw/hppa: use pci_init_nic_devices()
  hw/loongarch: use pci_init_nic_devices()
  hw/mips/fuloong2e: use pci_init_nic_devices()
  hw/mips/malta: use pci_init_nic_devices()
  hw/mips/loongson3_virt: use pci_init_nic_devices()
  hw/ppc/prep: use pci_init_nic_devices()
  hw/ppc/spapr: use qemu_get_nic_info() and pci_init_nic_devices()
  hw/ppc: use pci_init_nic_devices()
  hw/sh4/r2d: use pci_init_nic_devices()
  hw/sparc64/sun4u: use pci_init_nic_devices()
  hw/xtensa/virt: use pci_init_nic_devices()
  hw/arm/allwinner: use qemu_configure_nic_device()
  hw/arm/aspeed: use qemu_configure_nic_device()
  hw/arm/exynos4: use qemu_create_nic_device()
  hw/arm/fsl: use qemu_configure_nic_device()
  hw/net/smc91c111: use qemu_configure_nic_device()
  hw/net/lan9118: use qemu_configure_nic_device()
  hw/arm/highbank: use qemu_create_nic_device()
  hw/arm/npcm7xx: use qemu_configure_nic_device, allow emc0/emc1 as aliases
  hw/arm/stellaris: use qemu_find_nic_info()
  hw/arm: use qemu_configure_nic_device()
  hw/net/etraxfs-eth: use qemu_configure_nic_device()
  hw/m68k/mcf5208: use qemu_create_nic_device()
  hw/m68k/q800: use qemu_find_nic_info()
  hw/microblaze: use qemu_configure_nic_device()
  hw/mips/mipssim: use qemu_create_nic_device()
  hw/mips/jazz: use qemu_find_nic_info()
  hw/net/lasi_i82596: Re-enable build
  hw/net/lasi_i82596: use qemu_create_nic_device()
  hw/openrisc/openrisc_sim: use qemu_create_nic_device()
  hw/riscv: use qemu_configure_nic_device()
  hw/s390x/s390-virtio-ccw: use qemu_create_nic_device()
  hw/sparc/sun4m: use qemu_find_nic_info()
  hw/xtensa/xtfpga: use qemu_create_nic_device()
  net: remove qemu_check_nic_model()
  hw/pci: remove pci_nic_init_nofail()
  net: remove qemu_show_nic_models(), qemu_find_nic_model()
  net: make nb_nics and nd_table[] static in net/net.c

 hw/alpha/dp264.c |   4 +-
 hw/arm/allwinner-a10.c   |   6 +-
 hw/arm/allwinner-h3.c|   6 +-
 hw/arm/allwinner-r40.c   |  27 +---
 hw/arm/aspeed.c  |   9 +-
 hw/arm/exynos4_boards.c  |   6 +-
 hw/arm/fsl-imx25.c   |   2 +-
 hw/arm/fsl-imx6.c|   2 +-
 hw/arm/fsl-imx6ul.c  |   2 +-
 hw/arm/fsl-imx7.c|   2 +-
 hw/arm/gumstix.c |   6 +-
 hw/arm/highbank.c|  12 +-
 hw/arm/integratorcp.c|   5 +-
 hw/arm/kzm.c |   4 +-
 hw/arm/mainstone.c   |   3 +-
 hw/arm/mps2-tz.c |   8 +-
 hw/arm/mps2.c|   2 +-
 hw/arm/msf2-soc.c|   6 +-
 hw/arm/musicpal.c|   3 +-
 hw/arm/npcm7xx.c |  16 +-
 hw/arm/realview.c|  25 ++-
 hw/arm/sbsa-ref.c|   4 +-
 hw/arm/stellaris.c   |  30 +++-
 hw/arm/versatilepb.c |  15 +-
 hw/arm/vexpress.c|   4 +-
 hw/arm/virt.c|   4 +-
 hw/arm/xilinx_zynq.c |  11 +-
 hw/arm/xlnx-versal.c |   7 +-
 hw/arm/xlnx-zynqmp.c |   8 +-
 hw/cris/axis_dev88.c |   9 +-
 hw/hppa/machine.c|  12 +-
 hw/i386/pc.c |  38 +++--
 hw/i386/pc_piix.c|   2 +-
 hw/i386/pc_q35.c |   2 +-
 hw/loongarch/virt.c

Re: Crash with CXL + TCG on 8.2: Was Re: qemu cxl memory expander shows numa_node -1

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 16:26, Jonathan Cameron
 wrote:
> New exciting trace...
> Thread 5 "qemu-system-x86" received signal SIGABRT, Aborted.
> [Switching to Thread 0x74efe6c0 (LWP 16503)]
> __pthread_kill_implementation (no_tid=0, signo=6, threadid=) 
> at ./nptl/pthread_kill.c:44
> Download failed: Invalid argument.  Continuing without source file 
> ./nptl/./nptl/pthread_kill.c.
> 44  ./nptl/pthread_kill.c: No such file or directory.
> (gdb) bt
> #0  __pthread_kill_implementation (no_tid=0, signo=6, threadid= out>) at ./nptl/pthread_kill.c:44
> #1  __pthread_kill_internal (signo=6, threadid=) at 
> ./nptl/pthread_kill.c:78
> #2  __GI___pthread_kill (threadid=, signo=signo@entry=6) at 
> ./nptl/pthread_kill.c:89
> #3  0x777c43b6 in __GI_raise (sig=sig@entry=6) at 
> ../sysdeps/posix/raise.c:26
> #4  0x777aa87c in __GI_abort () at ./stdlib/abort.c:79
> #5  0x77b2ed1e in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
> #6  0x77b9622e in g_assertion_message_expr () at 
> /lib/x86_64-linux-gnu/libglib-2.0.so.0
> #7  0x55ab1929 in bql_lock_impl (file=0x56049122 
> "../../accel/tcg/cputlb.c", line=2033) at ../../system/cpus.c:524
> #8  bql_lock_impl (file=file@entry=0x56049122 "../../accel/tcg/cputlb.c", 
> line=line@entry=2033) at ../../system/cpus.c:520
> #9  0x55c9f7d6 in do_ld_mmio_beN (cpu=0x578e0cb0, 
> full=0x7ffe88012950, ret_be=ret_be@entry=0, addr=19595792376, 
> size=size@entry=8, mmu_idx=4, type=MMU_DATA_LOAD, ra=0) at 
> ../../accel/tcg/cputlb.c:2033
> #10 0x55ca0fbd in do_ld_8 (cpu=cpu@entry=0x578e0cb0, 
> p=p@entry=0x74efd1d0, mmu_idx=, 
> type=type@entry=MMU_DATA_LOAD, memop=, ra=ra@entry=0) at 
> ../../accel/tcg/cputlb.c:2356
> #11 0x55ca341f in do_ld8_mmu (cpu=cpu@entry=0x578e0cb0, 
> addr=addr@entry=19595792376, oi=oi@entry=52, ra=0, ra@entry=52, 
> access_type=access_type@entry=MMU_DATA_LOAD) at ../../accel/tcg/cputlb.c:2439
> #12 0x55ca5f59 in cpu_ldq_mmu (ra=52, oi=52, addr=19595792376, 
> env=0x578e3470) at ../../accel/tcg/ldst_common.c.inc:169
> #13 cpu_ldq_le_mmuidx_ra (env=0x578e3470, addr=19595792376, 
> mmu_idx=, ra=ra@entry=0) at 
> ../../accel/tcg/ldst_common.c.inc:301
> #14 0x55b4b5fc in ptw_ldq (ra=0, in=0x74efd320) at 
> ../../target/i386/tcg/sysemu/excp_helper.c:98
> #15 ptw_ldq (ra=0, in=0x74efd320) at 
> ../../target/i386/tcg/sysemu/excp_helper.c:93
> #16 mmu_translate (env=env@entry=0x578e3470, in=0x74efd3e0, 
> out=0x74efd3b0, err=err@entry=0x74efd3c0, ra=ra@entry=0) at 
> ../../target/i386/tcg/sysemu/excp_helper.c:174
> #17 0x55b4c4b3 in get_physical_address (ra=0, err=0x74efd3c0, 
> out=0x74efd3b0, mmu_idx=0, access_type=MMU_DATA_LOAD, 
> addr=18446741874686299840, env=0x578e3470) at 
> ../../target/i386/tcg/sysemu/excp_helper.c:580
> #18 x86_cpu_tlb_fill (cs=0x578e0cb0, addr=18446741874686299840, 
> size=, access_type=MMU_DATA_LOAD, mmu_idx=0, probe= out>, retaddr=0) at ../../target/i386/tcg/sysemu/excp_helper.c:606
> #19 0x55ca0ee9 in tlb_fill (retaddr=0, mmu_idx=0, 
> access_type=MMU_DATA_LOAD, size=, addr=18446741874686299840, 
> cpu=0x74efd540) at ../../accel/tcg/cputlb.c:1315
> #20 mmu_lookup1 (cpu=cpu@entry=0x578e0cb0, 
> data=data@entry=0x74efd540, mmu_idx=0, 
> access_type=access_type@entry=MMU_DATA_LOAD, ra=ra@entry=0) at 
> ../../accel/tcg/cputlb.c:1713
> #21 0x55ca2c61 in mmu_lookup (cpu=cpu@entry=0x578e0cb0, 
> addr=addr@entry=18446741874686299840, oi=oi@entry=32, ra=ra@entry=0, 
> type=type@entry=MMU_DATA_LOAD, l=l@entry=0x74efd540) at 
> ../../accel/tcg/cputlb.c:1803
> #22 0x55ca3165 in do_ld4_mmu (cpu=cpu@entry=0x578e0cb0, 
> addr=addr@entry=18446741874686299840, oi=oi@entry=32, ra=ra@entry=0, 
> access_type=access_type@entry=MMU_DATA_LOAD) at ../../accel/tcg/cputlb.c:2416
> #23 0x55ca5ef9 in cpu_ldl_mmu (ra=0, oi=32, 
> addr=18446741874686299840, env=0x578e3470) at 
> ../../accel/tcg/ldst_common.c.inc:158
> #24 cpu_ldl_le_mmuidx_ra (env=env@entry=0x578e3470, 
> addr=addr@entry=18446741874686299840, mmu_idx=, ra=ra@entry=0) 
> at ../../accel/tcg/ldst_common.c.inc:294
> #25 0x55bb6cdd in do_interrupt64 (is_hw=1, 
> next_eip=18446744072399775809, error_code=0, is_int=0, intno=236, 
> env=0x578e3470) at ../../target/i386/tcg/seg_helper.c:889
> #26 do_interrupt_all (cpu=cpu@entry=0x578e0cb0, intno=236, 
> is_int=is_int@entry=0, error_code=error_code@entry=0, 
> next_eip=next_eip@entry=0, is_hw=is_hw@entry=1) at 
> ../../target/i386/tcg/seg_helper.c:1130
> #27 0x55bb87da in do_interrupt_x86_hardirq 
> (env=env@entry=0x578e3470, intno=, is_hw=is_hw@entry=1) at 
> ../../target/i386/tcg/seg_helper.c:1162
> #28 0x55b5039c in x86_cpu_exec_interrupt (cs=0x578e0cb0, 
> interrupt_request=) at 
> ../../target/i386/tcg/sysemu/seg_helper.c:197
> #29 0x55c94480 in cpu_handle_interrupt (last_tb=, 
>

Re: Crash with CXL + TCG on 8.2: Was Re: qemu cxl memory expander shows numa_node -1

2024-02-02 Thread Jonathan Cameron via

On Thu, 1 Feb 2024 13:56:09 -0500
Gregory Price  wrote:

> On Thu, Feb 01, 2024 at 06:04:26PM +, Peter Maydell wrote:
> > On Thu, 1 Feb 2024 at 17:25, Alex Bennée  wrote:  
> > >
> > > Jonathan Cameron  writes:  
> > > >> > #21 0x55ca3e5d in do_st8_mmu (cpu=0x578e0cb0, 
> > > >> > addr=23937, val=18386491784638059520, oi=6, ra=140736029817822) at 
> > > >> > ../../accel/tcg/cputlb.c:2853
> > > >> > #22 0x7fffa9107c63 in code_gen_buffer ()  
> > > >>
> > > >> No thats different - we are actually writing to the MMIO region here.
> > > >> But the fact we hit cpu_abort because we can't find the TB we are
> > > >> executing is a little problematic.
> > > >>
> > > >> Does ra properly point to the code buffer here?  
> > > >
> > > > Err.  How would I tell?  
> > >
> > > (gdb) p/x 140736029817822
> > > $1 = 0x7fffa9107bde
> > >
> > > seems off because code_gen_buffer starts at 0x7fffa9107c63  
> > 
> > The code_gen_buffer doesn't *start* at 0x7fffa9107c63 --
> > that is our return address into it, which is to say it's just
> > after the call insn to the do_st8_mmu helper. The 'ra' argument
> > to the helper function is going to be a number slightly lower
> > than that, because it points within the main lump of generated
> > code for the TB, whereas the helper call is done as part of
> > an out-of-line lump of common code at the end of the TB.
> > 
> > The 'ra' here is fine -- the problem is that we don't
> > pass it all the way down the callstack and instead end
> > up using 0 as a 'ra' within the ptw code.
> > 
> > -- PMM  
> 
> Is there any particular reason not to, as below?
> ~Gregory
> 
One patch blindly applied. 
New exciting trace...
Thread 5 "qemu-system-x86" received signal SIGABRT, Aborted.
[Switching to Thread 0x74efe6c0 (LWP 16503)]
__pthread_kill_implementation (no_tid=0, signo=6, threadid=) at 
./nptl/pthread_kill.c:44
Download failed: Invalid argument.  Continuing without source file 
./nptl/./nptl/pthread_kill.c.
44  ./nptl/pthread_kill.c: No such file or directory.
(gdb) bt
#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=) 
at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=) at 
./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=, signo=signo@entry=6) at 
./nptl/pthread_kill.c:89
#3  0x777c43b6 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/posix/raise.c:26
#4  0x777aa87c in __GI_abort () at ./stdlib/abort.c:79
#5  0x77b2ed1e in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#6  0x77b9622e in g_assertion_message_expr () at 
/lib/x86_64-linux-gnu/libglib-2.0.so.0
#7  0x55ab1929 in bql_lock_impl (file=0x56049122 
"../../accel/tcg/cputlb.c", line=2033) at ../../system/cpus.c:524
#8  bql_lock_impl (file=file@entry=0x56049122 "../../accel/tcg/cputlb.c", 
line=line@entry=2033) at ../../system/cpus.c:520
#9  0x55c9f7d6 in do_ld_mmio_beN (cpu=0x578e0cb0, 
full=0x7ffe88012950, ret_be=ret_be@entry=0, addr=19595792376, 
size=size@entry=8, mmu_idx=4, type=MMU_DATA_LOAD, ra=0) at 
../../accel/tcg/cputlb.c:2033
#10 0x55ca0fbd in do_ld_8 (cpu=cpu@entry=0x578e0cb0, 
p=p@entry=0x74efd1d0, mmu_idx=, 
type=type@entry=MMU_DATA_LOAD, memop=, ra=ra@entry=0) at 
../../accel/tcg/cputlb.c:2356
#11 0x55ca341f in do_ld8_mmu (cpu=cpu@entry=0x578e0cb0, 
addr=addr@entry=19595792376, oi=oi@entry=52, ra=0, ra@entry=52, 
access_type=access_type@entry=MMU_DATA_LOAD) at ../../accel/tcg/cputlb.c:2439
#12 0x55ca5f59 in cpu_ldq_mmu (ra=52, oi=52, addr=19595792376, 
env=0x578e3470) at ../../accel/tcg/ldst_common.c.inc:169
#13 cpu_ldq_le_mmuidx_ra (env=0x578e3470, addr=19595792376, 
mmu_idx=, ra=ra@entry=0) at ../../accel/tcg/ldst_common.c.inc:301
#14 0x55b4b5fc in ptw_ldq (ra=0, in=0x74efd320) at 
../../target/i386/tcg/sysemu/excp_helper.c:98
#15 ptw_ldq (ra=0, in=0x74efd320) at 
../../target/i386/tcg/sysemu/excp_helper.c:93
#16 mmu_translate (env=env@entry=0x578e3470, in=0x74efd3e0, 
out=0x74efd3b0, err=err@entry=0x74efd3c0, ra=ra@entry=0) at 
../../target/i386/tcg/sysemu/excp_helper.c:174
#17 0x55b4c4b3 in get_physical_address (ra=0, err=0x74efd3c0, 
out=0x74efd3b0, mmu_idx=0, access_type=MMU_DATA_LOAD, 
addr=18446741874686299840, env=0x578e3470) at 
../../target/i386/tcg/sysemu/excp_helper.c:580
#18 x86_cpu_tlb_fill (cs=0x578e0cb0, addr=18446741874686299840, 
size=, access_type=MMU_DATA_LOAD, mmu_idx=0, probe=, retaddr=0) at ../../target/i386/tcg/sysemu/excp_helper.c:606
#19 0x55ca0ee9 in tlb_fill (retaddr=0, mmu_idx=0, 
access_type=MMU_DATA_LOAD, size=, addr=18446741874686299840, 
cpu=0x74efd540) at ../../accel/tcg/cputlb.c:1315
#20 mmu_lookup1 (cpu=cpu@entry=0x578e0cb0, data=data@entry=0x74efd540, 
mmu_idx=0, access_type=access_type@entry=MMU_DATA_LOAD, ra=ra@entry=0) at 
../../accel/tcg/cputlb.c:1713
#21 0x55ca2c61 in mmu_lookup (cpu=cpu@entry=0x578e0cb0,

[PATCH v4.1 38/47] hw/net/lasi_i82596: use qemu_create_nic_device()

2024-02-02 Thread David Woodhouse

From: David Woodhouse 


Create the device only if there is a corresponding NIC config for it.
Remove the explicit check on nd_table[0].used from hw/hppa/machine.c
which (since commit d8a3220005d7) tries to do the same thing.

The lasi_82596 support has been disabled since it was first introduced,
since enable_lasi_lan() has always been zero. This allows the user to
enable it by explicitly requesting a NIC model 'lasi_82596' or just
using the alias 'lasi'. Otherwise, it defaults to a PCI NIC as before.

Signed-off-by: David Woodhouse 
Reviewed-by: Thomas Huth 
---
v4.1: Only attempt to find GPIOs on the lasi_dev if it exists. This
  was theoretically broken already, but enable_lasi_dev() is
  hard-coded to zero, so adding the user override to enable it
  is what actually exposed the bug.

(Not reposting the whole series for that)

 hw/hppa/machine.c   |  9 -
 hw/net/lasi_i82596.c| 12 +++-
 include/hw/net/lasi_82596.h |  4 ++--
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index a1045b48cc..eb78c46ff1 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -362,14 +362,13 @@ static void machine_HP_common_init_tail(MachineState 
*machine, PCIBus *pci_bus,
 }
 
 /* Network setup. */
-if (nd_table[0].used && enable_lasi_lan()) {
+if (lasi_dev) {
 lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
-qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA));
+qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
+enable_lasi_lan());
 }
 
-if (!enable_lasi_lan()) {
-pci_init_nic_devices(pci_bus, mc->default_nic);
-}
+pci_init_nic_devices(pci_bus, mc->default_nic);
 
 /* BMC board: HP Powerbar SP2 Diva (with console only) */
 pci_dev = pci_new(-1, "pci-serial");
diff --git a/hw/net/lasi_i82596.c b/hw/net/lasi_i82596.c
index 09e830ba5f..fcf7fae941 100644
--- a/hw/net/lasi_i82596.c
+++ b/hw/net/lasi_i82596.c
@@ -118,19 +118,21 @@ static void lasi_82596_realize(DeviceState *dev, Error 
**errp)
 i82596_common_init(dev, s, _lasi_82596_info);
 }
 
-SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space,
-  hwaddr hpa, qemu_irq lan_irq)
+SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space, hwaddr hpa,
+   qemu_irq lan_irq, gboolean match_default)
 {
 DeviceState *dev;
 SysBusI82596State *s;
 static const MACAddr HP_MAC = {
 .a = { 0x08, 0x00, 0x09, 0xef, 0x34, 0xf6 } };
 
-qemu_check_nic_model(_table[0], TYPE_LASI_82596);
-dev = qdev_new(TYPE_LASI_82596);
+dev = qemu_create_nic_device(TYPE_LASI_82596, match_default, "lasi");
+if (!dev) {
+return NULL;
+}
+
 s = SYSBUS_I82596(dev);
 s->state.irq = lan_irq;
-qdev_set_nic_properties(dev, _table[0]);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), _fatal);
 s->state.conf.macaddr = HP_MAC; /* set HP MAC prefix */
 
diff --git a/include/hw/net/lasi_82596.h b/include/hw/net/lasi_82596.h
index 3ef2f47ba2..439356ec19 100644
--- a/include/hw/net/lasi_82596.h
+++ b/include/hw/net/lasi_82596.h
@@ -25,7 +25,7 @@ struct SysBusI82596State {
 int val_index:1;
 };
 
-SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space,
-hwaddr hpa, qemu_irq irq);
+SysBusI82596State *lasi_82596_init(MemoryRegion *addr_space, hwaddr hpa,
+   qemu_irq irq, gboolean match_default);
 
 #endif
-- 
2.34.1




smime.p7s
Description: S/MIME cryptographic signature

Re: [PULL 00/47] nic-config.for-upstream queue

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 16:01, David Woodhouse  wrote:
>
> On Fri, 2024-02-02 at 15:32 +, Peter Maydell wrote:
> >
> > $ ./build/all/qemu-system-hppa -M C3700
> > Segmentation fault (core dumped)
>
> Ah, got it. Some HPPA machine types don't have a lasi_dev.
>
>
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -362,9 +362,11 @@ static void machine_HP_common_init_tail(MachineState 
> *machine, PCIBus *pci_bus,
>  }
>
>  /* Network setup. */
> -lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
> -qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
> -enable_lasi_lan());
> +if (lasi_dev) {
> +lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
> +qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
> +enable_lasi_lan());
> +}
>
>  pci_init_nic_devices(pci_bus, mc->default_nic);
>
> New pipeline running (FWIW) at
> https://gitlab.com/dwmw2/qemu/-/pipelines/1162635873
>
> What is the next step? Post the full series as a v5, or perhaps just
> the single fixed patch which is now at
> https://git.infradead.org/?p=users/dwmw2/qemu.git;a=commitdiff;h=2c20b4ee96db
>
> ... and then another pull request?

If that diff above is the only change, then:
 * roll a new pullrequest with the fix squashed into the appropriate patch
 * have the subject marker be "PULL v2"
 * you can send just the cover-letter and the one patch that has changed,
   you don't need to resend the entire series (though it's not a big
   deal if you do send the whole set of mails again)

> The docs are fairly clear that pull
> requests can't have even minor changes that haven't been posted
> separately... and I guess the above incremental doesn't count?

Which bit of the docs is that? It's not our actual practice,
so we should really fix the wording. The principle is "don't
stick code into pullreqs that hasn't been through the review
process", but in practice especially for submaintainers who
know the system it's not uncommon to say "I'm going to
squash change X in and take this" or similar rather than
forcing submitters to do another round of sending out patches.
There should be *something* on the list to say this change was
put in, but eg the exchange in this email thread is fine for that.

thanks
-- PMM

Re: [PULL 00/47] nic-config.for-upstream queue

2024-02-02 Thread David Woodhouse

On Fri, 2024-02-02 at 15:32 +, Peter Maydell wrote:
> 
> $ ./build/all/qemu-system-hppa -M C3700
> Segmentation fault (core dumped)

Ah, got it. Some HPPA machine types don't have a lasi_dev.

--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -362,9 +362,11 @@ static void machine_HP_common_init_tail(MachineState 
*machine, PCIBus *pci_bus,
 }

 /* Network setup. */
-lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
-qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
-enable_lasi_lan());
+if (lasi_dev) {
+lasi_82596_init(addr_space, translate(NULL, LASI_LAN_HPA),
+qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA),
+enable_lasi_lan());
+}

 pci_init_nic_devices(pci_bus, mc->default_nic);

New pipeline running (FWIW) at
https://gitlab.com/dwmw2/qemu/-/pipelines/1162635873

What is the next step? Post the full series as a v5, or perhaps just
the single fixed patch which is now at
https://git.infradead.org/?p=users/dwmw2/qemu.git;a=commitdiff;h=2c20b4ee96db

... and then another pull request? The docs are fairly clear that pull
requests can't have even minor changes that haven't been posted
separately... and I guess the above incremental doesn't count?

Sorry for the noise.

smime.p7s
Description: S/MIME cryptographic signature

KVM/QEMU Community Call 6th Feb Agenda Items

2024-02-02 Thread Alex Bennée

Hi,

The KVM/QEMU community call is at:

  https://meet.jit.si/kvmcallmeeting
  @
  6/2/2024 14:00 UTC

As I'll be away Philippe has volunteered to run the meeting.

So far we have one agenda item which is to discuss next steps from
Markus' post about dynamic modelling:

  Message-ID: <87o7d1i7ky@pond.sub.org>
  Date: Wed, 31 Jan 2024 21:14:21 +0100
  Subject: Dynamic & heterogeneous machines, initial configuration: problems

If there are any other agenda items you would like to discuss then
please respond to this email.

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Re: [PULL 00/47] nic-config.for-upstream queue

2024-02-02 Thread Peter Maydell

On Fri, 2 Feb 2024 at 15:36, David Woodhouse  wrote:
>
> On Fri, 2024-02-02 at 15:32 +, Peter Maydell wrote:
> >
> > This fails "make check' because some of the qom-test and
> > test-hmp checks fail when the QEMU binary segfaults.
> >
> > https://gitlab.com/qemu-project/qemu/-/jobs/6084552256
> > https://gitlab.com/qemu-project/qemu/-/jobs/6084044180
>
> Thanks.  Any idea why that didn't show up in my own pipeline?
> https://gitlab.com/dwmw2/qemu/-/pipelines/1160949234

I think because the failing runners are the aarch64 and
s390 host ones, which we don't let run for anything
except real merge-pullreq test runs because they're
limited resource. I guess that perhaps we have at some point
said "we don't need to run all the guest architectures
on all jobs" and not noticed that this leaves the
coverage for the submaintainer only-uses-the-public-runners
CI testing with gaps.

CCing Alex and Thomas for possible suggestions.

thanks
-- PMM

[PULL 28/36] hw/xen: convert stderr prints to error/warn reports

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

According to the QEMU Coding Style document:

> Do not use printf(), fprintf() or monitor_printf(). Instead, use
> error_report() or error_vreport() from error-report.h. This ensures the
> error is reported in the right place (current monitor or stderr), and in
> a uniform format.
> Use error_printf() & friends to print additional information.

This commit changes fprintfs that report warnings and errors to the
appropriate report functions.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
42a8953553cf68e8bacada966f93af4fbce45919.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/xen/xen-hvm-common.c | 12 ++--
 hw/xen/xen-mapcache.c   |  5 ++---
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 05a29c6f11a..baa1adb9f23 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -20,8 +20,8 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, 
MemoryRegion *mr,
 
 if (runstate_check(RUN_STATE_INMIGRATE)) {
 /* RAM already populated in Xen */
-fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
-" bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
+warn_report("%s: do not alloc "RAM_ADDR_FMT
+" bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
 __func__, size, ram_addr);
 return;
 }
@@ -552,9 +552,9 @@ static void cpu_handle_ioreq(void *opaque)
 req->data = copy.data;
 
 if (req->state != STATE_IOREQ_INPROCESS) {
-fprintf(stderr, "Badness in I/O request ... not in service?!: "
+warn_report("Badness in I/O request ... not in service?!: "
 "%x, ptr: %x, port: %"PRIx64", "
-"data: %"PRIx64", count: %u, size: %u, type: %u\n",
+"data: %"PRIx64", count: %u, size: %u, type: %u",
 req->state, req->data_is_ptr, req->addr,
 req->data, req->count, req->size, req->type);
 destroy_hvm_domain(false);
@@ -758,9 +758,9 @@ void xen_shutdown_fatal_error(const char *fmt, ...)
 va_list ap;
 
 va_start(ap, fmt);
-vfprintf(stderr, fmt, ap);
+error_vreport(fmt, ap);
 va_end(ap);
-fprintf(stderr, "Will destroy the domain.\n");
+error_report("Will destroy the domain.");
 /* destroy the domain */
 qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
 }
diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index 336c2123767..4f956d048ee 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -347,9 +347,8 @@ tryagain:
 MapCacheRev *reventry = g_new0(MapCacheRev, 1);
 entry->lock++;
 if (entry->lock == 0) {
-fprintf(stderr,
-"mapcache entry lock overflow: "HWADDR_FMT_plx" -> %p\n",
-entry->paddr_index, entry->vaddr_base);
+error_report("mapcache entry lock overflow: "HWADDR_FMT_plx" -> 
%p",
+ entry->paddr_index, entry->vaddr_base);
 abort();
 }
 reventry->dma = dma;
-- 
2.34.1

[PULL 27/36] hw/xen/xen-hvm-common.c: convert DPRINTF to tracepoints

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

Tracing DPRINTFs to stderr might not be desired. A developer that relies
on tracepoints should be able to opt-in to each tracepoint and rely on
QEMU's log redirection, instead of stderr by default.

This commit converts DPRINTFs in this file that are used for tracing
into tracepoints.

Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
b000ab73022dfeb7a7ab0ee8fd0f41fb208adaf0.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/xen/xen-hvm-common.c | 35 ++-
 hw/xen/trace-events | 10 +-
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 47e6cb1db3a..05a29c6f11a 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -169,11 +169,12 @@ static ioreq_t 
*cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
 ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
 
 if (req->state != STATE_IOREQ_READY) {
-DPRINTF("I/O request not ready: "
-"%x, ptr: %x, port: %"PRIx64", "
-"data: %"PRIx64", count: %u, size: %u\n",
-req->state, req->data_is_ptr, req->addr,
-req->data, req->count, req->size);
+trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
+ req->data_is_ptr,
+ req->addr,
+ req->data,
+ req->count,
+ req->size);
 return NULL;
 }
 
@@ -601,10 +602,9 @@ static void xen_main_loop_prepare(XenIOState *state)
 if (evtchn_fd != -1) {
 CPUState *cpu_state;
 
-DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
 CPU_FOREACH(cpu_state) {
-DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
-__func__, cpu_state->cpu_index, cpu_state);
+trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
+ cpu_state);
 state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
 }
 qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
@@ -681,7 +681,7 @@ static int xen_map_ioreq_server(XenIOState *state)
 }
 
 if (state->shared_page == NULL) {
-DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
+trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
 
 state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
   PROT_READ | PROT_WRITE,
@@ -693,7 +693,7 @@ static int xen_map_ioreq_server(XenIOState *state)
 }
 
 if (state->buffered_io_page == NULL) {
-DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
+trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
 
 state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
PROT_READ | PROT_WRITE,
@@ -709,7 +709,7 @@ static int xen_map_ioreq_server(XenIOState *state)
 return -1;
 }
 
-DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
+trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
 
 state->bufioreq_remote_port = bufioreq_evtchn;
 
@@ -737,16 +737,17 @@ void destroy_hvm_domain(bool reboot)
 
 xc_handle = xc_interface_open(0, 0, 0);
 if (xc_handle == NULL) {
-fprintf(stderr, "Cannot acquire xenctrl handle\n");
+trace_destroy_hvm_domain_cannot_acquire_handle();
 } else {
 sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
 if (sts != 0) {
-fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
-"sts %d, %s\n", reboot ? "reboot" : "poweroff",
-sts, strerror(errno));
+trace_destroy_hvm_domain_failed_action(
+reboot ? "reboot" : "poweroff", sts, strerror(errno)
+);
 } else {
-fprintf(stderr, "Issued domain %d %s\n", xen_domid,
-reboot ? "reboot" : "poweroff");
+trace_destroy_hvm_domain_action(
+xen_domid, reboot ? "reboot" : "poweroff"
+);
 }
 xc_interface_close(xc_handle);
 }
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index a65dc0e55fd..d1b27f6c11b 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -42,7 +42,7 @@ xs_node_vscanf(char *path, char *value) "%s %s"
 xs_node_watch(char *path) "%s"
 xs_node_unwatch(char *path) "%s"
 
-# xen-hvm.c
+# xen-hvm-common.c
 xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: 0x%lx, 
size 0x%lx"
 xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) 
"0x%"PRIx64" size

[PULL 04/36] adb: Switch bus reset to 3-phase-reset

2024-02-02 Thread Peter Maydell

Switch the ADB bus from using BusClass::reset to the Resettable
interface.

This has no behavioural change, because the BusClass code to support
subclasses that use the legacy BusClass::reset will call that method
in the hold phase of 3-phase reset.

Signed-off-by: Peter Maydell 
Acked-by: Michael S. Tsirkin 
Acked-by: Cédric Le Goater 
Acked-by: Maciej S. Szmigiero 
Tested-by: Cédric Le Goater 
Reviewed-by: Mark Cave-Ayland 
Reviewed-by: Zhao Liu 
Message-id: 20240119163512.3810301-4-peter.mayd...@linaro.org
---
 hw/input/adb.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/input/adb.c b/hw/input/adb.c
index 0f3c73d6d00..98f39b4281a 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -231,9 +231,9 @@ static const VMStateDescription vmstate_adb_bus = {
 }
 };
 
-static void adb_bus_reset(BusState *qbus)
+static void adb_bus_reset_hold(Object *obj)
 {
-ADBBusState *adb_bus = ADB_BUS(qbus);
+ADBBusState *adb_bus = ADB_BUS(obj);
 
 adb_bus->autopoll_enabled = false;
 adb_bus->autopoll_mask = 0x;
@@ -262,10 +262,11 @@ static void adb_bus_unrealize(BusState *qbus)
 static void adb_bus_class_init(ObjectClass *klass, void *data)
 {
 BusClass *k = BUS_CLASS(klass);
+ResettableClass *rc = RESETTABLE_CLASS(klass);
 
 k->realize = adb_bus_realize;
 k->unrealize = adb_bus_unrealize;
-k->reset = adb_bus_reset;
+rc->phases.hold = adb_bus_reset_hold;
 }
 
 static const TypeInfo adb_bus_type_info = {
-- 
2.34.1

[PULL 09/36] target/arm: Add ID_AA64ZFR0_EL1.B16B16 to the exposed-to-userspace set

2024-02-02 Thread Peter Maydell

In kernel commit 5d5b4e8c2d9ec ("arm64/sve: Report FEAT_SVE_B16B16 to
userspace") Linux added ID_AA64ZFR0_el1.B16B16 to the set of ID
register fields which it exposes to userspace.  Update our
exported_bits mask to include this.

(This doesn't yet change any behaviour for us, because we don't yet
have any CPUs that implement this feature, which is part of SVE2.)

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Message-id: 20240125134304.1470404-1-peter.mayd...@linaro.org
---
 target/arm/helper.c | 1 +
 tests/tcg/aarch64/sysregs.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index a0041aa0ec7..d51093a7c44 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8897,6 +8897,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
R_ID_AA64ZFR0_AES_MASK |
R_ID_AA64ZFR0_BITPERM_MASK |
R_ID_AA64ZFR0_BFLOAT16_MASK |
+   R_ID_AA64ZFR0_B16B16_MASK |
R_ID_AA64ZFR0_SHA3_MASK |
R_ID_AA64ZFR0_SM4_MASK |
R_ID_AA64ZFR0_I8MM_MASK |
diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c
index f7a055f1d5f..301e61d0dd4 100644
--- a/tests/tcg/aarch64/sysregs.c
+++ b/tests/tcg/aarch64/sysregs.c
@@ -137,7 +137,7 @@ int main(void)
 /* all hidden, DebugVer fixed to 0x6 (ARMv8 debug architecture) */
 get_cpu_reg_check_mask(id_aa64dfr0_el1,  _m(,,,0006));
 get_cpu_reg_check_zero(id_aa64dfr1_el1);
-get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1,  _m(0ff0,ff0f,00ff,00ff));
+get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1,  _m(0ff0,ff0f,0fff,00ff));
 get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(8ff1,fcff,,));
 
 get_cpu_reg_check_zero(id_aa64afr0_el1);
-- 
2.34.1

[PULL 03/36] vmbus: Switch bus reset to 3-phase-reset

2024-02-02 Thread Peter Maydell

Switch vmbus from using BusClass::reset to the Resettable interface.

This has no behavioural change, because the BusClass code to support
subclasses that use the legacy BusClass::reset will call that method
in the hold phase of 3-phase reset.

Signed-off-by: Peter Maydell 
Acked-by: Michael S. Tsirkin 
Acked-by: Cédric Le Goater 
Acked-by: Maciej S. Szmigiero 
Tested-by: Cédric Le Goater 
Reviewed-by: Zhao Liu 
Message-id: 20240119163512.3810301-3-peter.mayd...@linaro.org
---
 hw/hyperv/vmbus.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index c86d1895bae..380239af2c7 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2453,9 +2453,9 @@ static void vmbus_unrealize(BusState *bus)
 qemu_mutex_destroy(>rx_queue_lock);
 }
 
-static void vmbus_reset(BusState *bus)
+static void vmbus_reset_hold(Object *obj)
 {
-vmbus_deinit(VMBUS(bus));
+vmbus_deinit(VMBUS(obj));
 }
 
 static char *vmbus_get_dev_path(DeviceState *dev)
@@ -2476,12 +2476,13 @@ static char *vmbus_get_fw_dev_path(DeviceState *dev)
 static void vmbus_class_init(ObjectClass *klass, void *data)
 {
 BusClass *k = BUS_CLASS(klass);
+ResettableClass *rc = RESETTABLE_CLASS(klass);
 
 k->get_dev_path = vmbus_get_dev_path;
 k->get_fw_dev_path = vmbus_get_fw_dev_path;
 k->realize = vmbus_realize;
 k->unrealize = vmbus_unrealize;
-k->reset = vmbus_reset;
+rc->phases.hold = vmbus_reset_hold;
 }
 
 static int vmbus_pre_load(void *opaque)
-- 
2.34.1

[PULL 14/36] hw/arm/exynos: Check for CPU types in machine_run_board_init()

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Instead of ignoring invalid CPU type requested by the user:

  $ qemu-system-arm -M nuri -cpu cortex-a7 -S -monitor stdio
  QEMU 8.2.50 monitor - type 'help' for more information
  (qemu) info qom-tree
  /machine (nuri-machine)
/soc (exynos4210)
  /cpu[0] (cortex-a9-arm-cpu)
  ...

We now display an error:

  $ qemu-system-arm -M nuri -cpu cortex-a7
  qemu-system-arm: Invalid CPU model: cortex-a7
  The only valid type is: cortex-a9

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Reviewed-by: Gavin Shan 
Message-id: 20240129151828.59544-3-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/exynos4_boards.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index b0e13eb4f00..01c7618a67c 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -34,6 +34,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "target/arm/cpu-qom.h"
 
 #define SMDK_LAN9118_BASE_ADDR  0x0500
 
@@ -150,12 +151,18 @@ static void smdkc210_init(MachineState *machine)
 arm_load_kernel(s->soc.cpu[0], machine, _board_binfo);
 }
 
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
+
 static void nuri_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Samsung NURI board (Exynos4210)";
 mc->init = nuri_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->max_cpus = EXYNOS4210_NCPUS;
 mc->min_cpus = EXYNOS4210_NCPUS;
 mc->default_cpus = EXYNOS4210_NCPUS;
@@ -174,6 +181,7 @@ static void smdkc210_class_init(ObjectClass *oc, void *data)
 
 mc->desc = "Samsung SMDKC210 board (Exynos4210)";
 mc->init = smdkc210_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->max_cpus = EXYNOS4210_NCPUS;
 mc->min_cpus = EXYNOS4210_NCPUS;
 mc->default_cpus = EXYNOS4210_NCPUS;
-- 
2.34.1

[PULL 15/36] hw/arm/highbank: Add missing QOM parent for CPU cores

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

QDev objects created with qdev_new() need to manually add
their parent relationship with object_property_add_child().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Reviewed-by: Gavin Shan 
Message-id: 20240129151828.59544-4-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/highbank.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index e6e27d69af5..b8d702c82cc 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -209,6 +209,7 @@ static void calxeda_init(MachineState *machine, enum 
cxmachines machine_id)
 cpuobj = object_new(machine->cpu_type);
 cpu = ARM_CPU(cpuobj);
 
+object_property_add_child(OBJECT(machine), "cpu[*]", cpuobj);
 object_property_set_int(cpuobj, "psci-conduit", QEMU_PSCI_CONDUIT_SMC,
 _abort);
 
-- 
2.34.1

[PULL 34/36] tests/qtest: Adding PCS Module test to GMAC Qtest

2024-02-02 Thread Peter Maydell

From: Nabih Estefan Diaz 

 - Add PCS Register check to npcm_gmac-test

Change-Id: I34821beb5e0b1e89e2be576ab58eabe41545af12
Signed-off-by: Nabih Estefan 
Reviewed-by: Tyrone Ting 
Message-id: 20240131002800.989285-7-nabiheste...@google.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 tests/qtest/npcm_gmac-test.c | 132 +++
 1 file changed, 132 insertions(+)

diff --git a/tests/qtest/npcm_gmac-test.c b/tests/qtest/npcm_gmac-test.c
index 72c68874dfa..9e58b15ca1c 100644
--- a/tests/qtest/npcm_gmac-test.c
+++ b/tests/qtest/npcm_gmac-test.c
@@ -23,6 +23,10 @@
 /* Name of the GMAC Device */
 #define TYPE_NPCM_GMAC "npcm-gmac"
 
+/* Address of the PCS Module */
+#define PCS_BASE_ADDRESS 0xf078
+#define NPCM_PCS_IND_AC_BA 0x1fe
+
 typedef struct GMACModule {
 int irq;
 uint64_t base_addr;
@@ -114,6 +118,62 @@ typedef enum NPCMRegister {
 NPCM_GMAC_PTP_STNSUR = 0x714,
 NPCM_GMAC_PTP_TAR = 0x718,
 NPCM_GMAC_PTP_TTSR = 0x71c,
+
+/* PCS Registers */
+NPCM_PCS_SR_CTL_ID1 = 0x3c0008,
+NPCM_PCS_SR_CTL_ID2 = 0x3c000a,
+NPCM_PCS_SR_CTL_STS = 0x3c0010,
+
+NPCM_PCS_SR_MII_CTRL = 0x3e,
+NPCM_PCS_SR_MII_STS = 0x3e0002,
+NPCM_PCS_SR_MII_DEV_ID1 = 0x3e0004,
+NPCM_PCS_SR_MII_DEV_ID2 = 0x3e0006,
+NPCM_PCS_SR_MII_AN_ADV = 0x3e0008,
+NPCM_PCS_SR_MII_LP_BABL = 0x3e000a,
+NPCM_PCS_SR_MII_AN_EXPN = 0x3e000c,
+NPCM_PCS_SR_MII_EXT_STS = 0x3e001e,
+
+NPCM_PCS_SR_TIM_SYNC_ABL = 0x3e0e10,
+NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR = 0x3e0e12,
+NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_UPR = 0x3e0e14,
+NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR = 0x3e0e16,
+NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_UPR = 0x3e0e18,
+NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR = 0x3e0e1a,
+NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_UPR = 0x3e0e1c,
+NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR = 0x3e0e1e,
+NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_UPR = 0x3e0e20,
+
+NPCM_PCS_VR_MII_MMD_DIG_CTRL1 = 0x3f,
+NPCM_PCS_VR_MII_AN_CTRL = 0x3f0002,
+NPCM_PCS_VR_MII_AN_INTR_STS = 0x3f0004,
+NPCM_PCS_VR_MII_TC = 0x3f0006,
+NPCM_PCS_VR_MII_DBG_CTRL = 0x3f000a,
+NPCM_PCS_VR_MII_EEE_MCTRL0 = 0x3f000c,
+NPCM_PCS_VR_MII_EEE_TXTIMER = 0x3f0010,
+NPCM_PCS_VR_MII_EEE_RXTIMER = 0x3f0012,
+NPCM_PCS_VR_MII_LINK_TIMER_CTRL = 0x3f0014,
+NPCM_PCS_VR_MII_EEE_MCTRL1 = 0x3f0016,
+NPCM_PCS_VR_MII_DIG_STS = 0x3f0020,
+NPCM_PCS_VR_MII_ICG_ERRCNT1 = 0x3f0022,
+NPCM_PCS_VR_MII_MISC_STS = 0x3f0030,
+NPCM_PCS_VR_MII_RX_LSTS = 0x3f0040,
+NPCM_PCS_VR_MII_MP_TX_BSTCTRL0 = 0x3f0070,
+NPCM_PCS_VR_MII_MP_TX_LVLCTRL0 = 0x3f0074,
+NPCM_PCS_VR_MII_MP_TX_GENCTRL0 = 0x3f007a,
+NPCM_PCS_VR_MII_MP_TX_GENCTRL1 = 0x3f007c,
+NPCM_PCS_VR_MII_MP_TX_STS = 0x3f0090,
+NPCM_PCS_VR_MII_MP_RX_GENCTRL0 = 0x3f00b0,
+NPCM_PCS_VR_MII_MP_RX_GENCTRL1 = 0x3f00b2,
+NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0 = 0x3f00ba,
+NPCM_PCS_VR_MII_MP_MPLL_CTRL0 = 0x3f00f0,
+NPCM_PCS_VR_MII_MP_MPLL_CTRL1 = 0x3f00f2,
+NPCM_PCS_VR_MII_MP_MPLL_STS = 0x3f0110,
+NPCM_PCS_VR_MII_MP_MISC_CTRL2 = 0x3f0126,
+NPCM_PCS_VR_MII_MP_LVL_CTRL = 0x3f0130,
+NPCM_PCS_VR_MII_MP_MISC_CTRL0 = 0x3f0132,
+NPCM_PCS_VR_MII_MP_MISC_CTRL1 = 0x3f0134,
+NPCM_PCS_VR_MII_DIG_CTRL2 = 0x3f01c2,
+NPCM_PCS_VR_MII_DIG_ERRCNT_SEL = 0x3f01c4,
 } NPCMRegister;
 
 static uint32_t gmac_read(QTestState *qts, const GMACModule *mod,
@@ -122,6 +182,15 @@ static uint32_t gmac_read(QTestState *qts, const 
GMACModule *mod,
 return qtest_readl(qts, mod->base_addr + regno);
 }
 
+static uint16_t pcs_read(QTestState *qts, const GMACModule *mod,
+  NPCMRegister regno)
+{
+uint32_t write_value = (regno & 0x3ffe00) >> 9;
+qtest_writel(qts, PCS_BASE_ADDRESS + NPCM_PCS_IND_AC_BA, write_value);
+uint32_t read_offset = regno & 0x1ff;
+return qtest_readl(qts, PCS_BASE_ADDRESS + read_offset);
+}
+
 /* Check that GMAC registers are reset to default value */
 static void test_init(gconstpointer test_data)
 {
@@ -134,6 +203,11 @@ static void test_init(gconstpointer test_data)
 g_assert_cmphex(gmac_read(qts, mod, (regno)), ==, (value)); \
 } while (0)
 
+#define CHECK_REG_PCS(regno, value) \
+do { \
+g_assert_cmphex(pcs_read(qts, mod, (regno)), ==, (value)); \
+} while (0)
+
 CHECK_REG32(NPCM_DMA_BUS_MODE, 0x00020100);
 CHECK_REG32(NPCM_DMA_XMT_POLL_DEMAND, 0);
 CHECK_REG32(NPCM_DMA_RCV_POLL_DEMAND, 0);
@@ -183,6 +257,64 @@ static void test_init(gconstpointer test_data)
 CHECK_REG32(NPCM_GMAC_PTP_TAR, 0);
 CHECK_REG32(NPCM_GMAC_PTP_TTSR, 0);
 
+/* TODO Add registers PCS */
+if (mod->base_addr == 0xf0802000) {
+CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID1, 0x699e);
+CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID2, 0);
+CHECK_REG_PCS(NPCM_PCS_SR_CTL_STS, 0x8000);
+
+CHECK_REG_PCS(NPCM_PCS_SR_MII_CTRL, 0x1140);
+CHECK_REG_PCS(NPCM_PCS_SR_MII_STS, 0x0109);
+

[PULL 24/36] hw/arm/z2: convert DPRINTF to trace events and guest errors

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

Tracing DPRINTFs to stderr might not be desired. A developer that relies
on trace events should be able to opt-in to each trace event and rely on
QEMU's log redirection, instead of stderr by default.

This commit converts DPRINTFs in this file that are used for tracing
into trace events. DPRINTFs that report guest errors are logged with
LOG_GUEST_ERROR.

Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
799c5141c5751cf2341e1d095349612e046424a8.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/z2.c | 27 ---
 hw/arm/trace-events |  7 +++
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index a67fba2cfd2..eb2ff8dbc84 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -27,13 +27,7 @@
 #include "exec/address-spaces.h"
 #include "qom/object.h"
 #include "qapi/error.h"
-
-#ifdef DEBUG_Z2
-#define DPRINTF(fmt, ...) \
-printf(fmt, ## __VA_ARGS__)
-#else
-#define DPRINTF(fmt, ...)
-#endif
+#include "trace.h"
 
 static const struct keymap map[0x100] = {
 [0 ... 0xff] = { -1, -1 },
@@ -119,6 +113,8 @@ static uint32_t zipit_lcd_transfer(SSIPeripheral *dev, 
uint32_t value)
 {
 ZipitLCD *z = ZIPIT_LCD(dev);
 uint16_t val;
+
+trace_z2_lcd_reg_update(z->cur_reg, z->buf[0], z->buf[1], z->buf[2], 
value);
 if (z->selected) {
 z->buf[z->pos] = value & 0xff;
 z->pos++;
@@ -126,22 +122,19 @@ static uint32_t zipit_lcd_transfer(SSIPeripheral *dev, 
uint32_t value)
 if (z->pos == 3) {
 switch (z->buf[0]) {
 case 0x74:
-DPRINTF("%s: reg: 0x%.2x\n", __func__, z->buf[2]);
 z->cur_reg = z->buf[2];
 break;
 case 0x76:
 val = z->buf[1] << 8 | z->buf[2];
-DPRINTF("%s: value: 0x%.4x\n", __func__, val);
 if (z->cur_reg == 0x22 && val == 0x) {
 z->enabled = 1;
-printf("%s: LCD enabled\n", __func__);
+trace_z2_lcd_enable_disable_result("enabled");
 } else if (z->cur_reg == 0x10 && val == 0x) {
 z->enabled = 0;
-printf("%s: LCD disabled\n", __func__);
+trace_z2_lcd_enable_disable_result("disabled");
 }
 break;
 default:
-DPRINTF("%s: unknown command!\n", __func__);
 break;
 }
 z->pos = 0;
@@ -211,14 +204,12 @@ static int aer915_send(I2CSlave *i2c, uint8_t data)
 
 s->buf[s->len] = data;
 if (s->len++ > 2) {
-DPRINTF("%s: message too long (%i bytes)\n",
-__func__, s->len);
+trace_z2_aer915_send_too_long(s->len);
 return 1;
 }
 
 if (s->len == 2) {
-DPRINTF("%s: reg %d value 0x%02x\n", __func__,
-s->buf[0], s->buf[1]);
+trace_z2_aer915_send(s->buf[0], s->buf[1]);
 }
 
 return 0;
@@ -228,14 +219,12 @@ static int aer915_event(I2CSlave *i2c, enum i2c_event 
event)
 {
 AER915State *s = AER915(i2c);
 
+trace_z2_aer915_event(s->len, event);
 switch (event) {
 case I2C_START_SEND:
 s->len = 0;
 break;
 case I2C_START_RECV:
-if (s->len != 1) {
-DPRINTF("%s: short message!?\n", __func__);
-}
 break;
 case I2C_FINISH:
 break;
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 7c569432150..0ff41e6c780 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -58,3 +58,10 @@ smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, 
uint16_t vmid, uint64
 # strongarm.c
 strongarm_uart_update_parameters(const char *label, int speed, char parity, 
int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
 strongarm_ssp_read_underrun(void) "SSP rx underrun"
+
+# z2.c
+z2_lcd_reg_update(uint8_t cur, uint8_t i_0, uint8_t i_1, uint8_t i_2, uint32_t 
value) "cur_reg = 0x%x, buf = [0x%x, 0x%x, 0x%x], value = 0x%x"
+z2_lcd_enable_disable_result(const char *result) "LCD %s"
+z2_aer915_send_too_long(int8_t msg) "message too long (%i bytes)"
+z2_aer915_send(uint8_t reg, uint8_t value) "reg %d value 0x%02x"
+z2_aer915_event(int8_t event, int8_t len) "i2c event =0x%x len=%d bytes"
-- 
2.34.1

[PULL 35/36] hw/ssi: Implement BCM2835 SPI Controller

2024-02-02 Thread Peter Maydell

From: Rayhan Faizel 

This patch adds the SPI controller for the BCM2835. Polling and interrupt modes
of transfer are supported. DMA and LoSSI modes are currently unimplemented.

Signed-off-by: Rayhan Faizel 
Message-id: 20240129221807.2983148-2-rayhan.fai...@gmail.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 docs/system/arm/raspi.rst|   2 +-
 include/hw/ssi/bcm2835_spi.h |  81 ++
 hw/ssi/bcm2835_spi.c | 288 +++
 hw/ssi/Kconfig   |   4 +
 hw/ssi/meson.build   |   1 +
 5 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 include/hw/ssi/bcm2835_spi.h
 create mode 100644 hw/ssi/bcm2835_spi.c

diff --git a/docs/system/arm/raspi.rst b/docs/system/arm/raspi.rst
index 922fe375a67..d0a6f08b2b9 100644
--- a/docs/system/arm/raspi.rst
+++ b/docs/system/arm/raspi.rst
@@ -33,11 +33,11 @@ Implemented devices
  * USB2 host controller (DWC2 and MPHI)
  * MailBox controller (MBOX)
  * VideoCore firmware (property)
+ * Peripheral SPI controller (SPI)
 
 
 Missing devices
 ---
 
- * Peripheral SPI controller (SPI)
  * Analog to Digital Converter (ADC)
  * Pulse Width Modulation (PWM)
diff --git a/include/hw/ssi/bcm2835_spi.h b/include/hw/ssi/bcm2835_spi.h
new file mode 100644
index 000..d3f8cec1119
--- /dev/null
+++ b/include/hw/ssi/bcm2835_spi.h
@@ -0,0 +1,81 @@
+/*
+ * BCM2835 SPI Master Controller
+ *
+ * Copyright (c) 2024 Rayhan Faizel 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw/sysbus.h"
+#include "hw/ssi/ssi.h"
+#include "qom/object.h"
+#include "qemu/fifo8.h"
+
+#define TYPE_BCM2835_SPI "bcm2835-spi"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835SPIState, BCM2835_SPI)
+
+/*
+ * Though BCM2835 documentation says FIFOs have a capacity of 16,
+ * FIFOs are actually 16 words in size or effectively 64 bytes when operating
+ * in non DMA mode.
+ */
+#define FIFO_SIZE   64
+#define FIFO_SIZE_3_4   48
+
+#define RO_MASK 0x1f
+
+#define BCM2835_SPI_CS  0x00
+#define BCM2835_SPI_FIFO0x04
+#define BCM2835_SPI_CLK 0x08
+#define BCM2835_SPI_DLEN0x0c
+#define BCM2835_SPI_LTOH0x10
+#define BCM2835_SPI_DC  0x14
+
+#define BCM2835_SPI_CS_RXF  BIT(20)
+#define BCM2835_SPI_CS_RXR  BIT(19)
+#define BCM2835_SPI_CS_TXD  BIT(18)
+#define BCM2835_SPI_CS_RXD  BIT(17)
+#define BCM2835_SPI_CS_DONE BIT(16)
+#define BCM2835_SPI_CS_LEN  BIT(13)
+#define BCM2835_SPI_CS_REN  BIT(12)
+#define BCM2835_SPI_CS_INTR BIT(10)
+#define BCM2835_SPI_CS_INTD BIT(9)
+#define BCM2835_SPI_CS_DMAENBIT(8)
+#define BCM2835_SPI_CS_TA   BIT(7)
+#define BCM2835_SPI_CLEAR_RXBIT(5)
+#define BCM2835_SPI_CLEAR_TXBIT(4)
+
+struct BCM2835SPIState {
+/*  */
+SysBusDevice parent_obj;
+
+/*  */
+SSIBus *bus;
+MemoryRegion iomem;
+qemu_irq irq;
+
+uint32_t cs;
+uint32_t clk;
+uint32_t dlen;
+uint32_t ltoh;
+uint32_t dc;
+
+Fifo8 tx_fifo;
+Fifo8 rx_fifo;
+};
diff --git a/hw/ssi/bcm2835_spi.c b/hw/ssi/bcm2835_spi.c
new file mode 100644
index 000..6ecb42d4e3b
--- /dev/null
+++ b/hw/ssi/bcm2835_spi.c
@@ -0,0 +1,288 @@
+/*
+ * BCM2835 SPI Master Controller
+ *
+ * Copyright (c) 2024 Rayhan Faizel 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS

[PULL 36/36] hw/arm: Connect SPI Controller to BCM2835

2024-02-02 Thread Peter Maydell

From: Rayhan Faizel 

This patch will allow the SPI controller to be accessible from BCM2835 based
boards as SPI0. SPI driver is usually disabled by default and config.txt does
not work.

Instead, dtmerge can be used to apply spi=on on a bcm2835 dtb file.

Signed-off-by: Rayhan Faizel 
Message-id: 20240129221807.2983148-3-rayhan.fai...@gmail.com
[PMM: indent tweak]
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 include/hw/arm/bcm2835_peripherals.h |  3 ++-
 hw/arm/bcm2835_peripherals.c | 17 -
 hw/arm/Kconfig   |  1 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/hw/arm/bcm2835_peripherals.h 
b/include/hw/arm/bcm2835_peripherals.h
index d724a2fc28a..0203bb79d8c 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -31,6 +31,7 @@
 #include "hw/gpio/bcm2835_gpio.h"
 #include "hw/timer/bcm2835_systmr.h"
 #include "hw/usb/hcd-dwc2.h"
+#include "hw/ssi/bcm2835_spi.h"
 #include "hw/misc/unimp.h"
 #include "qom/object.h"
 
@@ -66,7 +67,7 @@ struct BCM2835PeripheralState {
 BCM2835GpioState gpio;
 Bcm2835ThermalState thermal;
 UnimplementedDeviceState i2s;
-UnimplementedDeviceState spi[1];
+BCM2835SPIState spi[1];
 UnimplementedDeviceState i2c[3];
 UnimplementedDeviceState otp;
 UnimplementedDeviceState dbus;
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 0233038b957..d5573fd9545 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -144,6 +144,10 @@ static void bcm2835_peripherals_init(Object *obj)
 /* Power Management */
 object_initialize_child(obj, "powermgt", >powermgt,
 TYPE_BCM2835_POWERMGT);
+
+/* SPI */
+object_initialize_child(obj, "bcm2835-spi0", >spi[0],
+TYPE_BCM2835_SPI);
 }
 
 static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -402,11 +406,22 @@ static void bcm2835_peripherals_realize(DeviceState *dev, 
Error **errp)
 memory_region_add_subregion(>peri_mr, PM_OFFSET,
 sysbus_mmio_get_region(SYS_BUS_DEVICE(>powermgt), 0));
 
+/* SPI */
+if (!sysbus_realize(SYS_BUS_DEVICE(>spi[0]), errp)) {
+return;
+}
+
+memory_region_add_subregion(>peri_mr, SPI0_OFFSET,
+sysbus_mmio_get_region(SYS_BUS_DEVICE(>spi[0]), 0));
+sysbus_connect_irq(SYS_BUS_DEVICE(>spi[0]), 0,
+   qdev_get_gpio_in_named(DEVICE(>ic),
+  BCM2835_IC_GPU_IRQ,
+  INTERRUPT_SPI));
+
 create_unimp(s, >txp, "bcm2835-txp", TXP_OFFSET, 0x1000);
 create_unimp(s, >armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 
0x40);
 create_unimp(s, >i2s, "bcm2835-i2s", I2S_OFFSET, 0x100);
 create_unimp(s, >smi, "bcm2835-smi", SMI_OFFSET, 0x100);
-create_unimp(s, >spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20);
 create_unimp(s, >bscsl, "bcm2835-spis", BSC_SL_OFFSET, 0x100);
 create_unimp(s, >i2c[0], "bcm2835-i2c0", BSC0_OFFSET, 0x20);
 create_unimp(s, >i2c[1], "bcm2835-i2c1", BSC1_OFFSET, 0x20);
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index db08a00a45b..980b14d58dc 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -429,6 +429,7 @@ config RASPI
 select PL011 # UART
 select SDHCI
 select USB_DWC2
+select BCM2835_SPI
 
 config STM32F100_SOC
 bool
-- 
2.34.1

[PULL 21/36] hw/arm/zynq: Check for CPU types in machine_run_board_init()

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

Leverage the common code introduced in commit c9cf636d48 ("machine:
Add a valid_cpu_types property") to check for the single valid CPU
type. Remove the now unused MachineClass::default_cpu_type field.

Reviewed-by: Richard Henderson 
Reviewed-by: Gavin Shan 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20240129151828.59544-10-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/xilinx_zynq.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 66d0de139f2..c57bbccb70f 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -355,13 +355,17 @@ static void zynq_init(MachineState *machine)
 
 static void zynq_machine_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
 mc->init = zynq_init;
 mc->max_cpus = 1;
 mc->no_sdcard = 1;
 mc->ignore_memory_transaction_failures = true;
-mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a9");
+mc->valid_cpu_types = valid_cpu_types;
 mc->default_ram_id = "zynq.ext_ram";
 }
 
-- 
2.34.1

[PULL 12/36] doc/sphinx/hxtool.py: add optional label argument to SRST directive

2024-02-02 Thread Peter Maydell

From: David Woodhouse 

We can't just embed labels directly into files like qemu-options.hx which
are included from multiple top-level rST files, because Sphinx sees the
labels as duplicate: https://github.com/sphinx-doc/sphinx/issues/9707

So add an optional argument to the SRST directive which causes a label
of the form '.. _DOCNAME-HXFILE-LABEL:' to be emitted, where 'DOCNAME'
is the name of the top level rST file, 'HXFILE' is the filename of the
.hx file, and 'LABEL' is the text provided within the 'SRST()' directive.
Using the DOCNAME of the top-level rST document means that it is unique
even when the .hx file is included from two different documents, as is
the case for qemu-options.hx

Now where the Xen PV documentation refers to the documentation for the
-initrd command line option, it can emit a link directly to it as
''.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
Reviewed-by: Peter Maydell 
Message-id: 20240130190348.682912-1-dw...@infradead.org
Signed-off-by: Peter Maydell 
---
 docs/devel/docs.rst  | 12 ++--
 docs/sphinx/hxtool.py| 16 
 docs/system/i386/xen.rst |  3 ++-
 qemu-options.hx  |  2 +-
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/docs/devel/docs.rst b/docs/devel/docs.rst
index 7da067905b8..50ff0d67f84 100644
--- a/docs/devel/docs.rst
+++ b/docs/devel/docs.rst
@@ -30,6 +30,13 @@ nor the documentation output.
 
 ``SRST`` starts a reStructuredText section. Following lines
 are put into the documentation verbatim, and discarded from the C output.
+The alternative form ``SRST()`` is used to define a label which can be
+referenced from elsewhere in the rST documentation. The label will take
+the form , where ``DOCNAME`` is the name of the
+top level rST file, ``HXFILE`` is the filename of the .hx file without
+the ``.hx`` extension, and ``LABEL`` is the text provided within the
+``SRST()`` directive. For example,
+.
 
 ``ERST`` ends the documentation section started with ``SRST``,
 and switches back to a C code section.
@@ -53,8 +60,9 @@ text, but in ``hmp-commands.hx`` the C code sections are 
elements
 of an array of structs of type ``HMPCommand`` which define the
 name, behaviour and help text for each monitor command.
 
-In the file ``qemu-options.hx``, do not try to define a
+In the file ``qemu-options.hx``, do not try to explicitly define a
 reStructuredText label within a documentation section. This file
 is included into two separate Sphinx documents, and some
 versions of Sphinx will complain about the duplicate label
-that results.
+that results. Use the ``SRST()`` directive documented above, to
+emit an unambiguous label.
diff --git a/docs/sphinx/hxtool.py b/docs/sphinx/hxtool.py
index 9f6b9d87dcc..3729084a36c 100644
--- a/docs/sphinx/hxtool.py
+++ b/docs/sphinx/hxtool.py
@@ -78,6 +78,14 @@ def parse_archheading(file, lnum, line):
 serror(file, lnum, "Invalid ARCHHEADING line")
 return match.group(1)
 
+def parse_srst(file, lnum, line):
+"""Handle an SRST directive"""
+# The input should be either "SRST", or "SRST(label)".
+match = re.match(r'SRST(\((.*?)\))?', line)
+if match is None:
+serror(file, lnum, "Invalid SRST line")
+return match.group(2)
+
 class HxtoolDocDirective(Directive):
 """Extract rST fragments from the specified .hx file"""
 required_argument = 1
@@ -113,6 +121,14 @@ def run(self):
 serror(hxfile, lnum, 'expected ERST, found SRST')
 else:
 state = HxState.RST
+label = parse_srst(hxfile, lnum, line)
+if label:
+rstlist.append("", hxfile, lnum - 1)
+# Build label as _DOCNAME-HXNAME-LABEL
+hx = os.path.splitext(os.path.basename(hxfile))[0]
+refline = ".. _" + env.docname + "-" + hx + \
+"-" + label + ":"
+rstlist.append(refline, hxfile, lnum - 1)
 elif directive == 'ERST':
 if state == HxState.CTEXT:
 serror(hxfile, lnum, 'expected SRST, found ERST')
diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index 81898768baa..46db5f34c1d 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -132,7 +132,8 @@ The example above provides the guest kernel command line 
after a separator
 (" ``--`` ") on the Xen command line, and does not provide the guest kernel
 with an actual initramfs, which would need to listed as a second multiboot
 module. For more complicated alternatives, see the command line
-documentation for the ``-initrd`` option.
+:ref:`documentation ` for the
+``-initrd`` option.
 
 Host OS requirements
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 40e938c4877..5adbed11013 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@

[PULL 26/36] hw/xen/xen-mapcache.c: convert DPRINTF to tracepoints

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

Tracing DPRINTFs to stderr might not be desired. A developer that relies
on tracepoints should be able to opt-in to each tracepoint and rely on
QEMU's log redirection, instead of stderr by default.

This commit converts DPRINTFs in this file that are used for tracing
into tracepoints.

Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
2fbe1fbc59078e384761c932e97cfa4276a53d75.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/xen/xen-mapcache.c | 54 +++
 hw/xen/trace-events   | 11 +
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c
index f7d974677d1..336c2123767 100644
--- a/hw/xen/xen-mapcache.c
+++ b/hw/xen/xen-mapcache.c
@@ -22,16 +22,6 @@
 #include "trace.h"
 
 
-//#define MAPCACHE_DEBUG
-
-#ifdef MAPCACHE_DEBUG
-#  define DPRINTF(fmt, ...) do { \
-fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
-} while (0)
-#else
-#  define DPRINTF(fmt, ...) do { } while (0)
-#endif
-
 #if HOST_LONG_BITS == 32
 #  define MCACHE_BUCKET_SHIFT 16
 #  define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
@@ -145,8 +135,7 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void 
*opaque)
 
 size = mapcache->nr_buckets * sizeof (MapCacheEntry);
 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
-DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
-mapcache->nr_buckets, size);
+trace_xen_map_cache_init(mapcache->nr_buckets, size);
 mapcache->entry = g_malloc0(size);
 }
 
@@ -286,7 +275,9 @@ tryagain:
 test_bits(address_offset >> XC_PAGE_SHIFT,
   test_bit_size >> XC_PAGE_SHIFT,
   mapcache->last_entry->valid_mapping)) {
-trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + 
address_offset);
+trace_xen_map_cache_return(
+mapcache->last_entry->vaddr_base + address_offset
+);
 return mapcache->last_entry->vaddr_base + address_offset;
 }
 
@@ -368,7 +359,9 @@ tryagain:
 QTAILQ_INSERT_HEAD(>locked_entries, reventry, next);
 }
 
-trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + 
address_offset);
+trace_xen_map_cache_return(
+mapcache->last_entry->vaddr_base + address_offset
+);
 return mapcache->last_entry->vaddr_base + address_offset;
 }
 
@@ -402,10 +395,10 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 }
 }
 if (!found) {
-fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
+trace_xen_ram_addr_from_mapcache_not_found(ptr);
 QTAILQ_FOREACH(reventry, >locked_entries, next) {
-DPRINTF("   "HWADDR_FMT_plx" -> %p is present\n", 
reventry->paddr_index,
-reventry->vaddr_req);
+trace_xen_ram_addr_from_mapcache_found(reventry->paddr_index,
+   reventry->vaddr_req);
 }
 abort();
 return 0;
@@ -416,7 +409,7 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
 entry = entry->next;
 }
 if (!entry) {
-DPRINTF("Trying to find address %p that is not in the mapcache!\n", 
ptr);
+trace_xen_ram_addr_from_mapcache_not_in_cache(ptr);
 raddr = 0;
 } else {
 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
@@ -443,9 +436,12 @@ static void 
xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
 }
 }
 if (!found) {
-DPRINTF("%s, could not find %p\n", __func__, buffer);
+trace_xen_invalidate_map_cache_entry_unlocked_not_found(buffer);
 QTAILQ_FOREACH(reventry, >locked_entries, next) {
-DPRINTF("   "HWADDR_FMT_plx" -> %p is present\n", 
reventry->paddr_index, reventry->vaddr_req);
+trace_xen_invalidate_map_cache_entry_unlocked_found(
+reventry->paddr_index,
+reventry->vaddr_req
+);
 }
 return;
 }
@@ -463,7 +459,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t 
*buffer)
 entry = entry->next;
 }
 if (!entry) {
-DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", 
buffer);
+trace_xen_invalidate_map_cache_entry_unlocked_miss(buffer);
 return;
 }
 entry->lock--;
@@ -502,9 +498,8 @@ void xen_invalidate_map_cache(void)
 if (!reventry->dma) {
 continue;
 }
-fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
-" "HWADDR_FMT_plx" -> %p is present\n",
-reventry->paddr_index, reventry->vaddr_req);
+trace_xen_invalidate_map_cache(reventry->paddr_index,
+   reventry->vaddr_req);
 }
 
 for (i = 0; i < mapcache->nr_buckets; i++) {
@@ -562,24 +557,23 @@ static uint8_t *xen_replace_cache_entry_unlocked(hwaddr

[PULL 07/36] system/vl.c: Fix handling of '-serial none -serial something'

2024-02-02 Thread Peter Maydell

Currently if the user passes multiple -serial options on the command
line, we mostly treat those as applying to the different serial
devices in order, so that for example
 -serial stdio -serial file:filename
will connect the first serial port to stdio and the second to the
named file.

The exception to this is the '-serial none' serial device type.  This
means "don't allocate this serial device", but a bug means that
following -serial options are not correctly handled, so that
 -serial none -serial stdio
has the unexpected effect that stdio is connected to the first serial
port, not the second.

This is a very long-standing bug that dates back at least as far as
commit 998bbd74b9d81 from 2009.

Make the 'none' serial type move forward in the indexing of serial
devices like all the other serial types, so that any subsequent
-serial options are correctly handled.

Note that if your commandline mistakenly had a '-serial none' that
was being overridden by a following '-serial something' option, you
should delete the unnecessary '-serial none'.  This will give you the
same behaviour as before, on QEMU versions both with and without this
bug fix.

Cc: qemu-sta...@nongnu.org
Reported-by: Bohdan Kostiv 
Signed-off-by: Peter Maydell 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Richard Henderson 
Message-id: 20240122163607.459769-2-peter.mayd...@linaro.org
Fixes: 998bbd74b9d81 ("default devices: core code & serial lines")
Signed-off-by: Peter Maydell 
---
 system/vl.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/system/vl.c b/system/vl.c
index 60fd1e56b6b..bb959cbc440 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -1439,18 +1439,22 @@ static void qemu_create_default_devices(void)
 static int serial_parse(const char *devname)
 {
 int index = num_serial_hds;
-char label[32];
 
-if (strcmp(devname, "none") == 0)
-return 0;
-snprintf(label, sizeof(label), "serial%d", index);
 serial_hds = g_renew(Chardev *, serial_hds, index + 1);
 
-serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
-if (!serial_hds[index]) {
-error_report("could not connect serial device"
- " to character backend '%s'", devname);
-return -1;
+if (strcmp(devname, "none") == 0) {
+/* Don't allocate a serial device for this index */
+serial_hds[index] = NULL;
+} else {
+char label[32];
+snprintf(label, sizeof(label), "serial%d", index);
+
+serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
+if (!serial_hds[index]) {
+error_report("could not connect serial device"
+ " to character backend '%s'", devname);
+return -1;
+}
 }
 num_serial_hds++;
 return 0;
-- 
2.34.1

[PULL 10/36] tests/qtest/xlnx-versal-trng-test.c: Drop use of variable length array

2024-02-02 Thread Peter Maydell

This test program is the last use of any variable length array in the
codebase.  If we can get rid of all uses of VLAs we can make the
compiler error on new additions.  This is a defensive measure against
security bugs where an on-stack dynamic allocation isn't correctly
size-checked (e.g.  CVE-2021-3527).

In this case the test code didn't even want a variable-sized
array, it was just accidentally using syntax that gave it one.
(The array size for C has to be an actual constant expression,
not just something that happens to be known to be constant...)

Remove the VLA usage.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Reviewed-by: Thomas Huth 
Reviewed-by: Zhao Liu 
Message-id: 20240125173211.1786196-2-peter.mayd...@linaro.org
---
 tests/qtest/xlnx-versal-trng-test.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tests/qtest/xlnx-versal-trng-test.c 
b/tests/qtest/xlnx-versal-trng-test.c
index cef4e575bba..ba86f39d13c 100644
--- a/tests/qtest/xlnx-versal-trng-test.c
+++ b/tests/qtest/xlnx-versal-trng-test.c
@@ -298,10 +298,13 @@ static size_t trng_collect(uint32_t *rnd, size_t cnt)
 return i;
 }
 
+/* These tests all generate 512 bits of random data with the device */
+#define TEST_DATA_WORDS (512 / 32)
+
 static void trng_test_autogen(void)
 {
-const size_t cnt = 512 / 32;
-uint32_t rng[cnt], prng[cnt];
+const size_t cnt = TEST_DATA_WORDS;
+uint32_t rng[TEST_DATA_WORDS], prng[TEST_DATA_WORDS];
 size_t n;
 
 trng_reset();
@@ -343,8 +346,8 @@ static void trng_test_autogen(void)
 
 static void trng_test_oneshot(void)
 {
-const size_t cnt = 512 / 32;
-uint32_t rng[cnt];
+const size_t cnt = TEST_DATA_WORDS;
+uint32_t rng[TEST_DATA_WORDS];
 size_t n;
 
 trng_reset();
@@ -370,8 +373,8 @@ static void trng_test_oneshot(void)
 
 static void trng_test_per_str(void)
 {
-const size_t cnt = 512 / 32;
-uint32_t rng[cnt], prng[cnt];
+const size_t cnt = TEST_DATA_WORDS;
+uint32_t rng[TEST_DATA_WORDS], prng[TEST_DATA_WORDS];
 size_t n;
 
 trng_reset();
@@ -415,8 +418,8 @@ static void trng_test_forced_prng(void)
 const char *prop = "forced-prng";
 const uint64_t seed = 0xdeadbeefbad1bad0ULL;
 
-const size_t cnt = 512 / 32;
-uint32_t rng[cnt], prng[cnt];
+const size_t cnt = TEST_DATA_WORDS;
+uint32_t rng[TEST_DATA_WORDS], prng[TEST_DATA_WORDS];
 size_t n;
 
 trng_reset();
-- 
2.34.1

[PULL 01/36] target/arm: fix exception syndrome for AArch32 bkpt insn

2024-02-02 Thread Peter Maydell

From: Jan Klötzke 

Debug exceptions that target AArch32 Hyp mode are reported differently
than on AAarch64. Internally, Qemu uses the AArch64 syndromes. Therefore
such exceptions need to be either converted to a prefetch abort
(breakpoints, vector catch) or a data abort (watchpoints).

Cc: qemu-sta...@nongnu.org
Signed-off-by: Jan Klötzke 
Reviewed-by: Richard Henderson 
Message-id: 20240127202758.3326381-1-jan.kloet...@kernkonzept.com
Signed-off-by: Peter Maydell 
---
 target/arm/syndrome.h |  8 
 target/arm/helper.c   | 18 ++
 2 files changed, 26 insertions(+)

diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
index 1a49767479f..3244e0740dd 100644
--- a/target/arm/syndrome.h
+++ b/target/arm/syndrome.h
@@ -25,6 +25,8 @@
 #ifndef TARGET_ARM_SYNDROME_H
 #define TARGET_ARM_SYNDROME_H
 
+#include "qemu/bitops.h"
+
 /* Valid Syndrome Register EC field values */
 enum arm_exception_class {
 EC_UNCATEGORIZED  = 0x00,
@@ -80,6 +82,7 @@ typedef enum {
 SME_ET_InactiveZA,
 } SMEExceptionType;
 
+#define ARM_EL_EC_LENGTH 6
 #define ARM_EL_EC_SHIFT 26
 #define ARM_EL_IL_SHIFT 25
 #define ARM_EL_ISV_SHIFT 24
@@ -94,6 +97,11 @@ static inline uint32_t syn_get_ec(uint32_t syn)
 return syn >> ARM_EL_EC_SHIFT;
 }
 
+static inline uint32_t syn_set_ec(uint32_t syn, uint32_t ec)
+{
+return deposit32(syn, ARM_EL_EC_SHIFT, ARM_EL_EC_LENGTH, ec);
+}
+
 /*
  * Utility functions for constructing various kinds of syndrome value.
  * Note that in general we follow the AArch64 syndrome values; in a
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 945d8571a61..a0041aa0ec7 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11015,6 +11015,24 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
 }
 
 if (env->exception.target_el == 2) {
+/* Debug exceptions are reported differently on AArch32 */
+switch (syn_get_ec(env->exception.syndrome)) {
+case EC_BREAKPOINT:
+case EC_BREAKPOINT_SAME_EL:
+case EC_AA32_BKPT:
+case EC_VECTORCATCH:
+env->exception.syndrome = syn_insn_abort(arm_current_el(env) == 2,
+ 0, 0, 0x22);
+break;
+case EC_WATCHPOINT:
+env->exception.syndrome = syn_set_ec(env->exception.syndrome,
+ EC_DATAABORT);
+break;
+case EC_WATCHPOINT_SAME_EL:
+env->exception.syndrome = syn_set_ec(env->exception.syndrome,
+ EC_DATAABORT_SAME_EL);
+break;
+}
 arm_cpu_do_interrupt_aarch32_hyp(cs);
 return;
 }
-- 
2.34.1

[PULL 22/36] pci-host: designware: Limit value range of iATU viewport register

2024-02-02 Thread Peter Maydell

From: Guenter Roeck 

The latest version of qemu (v8.2.0-869-g7a1dc45af5) crashes when booting
the mcimx7d-sabre emulation with Linux v5.11 and later.

qemu-system-arm: ../system/memory.c:2750: memory_region_set_alias_offset: 
Assertion `mr->alias' failed.

Problem is that the Designware PCIe emulation accepts the full value range
for the iATU Viewport Register. However, both hardware and emulation only
support four inbound and four outbound viewports.

The Linux kernel determines the number of supported viewports by writing
0xff into the viewport register and reading the value back. The expected
value when reading the register is the highest supported viewport index.
Match that code by masking the supported viewport value range when the
register is written. With this change, the Linux kernel reports

imx6q-pcie 3380.pcie: iATU: unroll F, 4 ob, 4 ib, align 0K, limit 4G

as expected and supported.

Fixes: d64e5eabc4c7 ("pci: Add support for Designware IP block")
Cc: Andrey Smirnov 
Cc: Nikita Ostrenkov 
Signed-off-by: Guenter Roeck 
Message-id: 20240129060055.2616989-1-li...@roeck-us.net
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/pci-host/designware.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index dd9e389c07b..c25d50f1c6b 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -340,6 +340,8 @@ static void designware_pcie_root_config_write(PCIDevice *d, 
uint32_t address,
 break;
 
 case DESIGNWARE_PCIE_ATU_VIEWPORT:
+val &= DESIGNWARE_PCIE_ATU_REGION_INBOUND |
+(DESIGNWARE_PCIE_NUM_VIEWPORTS - 1);
 root->atu_viewport = val;
 break;
 
-- 
2.34.1

[PULL 16/36] hw/arm/highbank: Check for CPU types in machine_run_board_init()

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

Restrict MachineClass::valid_cpu_types[] to the single
valid CPU types.

Instead of ignoring invalid CPU type requested by the user:

  $ qemu-system-arm -M midway -cpu cortex-a7 -S -monitor stdio
  QEMU 8.2.50 monitor - type 'help' for more information
  (qemu) info qom-tree
  /machine (midway-machine)
/cpu[0] (cortex-a15-arm-cpu)
...

we now display an error:

  $ qemu-system-arm -M midway -cpu cortex-a7
  qemu-system-arm: Invalid CPU model: cortex-a7
  The only valid type is: cortex-a15

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Reviewed-by: Gavin Shan 
Message-id: 20240129151828.59544-5-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/highbank.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index b8d702c82cc..03670506974 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -345,10 +345,15 @@ static void midway_init(MachineState *machine)
 
 static void highbank_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Calxeda Highbank (ECX-1000)";
 mc->init = highbank_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->block_default_type = IF_IDE;
 mc->units_per_default_bus = 1;
 mc->max_cpus = 4;
@@ -364,10 +369,15 @@ static const TypeInfo highbank_type = {
 
 static void midway_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a15"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Calxeda Midway (ECX-2000)";
 mc->init = midway_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->block_default_type = IF_IDE;
 mc->units_per_default_bus = 1;
 mc->max_cpus = 4;
-- 
2.34.1

[PULL 31/36] tests/qtest: Creating qtest for GMAC Module

2024-02-02 Thread Peter Maydell

From: Nabih Estefan Diaz 

 - Created qtest to check initialization of registers in GMAC Module.
 - Implemented test into Build File.

Change-Id: I8b2fe152d3987a7eec4cf6a1d25ba92e75a5391d
Signed-off-by: Nabih Estefan 
Reviewed-by: Tyrone Ting 
Message-id: 20240131002800.989285-4-nabiheste...@google.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 tests/qtest/npcm_gmac-test.c | 212 +++
 tests/qtest/meson.build  |   1 +
 2 files changed, 213 insertions(+)
 create mode 100644 tests/qtest/npcm_gmac-test.c

diff --git a/tests/qtest/npcm_gmac-test.c b/tests/qtest/npcm_gmac-test.c
new file mode 100644
index 000..72c68874dfa
--- /dev/null
+++ b/tests/qtest/npcm_gmac-test.c
@@ -0,0 +1,212 @@
+/*
+ * QTests for Nuvoton NPCM7xx/8xx GMAC Modules.
+ *
+ * Copyright 2024 Google LLC
+ * Authors:
+ * Hao Wu 
+ * Nabih Estefan 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "libqos/libqos.h"
+
+/* Name of the GMAC Device */
+#define TYPE_NPCM_GMAC "npcm-gmac"
+
+typedef struct GMACModule {
+int irq;
+uint64_t base_addr;
+} GMACModule;
+
+typedef struct TestData {
+const GMACModule *module;
+} TestData;
+
+/* Values extracted from hw/arm/npcm8xx.c */
+static const GMACModule gmac_module_list[] = {
+{
+.irq= 14,
+.base_addr  = 0xf0802000
+},
+{
+.irq= 15,
+.base_addr  = 0xf0804000
+},
+{
+.irq= 16,
+.base_addr  = 0xf0806000
+},
+{
+.irq= 17,
+.base_addr  = 0xf0808000
+}
+};
+
+/* Returns the index of the GMAC module. */
+static int gmac_module_index(const GMACModule *mod)
+{
+ptrdiff_t diff = mod - gmac_module_list;
+
+g_assert_true(diff >= 0 && diff < ARRAY_SIZE(gmac_module_list));
+
+return diff;
+}
+
+/* 32-bit register indices. Taken from npcm_gmac.c */
+typedef enum NPCMRegister {
+/* DMA Registers */
+NPCM_DMA_BUS_MODE = 0x1000,
+NPCM_DMA_XMT_POLL_DEMAND = 0x1004,
+NPCM_DMA_RCV_POLL_DEMAND = 0x1008,
+NPCM_DMA_RCV_BASE_ADDR = 0x100c,
+NPCM_DMA_TX_BASE_ADDR = 0x1010,
+NPCM_DMA_STATUS = 0x1014,
+NPCM_DMA_CONTROL = 0x1018,
+NPCM_DMA_INTR_ENA = 0x101c,
+NPCM_DMA_MISSED_FRAME_CTR = 0x1020,
+NPCM_DMA_HOST_TX_DESC = 0x1048,
+NPCM_DMA_HOST_RX_DESC = 0x104c,
+NPCM_DMA_CUR_TX_BUF_ADDR = 0x1050,
+NPCM_DMA_CUR_RX_BUF_ADDR = 0x1054,
+NPCM_DMA_HW_FEATURE = 0x1058,
+
+/* GMAC Registers */
+NPCM_GMAC_MAC_CONFIG = 0x0,
+NPCM_GMAC_FRAME_FILTER = 0x4,
+NPCM_GMAC_HASH_HIGH = 0x8,
+NPCM_GMAC_HASH_LOW = 0xc,
+NPCM_GMAC_MII_ADDR = 0x10,
+NPCM_GMAC_MII_DATA = 0x14,
+NPCM_GMAC_FLOW_CTRL = 0x18,
+NPCM_GMAC_VLAN_FLAG = 0x1c,
+NPCM_GMAC_VERSION = 0x20,
+NPCM_GMAC_WAKEUP_FILTER = 0x28,
+NPCM_GMAC_PMT = 0x2c,
+NPCM_GMAC_LPI_CTRL = 0x30,
+NPCM_GMAC_TIMER_CTRL = 0x34,
+NPCM_GMAC_INT_STATUS = 0x38,
+NPCM_GMAC_INT_MASK = 0x3c,
+NPCM_GMAC_MAC0_ADDR_HI = 0x40,
+NPCM_GMAC_MAC0_ADDR_LO = 0x44,
+NPCM_GMAC_MAC1_ADDR_HI = 0x48,
+NPCM_GMAC_MAC1_ADDR_LO = 0x4c,
+NPCM_GMAC_MAC2_ADDR_HI = 0x50,
+NPCM_GMAC_MAC2_ADDR_LO = 0x54,
+NPCM_GMAC_MAC3_ADDR_HI = 0x58,
+NPCM_GMAC_MAC3_ADDR_LO = 0x5c,
+NPCM_GMAC_RGMII_STATUS = 0xd8,
+NPCM_GMAC_WATCHDOG = 0xdc,
+NPCM_GMAC_PTP_TCR = 0x700,
+NPCM_GMAC_PTP_SSIR = 0x704,
+NPCM_GMAC_PTP_STSR = 0x708,
+NPCM_GMAC_PTP_STNSR = 0x70c,
+NPCM_GMAC_PTP_STSUR = 0x710,
+NPCM_GMAC_PTP_STNSUR = 0x714,
+NPCM_GMAC_PTP_TAR = 0x718,
+NPCM_GMAC_PTP_TTSR = 0x71c,
+} NPCMRegister;
+
+static uint32_t gmac_read(QTestState *qts, const GMACModule *mod,
+  NPCMRegister regno)
+{
+return qtest_readl(qts, mod->base_addr + regno);
+}
+
+/* Check that GMAC registers are reset to default value */
+static void test_init(gconstpointer test_data)
+{
+const TestData *td = test_data;
+const GMACModule *mod = td->module;
+QTestState *qts = qtest_init("-machine npcm845-evb");
+
+#define CHECK_REG32(regno, value) \
+do { \
+g_assert_cmphex(gmac_read(qts, mod, (regno)), ==, (value)); \
+} while (0)
+
+CHECK_REG32(NPCM_DMA_BUS_MODE, 0x00020100);
+CHECK_REG32(NPCM_DMA_XMT_POLL_DEMAND, 0);
+CHECK_REG32(NPCM_DMA_RCV_POLL_DEMAND, 0);
+CHECK_REG32(NPCM_DMA_RCV_BASE_ADDR, 0);
+CHECK_REG32(NPCM_DMA_TX_BASE_ADDR, 0);
+CHECK_REG32(NPCM_DMA_STATUS, 0);
+CHECK_REG32(NPCM_DMA_CONTROL, 0);
+

[PULL 00/36] target-arm queue

2024-02-02 Thread Peter Maydell

The following changes since commit c3709fde5955d13f6d4f86ab46ef3cc2288ca65e:

  Merge tag 'pull-aspeed-20240201' of https://github.com/legoater/qemu into 
staging (2024-02-01 14:42:11 +)

are available in the Git repository at:

  https://git.linaro.org/people/pmaydell/qemu-arm.git 
tags/pull-target-arm-20240202

for you to fetch changes up to f09c2b7ba9908714a3e2f1decd989462536cf731:

  hw/arm: Connect SPI Controller to BCM2835 (2024-02-02 13:51:59 +)


target/arm: fix exception syndrome for AArch32 bkpt insn
pci, vmbus, adb, s390x/css-bridge: Switch buses to 3-phase reset
system/vl.c: Fix handling of '-serial none -serial something'
target/arm: Add ID_AA64ZFR0_EL1.B16B16 to the exposed-to-userspace set
tests/qtest/xlnx-versal-trng-test.c: Drop use of variable length array
target/arm: Reinstate "vfp" property on AArch32 CPUs
doc/sphinx/hxtool.py: add optional label argument to SRST directive
hw/arm: Check for CPU types in machine_run_board_init() for various boards
pci-host: designware: Limit value range of iATU viewport register
hw/arm: Convert some DPRINTF macros to trace events and guest errors
hw/arm: NPCM7XX SoC: Add GMAC ethernet controller devices
hw/arm: Implement BCM2835 SPI Controller


David Woodhouse (1):
  doc/sphinx/hxtool.py: add optional label argument to SRST directive

Guenter Roeck (1):
  pci-host: designware: Limit value range of iATU viewport register

Hao Wu (2):
  hw/net: Add NPCMXXX GMAC device
  hw/arm: Add GMAC devices to NPCM7XX SoC

Jan Klötzke (1):
  target/arm: fix exception syndrome for AArch32 bkpt insn

Manos Pitsidianakis (6):
  hw/arm/strongarm.c: convert DPRINTF to trace events and guest errors
  hw/arm/z2: convert DPRINTF to trace events and guest errors
  hw/arm/xen_arm.c: convert DPRINTF to trace events and error/warn reports
  hw/xen/xen-mapcache.c: convert DPRINTF to tracepoints
  hw/xen/xen-hvm-common.c: convert DPRINTF to tracepoints
  hw/xen: convert stderr prints to error/warn reports

Nabih Estefan Diaz (4):
  tests/qtest: Creating qtest for GMAC Module
  hw/net: GMAC Rx Implementation
  hw/net: GMAC Tx Implementation
  tests/qtest: Adding PCS Module test to GMAC Qtest

Peter Maydell (10):
  pci: Switch bus reset to 3-phase-reset
  vmbus: Switch bus reset to 3-phase-reset
  adb: Switch bus reset to 3-phase-reset
  hw/s390x/css-bridge: switch virtual-css bus to 3-phase-reset
  hw/core: Remove transitional infrastructure from BusClass
  system/vl.c: Fix handling of '-serial none -serial something'
  qemu-options.hx: Improve -serial option documentation
  target/arm: Add ID_AA64ZFR0_EL1.B16B16 to the exposed-to-userspace set
  tests/qtest/xlnx-versal-trng-test.c: Drop use of variable length array
  target/arm: Reinstate "vfp" property on AArch32 CPUs

Philippe Mathieu-Daudé (9):
  hw/arm/exynos: Add missing QOM parent for CPU cores
  hw/arm/exynos: Check for CPU types in machine_run_board_init()
  hw/arm/highbank: Add missing QOM parent for CPU cores
  hw/arm/highbank: Check for CPU types in machine_run_board_init()
  hw/arm/msf2: Simplify setting MachineClass::valid_cpu_types[]
  hw/arm/musca: Simplify setting MachineClass::valid_cpu_types[]
  hw/arm/npcm7xx_boards: Simplify setting MachineClass::valid_cpu_types[]
  hw/arm/vexpress: Check for CPU types in machine_run_board_init()
  hw/arm/zynq: Check for CPU types in machine_run_board_init()

Rayhan Faizel (2):
  hw/ssi: Implement BCM2835 SPI Controller
  hw/arm: Connect SPI Controller to BCM2835

 docs/devel/docs.rst  |  12 +-
 docs/sphinx/hxtool.py|  16 +
 docs/system/arm/raspi.rst|   2 +-
 docs/system/i386/xen.rst |   3 +-
 include/hw/arm/bcm2835_peripherals.h |   3 +-
 include/hw/arm/msf2-soc.h|   3 -
 include/hw/arm/npcm7xx.h |   2 +
 include/hw/net/npcm_gmac.h   | 343 +
 include/hw/qdev-core.h   |   2 -
 include/hw/ssi/bcm2835_spi.h |  81 +++
 target/arm/syndrome.h|   8 +
 hw/arm/bcm2835_peripherals.c |  17 +-
 hw/arm/exynos4210.c  |   1 +
 hw/arm/exynos4_boards.c  |   8 +
 hw/arm/highbank.c|  11 +
 hw/arm/msf2-soc.c|   3 +-
 hw/arm/msf2-som.c|   4 -
 hw/arm/musca.c   |   1 -
 hw/arm/npcm7xx.c |  37 +-
 hw/arm/npcm7xx_boards.c  |   1 -
 hw/arm/strongarm.c   |  82 +--
 hw/arm/vexpress.c|  12 +-
 hw/arm/xen_arm.c |  23 +-
 hw/arm/xilinx_zynq.c |   6 +-
 hw/arm/z2.c  |  27 +-
 hw/core/bus.c|  67 --

[PULL 17/36] hw/arm/msf2: Simplify setting MachineClass::valid_cpu_types[]

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

The M2Sxxx SoC family can only be used with Cortex-M3.
Propagating the CPU type from the board level is pointless.

Hard-code the CPU type at the SoC level.
Remove the now ignored MachineClass::default_cpu_type field.

Use the common code introduced in commit c9cf636d48 ("machine: Add
a valid_cpu_types property") to check for valid CPU type at the
board level.

Reviewed-by: Richard Henderson 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20240129151828.59544-6-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 include/hw/arm/msf2-soc.h | 3 ---
 hw/arm/msf2-soc.c | 3 +--
 hw/arm/msf2-som.c | 4 
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/hw/arm/msf2-soc.h b/include/hw/arm/msf2-soc.h
index ce417a6266a..9300664e8ea 100644
--- a/include/hw/arm/msf2-soc.h
+++ b/include/hw/arm/msf2-soc.h
@@ -47,13 +47,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(MSF2State, MSF2_SOC)
 #define MSF2_NUM_TIMERS   2
 
 struct MSF2State {
-/*< private >*/
 SysBusDevice parent_obj;
-/*< public >*/
 
 ARMv7MState armv7m;
 
-char *cpu_type;
 char *part_name;
 uint64_t envm_size;
 uint64_t esram_size;
diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c
index b5fe9f364d5..d6eb9ec9ac1 100644
--- a/hw/arm/msf2-soc.c
+++ b/hw/arm/msf2-soc.c
@@ -134,7 +134,7 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error 
**errp)
 
 armv7m = DEVICE(>armv7m);
 qdev_prop_set_uint32(armv7m, "num-irq", 81);
-qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type);
+qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3"));
 qdev_prop_set_bit(armv7m, "enable-bitband", true);
 qdev_connect_clock_in(armv7m, "cpuclk", s->m3clk);
 qdev_connect_clock_in(armv7m, "refclk", s->refclk);
@@ -231,7 +231,6 @@ static Property m2sxxx_soc_properties[] = {
  * part name specifies the type of SmartFusion2 device variant(this
  * property is for information purpose only.
  */
-DEFINE_PROP_STRING("cpu-type", MSF2State, cpu_type),
 DEFINE_PROP_STRING("part-name", MSF2State, part_name),
 DEFINE_PROP_UINT64("eNVM-size", MSF2State, envm_size, MSF2_ENVM_MAX_SIZE),
 DEFINE_PROP_UINT64("eSRAM-size", MSF2State, esram_size,
diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
index a269cf044b9..5c415abe852 100644
--- a/hw/arm/msf2-som.c
+++ b/hw/arm/msf2-som.c
@@ -47,7 +47,6 @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
 DeviceState *dev;
 DeviceState *spi_flash;
 MSF2State *soc;
-MachineClass *mc = MACHINE_GET_CLASS(machine);
 DriveInfo *dinfo = drive_get(IF_MTD, 0, 0);
 qemu_irq cs_line;
 BusState *spi_bus;
@@ -62,8 +61,6 @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
 dev = qdev_new(TYPE_MSF2_SOC);
 object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
 qdev_prop_set_string(dev, "part-name", "M2S010");
-qdev_prop_set_string(dev, "cpu-type", mc->default_cpu_type);
-
 qdev_prop_set_uint64(dev, "eNVM-size", M2S010_ENVM_SIZE);
 qdev_prop_set_uint64(dev, "eSRAM-size", M2S010_ESRAM_SIZE);
 
@@ -108,7 +105,6 @@ static void emcraft_sf2_machine_init(MachineClass *mc)
 
 mc->desc = "SmartFusion2 SOM kit from Emcraft (M2S010)";
 mc->init = emcraft_sf2_s2s010_init;
-mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m3");
 mc->valid_cpu_types = valid_cpu_types;
 }
 
-- 
2.34.1

[PULL 02/36] pci: Switch bus reset to 3-phase-reset

2024-02-02 Thread Peter Maydell

Switch the PCI bus from using BusClass::reset to the Resettable
interface.

This has no behavioural change, because the BusClass code to support
subclasses that use the legacy BusClass::reset will call that method
in the hold phase of 3-phase reset.

Signed-off-by: Peter Maydell 
Acked-by: Michael S. Tsirkin 
Acked-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 
Reviewed-by: Zhao Liu 
Message-id: 20240119163512.3810301-2-peter.mayd...@linaro.org
---
 hw/pci/pci.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 76080af580d..05c2e46bda5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -64,7 +64,7 @@ bool pci_available = true;
 
 static char *pcibus_get_dev_path(DeviceState *dev);
 static char *pcibus_get_fw_dev_path(DeviceState *dev);
-static void pcibus_reset(BusState *qbus);
+static void pcibus_reset_hold(Object *obj);
 static bool pcie_has_upstream_port(PCIDevice *dev);
 
 static Property pci_props[] = {
@@ -202,13 +202,15 @@ static void pci_bus_class_init(ObjectClass *klass, void 
*data)
 {
 BusClass *k = BUS_CLASS(klass);
 PCIBusClass *pbc = PCI_BUS_CLASS(klass);
+ResettableClass *rc = RESETTABLE_CLASS(klass);
 
 k->print_dev = pcibus_dev_print;
 k->get_dev_path = pcibus_get_dev_path;
 k->get_fw_dev_path = pcibus_get_fw_dev_path;
 k->realize = pci_bus_realize;
 k->unrealize = pci_bus_unrealize;
-k->reset = pcibus_reset;
+
+rc->phases.hold = pcibus_reset_hold;
 
 pbc->bus_num = pcibus_num;
 pbc->numa_node = pcibus_numa_node;
@@ -424,9 +426,9 @@ void pci_device_reset(PCIDevice *dev)
  * Called via bus_cold_reset on RST# assert, after the devices
  * have been reset device_cold_reset-ed already.
  */
-static void pcibus_reset(BusState *qbus)
+static void pcibus_reset_hold(Object *obj)
 {
-PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus);
+PCIBus *bus = PCI_BUS(obj);
 int i;
 
 for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
-- 
2.34.1

[PULL 32/36] hw/net: GMAC Rx Implementation

2024-02-02 Thread Peter Maydell

From: Nabih Estefan Diaz 

- Implementation of Receive function for packets
- Implementation for reading and writing from and to descriptors in
  memory for Rx

When RX starts, we need to flush the queued packets so that they
can be received by the GMAC device. Without this it won't work
with TAP NIC device.

When RX descriptor list is full, it returns a DMA_STATUS for
software to handle it. But there's no way to indicate the software has
handled all RX descriptors and the whole pipeline stalls.

We do something similar to NPCM7XX EMC to handle this case.

1. Return packet size when RX descriptor is full, effectively dropping
these packets in such a case.
2. When software clears RX descriptor full bit, continue receiving
further packets by flushing QEMU packet queue.

Added relevant trace-events

Change-Id: I132aa254a94cda1a586aba2ea33bbfc74ecdb831
Signed-off-by: Hao Wu 
Signed-off-by: Nabih Estefan 
Reviewed-by: Tyrone Ting 
Message-id: 20240131002800.989285-5-nabiheste...@google.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/net/npcm_gmac.c  | 276 +++-
 hw/net/trace-events |   5 +
 2 files changed, 279 insertions(+), 2 deletions(-)

diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c
index 7118b4c7c78..a3c626e1b83 100644
--- a/hw/net/npcm_gmac.c
+++ b/hw/net/npcm_gmac.c
@@ -27,6 +27,10 @@
 #include "hw/net/mii.h"
 #include "hw/net/npcm_gmac.h"
 #include "migration/vmstate.h"
+#include "net/checksum.h"
+#include "net/eth.h"
+#include "net/net.h"
+#include "qemu/cutils.h"
 #include "qemu/log.h"
 #include "qemu/units.h"
 #include "sysemu/dma.h"
@@ -149,6 +153,17 @@ static void gmac_phy_set_link(NPCMGMACState *gmac, bool 
active)
 
 static bool gmac_can_receive(NetClientState *nc)
 {
+NPCMGMACState *gmac = NPCM_GMAC(qemu_get_nic_opaque(nc));
+
+/* If GMAC receive is disabled. */
+if (!(gmac->regs[R_NPCM_GMAC_MAC_CONFIG] & NPCM_GMAC_MAC_CONFIG_RX_EN)) {
+return false;
+}
+
+/* If GMAC DMA RX is stopped. */
+if (!(gmac->regs[R_NPCM_DMA_CONTROL] & NPCM_DMA_CONTROL_START_STOP_RX)) {
+return false;
+}
 return true;
 }
 
@@ -192,10 +207,256 @@ static void gmac_update_irq(NPCMGMACState *gmac)
 qemu_set_irq(gmac->irq, level);
 }
 
+static int gmac_read_rx_desc(dma_addr_t addr, struct NPCMGMACRxDesc *desc)
+{
+if (dma_memory_read(_space_memory, addr, desc,
+sizeof(*desc), MEMTXATTRS_UNSPECIFIED)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read descriptor @ 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+return -1;
+}
+desc->rdes0 = le32_to_cpu(desc->rdes0);
+desc->rdes1 = le32_to_cpu(desc->rdes1);
+desc->rdes2 = le32_to_cpu(desc->rdes2);
+desc->rdes3 = le32_to_cpu(desc->rdes3);
+return 0;
+}
+
+static int gmac_write_rx_desc(dma_addr_t addr, struct NPCMGMACRxDesc *desc)
+{
+struct NPCMGMACRxDesc le_desc;
+le_desc.rdes0 = cpu_to_le32(desc->rdes0);
+le_desc.rdes1 = cpu_to_le32(desc->rdes1);
+le_desc.rdes2 = cpu_to_le32(desc->rdes2);
+le_desc.rdes3 = cpu_to_le32(desc->rdes3);
+if (dma_memory_write(_space_memory, addr, _desc,
+sizeof(le_desc), MEMTXATTRS_UNSPECIFIED)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to write descriptor @ 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+return -1;
+}
+return 0;
+}
+
+static int gmac_rx_transfer_frame_to_buffer(uint32_t rx_buf_len,
+uint32_t *left_frame,
+uint32_t rx_buf_addr,
+bool *eof_transferred,
+const uint8_t **frame_ptr,
+uint16_t *transferred)
+{
+uint32_t to_transfer;
+/*
+ * Check that buffer is bigger than the frame being transfered
+ * If bigger then transfer only whats left of frame
+ * Else, fill frame with all the content possible
+ */
+if (rx_buf_len >= *left_frame) {
+to_transfer = *left_frame;
+*eof_transferred = true;
+} else {
+to_transfer = rx_buf_len;
+}
+
+/* write frame part to memory */
+if (dma_memory_write(_space_memory, (uint64_t) rx_buf_addr,
+ *frame_ptr, to_transfer, MEMTXATTRS_UNSPECIFIED)) {
+return -1;
+}
+
+/* update frame pointer and size of whats left of frame */
+*frame_ptr += to_transfer;
+*left_frame -= to_transfer;
+*transferred += to_transfer;
+
+return 0;
+}
+
+static void gmac_dma_set_state(NPCMGMACState *gmac, int shift, uint32_t state)
+{
+gmac->regs[R_NPCM_DMA_STATUS] = deposit32(gmac->regs[R_NPCM_DMA_STATUS],
+shift, 3, state);
+}
+
 static ssize_t gmac_receive(NetClientState *nc, const uint8_t *buf, size_t len)
 {
-/* Placeholder. Function will be filled in following patches */
-

[PULL 11/36] target/arm: Reinstate "vfp" property on AArch32 CPUs

2024-02-02 Thread Peter Maydell

In commit 4315f7c614743 we restructured the logic for creating the
VFP related properties to avoid testing the aa32_simd_r32 feature on
AArch64 CPUs.  However in the process we accidentally stopped
exposing the "vfp" QOM property on AArch32 TCG CPUs.

This mostly hasn't had any ill effects because not many people want
to disable VFP, but it wasn't intentional.  Reinstate the property.

Cc: qemu-sta...@nongnu.org
Fixes: 4315f7c614743 ("target/arm: Restructure has_vfp_d32 test")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2098
Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
Message-id: 20240126193432.2210558-1-peter.mayd...@linaro.org
---
 target/arm/cpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 6a96b245f2c..1ce26e56e32 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1627,6 +1627,10 @@ void arm_cpu_post_init(Object *obj)
 }
 } else if (cpu_isar_feature(aa32_vfp, cpu)) {
 cpu->has_vfp = true;
+if (tcg_enabled() || qtest_enabled()) {
+qdev_property_add_static(DEVICE(obj),
+ _cpu_has_vfp_property);
+}
 if (cpu_isar_feature(aa32_simd_r32, cpu)) {
 cpu->has_vfp_d32 = true;
 /*
-- 
2.34.1

[PULL 33/36] hw/net: GMAC Tx Implementation

2024-02-02 Thread Peter Maydell

From: Nabih Estefan Diaz 

- Implementation of Transmit function for packets
- Implementation for reading and writing from and to descriptors in
  memory for Tx

Added relevant trace-events

NOTE: This function implements the steps detailed in the datasheet for
transmitting messages from the GMAC.

Change-Id: Icf14f9fcc6cc7808a41acd872bca67c9832087e6
Signed-off-by: Nabih Estefan 
Reviewed-by: Tyrone Ting 
Message-id: 20240131002800.989285-6-nabiheste...@google.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 hw/net/npcm_gmac.c  | 203 
 hw/net/trace-events |   2 +
 2 files changed, 205 insertions(+)

diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c
index a3c626e1b83..1b71e2526e3 100644
--- a/hw/net/npcm_gmac.c
+++ b/hw/net/npcm_gmac.c
@@ -238,6 +238,37 @@ static int gmac_write_rx_desc(dma_addr_t addr, struct 
NPCMGMACRxDesc *desc)
 return 0;
 }
 
+static int gmac_read_tx_desc(dma_addr_t addr, struct NPCMGMACTxDesc *desc)
+{
+if (dma_memory_read(_space_memory, addr, desc,
+sizeof(*desc), MEMTXATTRS_UNSPECIFIED)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read descriptor @ 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+return -1;
+}
+desc->tdes0 = le32_to_cpu(desc->tdes0);
+desc->tdes1 = le32_to_cpu(desc->tdes1);
+desc->tdes2 = le32_to_cpu(desc->tdes2);
+desc->tdes3 = le32_to_cpu(desc->tdes3);
+return 0;
+}
+
+static int gmac_write_tx_desc(dma_addr_t addr, struct NPCMGMACTxDesc *desc)
+{
+struct NPCMGMACTxDesc le_desc;
+le_desc.tdes0 = cpu_to_le32(desc->tdes0);
+le_desc.tdes1 = cpu_to_le32(desc->tdes1);
+le_desc.tdes2 = cpu_to_le32(desc->tdes2);
+le_desc.tdes3 = cpu_to_le32(desc->tdes3);
+if (dma_memory_write(_space_memory, addr, _desc,
+sizeof(le_desc), MEMTXATTRS_UNSPECIFIED)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to write descriptor @ 0x%"
+  HWADDR_PRIx "\n", __func__, addr);
+return -1;
+}
+return 0;
+}
+
 static int gmac_rx_transfer_frame_to_buffer(uint32_t rx_buf_len,
 uint32_t *left_frame,
 uint32_t rx_buf_addr,
@@ -459,6 +490,155 @@ static ssize_t gmac_receive(NetClientState *nc, const 
uint8_t *buf, size_t len)
 return len;
 }
 
+static int gmac_tx_get_csum(uint32_t tdes1)
+{
+uint32_t mask = TX_DESC_TDES1_CHKSM_INS_CTRL_MASK(tdes1);
+int csum = 0;
+
+if (likely(mask > 0)) {
+csum |= CSUM_IP;
+}
+if (likely(mask > 1)) {
+csum |= CSUM_TCP | CSUM_UDP;
+}
+
+return csum;
+}
+
+static void gmac_try_send_next_packet(NPCMGMACState *gmac)
+{
+/*
+ * Comments about steps refer to steps for
+ * transmitting in page 384 of datasheet
+ */
+uint16_t tx_buffer_size = 2048;
+g_autofree uint8_t *tx_send_buffer = g_malloc(tx_buffer_size);
+uint32_t desc_addr;
+struct NPCMGMACTxDesc tx_desc;
+uint32_t tx_buf_addr, tx_buf_len;
+uint16_t length = 0;
+uint8_t *buf = tx_send_buffer;
+uint32_t prev_buf_size = 0;
+int csum = 0;
+
+/* steps 1&2 */
+if (!gmac->regs[R_NPCM_DMA_HOST_TX_DESC]) {
+gmac->regs[R_NPCM_DMA_HOST_TX_DESC] =
+NPCM_DMA_HOST_TX_DESC_MASK(gmac->regs[R_NPCM_DMA_TX_BASE_ADDR]);
+}
+desc_addr = gmac->regs[R_NPCM_DMA_HOST_TX_DESC];
+
+while (true) {
+gmac_dma_set_state(gmac, NPCM_DMA_STATUS_TX_PROCESS_STATE_SHIFT,
+NPCM_DMA_STATUS_TX_RUNNING_FETCHING_STATE);
+if (gmac_read_tx_desc(desc_addr, _desc)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "TX Descriptor @ 0x%x can't be read\n",
+  desc_addr);
+return;
+}
+/* step 3 */
+
+trace_npcm_gmac_packet_desc_read(DEVICE(gmac)->canonical_path,
+desc_addr);
+trace_npcm_gmac_debug_desc_data(DEVICE(gmac)->canonical_path, _desc,
+tx_desc.tdes0, tx_desc.tdes1, tx_desc.tdes2, tx_desc.tdes3);
+
+/* 1 = DMA Owned, 0 = Software Owned */
+if (!(tx_desc.tdes0 & TX_DESC_TDES0_OWN)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "TX Descriptor @ 0x%x is owned by software\n",
+  desc_addr);
+gmac->regs[R_NPCM_DMA_STATUS] |= NPCM_DMA_STATUS_TU;
+gmac_dma_set_state(gmac, NPCM_DMA_STATUS_TX_PROCESS_STATE_SHIFT,
+NPCM_DMA_STATUS_TX_SUSPENDED_STATE);
+gmac_update_irq(gmac);
+return;
+}
+
+gmac_dma_set_state(gmac, NPCM_DMA_STATUS_TX_PROCESS_STATE_SHIFT,
+NPCM_DMA_STATUS_TX_RUNNING_READ_STATE);
+/* Give the descriptor back regardless of what happens. */
+tx_desc.tdes0 &= ~TX_DESC_TDES0_OWN;
+
+if (tx_desc.tdes1 & TX_DESC_TDES1_FIRST_SEG_MASK) {
+csum =

[PULL 25/36] hw/arm/xen_arm.c: convert DPRINTF to trace events and error/warn reports

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

Tracing DPRINTFs to stderr might not be desired. A developer that relies
on trace events should be able to opt-in to each trace event and rely on
QEMU's log redirection, instead of stderr by default.

This commit converts DPRINTFs in this file that are used for tracing
into trace events. Errors or warnings are converted to error_report and
warn_report calls.

Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
fe5e3bd54231abe933f95a24e0e88208cd8cfd8f.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/xen_arm.c| 23 +++
 hw/arm/trace-events |  5 +
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
index a5631529d09..32776d94dfe 100644
--- a/hw/arm/xen_arm.c
+++ b/hw/arm/xen_arm.c
@@ -34,6 +34,7 @@
 #include "hw/xen/xen-hvm-common.h"
 #include "sysemu/tpm.h"
 #include "hw/xen/arch_hvm.h"
+#include "trace.h"
 
 #define TYPE_XEN_ARM  MACHINE_TYPE_NAME("xenpvh")
 OBJECT_DECLARE_SIMPLE_TYPE(XenArmState, XEN_ARM)
@@ -91,8 +92,9 @@ static void xen_create_virtio_mmio_devices(XenArmState *xam)
 
 sysbus_create_simple("virtio-mmio", base, irq);
 
-DPRINTF("Created virtio-mmio device %d: irq %d base 0x%lx\n",
-i, GUEST_VIRTIO_MMIO_SPI_FIRST + i, base);
+trace_xen_create_virtio_mmio_devices(i,
+ GUEST_VIRTIO_MMIO_SPI_FIRST + i,
+ base);
 }
 }
 
@@ -101,6 +103,7 @@ static void xen_init_ram(MachineState *machine)
 MemoryRegion *sysmem = get_system_memory();
 ram_addr_t block_len, ram_size[GUEST_RAM_BANKS];
 
+trace_xen_init_ram(machine->ram_size);
 if (machine->ram_size <= GUEST_RAM0_SIZE) {
 ram_size[0] = machine->ram_size;
 ram_size[1] = 0;
@@ -117,15 +120,10 @@ static void xen_init_ram(MachineState *machine)
 memory_region_init_alias(_lo, NULL, "xen.ram.lo", _memory,
  GUEST_RAM0_BASE, ram_size[0]);
 memory_region_add_subregion(sysmem, GUEST_RAM0_BASE, _lo);
-DPRINTF("Initialized region xen.ram.lo: base 0x%llx size 0x%lx\n",
-GUEST_RAM0_BASE, ram_size[0]);
-
 if (ram_size[1] > 0) {
 memory_region_init_alias(_hi, NULL, "xen.ram.hi", _memory,
  GUEST_RAM1_BASE, ram_size[1]);
 memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, _hi);
-DPRINTF("Initialized region xen.ram.hi: base 0x%llx size 0x%lx\n",
-GUEST_RAM1_BASE, ram_size[1]);
 }
 }
 
@@ -158,7 +156,7 @@ static void xen_enable_tpm(XenArmState *xam)
 
 TPMBackend *be = qemu_find_tpm_be("tpm0");
 if (be == NULL) {
-DPRINTF("Couldn't fine the backend for tpm0\n");
+error_report("Couldn't find tmp0 backend");
 return;
 }
 dev = qdev_new(TYPE_TPM_TIS_SYSBUS);
@@ -168,7 +166,7 @@ static void xen_enable_tpm(XenArmState *xam)
 sysbus_realize_and_unref(busdev, _fatal);
 sysbus_mmio_map(busdev, 0, xam->cfg.tpm_base_addr);
 
-DPRINTF("Connected tpmdev at address 0x%lx\n", xam->cfg.tpm_base_addr);
+trace_xen_enable_tpm(xam->cfg.tpm_base_addr);
 }
 #endif
 
@@ -179,8 +177,9 @@ static void xen_arm_init(MachineState *machine)
 xam->state =  g_new0(XenIOState, 1);
 
 if (machine->ram_size == 0) {
-DPRINTF("ram_size not specified. QEMU machine started without IOREQ"
-"(no emulated devices including Virtio)\n");
+warn_report("%s non-zero ram size not specified. QEMU machine started"
+" without IOREQ (no emulated devices including virtio)",
+MACHINE_CLASS(object_get_class(OBJECT(machine)))->desc);
 return;
 }
 
@@ -194,7 +193,7 @@ static void xen_arm_init(MachineState *machine)
 if (xam->cfg.tpm_base_addr) {
 xen_enable_tpm(xam);
 } else {
-DPRINTF("tpm-base-addr is not provided. TPM will not be enabled\n");
+warn_report("tpm-base-addr is not provided. TPM will not be enabled");
 }
 #endif
 }
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index 0ff41e6c780..fd0d92762e4 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -65,3 +65,8 @@ z2_lcd_enable_disable_result(const char *result) "LCD %s"
 z2_aer915_send_too_long(int8_t msg) "message too long (%i bytes)"
 z2_aer915_send(uint8_t reg, uint8_t value) "reg %d value 0x%02x"
 z2_aer915_event(int8_t event, int8_t len) "i2c event =0x%x len=%d bytes"
+
+# xen_arm.c
+xen_create_virtio_mmio_devices(int i, int irq, uint64_t base) "Created 
virtio-mmio device %d: irq %d base 0x%"PRIx64
+xen_init_ram(uint64_t machine_ram_size) "Initialized xen ram with size 
0x%"PRIx64
+xen_enable_tpm(uint64_t addr) "Connected tpmdev at address 0x%"PRIx64
-- 
2.34.1

[PULL 29/36] hw/net: Add NPCMXXX GMAC device

2024-02-02 Thread Peter Maydell

From: Hao Wu 

This patch implements the basic registers of GMAC device and sets
registers for networking functionalities.
Squashed IRQ Implementation patch into this one for compliation.
Tested:
The following message shows up with the change:
Broadcom BCM54612E stmmac-0:00: attached PHY driver [Broadcom BCM54612E] 
(mii_bus:phy_addr=stmmac-0:00, irq=POLL)
stmmaceth f0802000.eth eth0: Link is Up - 1Gbps/Full - flow control rx/tx

Change-Id: If71c6d486b95edcccba109ba454870714d7e0940
Signed-off-by: Hao Wu 
Signed-off-by: Nabih Estefan Diaz 
Reviewed-by: Tyrone Ting 
Message-id: 20240131002800.989285-2-nabiheste...@google.com
Reviewed-by: Peter Maydell 
Signed-off-by: Peter Maydell 
---
 include/hw/net/npcm_gmac.h | 343 +++
 hw/net/npcm_gmac.c | 467 +
 hw/net/meson.build |   2 +-
 hw/net/trace-events|  12 +
 4 files changed, 823 insertions(+), 1 deletion(-)
 create mode 100644 include/hw/net/npcm_gmac.h
 create mode 100644 hw/net/npcm_gmac.c

diff --git a/include/hw/net/npcm_gmac.h b/include/hw/net/npcm_gmac.h
new file mode 100644
index 000..f2d9f08ec1a
--- /dev/null
+++ b/include/hw/net/npcm_gmac.h
@@ -0,0 +1,343 @@
+/*
+ * Nuvoton NPCM7xx/8xx GMAC Module
+ *
+ * Copyright 2024 Google LLC
+ * Authors:
+ * Hao Wu 
+ * Nabih Estefan 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef NPCM_GMAC_H
+#define NPCM_GMAC_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "net/net.h"
+
+#define NPCM_GMAC_NR_REGS (0x1060 / sizeof(uint32_t))
+
+#define NPCM_GMAC_MAX_PHYS 32
+#define NPCM_GMAC_MAX_PHY_REGS 32
+
+struct NPCMGMACRxDesc {
+uint32_t rdes0;
+uint32_t rdes1;
+uint32_t rdes2;
+uint32_t rdes3;
+};
+
+/* NPCMGMACRxDesc.flags values */
+/* RDES2 and RDES3 are buffer addresses */
+/* Owner: 0 = software, 1 = dma */
+#define RX_DESC_RDES0_OWN BIT(31)
+/* Destination Address Filter Fail */
+#define RX_DESC_RDES0_DEST_ADDR_FILT_FAIL BIT(30)
+/* Frame length */
+#define RX_DESC_RDES0_FRAME_LEN_MASK(word) extract32(word, 16, 14)
+/* Frame length Shift*/
+#define RX_DESC_RDES0_FRAME_LEN_SHIFT 16
+/* Error Summary */
+#define RX_DESC_RDES0_ERR_SUMM_MASK BIT(15)
+/* Descriptor Error */
+#define RX_DESC_RDES0_DESC_ERR_MASK BIT(14)
+/* Source Address Filter Fail */
+#define RX_DESC_RDES0_SRC_ADDR_FILT_FAIL_MASK BIT(13)
+/* Length Error */
+#define RX_DESC_RDES0_LEN_ERR_MASK BIT(12)
+/* Overflow Error */
+#define RX_DESC_RDES0_OVRFLW_ERR_MASK BIT(11)
+/* VLAN Tag */
+#define RX_DESC_RDES0_VLAN_TAG_MASK BIT(10)
+/* First Descriptor */
+#define RX_DESC_RDES0_FIRST_DESC_MASK BIT(9)
+/* Last Descriptor */
+#define RX_DESC_RDES0_LAST_DESC_MASK BIT(8)
+/* IPC Checksum Error/Giant Frame */
+#define RX_DESC_RDES0_IPC_CHKSM_ERR_GNT_FRM_MASK BIT(7)
+/* Late Collision */
+#define RX_DESC_RDES0_LT_COLL_MASK BIT(6)
+/* Frame Type */
+#define RX_DESC_RDES0_FRM_TYPE_MASK BIT(5)
+/* Receive Watchdog Timeout */
+#define RX_DESC_RDES0_REC_WTCHDG_TMT_MASK BIT(4)
+/* Receive Error */
+#define RX_DESC_RDES0_RCV_ERR_MASK BIT(3)
+/* Dribble Bit Error */
+#define RX_DESC_RDES0_DRBL_BIT_ERR_MASK BIT(2)
+/* Cyclcic Redundancy Check Error */
+#define RX_DESC_RDES0_CRC_ERR_MASK BIT(1)
+/* Rx MAC Address/Payload Checksum Error */
+#define RC_DESC_RDES0_RCE_MASK BIT(0)
+
+/* Disable Interrupt on Completion */
+#define RX_DESC_RDES1_DIS_INTR_COMP_MASK BIT(31)
+/* Recieve end of ring */
+#define RX_DESC_RDES1_RC_END_RING_MASK BIT(25)
+/* Second Address Chained */
+#define RX_DESC_RDES1_SEC_ADDR_CHND_MASK BIT(24)
+/* Receive Buffer 2 Size */
+#define RX_DESC_RDES1_BFFR2_SZ_SHIFT 11
+#define RX_DESC_RDES1_BFFR2_SZ_MASK(word) extract32(word, \
+RX_DESC_RDES1_BFFR2_SZ_SHIFT, 11)
+/* Receive Buffer 1 Size */
+#define RX_DESC_RDES1_BFFR1_SZ_MASK(word) extract32(word, 0, 11)
+
+
+struct NPCMGMACTxDesc {
+uint32_t tdes0;
+uint32_t tdes1;
+uint32_t tdes2;
+uint32_t tdes3;
+};
+
+/* NPCMGMACTxDesc.flags values */
+/* TDES2 and TDES3 are buffer addresses */
+/* Owner: 0 = software, 1 = gmac */
+#define TX_DESC_TDES0_OWN BIT(31)
+/* Tx Time Stamp Status */
+#define TX_DESC_TDES0_TTSS_MASK BIT(17)
+/* IP Header Error */
+#define TX_DESC_TDES0_IP_HEAD_ERR_MASK BIT(16)
+/* Error Summary */
+#define TX_DESC_TDES0_ERR_SUMM_MASK BIT(15)
+/* Jabber Timeout */
+#define TX_DESC_TDES0_JBBR_TMT_MASK BIT(14)
+/* Frame Flushed */
+#define TX_DESC_TDES0_FRM_FLSHD_MASK BIT(13)
+/* Payload Checksum Error */
+#define TX_DESC_TDES0_PYLD_CHKSM_ERR_MASK BIT(12)
+/* Loss of Carrier */

[PULL 18/36] hw/arm/musca: Simplify setting MachineClass::valid_cpu_types[]

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

Musca boards use the embedded subsystems (SSE) tied to a specific
Cortex core. Our models only use the Cortex-M33.

Use the common code introduced in commit c9cf636d48 ("machine: Add
a valid_cpu_types property") to check for valid CPU type at the
board level.

Remove the now unused MachineClass::default_cpu_type field.

Reviewed-by: Richard Henderson 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20240129151828.59544-7-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/musca.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/arm/musca.c b/hw/arm/musca.c
index 770ec1a15ca..e2c9d49af58 100644
--- a/hw/arm/musca.c
+++ b/hw/arm/musca.c
@@ -605,7 +605,6 @@ static void musca_class_init(ObjectClass *oc, void *data)
 mc->default_cpus = 2;
 mc->min_cpus = mc->default_cpus;
 mc->max_cpus = mc->default_cpus;
-mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-m33");
 mc->valid_cpu_types = valid_cpu_types;
 mc->init = musca_init;
 }
-- 
2.34.1

[PULL 13/36] hw/arm/exynos: Add missing QOM parent for CPU cores

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

QDev objects created with qdev_new() need to manually add
their parent relationship with object_property_add_child().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Reviewed-by: Gavin Shan 
Message-id: 20240129151828.59544-2-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/exynos4210.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index 6c428d8eeb4..57c77b140c2 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -556,6 +556,7 @@ static void exynos4210_realize(DeviceState *socdev, Error 
**errp)
 for (n = 0; n < EXYNOS4210_NCPUS; n++) {
 Object *cpuobj = object_new(ARM_CPU_TYPE_NAME("cortex-a9"));
 
+object_property_add_child(OBJECT(s), "cpu[*]", cpuobj);
 /* By default A9 CPUs have EL3 enabled.  This board does not currently
  * support EL3 so the CPU EL3 property is disabled before realization.
  */
-- 
2.34.1

[PULL 08/36] qemu-options.hx: Improve -serial option documentation

2024-02-02 Thread Peter Maydell

The -serial option documentation is a bit brief about '-serial none'
and '-serial null'. In particular it's not very clear about the
difference between them, and it doesn't mention that it's up to
the machine model whether '-serial none' means "don't create the
serial port" or "don't wire the serial port up to anything".

Expand on these points.

Signed-off-by: Peter Maydell 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20240122163607.459769-3-peter.mayd...@linaro.org
---
 qemu-options.hx | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 484cc21c1fd..40e938c4877 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4129,7 +4129,8 @@ SRST
 This option can be used several times to simulate up to 4 serial
 ports.
 
-Use ``-serial none`` to disable all serial ports.
+You can use ``-serial none`` to suppress the creation of default
+serial devices.
 
 Available character devices are:
 
@@ -4151,10 +4152,17 @@ SRST
 [Linux only] Pseudo TTY (a new PTY is automatically allocated)
 
 ``none``
-No device is allocated.
+No device is allocated. Note that for machine types which
+emulate systems where a serial device is always present in
+real hardware, this may be equivalent to the ``null`` option,
+in that the serial device is still present but all output
+is discarded. For boards where the number of serial ports is
+truly variable, this suppresses the creation of the device.
 
 ``null``
-void device
+A guest will see the UART or serial device as present in the
+machine, but all output is discarded, and there is no input.
+Conceptually equivalent to redirecting the output to ``/dev/null``.
 
 ``chardev:id``
 Use a named character device defined with the ``-chardev``
-- 
2.34.1

[PULL 19/36] hw/arm/npcm7xx_boards: Simplify setting MachineClass::valid_cpu_types[]

2024-02-02 Thread Peter Maydell

From: Philippe Mathieu-Daudé 

The npcm7xx Soc is created with a Cortex-A9 core, see in
hw/arm/npcm7xx.c:

  static void npcm7xx_init(Object *obj)
  {
  NPCM7xxState *s = NPCM7XX(obj);

  for (int i = 0; i < NPCM7XX_MAX_NUM_CPUS; i++) {
  object_initialize_child(obj, "cpu[*]", >cpu[i],
  ARM_CPU_TYPE_NAME("cortex-a9"));
  }

The MachineClass::default_cpu_type field is ignored: delete it.

Use the common code introduced in commit c9cf636d48 ("machine: Add
a valid_cpu_types property") to check for valid CPU type at the
board level.

Reviewed-by: Richard Henderson 
Signed-off-by: Philippe Mathieu-Daudé 
Message-id: 20240129151828.59544-8-phi...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/npcm7xx_boards.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index 2999b8b96d0..e229efb4472 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -465,7 +465,6 @@ static void npcm7xx_machine_class_init(ObjectClass *oc, 
void *data)
 mc->no_cdrom = 1;
 mc->no_parallel = 1;
 mc->default_ram_id = "ram";
-mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a9");
 mc->valid_cpu_types = valid_cpu_types;
 }
 
-- 
2.34.1

[PULL 06/36] hw/core: Remove transitional infrastructure from BusClass

2024-02-02 Thread Peter Maydell

BusClass currently has transitional infrastructure to support
subclasses which implement the legacy BusClass::reset method rather
than the Resettable interface.  We have now removed all the users of
BusClass::reset in the tree, so we can remove the transitional
infrastructure.

Signed-off-by: Peter Maydell 
Acked-by: Michael S. Tsirkin 
Acked-by: Cédric Le Goater 
Acked-by: Maciej S. Szmigiero 
Tested-by: Cédric Le Goater 
Reviewed-by: Zhao Liu 
Message-id: 20240119163512.3810301-6-peter.mayd...@linaro.org
---
 include/hw/qdev-core.h |  2 --
 hw/core/bus.c  | 67 --
 2 files changed, 69 deletions(-)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 66338f479fe..d47536eadb1 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -329,8 +329,6 @@ struct BusClass {
  */
 char *(*get_fw_dev_path)(DeviceState *dev);
 
-void (*reset)(BusState *bus);
-
 /*
  * Return whether the device can be added to @bus,
  * based on the address that was set (via device properties)
diff --git a/hw/core/bus.c b/hw/core/bus.c
index c7831b5293b..b9d89495cdf 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -232,57 +232,6 @@ static char *default_bus_get_fw_dev_path(DeviceState *dev)
 return g_strdup(object_get_typename(OBJECT(dev)));
 }
 
-/**
- * bus_phases_reset:
- * Transition reset method for buses to allow moving
- * smoothly from legacy reset method to multi-phases
- */
-static void bus_phases_reset(BusState *bus)
-{
-ResettableClass *rc = RESETTABLE_GET_CLASS(bus);
-
-if (rc->phases.enter) {
-rc->phases.enter(OBJECT(bus), RESET_TYPE_COLD);
-}
-if (rc->phases.hold) {
-rc->phases.hold(OBJECT(bus));
-}
-if (rc->phases.exit) {
-rc->phases.exit(OBJECT(bus));
-}
-}
-
-static void bus_transitional_reset(Object *obj)
-{
-BusClass *bc = BUS_GET_CLASS(obj);
-
-/*
- * This will call either @bus_phases_reset (for multi-phases transitioned
- * buses) or a bus's specific method for not-yet transitioned buses.
- * In both case, it does not reset children.
- */
-if (bc->reset) {
-bc->reset(BUS(obj));
-}
-}
-
-/**
- * bus_get_transitional_reset:
- * check if the bus's class is ready for multi-phase
- */
-static ResettableTrFunction bus_get_transitional_reset(Object *obj)
-{
-BusClass *dc = BUS_GET_CLASS(obj);
-if (dc->reset != bus_phases_reset) {
-/*
- * dc->reset has been overridden by a subclass,
- * the bus is not ready for multi phase yet.
- */
-return bus_transitional_reset;
-}
-return NULL;
-}
-
 static void bus_class_init(ObjectClass *class, void *data)
 {
 BusClass *bc = BUS_CLASS(class);
@@ -293,22 +242,6 @@ static void bus_class_init(ObjectClass *class, void *data)
 
 rc->get_state = bus_get_reset_state;
 rc->child_foreach = bus_reset_child_foreach;
-
-/*
- * @bus_phases_reset is put as the default reset method below, allowing
- * to do the multi-phase transition from base classes to leaf classes. It
- * allows a legacy-reset Bus class to extend a multi-phases-reset
- * Bus class for the following reason:
- * + If a base class B has been moved to multi-phase, then it does not
- *   override this default reset method and may have defined phase methods.
- * + A child class C (extending class B) which uses
- *   bus_class_set_parent_reset() (or similar means) to override the
- *   reset method will still work as expected. @bus_phases_reset function
- *   will be registered as the parent reset method and effectively call
- *   parent reset phases.
- */
-bc->reset = bus_phases_reset;
-rc->get_transitional_function = bus_get_transitional_reset;
 }
 
 static void qbus_finalize(Object *obj)
-- 
2.34.1

[PULL 05/36] hw/s390x/css-bridge: switch virtual-css bus to 3-phase-reset

2024-02-02 Thread Peter Maydell

Switch the s390x virtual-css bus from using BusClass::reset to the
Resettable interface.

This has no behavioural change, because the BusClass code to support
subclasses that use the legacy BusClass::reset will call that method
in the hold phase of 3-phase reset.

Signed-off-by: Peter Maydell 
Acked-by: Michael S. Tsirkin 
Acked-by: Cédric Le Goater 
Acked-by: Maciej S. Szmigiero 
Tested-by: Cédric Le Goater 
Reviewed-by: Halil Pasic 
Reviewed-by: Eric Farman 
Reviewed-by: Zhao Liu 
Message-id: 20240119163512.3810301-5-peter.mayd...@linaro.org
---
 hw/s390x/css-bridge.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c
index 15d26efc951..34639f21435 100644
--- a/hw/s390x/css-bridge.c
+++ b/hw/s390x/css-bridge.c
@@ -56,7 +56,7 @@ static void ccw_device_unplug(HotplugHandler *hotplug_dev,
 qdev_unrealize(dev);
 }
 
-static void virtual_css_bus_reset(BusState *qbus)
+static void virtual_css_bus_reset_hold(Object *obj)
 {
 /* This should actually be modelled via the generic css */
 css_reset();
@@ -81,8 +81,9 @@ static char *virtual_css_bus_get_dev_path(DeviceState *dev)
 static void virtual_css_bus_class_init(ObjectClass *klass, void *data)
 {
 BusClass *k = BUS_CLASS(klass);
+ResettableClass *rc = RESETTABLE_CLASS(klass);
 
-k->reset = virtual_css_bus_reset;
+rc->phases.hold = virtual_css_bus_reset_hold;
 k->get_dev_path = virtual_css_bus_get_dev_path;
 }
 
-- 
2.34.1

[PULL 23/36] hw/arm/strongarm.c: convert DPRINTF to trace events and guest errors

2024-02-02 Thread Peter Maydell

From: Manos Pitsidianakis 

Tracing DPRINTFs to stderr might not be desired. A developer that relies
on trace events should be able to opt-in to each trace event and rely on
QEMU's log redirection, instead of stderr by default.

This commit converts DPRINTFs in this file that are used for tracing
into trace events. DPRINTFs that report guest errors are logged with
LOG_GUEST_ERROR.#

Signed-off-by: Manos Pitsidianakis 
Reviewed-by: Alex Bennée 
Message-id: 
39db71dd87bf2007cf7812f3d91dde53887f1f2f.1706544115.git.manos.pitsidiana...@linaro.org
Signed-off-by: Peter Maydell 
---
 hw/arm/strongarm.c  | 82 -
 hw/arm/trace-events |  3 ++
 2 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index 7fd99a0f144..823b4931b0a 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -46,8 +46,7 @@
 #include "qemu/log.h"
 #include "qom/object.h"
 #include "target/arm/cpu-qom.h"
-
-//#define DEBUG
+#include "trace.h"
 
 /*
  TODO
@@ -66,12 +65,6 @@
  - Enhance UART with modem signals
  */
 
-#ifdef DEBUG
-# define DPRINTF(format, ...) printf(format , ## __VA_ARGS__)
-#else
-# define DPRINTF(format, ...) do { } while (0)
-#endif
-
 static struct {
 hwaddr io_base;
 int irq;
@@ -151,8 +144,9 @@ static uint64_t strongarm_pic_mem_read(void *opaque, hwaddr 
offset,
 case ICPR:
 return s->pending;
 default:
-printf("%s: Bad register offset 0x" HWADDR_FMT_plx "\n",
-__func__, offset);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad register offset 0x"HWADDR_FMT_plx"\n",
+  __func__, offset);
 return 0;
 }
 }
@@ -173,8 +167,9 @@ static void strongarm_pic_mem_write(void *opaque, hwaddr 
offset,
 s->int_idle = (value & 1) ? 0 : ~0;
 break;
 default:
-printf("%s: Bad register offset 0x" HWADDR_FMT_plx "\n",
-__func__, offset);
+qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Bad register offset 0x"HWADDR_FMT_plx"\n",
+ __func__, offset);
 break;
 }
 strongarm_pic_update(s);
@@ -333,7 +328,9 @@ static uint64_t strongarm_rtc_read(void *opaque, hwaddr 
addr,
 ((qemu_clock_get_ms(rtc_clock) - s->last_hz) << 15) /
 (1000 * ((s->rttr & 0x) + 1));
 default:
-printf("%s: Bad register 0x" HWADDR_FMT_plx "\n", __func__, addr);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad rtc register read 0x"HWADDR_FMT_plx"\n",
+  __func__, addr);
 return 0;
 }
 }
@@ -375,7 +372,9 @@ static void strongarm_rtc_write(void *opaque, hwaddr addr,
 break;
 
 default:
-printf("%s: Bad register 0x" HWADDR_FMT_plx "\n", __func__, addr);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad rtc register write 0x"HWADDR_FMT_plx"\n",
+  __func__, addr);
 }
 }
 
@@ -556,12 +555,12 @@ static uint64_t strongarm_gpio_read(void *opaque, hwaddr 
offset,
 
 case GPSR:/* GPIO Pin-Output Set registers */
 qemu_log_mask(LOG_GUEST_ERROR,
-  "strongarm GPIO: read from write only register GPSR\n");
+  "%s: read from write only register GPSR\n", __func__);
 return 0;
 
 case GPCR:/* GPIO Pin-Output Clear registers */
 qemu_log_mask(LOG_GUEST_ERROR,
-  "strongarm GPIO: read from write only register GPCR\n");
+  "%s: read from write only register GPCR\n", __func__);
 return 0;
 
 case GRER:/* GPIO Rising-Edge Detect Enable registers */
@@ -581,7 +580,9 @@ static uint64_t strongarm_gpio_read(void *opaque, hwaddr 
offset,
 return s->status;
 
 default:
-printf("%s: Bad offset 0x" HWADDR_FMT_plx "\n", __func__, offset);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad gpio read offset 0x"HWADDR_FMT_plx"\n",
+  __func__, offset);
 }
 
 return 0;
@@ -626,7 +627,9 @@ static void strongarm_gpio_write(void *opaque, hwaddr 
offset,
 break;
 
 default:
-printf("%s: Bad offset 0x" HWADDR_FMT_plx "\n", __func__, offset);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad write offset 0x"HWADDR_FMT_plx"\n",
+  __func__, offset);
 }
 }
 
@@ -782,7 +785,9 @@ static uint64_t strongarm_ppc_read(void *opaque, hwaddr 
offset,
 return s->ppfr | ~0x7f001;
 
 default:
-printf("%s: Bad offset 0x" HWADDR_FMT_plx "\n", __func__, offset);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Bad ppc read offset 0x"HWADDR_FMT_plx "\n",
+  __func__, offset);
 }
 
 return 0;
@@ -817,7 +822,9 @@ static void strongarm_ppc_write(void *opaque, hwaddr offset,
 break;
 
 default:
-

1 2 >

1 - 100 of 194 matches

Mail list logo