date:20221111

[PATCH for-8.0 3/3] target/i386: Inline cmpxchg16b

2022-11-11 Thread Richard Henderson

Use tcg_gen_atomic_cmpxchg_i128 for the atomic case,
and tcg_gen_qemu_ld/st_i128 otherwise.

Signed-off-by: Richard Henderson 
---
 target/i386/helper.h |  4 ---
 target/i386/tcg/mem_helper.c | 69 
 target/i386/tcg/translate.c  | 44 ---
 3 files changed, 39 insertions(+), 78 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index 2df8049f91..e627a93107 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -66,10 +66,6 @@ DEF_HELPER_1(rsm, void, env)
 #endif /* !CONFIG_USER_ONLY */
 
 DEF_HELPER_2(into, void, env, int)
-#ifdef TARGET_X86_64
-DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
-DEF_HELPER_2(cmpxchg16b, void, env, tl)
-#endif
 DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_1(rechecking_single_step, void, env)
 DEF_HELPER_1(cpuid, void, env)
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index 814786bb87..3ef84e90d9 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -27,75 +27,6 @@
 #include "tcg/tcg.h"
 #include "helper-tcg.h"
 
-#ifdef TARGET_X86_64
-void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
-{
-uintptr_t ra = GETPC();
-Int128 oldv, cmpv, newv;
-uint64_t o0, o1;
-int eflags;
-bool success;
-
-if ((a0 & 0xf) != 0) {
-raise_exception_ra(env, EXCP0D_GPF, GETPC());
-}
-eflags = cpu_cc_compute_all(env, CC_OP);
-
-cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
-newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
-
-o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
-o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
-
-oldv = int128_make128(o0, o1);
-success = int128_eq(oldv, cmpv);
-if (!success) {
-newv = oldv;
-}
-
-cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
-cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
-
-if (success) {
-eflags |= CC_Z;
-} else {
-env->regs[R_EAX] = int128_getlo(oldv);
-env->regs[R_EDX] = int128_gethi(oldv);
-eflags &= ~CC_Z;
-}
-CC_SRC = eflags;
-}
-
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
-{
-uintptr_t ra = GETPC();
-
-if ((a0 & 0xf) != 0) {
-raise_exception_ra(env, EXCP0D_GPF, ra);
-} else if (HAVE_CMPXCHG128) {
-int eflags = cpu_cc_compute_all(env, CC_OP);
-
-Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
-Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
-
-int mem_idx = cpu_mmu_index(env, false);
-MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
-Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
-
-if (int128_eq(oldv, cmpv)) {
-eflags |= CC_Z;
-} else {
-env->regs[R_EAX] = int128_getlo(oldv);
-env->regs[R_EDX] = int128_gethi(oldv);
-eflags &= ~CC_Z;
-}
-CC_SRC = eflags;
-} else {
-cpu_loop_exit_atomic(env_cpu(env), ra);
-}
-}
-#endif
-
 void helper_boundw(CPUX86State *env, target_ulong a0, int v)
 {
 int low, high;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a134d63946..6dfcfaf31a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3034,15 +3034,49 @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State 
*env, int modrm)
 #ifdef TARGET_X86_64
 static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
 {
+MemOp mop = MO_TE | MO_128 | MO_ALIGN;
+TCGv_i64 t0, t1;
+TCGv_i128 cmp, val;
+
 gen_lea_modrm(env, s, modrm);
 
-if ((s->prefix & PREFIX_LOCK) &&
-(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-gen_helper_cmpxchg16b(cpu_env, s->A0);
+cmp = tcg_temp_new_i128();
+val = tcg_temp_new_i128();
+tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+/* Only require atomic with LOCK; non-parallel handled in generator. */
+if (s->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
 } else {
-gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
+tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, 
mop);
 }
-set_cc_op(s, CC_OP_EFLAGS);
+
+tcg_gen_extr_i128_i64(s->T0, s->T1, val);
+tcg_temp_free_i128(cmp);
+tcg_temp_free_i128(val);
+
+/* Determine success after the fact. */
+t0 = tcg_temp_new_i64();
+t1 = tcg_temp_new_i64();
+tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
+tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
+tcg_gen_or_i64(t0, t0, t1);
+tcg_temp_free_i64(t1);
+
+/* Update Z. */
+gen_compute_eflags(s);
+tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
+tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
+

[PATCH for-8.0 1/3] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b

2022-11-11 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/i386/tcg/translate.c | 48 -
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 28a4e6dc1d..1175540a2c 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2974,6 +2974,34 @@ static void gen_sty_env_A0(DisasContext *s, int offset, 
bool align)
 #include "emit.c.inc"
 #include "decode-new.c.inc"
 
+static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
+{
+gen_lea_modrm(env, s, modrm);
+
+if ((s->prefix & PREFIX_LOCK) &&
+(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+gen_helper_cmpxchg8b(cpu_env, s->A0);
+} else {
+gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
+}
+set_cc_op(s, CC_OP_EFLAGS);
+}
+
+#ifdef TARGET_X86_64
+static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
+{
+gen_lea_modrm(env, s, modrm);
+
+if ((s->prefix & PREFIX_LOCK) &&
+(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+gen_helper_cmpxchg16b(cpu_env, s->A0);
+} else {
+gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
+}
+set_cc_op(s, CC_OP_EFLAGS);
+}
+#endif
+
 /* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
 static bool disas_insn(DisasContext *s, CPUState *cpu)
@@ -3814,28 +3842,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
 goto illegal_op;
 }
-gen_lea_modrm(env, s, modrm);
-if ((s->prefix & PREFIX_LOCK) &&
-(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-gen_helper_cmpxchg16b(cpu_env, s->A0);
-} else {
-gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
-}
-set_cc_op(s, CC_OP_EFLAGS);
+gen_cmpxchg16b(s, env, modrm);
 break;
 }
-#endif
+#endif
 if (!(s->cpuid_features & CPUID_CX8)) {
 goto illegal_op;
 }
-gen_lea_modrm(env, s, modrm);
-if ((s->prefix & PREFIX_LOCK) &&
-(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-gen_helper_cmpxchg8b(cpu_env, s->A0);
-} else {
-gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
-}
-set_cc_op(s, CC_OP_EFLAGS);
+gen_cmpxchg8b(s, env, modrm);
 break;
 
 case 7: /* RDSEED */
-- 
2.34.1

[PATCH for-8.0 2/3] target/i386: Inline cmpxchg8b

2022-11-11 Thread Richard Henderson

Use tcg_gen_atomic_cmpxchg_i64 for the atomic case,
and tcg_gen_nonatomic_cmpxchg_i64 otherwise.

Signed-off-by: Richard Henderson 
---
 target/i386/helper.h |  2 --
 target/i386/tcg/mem_helper.c | 57 
 target/i386/tcg/translate.c  | 54 ++
 3 files changed, 49 insertions(+), 64 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index b7de5429ef..2df8049f91 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -66,8 +66,6 @@ DEF_HELPER_1(rsm, void, env)
 #endif /* !CONFIG_USER_ONLY */
 
 DEF_HELPER_2(into, void, env, int)
-DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
-DEF_HELPER_2(cmpxchg8b, void, env, tl)
 #ifdef TARGET_X86_64
 DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index e3cdafd2d4..814786bb87 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -27,63 +27,6 @@
 #include "tcg/tcg.h"
 #include "helper-tcg.h"
 
-void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
-{
-uintptr_t ra = GETPC();
-uint64_t oldv, cmpv, newv;
-int eflags;
-
-eflags = cpu_cc_compute_all(env, CC_OP);
-
-cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
-newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
-
-oldv = cpu_ldq_data_ra(env, a0, ra);
-newv = (cmpv == oldv ? newv : oldv);
-/* always do the store */
-cpu_stq_data_ra(env, a0, newv, ra);
-
-if (oldv == cmpv) {
-eflags |= CC_Z;
-} else {
-env->regs[R_EAX] = (uint32_t)oldv;
-env->regs[R_EDX] = (uint32_t)(oldv >> 32);
-eflags &= ~CC_Z;
-}
-CC_SRC = eflags;
-}
-
-void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
-{
-#ifdef CONFIG_ATOMIC64
-uint64_t oldv, cmpv, newv;
-int eflags;
-
-eflags = cpu_cc_compute_all(env, CC_OP);
-
-cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
-newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
-
-{
-uintptr_t ra = GETPC();
-int mem_idx = cpu_mmu_index(env, false);
-MemOpIdx oi = make_memop_idx(MO_TEUQ, mem_idx);
-oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
-}
-
-if (oldv == cmpv) {
-eflags |= CC_Z;
-} else {
-env->regs[R_EAX] = (uint32_t)oldv;
-env->regs[R_EDX] = (uint32_t)(oldv >> 32);
-eflags &= ~CC_Z;
-}
-CC_SRC = eflags;
-#else
-cpu_loop_exit_atomic(env_cpu(env), GETPC());
-#endif /* CONFIG_ATOMIC64 */
-}
-
 #ifdef TARGET_X86_64
 void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
 {
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1175540a2c..a134d63946 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2976,15 +2976,59 @@ static void gen_sty_env_A0(DisasContext *s, int offset, 
bool align)
 
 static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
 {
+TCGv_i64 cmp, val, old;
+TCGv Z;
+
 gen_lea_modrm(env, s, modrm);
 
-if ((s->prefix & PREFIX_LOCK) &&
-(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-gen_helper_cmpxchg8b(cpu_env, s->A0);
+cmp = tcg_temp_new_i64();
+val = tcg_temp_new_i64();
+old = tcg_temp_new_i64();
+
+/* Construct the comparison values from the register pair. */
+tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+/* Only require atomic with LOCK; non-parallel handled in generator. */
+if (s->prefix & PREFIX_LOCK) {
+tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, 
MO_TEUQ);
 } else {
-gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
+tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
+  s->mem_index, MO_TEUQ);
 }
-set_cc_op(s, CC_OP_EFLAGS);
+tcg_temp_free_i64(val);
+
+/* Set tmp0 to match the required value of Z. */
+tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
+Z = tcg_temp_new();
+tcg_gen_trunc_i64_tl(Z, cmp);
+tcg_temp_free_i64(cmp);
+
+/*
+ * Extract the result values for the register pair.
+ * For 32-bit, we may do this unconditionally, because on success (Z=1),
+ * the old value matches the previous value in EDX:EAX.  For x86_64,
+ * the store must be conditional, because we must leave the source
+ * registers unchanged on success, and zero-extend the writeback
+ * on failure (Z=0).
+ */
+if (TARGET_LONG_BITS == 32) {
+tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
+} else {
+TCGv zero = tcg_constant_tl(0);
+
+tcg_gen_extr_i64_tl(s->T0, s->T1, old);
+tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
+   s->T0, cpu_regs[R_EAX]);
+

[PATCH for-8.0 0/3] target/i386: cmpxchg8b and cmpxchg16b cleanup

2022-11-11 Thread Richard Henderson

Use the new common functions and avoid rolling our own helpers.

r~

Based-on: 2022074101.2069454-1-richard.hender...@linaro.org
("tcg: Support for Int128 with helpers")

Richard Henderson (3):
  target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b
  target/i386: Inline cmpxchg8b
  target/i386: Inline cmpxchg16b

 target/i386/helper.h |   6 --
 target/i386/tcg/mem_helper.c | 126 ---
 target/i386/tcg/translate.c  | 126 ++-
 3 files changed, 109 insertions(+), 149 deletions(-)

-- 
2.34.1

Re: [PATCH-for-7.2 v2] libvduse: Avoid warning about dangerous use of strncpy()

2022-11-11 Thread Yongji Xie

On Fri, Nov 11, 2022 at 8:45 PM Philippe Mathieu-Daudé
 wrote:
>
> From: Philippe Mathieu-Daudé 
>
> GCC 8 added a -Wstringop-truncation warning:
>
>   The -Wstringop-truncation warning added in GCC 8.0 via r254630 for
>   bug 81117 is specifically intended to highlight likely unintended
>   uses of the strncpy function that truncate the terminating NUL
>   character from the source string.
>
> Here the next line indeed unconditionally zeroes the last byte, but
> 1/ the buffer has been calloc'd, so we don't need to add an extra
> byte, and 2/ we called vduse_name_is_invalid() which checked the
> string length, so we can simply call strcpy().
>
> This fixes when using gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0:
>
>   [42/666] Compiling C object subprojects/libvduse/libvduse.a.p/libvduse.c.o
>   FAILED: subprojects/libvduse/libvduse.a.p/libvduse.c.o
>   cc -m64 -mcx16 -Isubprojects/libvduse/libvduse.a.p -Isubprojects/libvduse 
> -I../../subprojects/libvduse [...] -o 
> subprojects/libvduse/libvduse.a.p/libvduse.c.o -c 
> ../../subprojects/libvduse/libvduse.c
>   In file included from /usr/include/string.h:495,
>from ../../subprojects/libvduse/libvduse.c:24:
>   In function ‘strncpy’,
>   inlined from ‘vduse_dev_create’ at 
> ../../subprojects/libvduse/libvduse.c:1312:5:
>   /usr/include/x86_64-linux-gnu/bits/string_fortified.h:106:10: error: 
> ‘__builtin_strncpy’ specified bound 256 equals destination size 
> [-Werror=stringop-truncation]
> 106 |   return __builtin___strncpy_chk (__dest, __src, __len, __bos 
> (__dest));
> |  
> ^~
>   cc1: all warnings being treated as errors
>   ninja: build stopped: cannot make progress due to previous errors.
>
> Fixes: d9cf16c0be ("libvduse: Replace strcpy() with strncpy()")
> Suggested-by: Markus Armbruster 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Supersedes: <20220919192306.52729-1-f4...@amsat.org>
> Cc: Xie Yongji 
> Cc: Kevin Wolf 
> ---
>  subprojects/libvduse/libvduse.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>

Reviewed-by: Xie Yongji

Re: [PATCH v8 5/5] docs: Add generic vhost-vdpa device documentation

2022-11-11 Thread longpeng2--- via





在 2022/11/8 16:42, Stefano Garzarella 写道:
On Tue, Nov 08, 2022 at 11:30:53AM +0800, Longpeng (Mike, Cloud 
Infrastructure Service Product Dept.) wrote:



在 2022/11/8 10:42, Jason Wang 写道:
On Tue, Nov 8, 2022 at 8:42 AM Longpeng(Mike)  
wrote:


From: Longpeng 

Signed-off-by: Longpeng 
---
 docs/system/devices/vhost-vdpa-device.rst | 43 +++
 1 file changed, 43 insertions(+)
 create mode 100644 docs/system/devices/vhost-vdpa-device.rst

diff --git a/docs/system/devices/vhost-vdpa-device.rst 
b/docs/system/devices/vhost-vdpa-device.rst

new file mode 100644
index 00..b758c4fce6
--- /dev/null
+++ b/docs/system/devices/vhost-vdpa-device.rst


If the doc is for a general vhost-vDPA device, we'd better have a 
better name?




How about general-vhost-vdpa-device.rst?



I would leave vhost-vdpa as the prefix, how about 
vhost-vdpa-generic-device.rst?



Okay, will do in next version, thanks.


Thanks,
Stefano

.

Re: [PATCH v8 4/5] vdpa-dev: mark the device as unmigratable

2022-11-11 Thread longpeng2--- via





在 2022/11/8 16:46, Stefano Garzarella 写道:

On Tue, Nov 08, 2022 at 08:41:56AM +0800, Longpeng(Mike) wrote:

From: Longpeng 

The generic vDPA device doesn't support migration currently, so
mark it as unmigratable temporarily.

Signed-off-by: Longpeng 
---
hw/virtio/vdpa-dev.c | 1 +
1 file changed, 1 insertion(+)


Is there a particular reason why we don't squash this change in the 
second patch of this series where we add the hw/virtio/vdpa-dev.c file?



No, just want to make it noticeable.


Anyway, this change LGTM:

Reviewed-by: Stefano Garzarella 


Thanks.



diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index 2885d06cbe..62d83d3423 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -327,6 +327,7 @@ static Property vhost_vdpa_device_properties[] = {

static const VMStateDescription vmstate_vhost_vdpa_device = {
    .name = "vhost-vdpa-device",
+    .unmigratable = 1,
    .minimum_version_id = 1,
    .version_id = 1,
    .fields = (VMStateField[]) {
--
2.23.0



.

[PATCH] hw/virtio: added virtio-serial test cases

2022-11-11 Thread Daniel Hoffman

The previous test cases for virtio-serial only tested initialization of
the device. I've included four new test cases: rx for virtconsole, tx
for virtconsole, rx for virtserialport, tx for virtserialport. It
follows the general pattern of virtio-net (i.e. chardev file descriptor
backend with a socketpair connected via fork-exec).

Signed-off-by: Daniel Hoffman 
---
 tests/qtest/libqos/virtio-serial.c |  51 +
 tests/qtest/libqos/virtio-serial.h |   2 +
 tests/qtest/virtio-serial-test.c   | 177 -
 3 files changed, 228 insertions(+), 2 deletions(-)

diff --git a/tests/qtest/libqos/virtio-serial.c 
b/tests/qtest/libqos/virtio-serial.c
index 1d689c3e38..8723bffe1b 100644
--- a/tests/qtest/libqos/virtio-serial.c
+++ b/tests/qtest/libqos/virtio-serial.c
@@ -22,6 +22,10 @@
 #include "qgraph.h"
 #include "virtio-serial.h"
 
+#include "qemu/iov.h"
+
+static QGuestAllocator *alloc;
+
 static void *qvirtio_serial_get_driver(QVirtioSerial *v_serial,
const char *interface)
 {
@@ -43,6 +47,33 @@ static void *qvirtio_serial_device_get_driver(void *object,
 return qvirtio_serial_get_driver(_serial->serial, interface);
 }
 
+static void virtio_serial_setup(QVirtioSerial *interface)
+{
+QVirtioDevice *vdev = interface->vdev;
+qvirtio_set_features(vdev, (1ULL << 1) | (1ULL << 32));
+
+interface->n_queues = 6;
+interface->queues = g_new(QVirtQueue*, interface->n_queues);
+
+for (int i = 0; i < interface->n_queues; i++) {
+interface->queues[i] = qvirtqueue_setup(interface->vdev, alloc, i);
+}
+
+qvirtio_set_driver_ok(vdev);
+}
+
+static void qvirtio_serial_device_destructor(QOSGraphObject *obj)
+{
+}
+
+static void qvirtio_serial_device_start_hw(QOSGraphObject *obj)
+{
+QVirtioSerialDevice *v_serial = (QVirtioSerialDevice *)obj;
+QVirtioSerial *interface = _serial->serial;
+
+virtio_serial_setup(interface);
+}
+
 static void *virtio_serial_device_create(void *virtio_dev,
  QGuestAllocator *t_alloc,
  void *addr)
@@ -51,13 +82,30 @@ static void *virtio_serial_device_create(void *virtio_dev,
 QVirtioSerial *interface = _device->serial;
 
 interface->vdev = virtio_dev;
+alloc = t_alloc;
 
+virtio_device->obj.destructor = qvirtio_serial_device_destructor;
+virtio_device->obj.start_hw = qvirtio_serial_device_start_hw;
 virtio_device->obj.get_driver = qvirtio_serial_device_get_driver;
 
 return _device->obj;
 }
 
 /* virtio-serial-pci */
+static void qvirtio_serial_pci_destructor(QOSGraphObject *obj)
+{
+}
+
+static void qvirtio_serial_pci_start_hw(QOSGraphObject *obj)
+{
+QVirtioSerialPCI *v_serial = (QVirtioSerialPCI *) obj;
+QVirtioSerial *interface = _serial->serial;
+QOSGraphObject *pci_vobj = _serial->pci_vdev.obj;
+
+qvirtio_pci_start_hw(pci_vobj);
+virtio_serial_setup(interface);
+}
+
 static void *qvirtio_serial_pci_get_driver(void *object, const char *interface)
 {
 QVirtioSerialPCI *v_serial = object;
@@ -76,7 +124,10 @@ static void *virtio_serial_pci_create(void *pci_bus, 
QGuestAllocator *t_alloc,
 
 virtio_pci_init(_spci->pci_vdev, pci_bus, addr);
 interface->vdev = _spci->pci_vdev.vdev;
+alloc = t_alloc;
 
+obj->destructor = qvirtio_serial_pci_destructor;
+obj->start_hw = qvirtio_serial_pci_start_hw;
 obj->get_driver = qvirtio_serial_pci_get_driver;
 
 return obj;
diff --git a/tests/qtest/libqos/virtio-serial.h 
b/tests/qtest/libqos/virtio-serial.h
index 3db43b2bb8..ce6ae164cb 100644
--- a/tests/qtest/libqos/virtio-serial.h
+++ b/tests/qtest/libqos/virtio-serial.h
@@ -29,6 +29,8 @@ typedef struct QVirtioSerialDevice QVirtioSerialDevice;
 
 struct QVirtioSerial {
 QVirtioDevice *vdev;
+int n_queues;
+QVirtQueue **queues;
 };
 
 struct QVirtioSerialPCI {
diff --git a/tests/qtest/virtio-serial-test.c b/tests/qtest/virtio-serial-test.c
index 2541034822..190075d6f5 100644
--- a/tests/qtest/virtio-serial-test.c
+++ b/tests/qtest/virtio-serial-test.c
@@ -11,6 +11,36 @@
 #include "libqtest-single.h"
 #include "qemu/module.h"
 #include "libqos/virtio-serial.h"
+#include "standard-headers/linux/virtio_console.h"
+#include "qemu/iov.h"
+
+static void virtio_serial_test_cleanup(void *sockets)
+{
+int *sv = sockets;
+
+close(sv[0]);
+qos_invalidate_command_line();
+close(sv[1]);
+g_free(sv);
+}
+
+static void *virtio_serial_test_setup(GString *cmd_line, void *arg)
+{
+int ret;
+int *sv = g_new(int, 3);
+
+ret = socketpair(PF_UNIX, SOCK_STREAM, 0, sv);
+g_assert_cmpint(ret, !=, -1);
+
+g_string_append_printf(
+cmd_line,
+" -chardev socket,fd=%d,id=virtioserial0",
+sv[1]);
+
+sv[2] = arg ? 1 : 0;
+g_test_queue_destroy(virtio_serial_test_cleanup, sv);
+return sv;
+}
 
 /* Tests only initialization so far. TODO: Replace with functional tests */
 static void

[PATCH for-8.0 0/1] target/ppc: Use tcg_gen_atomic_cmpxchg_i128

2022-11-11 Thread Richard Henderson

Use the new common function and avoid rolling our own helper(s).

r~

Based-on: 2022074101.2069454-1-richard.hender...@linaro.org
("tcg: Support for Int128 with helpers")

Richard Henderson (1):
  target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX

 target/ppc/helper.h |   2 -
 target/ppc/mem_helper.c |  44 -
 target/ppc/translate.c  | 102 ++--
 3 files changed, 47 insertions(+), 101 deletions(-)

-- 
2.34.1

[PATCH for-8.0 1/1] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX

2022-11-11 Thread Richard Henderson

Note that the previous direct reference to reserve_val,

-   tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
-? offsetof(CPUPPCState, reserve_val2)
-: offsetof(CPUPPCState, reserve_val)));

was incorrect because all references should have gone through
cpu_reserve_val.  Create a cpu_reserve_val2 tcg temp to fix this.

Signed-off-by: Richard Henderson 
---
 target/ppc/helper.h |   2 -
 target/ppc/mem_helper.c |  44 -
 target/ppc/translate.c  | 102 ++--
 3 files changed, 47 insertions(+), 101 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 8dd22a35e4..0beaca5c7a 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
void, env, tl, i64, i64, i32)
 DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
void, env, tl, i64, i64, i32)
-DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
-DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
 #endif
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index d1163f316c..1578887a8f 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong 
addr,
 val = int128_make128(lo, hi);
 cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
-
-uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
-  uint64_t new_lo, uint64_t new_hi,
-  uint32_t opidx)
-{
-bool success = false;
-
-/* We will have raised EXCP_ATOMIC from the translator.  */
-assert(HAVE_CMPXCHG128);
-
-if (likely(addr == env->reserve_addr)) {
-Int128 oldv, cmpv, newv;
-
-cmpv = int128_make128(env->reserve_val2, env->reserve_val);
-newv = int128_make128(new_lo, new_hi);
-oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
-  opidx, GETPC());
-success = int128_eq(oldv, cmpv);
-}
-env->reserve_addr = -1;
-return env->so + success * CRF_EQ_BIT;
-}
-
-uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
-  uint64_t new_lo, uint64_t new_hi,
-  uint32_t opidx)
-{
-bool success = false;
-
-/* We will have raised EXCP_ATOMIC from the translator.  */
-assert(HAVE_CMPXCHG128);
-
-if (likely(addr == env->reserve_addr)) {
-Int128 oldv, cmpv, newv;
-
-cmpv = int128_make128(env->reserve_val2, env->reserve_val);
-newv = int128_make128(new_lo, new_hi);
-oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
-  opidx, GETPC());
-success = int128_eq(oldv, cmpv);
-}
-env->reserve_addr = -1;
-return env->so + success * CRF_EQ_BIT;
-}
 #endif
 
 /*/
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 19c1d17cb0..85f95a9045 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -72,6 +72,7 @@ static TCGv cpu_cfar;
 static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
 static TCGv cpu_reserve_val;
+static TCGv cpu_reserve_val2;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
 
@@ -141,8 +142,11 @@ void ppc_translate_init(void)
  offsetof(CPUPPCState, reserve_addr),
  "reserve_addr");
 cpu_reserve_val = tcg_global_mem_new(cpu_env,
- offsetof(CPUPPCState, reserve_val),
- "reserve_val");
+ offsetof(CPUPPCState, reserve_val),
+ "reserve_val");
+cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
+  offsetof(CPUPPCState, reserve_val2),
+  "reserve_val2");
 
 cpu_fpscr = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, fpscr), "fpscr");
@@ -3979,78 +3983,66 @@ static void gen_lqarx(DisasContext *ctx)
 /* stqcx. */
 static void gen_stqcx_(DisasContext *ctx)
 {
+TCGLabel *lab_fail, *lab_over;
 int rs = rS(ctx->opcode);
-TCGv EA, hi, lo;
+TCGv EA, t0, t1;
+TCGv_i128 cmp, val;
 
 if (unlikely(rs & 1)) {
 gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
 return;
 }
 
+lab_fail = gen_new_label();
+lab_over = gen_new_label();
+
 gen_set_access_type(ctx, ACCESS_RES);
 EA = tcg_temp_new();
 gen_addr_reg_index(ctx, EA);
 
+tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
+tcg_temp_free(EA);
+
+cmp = tcg_temp_new_i128();
+

Re: [PATCH v5 20/20] include/hw: add commentary to current_cpu export

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

Document the intended use of current_cpu and discourage its use in new
HW emulation code. Once we have fully converted the tree we should
probably move this extern to another header.

Signed-off-by: Alex Bennée
---
  include/hw/core/cpu.h | 14 ++
  1 file changed, 14 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 19/20] hw/isa: derive CPUState from MemTxAttrs in apm_ioport_writeb

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

Some of the callbacks need a CPUState so extend the interface so we
can pass that down rather than relying on current_cpu hacks.

Signed-off-by: Alex Bennée
---
  include/hw/isa/apm.h |  2 +-
  hw/acpi/ich9.c   |  1 -
  hw/acpi/piix4.c  |  2 +-
  hw/isa/apm.c | 21 +
  hw/isa/lpc_ich9.c|  5 ++---
  5 files changed, 21 insertions(+), 10 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 18/20] hw/i386: convert apic access to use MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

+switch (attrs.requester_type) {
+case MTRT_MACHINE: /* MEMTX_IOPIC */


Not checking the id?


+case MTRT_PCI: /* PCI signalled MSI */
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: rejecting write from %d",
+  __func__, attrs.requester_id);
+return MEMTX_ACCESS_ERROR;
+}


Logging the requester_id by itself isn't great -- you need the type to know what you're 
looking at.


Otherwise,
Reviewed-by: Richard Henderson 


r~

Re: [PATCH v5 17/20] hw/intc: properly model IOAPIC MSI messages

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

On the real HW the IOAPIC is wired directly to the APIC and doesn't
really generate memory accesses on the main bus of the system. To
model this we can use the MTRT_MACHINE requester type and set the id
as a magic number to represent the IOAPIC as the source.

Signed-off-by: Alex Bennée 
Cc: Paolo Bonzini 
Cc: Peter Xu 
---
  include/hw/i386/ioapic_internal.h |  2 ++
  hw/intc/ioapic.c  | 35 ---
  2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 9880443cc7..a8c7a1418a 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -82,6 +82,8 @@
  
  #define IOAPIC_VER_ENTRIES_SHIFT16
  
+/* Magic number to identify IOAPIC memory transactions */

+#define MEMTX_IOAPIC0xA71C


Closing in on 1337 5p34k -- sure you didn't want a '4' there to start?  ;-)

Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 16/20] include: add MEMTXATTRS_MACHINE helper

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

We will need this shortly for machine specific transactions for the PC
IOAPIC.

Signed-off-by: Alex Bennée
---
  include/exec/memattrs.h | 8 
  1 file changed, 8 insertions(+)


Fold into patch 1?  Anyway,

Reviewed-by: Richard Henderson 


r~

Re: [PATCH v5 15/20] hw/i386: update vapic_write to use MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

This allows us to drop the current_cpu hack and properly model an
invalid access to the vapic.

Signed-off-by: Alex Bennée
---
  hw/i386/kvmvapic.c | 19 +++
  1 file changed, 11 insertions(+), 8 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 14/20] hw/audio: explicitly set .requester_type for intel-hda

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

This is simulating a bus master writing data back into system memory.
Mark it as such.

Signed-off-by: Alex Bennée 
---
  hw/audio/intel-hda.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index f38117057b..95c28b315c 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -345,7 +345,7 @@ static void intel_hda_corb_run(IntelHDAState *d)
  
  static void intel_hda_response(HDACodecDevice *dev, bool solicited, uint32_t response)

  {
-const MemTxAttrs attrs = { .memory = true };
+const MemTxAttrs attrs = { .requester_type = MTRT_PCI, .memory = true };


MEMTXATTRS_PCI?


r~

Re: [PATCH v5 13/20] target/i386: add explicit initialisation for MexTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
index b185ee8de4..337090e16f 100644
--- a/target/i386/hax/hax-all.c
+++ b/target/i386/hax/hax-all.c
@@ -385,7 +385,7 @@ static int hax_handle_io(CPUArchState *env, uint32_t df, 
uint16_t port,
  {
  uint8_t *ptr;
  int i;
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
  
  if (!df) {

  ptr = (uint8_t *) buffer;
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index b75738ee9c..cb0720a6fa 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -502,7 +502,7 @@ nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit 
*exit)
  static void
  nvmm_io_callback(struct nvmm_io *io)
  {
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
  int ret;
  
  ret = address_space_rw(_space_io, io->port, attrs, io->data,

diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index e738d83e81..42846144dd 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -791,7 +791,7 @@ static HRESULT CALLBACK whpx_emu_ioport_callback(
  void *ctx,
  WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
  {
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
  address_space_rw(_space_io, IoAccess->Port, attrs,
   >Data, IoAccess->AccessSize,
   IoAccess->Direction);


All three of these are hypervisor callouts to handle i/o for the guest, just 
like kvm.


r~

Re: [PATCH v5 11/20] target/sparc: initialise MemTxAttrs for CPU access

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

Both of the TLB fill functions and the cpu_sparc_get_phys_page deal
with CPU based access. Use the new MEMTXATTRS_CPU constructor to
ensure the correct CPU id is filled in should it ever be needed by any
devices later.

Signed-off-by: Alex Bennée
---
  target/sparc/mmu_helper.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 10/20] target/microblaze: initialise MemTxAttrs for CPU access

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

Both of these functions deal with CPU based access (as is evidenced by
the secure check straight after). Use the new MEMTXATTRS_CPU
constructor to ensure the correct CPU id is filled in should it ever
be needed by any devices later.

Signed-off-by: Alex Bennée
---
  target/microblaze/helper.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 09/20] hw/arm: remove current_cpu hack from pxa2xx access

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

We can derive the correct CPU from CPUARMState so lets not rely on
current_cpu.

Signed-off-by: Alex Bennée
---
  hw/arm/pxa2xx.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v5 04/20] target/arm: ensure KVM traps set appropriate MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

Although most KVM users will use the in-kernel GIC emulation it is
perfectly possible not to. In this case we need to ensure the
MemTxAttrs are correctly populated so the GIC can divine the source
CPU of the operation.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 

---
v3
   - new for v3
v5
   - tags
   - use MEMTXATTRS_PCI
---


Reviewed-by: Richard Henderson 


r~

Re: [PATCH v5 02/20] target/arm: ensure TCG IO accesses set appropriate MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

@@ -2872,7 +2873,7 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, 
vaddr addr,
  .in_secure = arm_is_secure(env),
  .in_debug = true,
  };
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
  ARMMMUFaultInfo fi = {};
  bool ret;
  
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c

index 0f4f4fc809..5960269421 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -208,7 +208,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
bool probe, uintptr_t retaddr)
  {
  ARMCPU *cpu = ARM_CPU(cs);
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };


Not the right level for these.

Should be set in get_phys_addr_with_struct, alongside .secure right at the top of the 
function.



r~

Re: [PATCH v5 05/20] target/arm: ensure m-profile helpers set appropriate MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

There are a number of helpers for M-profile that deal with CPU
initiated access to the vector and stack areas. While it is unlikely
these coincided with memory mapped IO devices it is not inconceivable.
Embedded targets tend to attract all sorts of interesting code and for
completeness we should tag the transaction appropriately.

Signed-off-by: Alex Bennée 

---
v5
   - rebase fixes for refactoring
---
  target/arm/m_helper.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index 355cd4d60a..2fb1ef95cd 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c
@@ -184,7 +184,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, 
uint32_t value,
  CPUState *cs = CPU(cpu);
  CPUARMState *env = >env;
  MemTxResult txres;
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };


This entire patch goes away with .attrs set properly in 
get_phys_addr_with_struct.


r~

Re: [PATCH v5 02/20] target/arm: ensure TCG IO accesses set appropriate MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 3745ac9723..4b6683f90d 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -2634,6 +2634,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, 
S1Translate *ptw,
  s1_lgpgsz = result->f.lg_page_size;
  cacheattrs1 = result->cacheattrs;
  memset(result, 0, sizeof(*result));
+result->f.attrs = MEMTXATTRS_CPU(env_cpu(env));


Ouch.  This means that f.secure has been reset too, which would break Secure EL1 running 
under Secure EL2.  I'll prepare a fix for 7.2...


Anyway,
Reviewed-by: Richard Henderson 


r~

  
  ret = get_phys_addr_lpae(env, ptw, ipa, access_type, is_el0, result, fi);

  fi->s2addr = ipa;
@@ -2872,7 +2873,7 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, 
vaddr addr,
  .in_secure = arm_is_secure(env),
  .in_debug = true,
  };
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
  ARMMMUFaultInfo fi = {};
  bool ret;
  
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c

index 0f4f4fc809..5960269421 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -208,7 +208,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
bool probe, uintptr_t retaddr)
  {
  ARMCPU *cpu = ARM_CPU(cs);
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
  ARMMMUFaultInfo local_fi, *fi;
  int ret;

[PATCH v4] acpi/tests/avocado/bits: some misc fixes

2022-11-11 Thread Ani Sinha

Most of the changes are trivial. The bits test timeout has now been increased
to 200 seconds in order to accommodate slower systems and fewer unnecessary
failures. Removed of the reference to non-existent README file in docs. Some
minor corrections in the doc file.

CC: Thomas Huth 
CC: Michael S. Tsirkin 
CC: qemu-triv...@nongnu.org
Signed-off-by: Ani Sinha 
Reviewed-by: Thomas Huth 
---
 docs/devel/acpi-bits.rst   | 12 
 tests/avocado/acpi-bits.py |  8 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

changes from v1: address Thomas' suggestions.
changes from v2: more minor corrections in doc, tags added.
changes from v3: raised timeout to 200 secs overriding the default
avocado timeout of 120 secs.

diff --git a/docs/devel/acpi-bits.rst b/docs/devel/acpi-bits.rst
index c9564d871a..56e76338c3 100644
--- a/docs/devel/acpi-bits.rst
+++ b/docs/devel/acpi-bits.rst
@@ -16,11 +16,8 @@ end user. The other is that we have more control of what we 
wanted to test
 and how by directly using acpica interpreter on top of the bios on a running
 system. More details on the inspiration for developing biosbits and its real
 life uses can be found in [#a]_ and [#b]_.
-This directory contains tests written in python using avocado framework that
-exercises the QEMU bios components using biosbits and reports test failures.
 For QEMU, we maintain a fork of bios bits in gitlab along with all the
-dependent submodules:
-https://gitlab.com/qemu-project/biosbits-bits
+dependent submodules here: https://gitlab.com/qemu-project/biosbits-bits
 This fork contains numerous fixes, a newer acpica and changes specific to
 running this avocado QEMU tests using bits. The author of this document
 is the sole maintainer of the QEMU fork of bios bits repo.
@@ -38,10 +35,9 @@ Under ``tests/avocado/`` as the root we have:
│ ├── bits-config
│ │ └── bits-cfg.txt
│ ├── bits-tests
-   │ │ ├── smbios.py2
-   │ │ ├── testacpi.py2
-   │ │ └── testcpuid.py2
-   │ └── README
+   │   ├── smbios.py2
+   │   ├── testacpi.py2
+   │   └── testcpuid.py2
├── acpi-bits.py
 
 * ``tests/avocado``:
diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index 8745a58a76..a67d30d583 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -134,6 +134,9 @@ class AcpiBitsTest(QemuBaseTest): #pylint: 
disable=too-many-instance-attributes
 :avocado: tags=acpi
 
 """
+# in slower systems the test can take as long as 3 minutes to complete.
+timeout = 210
+
 def __init__(self, *args, **kwargs):
 super().__init__(*args, **kwargs)
 self._vm = None
@@ -385,8 +388,9 @@ def test_acpi_smbios_bits(self):
 self._vm.launch()
 # biosbits has been configured to run all the specified test suites
 # in batch mode and then automatically initiate a vm shutdown.
-# sleep for maximum of one minute
-max_sleep_time = time.monotonic() + 60
+# sleep for maximum of 200 seconds in order to accommodate
+# even slower test setups.
+max_sleep_time = time.monotonic() + 200
 while self._vm.is_running() and time.monotonic() < max_sleep_time:
 time.sleep(1)
 
-- 
2.34.1

Re: biosbits test failing on origin/master

2022-11-11 Thread Ani Sinha

On Fri, Nov 11, 2022 at 4:39 PM Ani Sinha  wrote:
>
> On Fri, Nov 11, 2022 at 9:52 AM Ani Sinha  wrote:
> >
> > On Thu, Nov 10, 2022 at 11:37 PM John Snow  wrote:
> > >
> > > Hiya, on today's origin/master
> > > (2ccad61746ca7de5dd3e25146062264387e43bd4) I'm finding that "make
> > > check-avocado" is failing on the new biosbits test on my local
> > > development machine:
> > >
> > >  (001/193) tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits:
> > > FAIL: True is not false : The VM seems to have failed to shutdown in
> > > time (83.65 s)
> > >
> > > Is this a known issue, or should I begin to investigate it?
> >
> > In my test environment it does pass.
> >
> > $ ./tests/venv/bin/avocado run -t acpi tests/avocado
> > Fetching asset from
> > tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits
> > JOB ID : 35726df7d3c2e0f41847822620c78195ba45b9b9
> > JOB LOG: 
> > /home/anisinha/avocado/job-results/job-2022-11-11T09.42-35726df/job.log
> >  (1/1) tests/avocado/acpi-bits.py:AcpiBitsTest.test_acpi_smbios_bits:
> > PASS (57.57 s)
> > RESULTS: PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0
> > | CANCEL 0
> > JOB TIME   : 63.82 s
> >
> > However, I have seen that on certain slower test machines or when run
> > within a virtual machine, the test can take longer to complete and 60
> > secs may not always be enough.
>
> Here is an interesting data point. I re-ran on a Centos 8 VM running
> on Ubuntu bare metal. Just like the Ubuntu bare metal, it passed fine.
> Next, I tried to run it on a Centos 7.9.2009 VM running on the same
> Ubuntu bare metal. I was able to reproduce this consistently. So I did
> some digging, gdb into qemu and it looked like the VM was just slow
> but not stuck. So I timed the QEMU command line that was being used in
> the test using the same iso that was being generated by the test. Here
> it is:
>
> # time ./qemu-system-x86_64 -display none -vga none -chardev
> file,path=/var/tmp/debugcon-log.txt,id=debugcon -device
> isa-debugcon,iobase=0x403,chardev=debugcon -cdrom
> /var/tmp/acpi-bits-oogd8wp9.tmp/bits-2020.iso
>
> real 2m34.052s
> user 2m33.858s
> sys 0m0.467s
>
> On bare metal Ubuntu, I see this:
>
> $ time ./qemu-system-x86_64 -display none -vga none -chardev
> file,path=/var/tmp/debugcon-log.txt,id=debugcon -device
> isa-debugcon,iobase=0x403,chardev=debugcon -cdrom ~/temp/bits-2020.iso
>
> real 1m15.318s
> user 1m15.136s
> sys 0m0.345s
>
> With "-icount auto" added in the command line,  both bare metal and VM
> environments were slower. However, we need this command line for some
> latency tests.
>
> avocado framework has a 2 min timeout for any test.

Just realized that this value is coming from
https://gitlab.com/anisinha/qemu/-/blob/master/tests/avocado/avocado_qemu/__init__.py#L232
and can be overiden.
So I will override this to 200 seconds. If we still see timeouts, we
can raise this further by a bit more.

Thanks for bringing this to my attention.

This would
> definitely time out even without my internal timeout check. I verified
> that the test indeed passed by looking into the debug con logs pushed
> out by the test framework. If the system is just slow/overloaded I am
> not sure what we can do.
>
> In those cases raising the maximum
> > completion time to 90 secs helps. Perhaps you can try this and let me
> > know if it helps:
>
> Maybe I will make the timeout 90 secs and hope that this will be enough.
>
> >
> > diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
> > index 8745a58a76..b11fe39350 100644
> > --- a/tests/avocado/acpi-bits.py
> > +++ b/tests/avocado/acpi-bits.py
> > @@ -385,8 +385,9 @@ def test_acpi_smbios_bits(self):
> >  self._vm.launch()
> >  # biosbits has been configured to run all the specified test suites
> >  # in batch mode and then automatically initiate a vm shutdown.
> > -# sleep for maximum of one minute
> > -max_sleep_time = time.monotonic() + 60
> > +# sleep for a maximum of one and half minutes to accommodate
> > running this
> > +# even on slower machines.
> > +max_sleep_time = time.monotonic() + 90
> >  while self._vm.is_running() and time.monotonic() < max_sleep_time:
> >  time.sleep(1)

[PATCH for-8.0 1/2] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP

2022-11-11 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/helper-a64.h|   6 ---
 target/arm/helper-a64.c| 104 -
 target/arm/translate-a64.c |  60 -
 3 files changed, 35 insertions(+), 135 deletions(-)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 7b706571bb..94065d1917 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -50,12 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, 
ptr)
 DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
 DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, 
i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
-   i64, env, i64, i64, i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, 
i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
-   i64, env, i64, i64, i64)
 DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
 DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
 DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 77a8502b6b..7dbdb2c233 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -505,110 +505,6 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, 
uint32_t bytes)
 return crc32c(acc, buf, bytes) ^ 0x;
 }
 
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
-Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-Int128 newv = int128_make128(new_lo, new_hi);
-Int128 oldv;
-uintptr_t ra = GETPC();
-uint64_t o0, o1;
-bool success;
-int mem_idx = cpu_mmu_index(env, false);
-MemOpIdx oi0 = make_memop_idx(MO_LEUQ | MO_ALIGN_16, mem_idx);
-MemOpIdx oi1 = make_memop_idx(MO_LEUQ, mem_idx);
-
-o0 = cpu_ldq_le_mmu(env, addr + 0, oi0, ra);
-o1 = cpu_ldq_le_mmu(env, addr + 8, oi1, ra);
-oldv = int128_make128(o0, o1);
-
-success = int128_eq(oldv, cmpv);
-if (success) {
-cpu_stq_le_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
-cpu_stq_le_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
-}
-
-return !success;
-}
-
-uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
-  uint64_t new_lo, uint64_t new_hi)
-{
-Int128 oldv, cmpv, newv;
-uintptr_t ra = GETPC();
-bool success;
-int mem_idx;
-MemOpIdx oi;
-
-assert(HAVE_CMPXCHG128);
-
-mem_idx = cpu_mmu_index(env, false);
-oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
-
-cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-newv = int128_make128(new_lo, new_hi);
-oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-
-success = int128_eq(oldv, cmpv);
-return !success;
-}
-
-uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
-/*
- * High and low need to be switched here because this is not actually a
- * 128bit store but two doublewords stored consecutively
- */
-Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
-Int128 newv = int128_make128(new_hi, new_lo);
-Int128 oldv;
-uintptr_t ra = GETPC();
-uint64_t o0, o1;
-bool success;
-int mem_idx = cpu_mmu_index(env, false);
-MemOpIdx oi0 = make_memop_idx(MO_BEUQ | MO_ALIGN_16, mem_idx);
-MemOpIdx oi1 = make_memop_idx(MO_BEUQ, mem_idx);
-
-o1 = cpu_ldq_be_mmu(env, addr + 0, oi0, ra);
-o0 = cpu_ldq_be_mmu(env, addr + 8, oi1, ra);
-oldv = int128_make128(o0, o1);
-
-success = int128_eq(oldv, cmpv);
-if (success) {
-cpu_stq_be_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
-cpu_stq_be_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
-}
-
-return !success;
-}
-
-uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
-  uint64_t new_lo, uint64_t new_hi)
-{
-Int128 oldv, cmpv, newv;
-uintptr_t ra = GETPC();
-bool success;
-int mem_idx;
-MemOpIdx oi;
-
-assert(HAVE_CMPXCHG128);
-
-mem_idx = cpu_mmu_index(env, false);
-oi = make_memop_idx(MO_BE | MO_128 | MO_ALIGN, mem_idx);
-
-/*
- * High and low need to be switched here because this is not actually a
- * 128bit store but two doublewords stored consecutively
- */
-cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
-newv = int128_make128(new_hi, new_lo);
-oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-
-

[PATCH for-8.0 0/2] target/arm: Use tcg_gen_atomic_cmpxchg_i128

2022-11-11 Thread Richard Henderson

Use the new common function and avoid rolling our own helper(s).

r~

Based-on: 2022074101.2069454-1-richard.hender...@linaro.org
("tcg: Support for Int128 with helpers")


Richard Henderson (2):
  target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP
  target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP

 target/arm/helper-a64.h|   8 --
 target/arm/helper-a64.c| 147 -
 target/arm/translate-a64.c | 121 +-
 3 files changed, 53 insertions(+), 223 deletions(-)

-- 
2.34.1

[PATCH for-8.0 2/2] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP

2022-11-11 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/helper-a64.h|  2 --
 target/arm/helper-a64.c| 43 ---
 target/arm/translate-a64.c | 61 +++---
 3 files changed, 18 insertions(+), 88 deletions(-)

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 94065d1917..ff56807247 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -50,8 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
 DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
 DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
-DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
-DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
 DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
 DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
 DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7dbdb2c233..0972a4bdd0 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -505,49 +505,6 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, 
uint32_t bytes)
 return crc32c(acc, buf, bytes) ^ 0x;
 }
 
-/* Writes back the old data into Rs.  */
-void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
-  uint64_t new_lo, uint64_t new_hi)
-{
-Int128 oldv, cmpv, newv;
-uintptr_t ra = GETPC();
-int mem_idx;
-MemOpIdx oi;
-
-assert(HAVE_CMPXCHG128);
-
-mem_idx = cpu_mmu_index(env, false);
-oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
-
-cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
-newv = int128_make128(new_lo, new_hi);
-oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-
-env->xregs[rs] = int128_getlo(oldv);
-env->xregs[rs + 1] = int128_gethi(oldv);
-}
-
-void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
-  uint64_t new_hi, uint64_t new_lo)
-{
-Int128 oldv, cmpv, newv;
-uintptr_t ra = GETPC();
-int mem_idx;
-MemOpIdx oi;
-
-assert(HAVE_CMPXCHG128);
-
-mem_idx = cpu_mmu_index(env, false);
-oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
-
-cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
-newv = int128_make128(new_lo, new_hi);
-oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-
-env->xregs[rs + 1] = int128_getlo(oldv);
-env->xregs[rs] = int128_gethi(oldv);
-}
-
 /*
  * AdvSIMD half-precision
  */
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index dffd7ee737..067426baef 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2688,53 +2688,28 @@ static void gen_compare_and_swap_pair(DisasContext *s, 
int rs, int rt,
 tcg_gen_extr32_i64(s2, s1, cmp);
 }
 tcg_temp_free_i64(cmp);
-} else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-if (HAVE_CMPXCHG128) {
-TCGv_i32 tcg_rs = tcg_constant_i32(rs);
-if (s->be_data == MO_LE) {
-gen_helper_casp_le_parallel(cpu_env, tcg_rs,
-clean_addr, t1, t2);
-} else {
-gen_helper_casp_be_parallel(cpu_env, tcg_rs,
-clean_addr, t1, t2);
-}
-} else {
-gen_helper_exit_atomic(cpu_env);
-s->base.is_jmp = DISAS_NORETURN;
-}
 } else {
-TCGv_i64 d1 = tcg_temp_new_i64();
-TCGv_i64 d2 = tcg_temp_new_i64();
-TCGv_i64 a2 = tcg_temp_new_i64();
-TCGv_i64 c1 = tcg_temp_new_i64();
-TCGv_i64 c2 = tcg_temp_new_i64();
-TCGv_i64 zero = tcg_constant_i64(0);
+TCGv_i128 cmp = tcg_temp_new_i128();
+TCGv_i128 val = tcg_temp_new_i128();
 
-/* Load the two words, in memory order.  */
-tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
-MO_64 | MO_ALIGN_16 | s->be_data);
-tcg_gen_addi_i64(a2, clean_addr, 8);
-tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
+if (s->be_data == MO_LE) {
+tcg_gen_concat_i64_i128(val, t1, t2);
+tcg_gen_concat_i64_i128(cmp, s1, s2);
+} else {
+tcg_gen_concat_i64_i128(val, t2, t1);
+tcg_gen_concat_i64_i128(cmp, s2, s1);
+}
 
-/* Compare the two words, also in memory order.  */
-tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
-tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
-tcg_gen_and_i64(c2, c2, c1);
+tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
+MO_128 | MO_ALIGN | s->be_data);
+

Re: [PATCH v5 01/20] hw: encode accessing CPU index in MemTxAttrs

2022-11-11 Thread Richard Henderson


On 11/12/22 04:25, Alex Bennée wrote:

+/*
+ * Bus masters which don't specify any attributes will get this which
+ * indicates none of the attributes can be used.
+ */
+#define MEMTXATTRS_UNSPECIFIED ((MemTxAttrs) \
+{ .requester_type = MTRT_UNSPECIFIED })
+
+/*
+ * Helper for setting a basic CPU sourced transaction, it expects a
+ * CPUState *
+ */
+#define MEMTXATTRS_CPU(cs) ((MemTxAttrs) \
+{.requester_type = MTRT_CPU, \
+ .requester_id = cs->cpu_index})
+
+/*
+ * Helper for setting a basic PCI sourced transaction, it expects a
+ * PCIDevice *
  */
+#define MEMTXATTRS_PCI(dev) ((MemTxAttrs) \
+ {.requester_type = MTRT_PCI,   \
+ .requester_id = pci_requester_id(dev)})


Any reason these second two shouldn't be inlines?
Anything with arguments gets better type checking that way, unless you need preprocessor 
magic, which is not the case here.



Otherwise,
Reviewed-by: Richard Henderson 


r~

[PULL 1/1] hw/loongarch: Fix loongarch fdt addr confict

2022-11-11 Thread Richard Henderson

From: Song Gao 

Fix LoongArch check-tcg error:
   TESThello on loongarch64
qemu-system-loongarch64: Some ROM regions are overlapping
These ROM regions might have been loaded by direct user request or by default.
They could be BIOS/firmware images, a guest kernel, initrd or some other file 
loaded into guest memory.
Check whether you intended to load all this guest code, and whether it has been 
built to load to the correct addresses.

The following two regions overlap (in the memory address space):
   hello ELF program header segment 0 (addresses 0x0020 - 
0x00242000)
   fdt (addresses 0x0020 - 0x0030)
make[1]: *** [Makefile:177: run-hello] Error 1

Fixes: 021836936ef ("hw/loongarch: Load FDT table into dram memory space")
Reported-by: Richard Henderson 
Signed-off-by: Song Gao 
Reviewed-by: Richard Henderson 
Message-Id: <20221109020449.978064-1-gaos...@loongson.cn>
Signed-off-by: Richard Henderson 
---
 hw/loongarch/virt.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 5e4c2790bf..5136940b0b 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -793,13 +793,13 @@ static void loongarch_init(MachineState *machine)
 qemu_add_machine_init_done_notifier(>machine_done);
 fdt_add_pcie_node(lams);
 /*
- * Since lowmem region starts from 0, FDT base address is located
- * at 2 MiB to avoid NULL pointer access.
- *
+ * Since lowmem region starts from 0 and Linux kernel legacy start address
+ * at 2 MiB, FDT base address is located at 1 MiB to avoid NULL pointer
+ * access. FDT size limit with 1 MiB.
  * Put the FDT into the memory map as a ROM image: this will ensure
  * the FDT is copied again upon reset, even if addr points into RAM.
  */
-fdt_base = 2 * MiB;
+fdt_base = 1 * MiB;
 qemu_fdt_dumpdtb(machine->fdt, lams->fdt_size);
 rom_add_blob_fixed("fdt", machine->fdt, lams->fdt_size, fdt_base);
 }
-- 
2.34.1

[PULL 0/1] Fix loongarch make check-tcg failure

2022-11-11 Thread Richard Henderson

Since the fix has been sitting around on the list for a
few days, I thought I'd help get this merged before rc1.


r~


The following changes since commit 2ccad61746ca7de5dd3e25146062264387e43bd4:

  Merge tag 'pull-tcg-20221109' of https://gitlab.com/rth7680/qemu into staging 
(2022-11-09 13:26:45 -0500)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-la-20221112

for you to fetch changes up to 46b21de238c643ea098f2dcffe493abd135f7d89:

  hw/loongarch: Fix loongarch fdt addr confict (2022-11-12 11:05:52 +1000)


Fix loongarch make check-tcg failure.


Song Gao (1):
  hw/loongarch: Fix loongarch fdt addr confict

 hw/loongarch/virt.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

Re: [PATCH v5 11/20] target/sparc: initialise MemTxAttrs for CPU access

2022-11-11 Thread Mark Cave-Ayland


On 11/11/2022 18:25, Alex Bennée wrote:


Both of the TLB fill functions and the cpu_sparc_get_phys_page deal
with CPU based access. Use the new MEMTXATTRS_CPU constructor to
ensure the correct CPU id is filled in should it ever be needed by any
devices later.

Signed-off-by: Alex Bennée 
---
  target/sparc/mmu_helper.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index 919448a494..eeb52b5ee6 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -212,7 +212,7 @@ bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
  target_ulong vaddr;
  target_ulong page_size;
  int error_code = 0, prot, access_index;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
  
  /*

   * TODO: If we ever need tlb_vaddr_to_host for this target,
@@ -771,7 +771,7 @@ bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
  target_ulong vaddr;
  hwaddr paddr;
  target_ulong page_size;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
  int error_code = 0, prot, access_index;
  
  address &= TARGET_PAGE_MASK;

@@ -890,7 +890,7 @@ static int cpu_sparc_get_phys_page(CPUSPARCState *env, 
hwaddr *phys,
  {
  target_ulong page_size;
  int prot, access_index;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(env_cpu(env));
  
  return get_physical_address(env, phys, , _index, , addr,

  rw, mmu_idx, _size);


Reviewed-by: Mark Cave-Ayland 


ATB,

Mark.

Re: [PATCH for-8.0 0/2] hw/misc: Convert MOS6522 devices to 3-phase reset

2022-11-11 Thread Mark Cave-Ayland


On 10/11/2022 14:34, Peter Maydell wrote:


This patchset converts the TYPE_MOS6522 class and its subclasses to
use 3-phase reset. This is part of the work I'm doing to clean up some
of the reset-related code by getting rid of the
device_class_set_parent_reset() function, which is used by
legacy-reset subclasses which want to chain to their parent's reset
function. There aren't very many of these devices in total, and if we
convert them all to 3-phase reset they can use the 3-phase-reset
equivalent (resettable_class_set_parent_phases()).  Eventually this
will then let us simplify the transitional code for handling old-style
device reset.

This is 8.0 material. Tested with 'make check' and 'make check-avocado'
for ppc and m68k builds.

thanks
--PMM

Peter Maydell (2):
   hw/misc/mos6522: Convert TYPE_MOS6522 to 3-phase reset
   hw/misc: Convert TYPE_MOS6522 subclasses to 3-phase reset

  include/hw/misc/mos6522.h |  2 +-
  hw/misc/mac_via.c | 26 --
  hw/misc/macio/cuda.c  | 14 --
  hw/misc/macio/pmu.c   | 14 --
  hw/misc/mos6522.c |  7 ---
  5 files changed, 37 insertions(+), 26 deletions(-)


Again, not having played with the new reset code myself I'm not quite confident 
enough to give a Reviewed-by tag, but it appears to do the right thing and passes the 
avocado tests so:


Acked-by: Mark Cave-Ayland 


ATB,

Mark.

Re: [PATCH] hw/riscv: virt: Remove the redundant ipi-id property

2022-11-11 Thread Bin Meng

On Sat, Nov 12, 2022 at 8:42 AM Atish Kumar Patra  wrote:
>
>
>
> On Fri, Nov 11, 2022 at 4:39 PM Bin Meng  wrote:
>>
>> On Sat, Nov 12, 2022 at 4:14 AM Atish Patra  wrote:
>> >
>> > The imsic DT binding has changed and no longer require an ipi-id.
>>
>> requires
>>
>
> Sure. Will fix it.
>
>>
>> Could you please put a link here to the upstream imsic DT binding for 
>> reference?
>>
>
> It's not merged yet. Here is the latest version:
>
> https://lore.kernel.org/lkml/2022044207.1478350-5-apa...@ventanamicro.com/

Thanks. Putting this link in the commit message helps a lot.

>
>>
>> > The latest IMSIC driver dynamically allocates ipi id if slow-ipi
>> > is not defined.
>> >
>> > Get rid of the unused dt property which may lead to confusion.
>> >
>> > Signed-off-by: Atish Patra 
>> > ---
>> >  hw/riscv/virt.c | 2 --
>> >  include/hw/riscv/virt.h | 1 -
>> >  2 files changed, 3 deletions(-)
>> >

Regards,
Bin

Re: [PATCH] hw/riscv: virt: Remove the redundant ipi-id property

2022-11-11 Thread Atish Kumar Patra

On Fri, Nov 11, 2022 at 4:39 PM Bin Meng  wrote:

> On Sat, Nov 12, 2022 at 4:14 AM Atish Patra  wrote:
> >
> > The imsic DT binding has changed and no longer require an ipi-id.
>
> requires
>
>
Sure. Will fix it.


> Could you please put a link here to the upstream imsic DT binding for
> reference?
>
>
It's not merged yet. Here is the latest version:

https://lore.kernel.org/lkml/2022044207.1478350-5-apa...@ventanamicro.com/


> > The latest IMSIC driver dynamically allocates ipi id if slow-ipi
> > is not defined.
> >
> > Get rid of the unused dt property which may lead to confusion.
> >
> > Signed-off-by: Atish Patra 
> > ---
> >  hw/riscv/virt.c | 2 --
> >  include/hw/riscv/virt.h | 1 -
> >  2 files changed, 3 deletions(-)
> >
> > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> > index a5bc7353b412..0bc0964e42a8 100644
> > --- a/hw/riscv/virt.c
> > +++ b/hw/riscv/virt.c
> > @@ -546,8 +546,6 @@ static void create_fdt_imsic(RISCVVirtState *s,
> const MemMapEntry *memmap,
> >  riscv_socket_count(mc) * sizeof(uint32_t) * 4);
> >  qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids",
> >  VIRT_IRQCHIP_NUM_MSIS);
> > -qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id",
> > -VIRT_IRQCHIP_IPI_MSI);
> >  if (riscv_socket_count(mc) > 1) {
> >  qemu_fdt_setprop_cell(mc->fdt, imsic_name,
> "riscv,hart-index-bits",
> >  imsic_num_bits(imsic_max_hart_per_socket));
> > diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
> > index be4ab8fe7f71..62513e075c47 100644
> > --- a/include/hw/riscv/virt.h
> > +++ b/include/hw/riscv/virt.h
> > @@ -93,7 +93,6 @@ enum {
> >
> >  #define VIRT_PLATFORM_BUS_NUM_IRQS 32
> >
> > -#define VIRT_IRQCHIP_IPI_MSI 1
> >  #define VIRT_IRQCHIP_NUM_MSIS 255
> >  #define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
> >  #define VIRT_IRQCHIP_NUM_PRIO_BITS 3
> > --
>
> Regards,
> Bin
>

Re: [PATCH] hw/riscv: virt: Remove the redundant ipi-id property

2022-11-11 Thread Bin Meng

On Sat, Nov 12, 2022 at 4:14 AM Atish Patra  wrote:
>
> The imsic DT binding has changed and no longer require an ipi-id.

requires

Could you please put a link here to the upstream imsic DT binding for reference?

> The latest IMSIC driver dynamically allocates ipi id if slow-ipi
> is not defined.
>
> Get rid of the unused dt property which may lead to confusion.
>
> Signed-off-by: Atish Patra 
> ---
>  hw/riscv/virt.c | 2 --
>  include/hw/riscv/virt.h | 1 -
>  2 files changed, 3 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index a5bc7353b412..0bc0964e42a8 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -546,8 +546,6 @@ static void create_fdt_imsic(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  riscv_socket_count(mc) * sizeof(uint32_t) * 4);
>  qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids",
>  VIRT_IRQCHIP_NUM_MSIS);
> -qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id",
> -VIRT_IRQCHIP_IPI_MSI);
>  if (riscv_socket_count(mc) > 1) {
>  qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits",
>  imsic_num_bits(imsic_max_hart_per_socket));
> diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
> index be4ab8fe7f71..62513e075c47 100644
> --- a/include/hw/riscv/virt.h
> +++ b/include/hw/riscv/virt.h
> @@ -93,7 +93,6 @@ enum {
>
>  #define VIRT_PLATFORM_BUS_NUM_IRQS 32
>
> -#define VIRT_IRQCHIP_IPI_MSI 1
>  #define VIRT_IRQCHIP_NUM_MSIS 255
>  #define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
>  #define VIRT_IRQCHIP_NUM_PRIO_BITS 3
> --

Regards,
Bin

Re: [PATCH] hw/misc/pfsoc: add fabric clocks to ioscb

2022-11-11 Thread Bin Meng

Hi Conor,

On Sat, Nov 12, 2022 at 8:31 AM Conor Dooley  wrote:
>
> On Thu, Nov 10, 2022 at 12:18:44AM +0100, Philippe Mathieu-Daudé wrote:
> > Hi Conor,
> >
> > On 9/11/22 20:08, Conor Dooley wrote:
> > > From: Conor Dooley 
> > >
> > > @@ -168,6 +170,10 @@ static void mchp_pfsoc_ioscb_realize(DeviceState 
> > > *dev, Error **errp)
> > > "mchp.pfsoc.ioscb.cfg", 
> > > IOSCB_SUBMOD_REG_SIZE);
> > >   memory_region_add_subregion(>container, IOSCB_CFG_BASE, >cfg);
> > > +memory_region_init_io(>ccc, OBJECT(s), _pfsoc_dummy_ops, s,
> > > +  "mchp.pfsoc.ioscb.ccc", IOSCB_CCC_REG_SIZE);
> > > +memory_region_add_subregion(>container, IOSCB_CCC_BASE, >ccc);
> >
> > Unrelated but using the TYPE_UNIMPLEMENTED_DEVICE would ease tracing all
> > these block accesses, as the block name would appear before the
> > address/size. See for example aspeed_mmio_map_unimplemented();
>
> Certainly looks like a nice idea, and I gave it a go but kept running
> into issues due to my lack of understanding of QEMU :) I'm going to add
> this to my todo pile - while I have a v2 of this lined up, I'd rather
> not hold up adding the regions that prevent booting Linux etc as I
> fumble around trying to understand the hierarchy of devices required to
> set up something similar to your aspeed example.
>

Do you plan to bring QEMU support to the latest MSS_LINUX configuration [1]

Currently QEMU is supporting the MSS_BAREMETAL configuration. Do you
think it makes sense to support both?

[1] https://github.com/polarfire-soc/icicle-kit-reference-design

Regards,
Bin

Re: [PATCH] hw/misc/pfsoc: add fabric clocks to ioscb

2022-11-11 Thread Conor Dooley

On Thu, Nov 10, 2022 at 12:18:44AM +0100, Philippe Mathieu-Daudé wrote:
> Hi Conor,
> 
> On 9/11/22 20:08, Conor Dooley wrote:
> > From: Conor Dooley 
> > 
> > @@ -168,6 +170,10 @@ static void mchp_pfsoc_ioscb_realize(DeviceState *dev, 
> > Error **errp)
> > "mchp.pfsoc.ioscb.cfg", IOSCB_SUBMOD_REG_SIZE);
> >   memory_region_add_subregion(>container, IOSCB_CFG_BASE, >cfg);
> > +memory_region_init_io(>ccc, OBJECT(s), _pfsoc_dummy_ops, s,
> > +  "mchp.pfsoc.ioscb.ccc", IOSCB_CCC_REG_SIZE);
> > +memory_region_add_subregion(>container, IOSCB_CCC_BASE, >ccc);
> 
> Unrelated but using the TYPE_UNIMPLEMENTED_DEVICE would ease tracing all
> these block accesses, as the block name would appear before the
> address/size. See for example aspeed_mmio_map_unimplemented();

Certainly looks like a nice idea, and I gave it a go but kept running
into issues due to my lack of understanding of QEMU :) I'm going to add
this to my todo pile - while I have a v2 of this lined up, I'd rather
not hold up adding the regions that prevent booting Linux etc as I
fumble around trying to understand the hierarchy of devices required to
set up something similar to your aspeed example.

Thanks,
Conor.

Re: [PATCH v3] qapi/qom: Memory backend property prealloc-threads doc fix

2022-11-11 Thread Gavin Shan


On 11/11/22 6:54 PM, Igor Mammedov wrote:

On Fri, 11 Nov 2022 17:34:04 +0800
Gavin Shan  wrote:

On 11/11/22 5:13 PM, Igor Mammedov wrote:

On Fri, 11 Nov 2022 07:47:16 +0100
Markus Armbruster  wrote:

Gavin Shan  writes:

On 11/11/22 11:05 AM, Zhenyu Zhang wrote:

Commit ffac16fab3 "hostmem: introduce "prealloc-threads" property"
(v5.0.0) changed the default number of threads from number of CPUs
to 1.  This was deemed a regression, and fixed in commit f8d426a685
"hostmem: default the amount of prealloc-threads to smp-cpus".
Except the documentation remained unchanged.  Update it now.
Signed-off-by: Zhenyu Zhang 
---
v3: Covers historical descriptions  (Markus)
v2: The property is changed to smp-cpus since 5.0   (Phild)
---
qapi/qom.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
  


With the following comments addressed:

Reviewed-by: Gavin Shan 

---

Please consider amending the commit log to something like below.

The default "prealloc-threads" value is set to 1 when the property is
added by commit ffac16fab33b ("hostmem: introduce "prealloc-threads"
property") in v5.0.0. The default value is conflicting with the sugar
property as the value provided by the sugar property is number of CPUs.


What is the sugar property?  Can you explain the conflict in a bit more
detail?


my guess is that Gavin means mem_prealloc compat glue in 
qemu_process_sugar_options()

property value should be set according to following order
   default -> compat -> explicit value
so I don't see any conflict here.

PS:
if it we up to me, default would have stayed 1,
and prealloc-threads fixup to vCPUs number would happen in vl.c
similar to what is done in qemu_process_sugar_options(),
keeping backend clean of external dependencies.
   


Yes, it's the sugar property I was talking about. I'm not sure if
we have a more popular name for this property: compat property or
sugar property.

When 'mem-prealloc=on' and 'prealloc-threads=xxx' aren't provided,
the value is 1 before commit f8d426a6852c is applied. It's not
inconsistent with 'mem-prealloc=on'. It's the conflict I was talking
about and it's fixed by commit f8d426a6852c


default was not supposed to be consistent with legacy mem-prealloc
and sugar property takes care of mem-prealloc=on case.

so commit message in its current form looks fine to me.



Ok, thanks for your confirm. I think Zhenyu needs to post v4, to fix
the 80 characters limitation issue. My reviewed-by is still valid.

  

The conflict has been fixed by commit f8d426a6852c ("hostmem: default
the amount of prealloc-threads to smp-cpus"). However, 'qapi/qom.json'
was missed to be updated accordingly in the commit.

Update 'qapi/qom.json' to reflect the change in commit f8d426a6852c.

Signed-off-by: Zhenyu Zhang 

When a specific commit is mentioned in the commit log, we usually have
fixed format like below.

commit ffac16fab33b ("hostmem: introduce "prealloc-threads" property")
commit f8d426a6852c ("hostmem: default the amount of prealloc-threads to 
smp-cpus")


This is certainly a common format, but the other one is also in use.
  

diff --git a/qapi/qom.json b/qapi/qom.json
index 30e76653ad..dfd89bc6d4 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -576,7 +576,7 @@
#
# @prealloc: if true, preallocate memory (default: false)
#
-# @prealloc-threads: number of CPU threads to use for prealloc (default: 1)
+# @prealloc-threads: number of CPU threads to use for prealloc (default: 
number of CPUs) (since 5.0)
#
# @prealloc-context: thread context to use for creation of preallocation 
threads
#(default: none) (since 7.2)
  


The line seems exceeding 80 characters. It'd better to limit each line in 75 
characters.
So you probably need:

  # @prealloc-threads: number of CPU threads to use for prealloc (default: 
number of CPUs)
  #(since 5.0)


Still exceeds :)

I suggested

# @prealloc-threads: number of CPU threads to use for prealloc
#(default: number of CPUs) (since 5.0)
  
   


Markus's suggestion works :)



Thanks,
Gavin

[PATCH] hw/riscv: virt: Remove the redundant ipi-id property

2022-11-11 Thread Atish Patra

The imsic DT binding has changed and no longer require an ipi-id.
The latest IMSIC driver dynamically allocates ipi id if slow-ipi
is not defined.

Get rid of the unused dt property which may lead to confusion.

Signed-off-by: Atish Patra 
---
 hw/riscv/virt.c | 2 --
 include/hw/riscv/virt.h | 1 -
 2 files changed, 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index a5bc7353b412..0bc0964e42a8 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -546,8 +546,6 @@ static void create_fdt_imsic(RISCVVirtState *s, const 
MemMapEntry *memmap,
 riscv_socket_count(mc) * sizeof(uint32_t) * 4);
 qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids",
 VIRT_IRQCHIP_NUM_MSIS);
-qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id",
-VIRT_IRQCHIP_IPI_MSI);
 if (riscv_socket_count(mc) > 1) {
 qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits",
 imsic_num_bits(imsic_max_hart_per_socket));
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index be4ab8fe7f71..62513e075c47 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -93,7 +93,6 @@ enum {
 
 #define VIRT_PLATFORM_BUS_NUM_IRQS 32
 
-#define VIRT_IRQCHIP_IPI_MSI 1
 #define VIRT_IRQCHIP_NUM_MSIS 255
 #define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
 #define VIRT_IRQCHIP_NUM_PRIO_BITS 3
-- 
2.25.1

Re: [PATCH v5 10/20] target/microblaze: initialise MemTxAttrs for CPU access

2022-11-11 Thread Edgar E. Iglesias

On Fri, Nov 11, 2022 at 06:25:25PM +, Alex Bennée wrote:
> Both of these functions deal with CPU based access (as is evidenced by
> the secure check straight after). Use the new MEMTXATTRS_CPU
> constructor to ensure the correct CPU id is filled in should it ever
> be needed by any devices later.

Looks good to me!

Reviewed-by: Edgar E. Iglesias 

> 
> Signed-off-by: Alex Bennée 
> ---
>  target/microblaze/helper.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
> index 98bdb82de8..655be3b320 100644
> --- a/target/microblaze/helper.c
> +++ b/target/microblaze/helper.c
> @@ -44,7 +44,7 @@ bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
>  MicroBlazeMMULookup lu;
>  unsigned int hit;
>  int prot;
> -MemTxAttrs attrs = {};
> +MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
>  
>  attrs.secure = mb_cpu_access_is_secure(cpu, access_type);
>  
> @@ -235,7 +235,7 @@ hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cs, 
> vaddr addr,
>  unsigned int hit;
>  
>  /* Caller doesn't initialize */
> -*attrs = (MemTxAttrs) {};
> +*attrs = MEMTXATTRS_CPU(cs);
>  attrs->secure = mb_cpu_access_is_secure(cpu, MMU_DATA_LOAD);
>  
>  if (mmu_idx != MMU_NOMMU_IDX) {
> -- 
> 2.34.1
>

Re: [PATCH v2 11/12] gitlab: integrate coverage report

2022-11-11 Thread Philippe Mathieu-Daudé


On 11/11/22 15:55, Alex Bennée wrote:

This should hopefully give is nice coverage information about what our
tests (or at least the subset we are running) have hit. Ideally we
would want a way to trigger coverage on tests likely to be affected by
the current commit.


IIUC per [*] this will not appear in the pipeline but in 
https://gitlab.com/qemu-project/qemu/-/graphs/master/charts under 'Code 
coverage statistics', right?


If so, can you document this in this description? Also maybe this can
be linked in some docs/devel/ci*rst file.

[*] 
https://docs.gitlab.com/ee/ci/pipelines/settings.html#view-code-coverage-history




Signed-off-by: Alex Bennée 
Acked-by: Stefan Hajnoczi 
---
  .gitlab-ci.d/buildtest.yml | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 7173749c52..d21b4a1fd4 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -494,7 +494,17 @@ check-gprof-gcov:
  IMAGE: ubuntu2004
  MAKE_CHECK_ARGS: check
after_script:
-- ${CI_PROJECT_DIR}/scripts/ci/coverage-summary.sh
+- cd build
+- gcovr --xml-pretty --exclude-unreachable-branches --print-summary
+-o coverage.xml --root ${CI_PROJECT_DIR} . *.p
+  coverage: /^\s*lines:\s*\d+.\d+\%/
+  artifacts:
+name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA}
+expire_in: 2 days


Hmm do we need the 'name' and 'expire_in' keys? That would be to keep 
the coverage.xml file?



+reports:
+  coverage_report:
+coverage_format: cobertura
+path: build/coverage.xml
  
  build-oss-fuzz:

extends: .native_build_job_template

Re: [PULL 00/11] Block layer patches

2022-11-11 Thread Stefan Hajnoczi

On Fri, 11 Nov 2022 at 10:29, Kevin Wolf  wrote:
>
> The following changes since commit 2ccad61746ca7de5dd3e25146062264387e43bd4:
>
>   Merge tag 'pull-tcg-20221109' of https://gitlab.com/rth7680/qemu into 
> staging (2022-11-09 13:26:45 -0500)
>
> are available in the Git repository at:
>
>   https://repo.or.cz/qemu/kevin.git tags/for-upstream
>
> for you to fetch changes up to b04af371af685c12970ea93027dc6d8bf86265aa:
>
>   tests/stream-under-throttle: New test (2022-11-11 13:02:43 +0100)
>
> 
> Block layer patches
>
> - Fix deadlock in graph modification with iothreads
> - mirror: Fix non-converging cases for active mirror
> - qapi: Fix BlockdevOptionsNvmeIoUring @path description
> - blkio: Set BlockDriver::has_variable_length to false
>
> 
> Alberto Faria (2):
>   qapi/block-core: Fix BlockdevOptionsNvmeIoUring @path description
>   block/blkio: Set BlockDriver::has_variable_length to false
>
> Hanna Reitz (9):
>   block/mirror: Do not wait for active writes
>   block/mirror: Drop mirror_wait_for_any_operation()
>   block/mirror: Fix NULL s->job in active writes
>   iotests/151: Test that active mirror progresses
>   iotests/151: Test active requests on mirror start
>   block: Make bdrv_child_get_parent_aio_context I/O
>   block-backend: Update ctx immediately after root
>   block: Start/end drain on correct AioContext
>   tests/stream-under-throttle: New test

Hi Hanna,
This test is broken, probably due to the minimum Python version:
https://gitlab.com/qemu-project/qemu/-/jobs/3311521303

Stefan

Re: [PATCH v3 2/2] vhost-net: Fix the virtio features negotiation flaw

2022-11-11 Thread Hyman


The previous reply email has an text format error, please ignore and


在 2022/11/11 3:00, Michael S. Tsirkin 写道:

On Sun, Oct 30, 2022 at 09:52:39PM +0800, huang...@chinatelecom.cn wrote:

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Note that this patch also change the features saving logic
in chr_closed_bh, which originally backup features no matter
whether the features are 0 or not, but now do it only if
features aren't 0.


I'm not sure how is this change even related to what we
are trying to do (fix a bug). Explain here?


For this series, all we want to do is to making sure acked_features
in the NetVhostUserState is credible and uptodate in the scenario
that virtio features negotiation and openvswitch service restart
happens simultaneously.

To make sure that happens, we save the acked_features to
NetVhostUserState right after guest setting virtio-net features.

Assume that we do not save acked_features to NetVhostUserState just as
it is, the acked_features in NetVhostUserState has chance to be assigned
only when chr_closed_bh/vhost_user_stop happen. Note that openvswitch
service stop will cause chr_closed_bh happens and acked_features in
vhost_dev will be stored into NetVhostUserState, if the acked_features
in vhost_dev are out-of-date(may be updated in the next few seconds), so
does the acked_features in NetVhostUserState after doing the assignment,
this is the bug.

Let's refine the scenario and derive the bug:
qemu threaddpdk
|   |
   vhost_net_init() |
|   |
 assign acked_features in vhost_dev |
   with 0x4000  |
|   openvswitch.service stop
   chr_closed_bh|
|   |
 assign acked_features in   |
 NetVhostUserState with 0x4000  |
|   |
   virtio_net_set_features()|
|   |
 assign acked_features in vhost_dev |
   with 0x7060a782  |
|  openvswitch.service start
|   |
   vhost_user_start |
|   |
 assign acked_features in vhost_dev |
   with 0x4000  |
|   |

As the step shows, if we do not keep the acked_features in
NetVhostUserState up-to-date, the acked_features in vhost_dev may be
reloaded with the wrong value(eg, 0x4000) when vhost_user_start
happens.




As to reset acked_features to 0 if needed, Qemu always
keeping the backup acked_features up-to-date, and save the
acked_features after virtio_net_set_features in advance,
including reset acked_features to 0, so the behavior is
also covered.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
  hw/net/vhost_net.c  | 9 +
  hw/net/virtio-net.c | 5 +
  include/net/vhost_net.h | 2 ++
  net/vhost-user.c| 6 +-
  4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b9..2bffc27 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
  return net->dev.acked_features;
  }
  
+void vhost_net_save_acked_features(NetClientState *nc)

+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
  static int vhost_net_get_fd(NetClientState *backend)
  {
  switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e9f696b..5f8f788 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -924,6 +924,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
  continue;
  }
  vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
  }
  
  if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {

diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..3a5579b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState *

Re: [PATCH v3 2/2] vhost-net: Fix the virtio features negotiation flaw

2022-11-11 Thread Hyman





在 2022/11/11 3:00, Michael S. Tsirkin 写道:

On Sun, Oct 30, 2022 at 09:52:39PM +0800, huang...@chinatelecom.cn wrote:

From: Hyman Huang(黄勇) 

Save the acked_features once it be configured by guest
virtio driver so it can't miss any features.

Note that this patch also change the features saving logic
in chr_closed_bh, which originally backup features no matter
whether the features are 0 or not, but now do it only if
features aren't 0.


I'm not sure how is this change even related to what we
are trying to do (fix a bug). Explain here?


For this series, all we want to do is to making sure acked_features
in the NetVhostUserState is credible and uptodate in the scenario that 
virtio features negotiation and openvswitch service restart happens 
simultaneously.


To make sure that happens, we save the acked_features to 
NetVhostUserState right after guest setting virtio-net features.


Assume that we do not save acked_features to NetVhostUserState just as 
it is, the acked_features in NetVhostUserState has chance to be assigned 
only when chr_closed_bh/vhost_user_stop happen. Note that openvswitch 
service stop will cause chr_closed_bh happens and acked_features in 
vhost_dev will be stored into NetVhostUserState, if the acked_features 
in vhost_dev are out-of-date(may be updated in the next few seconds), so 
does the acked_features in NetVhostUserState after doing the assignment, 
this is the bug.


Let's refine the scenario and derive the bug:
qemu threaddpdk
|   |
   vhost_net_init() |
|   |
 assign acked_features in vhost_dev |
   with 0x4000  |
|  openvswitch.service stop
   chr_closed_bh|
|   |
 assign acked_features in   |
 NetVhostUserState with 0x4000  |
|   |
   virtio_net_set_features()|
|   |
 assign acked_features in vhost_dev |
   with 0x7060a782  |
|  openvswitch.service start
|   |
   vhost_user_start |
|   |
 assign acked_features in vhost_dev |
   with 0x4000  |
|   |

As the step shows, if we do not keep the acked_features in 
NetVhostUserState up-to-date, the acked_features in vhost_dev may be 
reloaded with the wrong value(eg, 0x4000) when vhost_user_start happens.




As to reset acked_features to 0 if needed, Qemu always
keeping the backup acked_features up-to-date, and save the
acked_features after virtio_net_set_features in advance,
including reset acked_features to 0, so the behavior is
also covered.

Signed-off-by: Hyman Huang(黄勇) 
Signed-off-by: Guoyi Tu 
---
  hw/net/vhost_net.c  | 9 +
  hw/net/virtio-net.c | 5 +
  include/net/vhost_net.h | 2 ++
  net/vhost-user.c| 6 +-
  4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b9..2bffc27 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -141,6 +141,15 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net)
  return net->dev.acked_features;
  }
  
+void vhost_net_save_acked_features(NetClientState *nc)

+{
+if (nc->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
+return;
+}
+
+vhost_user_save_acked_features(nc, false);
+}
+
  static int vhost_net_get_fd(NetClientState *backend)
  {
  switch (backend->info->type) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e9f696b..5f8f788 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -924,6 +924,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
  continue;
  }
  vhost_net_ack_features(get_vhost_net(nc->peer), features);
+/*
+ * keep acked_features in NetVhostUserState up-to-date so it
+ * can't miss any features configured by guest virtio driver.
+ */
+vhost_net_save_acked_features(nc->peer);
  }
  
  if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {

diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 387e913..3a5579b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -46,6 +46,8 @@ int vhost_set_vring_enable(NetClientState * nc, int enable);
  
  uint64_t

Re: [PATCH] s390x: Fix spelling errors

2022-11-11 Thread Stefan Weil via


Am 11.11.22 um 19:28 schrieb Thomas Huth:


Fix typos (discovered with the 'codespell' utility).

Signed-off-by: Thomas Huth 
---
  hw/s390x/ipl.h  | 2 +-
  pc-bios/s390-ccw/cio.h  | 2 +-
  pc-bios/s390-ccw/iplb.h | 2 +-
  target/s390x/cpu_models.h   | 4 ++--
  hw/s390x/s390-pci-vfio.c| 2 +-
  hw/s390x/s390-virtio-ccw.c  | 6 +++---
  target/s390x/ioinst.c   | 2 +-
  target/s390x/tcg/excp_helper.c  | 2 +-
  target/s390x/tcg/fpu_helper.c   | 2 +-
  target/s390x/tcg/misc_helper.c  | 2 +-
  target/s390x/tcg/translate.c| 4 ++--
  target/s390x/tcg/translate_vx.c.inc | 6 +++---
  pc-bios/s390-ccw/start.S| 2 +-
  13 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index dfc6dfd89c..7fc86e7905 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -140,7 +140,7 @@ void s390_ipl_clear_reset_request(void);
   * have an offset of 4 + n * 8 bytes within the struct in order
   * to keep it double-word aligned.
   * The total size of the struct must never exceed 28 bytes.
- * This definition must be kept in sync with the defininition
+ * This definition must be kept in sync with the definition
   * in pc-bios/s390-ccw/iplb.h.
   */
  struct QemuIplParameters {
diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h
index 1e5d4e92e1..88a88adfd2 100644
--- a/pc-bios/s390-ccw/cio.h
+++ b/pc-bios/s390-ccw/cio.h
@@ -20,7 +20,7 @@ struct pmcw {
  __u32 intparm;  /* interruption parameter */
  __u32 qf:1; /* qdio facility */
  __u32 w:1;
-__u32 isc:3;/* interruption sublass */
+__u32 isc:3;/* interruption subclass */
  __u32 res5:3;   /* reserved zeros */
  __u32 ena:1;/* enabled */
  __u32 lm:2; /* limit mode */
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 772d5c57c9..cb6ac8a880 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -81,7 +81,7 @@ extern IplParameterBlock iplb 
__attribute__((__aligned__(PAGE_SIZE)));
  #define QIPL_FLAG_BM_OPTS_ZIPL  0x40
  
  /*

- * This definition must be kept in sync with the defininition
+ * This definition must be kept in sync with the definition
   * in hw/s390x/ipl.h
   */
  struct QemuIplParameters {
diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h
index 74d1f87e4f..15c0f0dcfe 100644
--- a/target/s390x/cpu_models.h
+++ b/target/s390x/cpu_models.h
@@ -24,13 +24,13 @@ struct S390CPUDef {
  uint8_t gen;/* hw generation identification */
  uint16_t type;  /* cpu type identification */
  uint8_t ec_ga;  /* EC GA version (on which also the BC is based) 
*/
-uint8_t mha_pow;/* Maximum Host Adress Power, mha = 2^pow-1 */
+uint8_t mha_pow;/* Maximum Host Address Power, mha = 2^pow-1 */



This comment could use lower case words.



  uint32_t hmfai; /* hypervisor-managed facilities */
  /* base/min features, must never be changed between QEMU versions */
  S390FeatBitmap base_feat;
  /* used to init base_feat from generated data */
  S390FeatInit base_init;
-/* deafault features, QEMU version specific */
+/* default features, QEMU version specific */
  S390FeatBitmap default_feat;
  /* used to init default_feat from generated data */
  S390FeatInit default_init;
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 2aefa508a0..5f0adb0b4a 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -313,7 +313,7 @@ retry:
  /*
   * Get the host function handle from the vfio CLP capabilities chain.  Returns
   * true if a fh value was placed into the provided buffer.  Returns false
- * if a fh could not be obtained (ioctl failed or capabilitiy version does
+ * if a fh could not be obtained (ioctl failed or capability version does
   * not include the fh)
   */
  bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 7d80bc1837..2e64ffab45 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -354,7 +354,7 @@ static int s390_machine_protect(S390CcwMachineState *ms)
  }
  
  error_setg(_mig_blocker,

-   "protected VMs are currently not migrateable.");
+   "protected VMs are currently not migratable.");



This might again be a different British / American spelling.



  rc = migrate_add_blocker(pv_mig_blocker, _err);
  if (rc) {
  ram_block_discard_disable(false);
@@ -449,7 +449,7 @@ static void s390_machine_reset(MachineState *machine, 
ShutdownCause reason)
  break;
  case S390_RESET_MODIFIED_CLEAR:
  /*
- * Susbsystem reset needs to be done before we unshare memory
+ * Subsystem reset needs to be done before we unshare memory
   * and lose access to VIRTIO

[PATCH v5 13/20] target/i386: add explicit initialisation for MexTxAttrs

2022-11-11 Thread Alex Bennée

Where appropriate initialise with MEMTXATTRS_CPU otherwise use
MEMTXATTRS_UNSPECIFIED instead of the null initialiser.

Signed-off-by: Alex Bennée 
---
 target/i386/cpu.h   | 4 +++-
 target/i386/hax/hax-all.c   | 2 +-
 target/i386/nvmm/nvmm-all.c | 2 +-
 target/i386/sev.c   | 2 +-
 target/i386/whpx/whpx-all.c | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index d4bc19577a..04ab96b076 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2246,7 +2246,9 @@ static inline uint32_t cpu_compute_eflags(CPUX86State 
*env)
 
 static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env)
 {
-return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 });
+MemTxAttrs attrs = MEMTXATTRS_CPU(env_cpu(env));
+attrs.secure = (env->hflags & HF_SMM_MASK) != 0;
+return attrs;
 }
 
 static inline int32_t x86_get_a20_mask(CPUX86State *env)
diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
index b185ee8de4..337090e16f 100644
--- a/target/i386/hax/hax-all.c
+++ b/target/i386/hax/hax-all.c
@@ -385,7 +385,7 @@ static int hax_handle_io(CPUArchState *env, uint32_t df, 
uint16_t port,
 {
 uint8_t *ptr;
 int i;
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
 
 if (!df) {
 ptr = (uint8_t *) buffer;
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index b75738ee9c..cb0720a6fa 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -502,7 +502,7 @@ nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit 
*exit)
 static void
 nvmm_io_callback(struct nvmm_io *io)
 {
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
 int ret;
 
 ret = address_space_rw(_space_io, io->port, attrs, io->data,
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 32f7dbac4e..292cbcdd92 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1274,7 +1274,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext 
*ctx, Error **errp)
 uint8_t *hashp;
 size_t hash_len = HASH_SIZE;
 hwaddr mapped_len = sizeof(*padded_ht);
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
 bool ret = true;
 
 /*
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index e738d83e81..42846144dd 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -791,7 +791,7 @@ static HRESULT CALLBACK whpx_emu_ioport_callback(
 void *ctx,
 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
 {
-MemTxAttrs attrs = { 0 };
+MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
 address_space_rw(_space_io, IoAccess->Port, attrs,
  >Data, IoAccess->AccessSize,
  IoAccess->Direction);
-- 
2.34.1

[PATCH v5 17/20] hw/intc: properly model IOAPIC MSI messages

2022-11-11 Thread Alex Bennée

On the real HW the IOAPIC is wired directly to the APIC and doesn't
really generate memory accesses on the main bus of the system. To
model this we can use the MTRT_MACHINE requester type and set the id
as a magic number to represent the IOAPIC as the source.

Signed-off-by: Alex Bennée 
Cc: Paolo Bonzini 
Cc: Peter Xu 
---
 include/hw/i386/ioapic_internal.h |  2 ++
 hw/intc/ioapic.c  | 35 ---
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/include/hw/i386/ioapic_internal.h 
b/include/hw/i386/ioapic_internal.h
index 9880443cc7..a8c7a1418a 100644
--- a/include/hw/i386/ioapic_internal.h
+++ b/include/hw/i386/ioapic_internal.h
@@ -82,6 +82,8 @@
 
 #define IOAPIC_VER_ENTRIES_SHIFT16
 
+/* Magic number to identify IOAPIC memory transactions */
+#define MEMTX_IOAPIC0xA71C
 
 #define TYPE_IOAPIC_COMMON "ioapic-common"
 OBJECT_DECLARE_TYPE(IOAPICCommonState, IOAPICCommonClass, IOAPIC_COMMON)
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 264262959d..8a5418002b 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -21,6 +21,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "monitor/monitor.h"
 #include "hw/i386/apic.h"
@@ -88,9 +89,33 @@ static void ioapic_entry_parse(uint64_t entry, struct 
ioapic_entry_info *info)
 (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
 }
 
-static void ioapic_service(IOAPICCommonState *s)
+/*
+ * No matter whether IR is enabled, we translate the IOAPIC message
+ * into a MSI one, and its address space will decide whether we need a
+ * translation.
+ *
+ * As the IOPIC is directly wired to the APIC writes to it are not the
+ * same as writes coming from the main bus of the machine. To model
+ * this we set its source as machine specific with the MEMTX_IOPIC
+ * id.
+ */
+static void send_ioapic_msi(struct ioapic_entry_info info)
 {
 AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
+MemTxAttrs attrs = MEMTXATTRS_MACHINE(MEMTX_IOAPIC);
+MemTxResult res;
+
+address_space_stl_le(ioapic_as, info.addr, info.data,
+ attrs, );
+if (res != MEMTX_OK) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: couldn't write to %"PRIx32"\n", __func__, 
info.addr);
+}
+}
+
+
+static void ioapic_service(IOAPICCommonState *s)
+{
 struct ioapic_entry_info info;
 uint8_t i;
 uint32_t mask;
@@ -130,12 +155,8 @@ static void ioapic_service(IOAPICCommonState *s)
 continue;
 }
 #endif
-
-/* No matter whether IR is enabled, we translate
- * the IOAPIC message into a MSI one, and its
- * address space will decide whether we need a
- * translation. */
-stl_le_phys(ioapic_as, info.addr, info.data);
+/* If not handled by KVM we now send it ourselves */
+send_ioapic_msi(info);
 }
 }
 }
-- 
2.34.1

[PATCH v5 20/20] include/hw: add commentary to current_cpu export

2022-11-11 Thread Alex Bennée

Document the intended use of current_cpu and discourage its use in new
HW emulation code. Once we have fully converted the tree we should
probably move this extern to another header.

Signed-off-by: Alex Bennée 
---
 include/hw/core/cpu.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8830546121..209b88e559 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -454,6 +454,20 @@ extern CPUTailQ cpus;
 #define CPU_FOREACH_SAFE(cpu, next_cpu) \
 QTAILQ_FOREACH_SAFE_RCU(cpu, , node, next_cpu)
 
+/**
+ * current_cpu - TLS pointing to the current executing CPU
+ *
+ * current_cpu is a thread local convenience variable containing that
+ * threads executing CPUState. It is intended to be used deep in
+ * accelerator related operations where passing down CPUState is too
+ * fiddly.
+ *
+ * Its use in HW emulation is heavily discouraged in new code as not
+ * all memory accesses will necessarily be from an executing CPU (e.g.
+ * from a debugger). HW emulation should be using MemTxAttrs to derive
+ * the exact source of a memory access. If the access is from a CPU it
+ * can be derived from qemu_get_cpu(cpu_index).
+ */
 extern __thread CPUState *current_cpu;
 
 /**
-- 
2.34.1

[PATCH v5 11/20] target/sparc: initialise MemTxAttrs for CPU access

2022-11-11 Thread Alex Bennée

Both of the TLB fill functions and the cpu_sparc_get_phys_page deal
with CPU based access. Use the new MEMTXATTRS_CPU constructor to
ensure the correct CPU id is filled in should it ever be needed by any
devices later.

Signed-off-by: Alex Bennée 
---
 target/sparc/mmu_helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index 919448a494..eeb52b5ee6 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -212,7 +212,7 @@ bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 target_ulong vaddr;
 target_ulong page_size;
 int error_code = 0, prot, access_index;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
 
 /*
  * TODO: If we ever need tlb_vaddr_to_host for this target,
@@ -771,7 +771,7 @@ bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 target_ulong vaddr;
 hwaddr paddr;
 target_ulong page_size;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
 int error_code = 0, prot, access_index;
 
 address &= TARGET_PAGE_MASK;
@@ -890,7 +890,7 @@ static int cpu_sparc_get_phys_page(CPUSPARCState *env, 
hwaddr *phys,
 {
 target_ulong page_size;
 int prot, access_index;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(env_cpu(env));
 
 return get_physical_address(env, phys, , _index, , addr,
 rw, mmu_idx, _size);
-- 
2.34.1

[PATCH v5 18/20] hw/i386: convert apic access to use MemTxAttrs

2022-11-11 Thread Alex Bennée

This allows us to correctly model invalid accesses to the interrupt
controller as well as avoiding the use of current_cpu hacks to find
the APIC structure. We have to ensure we check for MSI signals first
which shouldn't arrive from the CPU but are either triggered by PCI or
internal IOAPIC writes.

Signed-off-by: Alex Bennée 
Cc: Paolo Bonzini 
Cc: Peter Xu 

---
v1
  - don't validate requester_id for MTRT_MACHINE, just assume IOPIC
---
 include/hw/i386/apic.h |  2 +-
 hw/i386/x86.c  | 11 +++-
 hw/intc/apic.c | 62 --
 3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
index da1d2fe155..064ea5ac1b 100644
--- a/include/hw/i386/apic.h
+++ b/include/hw/i386/apic.h
@@ -22,6 +22,6 @@ void apic_designate_bsp(DeviceState *d, bool bsp);
 int apic_get_highest_priority_irr(DeviceState *dev);
 
 /* pc.c */
-DeviceState *cpu_get_current_apic(void);
+DeviceState *cpu_get_current_apic(int cpu_index);
 
 #endif
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 78cc131926..66645a669c 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -585,14 +585,11 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 return intno;
 }
 
-DeviceState *cpu_get_current_apic(void)
+DeviceState *cpu_get_current_apic(int cpu_index)
 {
-if (current_cpu) {
-X86CPU *cpu = X86_CPU(current_cpu);
-return cpu->apic_state;
-} else {
-return NULL;
-}
+CPUState *cs = qemu_get_cpu(cpu_index);
+X86CPU *cpu = X86_CPU(cs);
+return cpu->apic_state;
 }
 
 void gsi_handler(void *opaque, int n, int level)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 3df11c34d6..0a9897e64f 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -18,9 +18,11 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/thread.h"
+#include "qemu/log.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/i386/apic.h"
 #include "hw/i386/ioapic.h"
+#include "hw/i386/ioapic_internal.h"
 #include "hw/intc/i8259.h"
 #include "hw/pci/msi.h"
 #include "qemu/host-utils.h"
@@ -634,21 +636,23 @@ static void apic_timer(void *opaque)
 apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+static MemTxResult apic_mem_read(void *opaque, hwaddr addr, uint64_t *data,
+ unsigned int size, MemTxAttrs attrs)
 {
 DeviceState *dev;
 APICCommonState *s;
 uint32_t val;
 int index;
 
-if (size < 4) {
-return 0;
+if (attrs.requester_type != MTRT_CPU) {
+return MEMTX_ACCESS_ERROR;
 }
+dev = cpu_get_current_apic(attrs.requester_id);
 
-dev = cpu_get_current_apic();
-if (!dev) {
-return 0;
+if (size < 4) {
+return MEMTX_ERROR;
 }
+
 s = APIC(dev);
 
 index = (addr >> 4) & 0xff;
@@ -719,7 +723,8 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, 
unsigned size)
 break;
 }
 trace_apic_mem_readl(addr, val);
-return val;
+*data = val;
+return MEMTX_OK;
 }
 
 static void apic_send_msi(MSIMessage *msi)
@@ -735,32 +740,45 @@ static void apic_send_msi(MSIMessage *msi)
 apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-   unsigned size)
+static MemTxResult apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+  unsigned int size, MemTxAttrs attrs)
 {
 DeviceState *dev;
 APICCommonState *s;
 int index = (addr >> 4) & 0xff;
 
 if (size < 4) {
-return;
+return MEMTX_ERROR;
 }
 
+/*
+ * MSI and MMIO APIC are at the same memory location, but actually
+ * not on the global bus: MSI is on PCI bus APIC is connected
+ * directly to the CPU.
+ *
+ * We can check the MemTxAttrs to check they are coming from where
+ * we expect. Even though the MSI registers are reserved in APIC
+ * MMIO and vice versa they shouldn't respond to CPU writes.
+ */
 if (addr > 0xfff || !index) {
-/* MSI and MMIO APIC are at the same memory location,
- * but actually not on the global bus: MSI is on PCI bus
- * APIC is connected directly to the CPU.
- * Mapping them on the global bus happens to work because
- * MSI registers are reserved in APIC MMIO and vice versa. */
+switch (attrs.requester_type) {
+case MTRT_MACHINE: /* MEMTX_IOPIC */
+case MTRT_PCI: /* PCI signalled MSI */
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: rejecting write from %d",
+  __func__, attrs.requester_id);
+return MEMTX_ACCESS_ERROR;
+}
 MSIMessage msi = { .address = addr, .data = val };
 apic_send_msi();
-return;
+return MEMTX_OK;
 }
 
-dev = cpu_get_current_apic();
-if (!dev) {
-

[PATCH v5 14/20] hw/audio: explicitly set .requester_type for intel-hda

2022-11-11 Thread Alex Bennée

This is simulating a bus master writing data back into system memory.
Mark it as such.

Signed-off-by: Alex Bennée 
---
 hw/audio/intel-hda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index f38117057b..95c28b315c 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -345,7 +345,7 @@ static void intel_hda_corb_run(IntelHDAState *d)
 
 static void intel_hda_response(HDACodecDevice *dev, bool solicited, uint32_t 
response)
 {
-const MemTxAttrs attrs = { .memory = true };
+const MemTxAttrs attrs = { .requester_type = MTRT_PCI, .memory = true };
 HDACodecBus *bus = HDA_BUS(dev->qdev.parent_bus);
 IntelHDAState *d = container_of(bus, IntelHDAState, codecs);
 hwaddr addr;
-- 
2.34.1

[PATCH v5 09/20] hw/arm: remove current_cpu hack from pxa2xx access

2022-11-11 Thread Alex Bennée

We can derive the correct CPU from CPUARMState so lets not rely on
current_cpu.

Signed-off-by: Alex Bennée 
---
 hw/arm/pxa2xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 93dda83d7a..065392a8bc 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -319,7 +319,7 @@ static void pxa2xx_pwrmode_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 #endif
 
 /* Suspend */
-cpu_interrupt(current_cpu, CPU_INTERRUPT_HALT);
+cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HALT);
 
 goto message;
 
-- 
2.34.1

[PATCH v5 19/20] hw/isa: derive CPUState from MemTxAttrs in apm_ioport_writeb

2022-11-11 Thread Alex Bennée

Some of the callbacks need a CPUState so extend the interface so we
can pass that down rather than relying on current_cpu hacks.

Signed-off-by: Alex Bennée 
---
 include/hw/isa/apm.h |  2 +-
 hw/acpi/ich9.c   |  1 -
 hw/acpi/piix4.c  |  2 +-
 hw/isa/apm.c | 21 +
 hw/isa/lpc_ich9.c|  5 ++---
 5 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/include/hw/isa/apm.h b/include/hw/isa/apm.h
index b6e070c00e..eb952e1c1c 100644
--- a/include/hw/isa/apm.h
+++ b/include/hw/isa/apm.h
@@ -6,7 +6,7 @@
 #define APM_CNT_IOPORT  0xb2
 #define ACPI_PORT_SMI_CMD APM_CNT_IOPORT
 
-typedef void (*apm_ctrl_changed_t)(uint32_t val, void *arg);
+typedef void (*apm_ctrl_changed_t)(CPUState *cs, uint32_t val, void *arg);
 
 typedef struct APMState {
 uint8_t apmc;
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index bd9bbade70..70ad1cd1ff 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -30,7 +30,6 @@
 #include "hw/pci/pci.h"
 #include "migration/vmstate.h"
 #include "qemu/timer.h"
-#include "hw/core/cpu.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 #include "hw/acpi/acpi.h"
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 0a81f1ad93..43b78ef8f9 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -70,7 +70,7 @@ static void pm_tmr_timer(ACPIREGS *ar)
 acpi_update_sci(>ar, s->irq);
 }
 
-static void apm_ctrl_changed(uint32_t val, void *arg)
+static void apm_ctrl_changed(CPUState *cs, uint32_t val, void *arg)
 {
 PIIX4PMState *s = arg;
 PCIDevice *d = PCI_DEVICE(s);
diff --git a/hw/isa/apm.c b/hw/isa/apm.c
index dfe9020d30..95efbf2457 100644
--- a/hw/isa/apm.c
+++ b/hw/isa/apm.c
@@ -21,6 +21,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/core/cpu.h"
 #include "hw/isa/apm.h"
 #include "hw/pci/pci.h"
 #include "migration/vmstate.h"
@@ -30,10 +32,19 @@
 /* fixed I/O location */
 #define APM_STS_IOPORT  0xb3
 
-static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
-  unsigned size)
+static MemTxResult apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size, MemTxAttrs attrs)
 {
 APMState *apm = opaque;
+CPUState *cs;
+
+if (attrs.requester_type != MTRT_CPU) {
+qemu_log_mask(LOG_UNIMP | LOG_GUEST_ERROR,
+  "%s: saw non-CPU transaction", __func__);
+return MEMTX_ACCESS_ERROR;
+}
+cs = qemu_get_cpu(attrs.requester_id);
+
 addr &= 1;
 
 trace_apm_io_write(addr, val);
@@ -41,11 +52,13 @@ static void apm_ioport_writeb(void *opaque, hwaddr addr, 
uint64_t val,
 apm->apmc = val;
 
 if (apm->callback) {
-(apm->callback)(val, apm->arg);
+(apm->callback)(cs, val, apm->arg);
 }
 } else {
 apm->apms = val;
 }
+
+return MEMTX_OK;
 }
 
 static uint64_t apm_ioport_readb(void *opaque, hwaddr addr, unsigned size)
@@ -77,7 +90,7 @@ const VMStateDescription vmstate_apm = {
 
 static const MemoryRegionOps apm_ops = {
 .read = apm_ioport_readb,
-.write = apm_ioport_writeb,
+.write_with_attrs = apm_ioport_writeb,
 .impl = {
 .min_access_size = 1,
 .max_access_size = 1,
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 0b0a83e080..2700a18a65 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -443,7 +443,7 @@ void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled)
 
 /* APM */
 
-static void ich9_apm_ctrl_changed(uint32_t val, void *arg)
+static void ich9_apm_ctrl_changed(CPUState *cs, uint32_t val, void *arg)
 {
 ICH9LPCState *lpc = arg;
 
@@ -459,12 +459,11 @@ static void ich9_apm_ctrl_changed(uint32_t val, void *arg)
 if (lpc->pm.smi_en & ICH9_PMIO_SMI_EN_APMC_EN) {
 if (lpc->smi_negotiated_features &
 (UINT64_C(1) << ICH9_LPC_SMI_F_BROADCAST_BIT)) {
-CPUState *cs;
 CPU_FOREACH(cs) {
 cpu_interrupt(cs, CPU_INTERRUPT_SMI);
 }
 } else {
-cpu_interrupt(current_cpu, CPU_INTERRUPT_SMI);
+cpu_interrupt(cs, CPU_INTERRUPT_SMI);
 }
 }
 }
-- 
2.34.1

[PATCH v5 06/20] qtest: make read/write operation appear to be from CPU

2022-11-11 Thread Alex Bennée

The point of qtest is to simulate how running code might interact with
the system. However because it's not a real system we have places in
the code which especially handle check qtest_enabled() before
referencing current_cpu. Now we can encode these details in the
MemTxAttrs lets do that so we can start removing them.

Reviewed-by: Richard Henderson 
Acked-by: Thomas Huth 
Signed-off-by: Alex Bennée 

---
v2
  - use a common macro instead of specific MEMTXATTRS_QTEST
v3
  - macro moved to earlier patch
---
 softmmu/qtest.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index d3e0ab4eda..5e9ac234ce 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -520,22 +520,22 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 
 if (words[0][5] == 'b') {
 uint8_t data = value;
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
 , 1);
 } else if (words[0][5] == 'w') {
 uint16_t data = value;
 tswap16s();
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
 , 2);
 } else if (words[0][5] == 'l') {
 uint32_t data = value;
 tswap32s();
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
 , 4);
 } else if (words[0][5] == 'q') {
 uint64_t data = value;
 tswap64s();
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
 , 8);
 }
 qtest_send_prefix(chr);
@@ -554,21 +554,21 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 
 if (words[0][4] == 'b') {
 uint8_t data;
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
, 1);
 value = data;
 } else if (words[0][4] == 'w') {
 uint16_t data;
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
, 2);
 value = tswap16(data);
 } else if (words[0][4] == 'l') {
 uint32_t data;
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
, 4);
 value = tswap32(data);
 } else if (words[0][4] == 'q') {
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
, 8);
 tswap64s();
 }
@@ -589,7 +589,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 g_assert(len);
 
 data = g_malloc(len);
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu), 
data,
len);
 
 enc = g_malloc(2 * len + 1);
@@ -615,7 +615,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 g_assert(ret == 0);
 
 data = g_malloc(len);
-address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+address_space_read(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu), 
data,
len);
 b64_data = g_base64_encode(data, len);
 qtest_send_prefix(chr);
@@ -650,7 +650,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 data[i] = 0;
 }
 }
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu), 
data,
 len);
 g_free(data);
 
@@ -673,7 +673,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)
 if (len) {
 data = g_malloc(len);
 memset(data, pattern, len);
-address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+address_space_write(first_cpu->as, addr, MEMTXATTRS_CPU(first_cpu),
 data, len);
 g_free(data);
 }
@@ -707,7 +707,7 @@ static void qtest_process_command(CharBackend *chr, gchar 
**words)

[PATCH] s390x: Fix spelling errors

2022-11-11 Thread Thomas Huth

Fix typos (discovered with the 'codespell' utility).

Signed-off-by: Thomas Huth 
---
 hw/s390x/ipl.h  | 2 +-
 pc-bios/s390-ccw/cio.h  | 2 +-
 pc-bios/s390-ccw/iplb.h | 2 +-
 target/s390x/cpu_models.h   | 4 ++--
 hw/s390x/s390-pci-vfio.c| 2 +-
 hw/s390x/s390-virtio-ccw.c  | 6 +++---
 target/s390x/ioinst.c   | 2 +-
 target/s390x/tcg/excp_helper.c  | 2 +-
 target/s390x/tcg/fpu_helper.c   | 2 +-
 target/s390x/tcg/misc_helper.c  | 2 +-
 target/s390x/tcg/translate.c| 4 ++--
 target/s390x/tcg/translate_vx.c.inc | 6 +++---
 pc-bios/s390-ccw/start.S| 2 +-
 13 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index dfc6dfd89c..7fc86e7905 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -140,7 +140,7 @@ void s390_ipl_clear_reset_request(void);
  * have an offset of 4 + n * 8 bytes within the struct in order
  * to keep it double-word aligned.
  * The total size of the struct must never exceed 28 bytes.
- * This definition must be kept in sync with the defininition
+ * This definition must be kept in sync with the definition
  * in pc-bios/s390-ccw/iplb.h.
  */
 struct QemuIplParameters {
diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h
index 1e5d4e92e1..88a88adfd2 100644
--- a/pc-bios/s390-ccw/cio.h
+++ b/pc-bios/s390-ccw/cio.h
@@ -20,7 +20,7 @@ struct pmcw {
 __u32 intparm;  /* interruption parameter */
 __u32 qf:1; /* qdio facility */
 __u32 w:1;
-__u32 isc:3;/* interruption sublass */
+__u32 isc:3;/* interruption subclass */
 __u32 res5:3;   /* reserved zeros */
 __u32 ena:1;/* enabled */
 __u32 lm:2; /* limit mode */
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 772d5c57c9..cb6ac8a880 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -81,7 +81,7 @@ extern IplParameterBlock iplb 
__attribute__((__aligned__(PAGE_SIZE)));
 #define QIPL_FLAG_BM_OPTS_ZIPL  0x40
 
 /*
- * This definition must be kept in sync with the defininition
+ * This definition must be kept in sync with the definition
  * in hw/s390x/ipl.h
  */
 struct QemuIplParameters {
diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h
index 74d1f87e4f..15c0f0dcfe 100644
--- a/target/s390x/cpu_models.h
+++ b/target/s390x/cpu_models.h
@@ -24,13 +24,13 @@ struct S390CPUDef {
 uint8_t gen;/* hw generation identification */
 uint16_t type;  /* cpu type identification */
 uint8_t ec_ga;  /* EC GA version (on which also the BC is based) */
-uint8_t mha_pow;/* Maximum Host Adress Power, mha = 2^pow-1 */
+uint8_t mha_pow;/* Maximum Host Address Power, mha = 2^pow-1 */
 uint32_t hmfai; /* hypervisor-managed facilities */
 /* base/min features, must never be changed between QEMU versions */
 S390FeatBitmap base_feat;
 /* used to init base_feat from generated data */
 S390FeatInit base_init;
-/* deafault features, QEMU version specific */
+/* default features, QEMU version specific */
 S390FeatBitmap default_feat;
 /* used to init default_feat from generated data */
 S390FeatInit default_init;
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 2aefa508a0..5f0adb0b4a 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -313,7 +313,7 @@ retry:
 /*
  * Get the host function handle from the vfio CLP capabilities chain.  Returns
  * true if a fh value was placed into the provided buffer.  Returns false
- * if a fh could not be obtained (ioctl failed or capabilitiy version does
+ * if a fh could not be obtained (ioctl failed or capability version does
  * not include the fh)
  */
 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 7d80bc1837..2e64ffab45 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -354,7 +354,7 @@ static int s390_machine_protect(S390CcwMachineState *ms)
 }
 
 error_setg(_mig_blocker,
-   "protected VMs are currently not migrateable.");
+   "protected VMs are currently not migratable.");
 rc = migrate_add_blocker(pv_mig_blocker, _err);
 if (rc) {
 ram_block_discard_disable(false);
@@ -449,7 +449,7 @@ static void s390_machine_reset(MachineState *machine, 
ShutdownCause reason)
 break;
 case S390_RESET_MODIFIED_CLEAR:
 /*
- * Susbsystem reset needs to be done before we unshare memory
+ * Subsystem reset needs to be done before we unshare memory
  * and lose access to VIRTIO structures in guest memory.
  */
 subsystem_reset();
@@ -462,7 +462,7 @@ static void s390_machine_reset(MachineState *machine, 
ShutdownCause reason)
 break;
 case S390_RESET_LOAD_NORMAL:
 /*

[PATCH v5 07/20] hw/intc/gic: use MxTxAttrs to divine accessing CPU

2022-11-11 Thread Alex Bennée

Now that MxTxAttrs encodes a CPU we should use that to figure it out.
This solves edge cases like accessing via gdbstub or qtest. As we
should only be processing accesses from CPU cores we can push the CPU
extraction logic out to the main access functions. If the access does
not come from a CPU we log it and fail the transaction with
MEMTX_ACCESS_ERROR.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/124

---
v2
  - update for new field
  - bool asserts
v3
  - fail non-CPU transactions
v5
  - split gic_valid_cpu from gic_get_current_cpu and use this
  - fix dud return false from gic_valid_cpu()
---
 hw/intc/arm_gic.c | 159 +-
 1 file changed, 102 insertions(+), 57 deletions(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 65b1ef7151..62f36b247f 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -56,17 +56,38 @@ static const uint8_t gic_id_gicv2[] = {
 0x04, 0x00, 0x00, 0x00, 0x90, 0xb4, 0x2b, 0x00, 0x0d, 0xf0, 0x05, 0xb1
 };
 
-static inline int gic_get_current_cpu(GICState *s)
+
+/*
+ * The GIC should only be accessed by the CPU so if it is not we
+ * should fail the transaction (it would either be a bug in how we've
+ * wired stuff up, a limitation of the translator or the guest doing
+ * something weird like programming a DMA master to write to the MMIO
+ * region).
+ *
+ * Note the cpu_index is global and we currently don't have any models
+ * with multiple SoC's with different CPUs. However if we did we would
+ * need to transform the cpu_index into the socket core.
+ */
+
+static bool gic_valid_cpu(MemTxAttrs attrs)
 {
-if (!qtest_enabled() && s->num_cpu > 1) {
-return current_cpu->cpu_index;
+if (attrs.requester_type != MTRT_CPU) {
+qemu_log_mask(LOG_UNIMP | LOG_GUEST_ERROR,
+  "%s: saw non-CPU transaction", __func__);
+return false;
 }
-return 0;
+return true;
+}
+
+static inline int gic_get_current_cpu(GICState *s, MemTxAttrs attrs)
+{
+g_assert(attrs.requester_id < s->num_cpu);
+return attrs.requester_id;
 }
 
-static inline int gic_get_current_vcpu(GICState *s)
+static inline int gic_get_current_vcpu(GICState *s, MemTxAttrs attrs)
 {
-return gic_get_current_cpu(s) + GIC_NCPU;
+return gic_get_current_cpu(s, attrs) + GIC_NCPU;
 }
 
 /* Return true if this GIC config has interrupt groups, which is
@@ -945,17 +966,14 @@ static void gic_complete_irq(GICState *s, int cpu, int 
irq, MemTxAttrs attrs)
  * Although this is named a byte read we don't always return bytes and
  * rely on the calling function oring bits together.
  */
-static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
+static uint32_t gic_dist_readb(GICState *s, int cpu, hwaddr offset, MemTxAttrs 
attrs)
 {
-GICState *s = (GICState *)opaque;
 uint32_t res;
 int irq;
 int i;
-int cpu;
 int cm;
 int mask;
 
-cpu = gic_get_current_cpu(s);
 cm = 1 << cpu;
 if (offset < 0x100) {
 if (offset < 0xc) {
@@ -1168,19 +1186,27 @@ bad_reg:
 static MemTxResult gic_dist_read(void *opaque, hwaddr offset, uint64_t *data,
  unsigned size, MemTxAttrs attrs)
 {
+GICState *s = (GICState *)opaque;
+int cpu;
+
+if (!gic_valid_cpu(attrs)) {
+return MEMTX_ACCESS_ERROR;
+}
+cpu = gic_get_current_cpu(s, attrs);
+
 switch (size) {
 case 1:
-*data = gic_dist_readb(opaque, offset, attrs);
+*data = gic_dist_readb(s, cpu, offset, attrs);
 break;
 case 2:
-*data = gic_dist_readb(opaque, offset, attrs);
-*data |= gic_dist_readb(opaque, offset + 1, attrs) << 8;
+*data = gic_dist_readb(s, cpu, offset, attrs);
+*data |= gic_dist_readb(s, cpu, offset + 1, attrs) << 8;
 break;
 case 4:
-*data = gic_dist_readb(opaque, offset, attrs);
-*data |= gic_dist_readb(opaque, offset + 1, attrs) << 8;
-*data |= gic_dist_readb(opaque, offset + 2, attrs) << 16;
-*data |= gic_dist_readb(opaque, offset + 3, attrs) << 24;
+*data = gic_dist_readb(s, cpu, offset, attrs);
+*data |= gic_dist_readb(s, cpu, offset + 1, attrs) << 8;
+*data |= gic_dist_readb(s, cpu, offset + 2, attrs) << 16;
+*data |= gic_dist_readb(s, cpu, offset + 3, attrs) << 24;
 break;
 default:
 return MEMTX_ERROR;
@@ -1190,15 +1216,12 @@ static MemTxResult gic_dist_read(void *opaque, hwaddr 
offset, uint64_t *data,
 return MEMTX_OK;
 }
 
-static void gic_dist_writeb(void *opaque, hwaddr offset,
+static void gic_dist_writeb(GICState *s, int cpu, hwaddr offset,
 uint32_t value, MemTxAttrs attrs)
 {
-GICState *s = (GICState *)opaque;
 int irq;
 int i;
-int cpu;
 
-cpu = gic_get_current_cpu(s);
 if (offset < 0x100) {
 if (offset == 0) {
 if

[PATCH v5 12/20] target/riscv: initialise MemTxAttrs for CPU access

2022-11-11 Thread Alex Bennée

get_physical_address works in the CPU context. Use the new
MEMTXATTRS_CPU constructor to ensure the correct CPU id is filled in
should it ever be needed by any devices later.

Currently the tlb_fill function isn't using the set with attributes
function so IO accesses from the softmmu slow-path will not be tagged
as coming from the CPU.

Signed-off-by: Alex Bennée 
---
 target/riscv/cpu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 278d163803..e661f9e68a 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -761,7 +761,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr 
*physical,
  * correct, but the value visible to the exception handler
  * (riscv_cpu_do_interrupt) is correct */
 MemTxResult res;
-MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
+MemTxAttrs attrs = MEMTXATTRS_CPU(env_cpu(env));
 int mode = mmu_idx & TB_FLAGS_PRIV_MMU_MASK;
 bool use_background = false;
 hwaddr ppn;
-- 
2.34.1

[PATCH v5 10/20] target/microblaze: initialise MemTxAttrs for CPU access

2022-11-11 Thread Alex Bennée

Both of these functions deal with CPU based access (as is evidenced by
the secure check straight after). Use the new MEMTXATTRS_CPU
constructor to ensure the correct CPU id is filled in should it ever
be needed by any devices later.

Signed-off-by: Alex Bennée 
---
 target/microblaze/helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index 98bdb82de8..655be3b320 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -44,7 +44,7 @@ bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 MicroBlazeMMULookup lu;
 unsigned int hit;
 int prot;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
 
 attrs.secure = mb_cpu_access_is_secure(cpu, access_type);
 
@@ -235,7 +235,7 @@ hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr 
addr,
 unsigned int hit;
 
 /* Caller doesn't initialize */
-*attrs = (MemTxAttrs) {};
+*attrs = MEMTXATTRS_CPU(cs);
 attrs->secure = mb_cpu_access_is_secure(cpu, MMU_DATA_LOAD);
 
 if (mmu_idx != MMU_NOMMU_IDX) {
-- 
2.34.1

[PATCH v5 16/20] include: add MEMTXATTRS_MACHINE helper

2022-11-11 Thread Alex Bennée

We will need this shortly for machine specific transactions for the PC
IOAPIC.

Signed-off-by: Alex Bennée 
---
 include/exec/memattrs.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h
index 8359fc448b..b92f11aaa4 100644
--- a/include/exec/memattrs.h
+++ b/include/exec/memattrs.h
@@ -104,6 +104,14 @@ typedef struct MemTxAttrs {
  {.requester_type = MTRT_PCI,   \
  .requester_id = pci_requester_id(dev)})
 
+/*
+ * Helper for setting a machine specific sourced transaction. The
+ * details of how to decode the requester_id are machine specific.
+ */
+#define MEMTXATTRS_MACHINE(id) ((MemTxAttrs) \
+{.requester_type = MTRT_MACHINE, \
+ .requester_id = id })
+
 /* New-style MMIO accessors can indicate that the transaction failed.
  * A zero (MEMTX_OK) response means success; anything else is a failure
  * of some kind. The memory subsystem will bitwise-OR together results
-- 
2.34.1

[PATCH v5 02/20] target/arm: ensure TCG IO accesses set appropriate MemTxAttrs

2022-11-11 Thread Alex Bennée

Both arm_cpu_tlb_fill (for normal IO) and
arm_cpu_get_phys_page_attrs_debug (for debug access) come through
get_phys_addr which is setting the other memory attributes for the
transaction. As these are all by definition CPU accesses we can also
set the requested_type/index as appropriate.

We also have to handle where the attributes are totally reset if we
call into get_phys_addr_twostage.

Signed-off-by: Alex Bennée 

---
v3
  - reword commit summary
v5
  - fix for new *result ABI
  - use MEMTXATTRS_CPU to fill in the initial values
  - also reset attrs in get_phys_addr_twostage
---
 target/arm/ptw.c| 3 ++-
 target/arm/tlb_helper.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 3745ac9723..4b6683f90d 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -2634,6 +2634,7 @@ static bool get_phys_addr_twostage(CPUARMState *env, 
S1Translate *ptw,
 s1_lgpgsz = result->f.lg_page_size;
 cacheattrs1 = result->cacheattrs;
 memset(result, 0, sizeof(*result));
+result->f.attrs = MEMTXATTRS_CPU(env_cpu(env));
 
 ret = get_phys_addr_lpae(env, ptw, ipa, access_type, is_el0, result, fi);
 fi->s2addr = ipa;
@@ -2872,7 +2873,7 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, 
vaddr addr,
 .in_secure = arm_is_secure(env),
 .in_debug = true,
 };
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo fi = {};
 bool ret;
 
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 0f4f4fc809..5960269421 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -208,7 +208,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
   bool probe, uintptr_t retaddr)
 {
 ARMCPU *cpu = ARM_CPU(cs);
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo local_fi, *fi;
 int ret;
 
-- 
2.34.1

[PATCH v5 15/20] hw/i386: update vapic_write to use MemTxAttrs

2022-11-11 Thread Alex Bennée

This allows us to drop the current_cpu hack and properly model an
invalid access to the vapic.

Signed-off-by: Alex Bennée 
---
 hw/i386/kvmvapic.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 43f8a8f679..a76ed07199 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -635,20 +635,21 @@ static int vapic_prepare(VAPICROMState *s)
 return 0;
 }
 
-static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
-unsigned int size)
+static MemTxResult vapic_write(void *opaque, hwaddr addr, uint64_t data,
+   unsigned int size, MemTxAttrs attrs)
 {
 VAPICROMState *s = opaque;
+CPUState *cs;
 X86CPU *cpu;
 CPUX86State *env;
 hwaddr rom_paddr;
 
-if (!current_cpu) {
-return;
+if (attrs.requester_type != MTRT_CPU) {
+return MEMTX_ACCESS_ERROR;
 }
-
-cpu_synchronize_state(current_cpu);
-cpu = X86_CPU(current_cpu);
+cs = qemu_get_cpu(attrs.requester_id);
+cpu_synchronize_state(cs);
+cpu = X86_CPU(cs);
 env = >env;
 
 /*
@@ -708,6 +709,8 @@ static void vapic_write(void *opaque, hwaddr addr, uint64_t 
data,
 }
 break;
 }
+
+return MEMTX_OK;
 }
 
 static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
@@ -716,7 +719,7 @@ static uint64_t vapic_read(void *opaque, hwaddr addr, 
unsigned size)
 }
 
 static const MemoryRegionOps vapic_ops = {
-.write = vapic_write,
+.write_with_attrs = vapic_write,
 .read = vapic_read,
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
-- 
2.34.1

[PATCH v5 05/20] target/arm: ensure m-profile helpers set appropriate MemTxAttrs

2022-11-11 Thread Alex Bennée

There are a number of helpers for M-profile that deal with CPU
initiated access to the vector and stack areas. While it is unlikely
these coincided with memory mapped IO devices it is not inconceivable.
Embedded targets tend to attract all sorts of interesting code and for
completeness we should tag the transaction appropriately.

Signed-off-by: Alex Bennée 

---
v5
  - rebase fixes for refactoring
---
 target/arm/m_helper.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index 355cd4d60a..2fb1ef95cd 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c
@@ -184,7 +184,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, 
uint32_t value,
 CPUState *cs = CPU(cpu);
 CPUARMState *env = >env;
 MemTxResult txres;
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo fi = {};
 bool secure = mmu_idx & ARM_MMU_IDX_M_S;
 int exc;
@@ -272,7 +272,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, 
uint32_t addr,
 CPUState *cs = CPU(cpu);
 CPUARMState *env = >env;
 MemTxResult txres;
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo fi = {};
 bool secure = mmu_idx & ARM_MMU_IDX_M_S;
 int exc;
@@ -665,7 +665,7 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool 
targets_secure,
 MemTxResult result;
 uint32_t addr = env->v7m.vecbase[targets_secure] + exc * 4;
 uint32_t vector_entry;
-MemTxAttrs attrs = {};
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
 ARMMMUIdx mmu_idx;
 bool exc_secure;
 
@@ -1999,7 +1999,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx 
mmu_idx, bool secure,
 CPUState *cs = CPU(cpu);
 CPUARMState *env = >env;
 V8M_SAttributes sattrs = {};
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo fi = {};
 MemTxResult txres;
 
@@ -2047,7 +2047,7 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx 
mmu_idx,
 CPUState *cs = CPU(cpu);
 CPUARMState *env = >env;
 MemTxResult txres;
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(cs) };
 ARMMMUFaultInfo fi = {};
 uint32_t value;
 
@@ -2805,7 +2805,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, 
uint32_t op)
  * inspecting the other MPU state.
  */
 if (arm_current_el(env) != 0 || alt) {
-GetPhysAddrResult res = {};
+GetPhysAddrResult res = { .f.attrs = MEMTXATTRS_CPU(env_cpu(env)) };
 ARMMMUFaultInfo fi = {};
 
 /* We can ignore the return value as prot is always set */
-- 
2.34.1

[PATCH v5 01/20] hw: encode accessing CPU index in MemTxAttrs

2022-11-11 Thread Alex Bennée

We currently have hacks across the hw/ to reference current_cpu to
work out what the current accessing CPU is. This breaks in some cases
including using gdbstub to access HW state. As we have MemTxAttrs to
describe details about the access lets extend it so CPU accesses can
be explicitly marked.

To achieve this we create a new requester_type which indicates to
consumers how requester_id it to be consumed. We absorb the existing
unspecified:1 bitfield into this type and also document a potential
machine specific encoding which will be useful to (currently)
out-of-tree extensions.

Places that checked to see if things where unspecified now instead
check the source if what they expected.

There are a number of places we need to fix up including:

  CPU helpers directly calling address_space_*() fns
  models in hw/ fishing the data out of current_cpu
  hypervisors offloading device emulation to QEMU

I'll start addressing some of these in following patches.

Signed-off-by: Alex Bennée 

---
v2
  - use separate field cpu_index
  - bool for requester_is_cpu
v3
  - switch to enum MemTxRequesterType
  - move helper #define to patch
  - revert to overloading requester_id
  - mention hypervisors in commit message
  - drop cputlb tweaks, they will move to target specific code
v4
  - merge unspecified:1 into MTRT_UNSPECIFIED
  - document a MTRT_MACHINE for more complex encoding
  - ensure existing users of requester_id set MTRT_PCI
  - ensure existing consumers of requester_id check type is MTRT_PCI
  - have MEMTXATTRS_CPU take CPUState * directly
v5
  - re-order so MTRT_UNSPECIFIED is zero
  - fix up comments referring to the difference between empty and unspecified:1
  - kernel-doc annotations for typedefs
  - don't impose source type tz-msc during transformation
  - re-order bitfields so requester_type/id at top
  - add helper for MEMTXATTRS_PCI
---
 include/exec/memattrs.h | 68 -
 hw/i386/amd_iommu.c |  6 ++--
 hw/i386/intel_iommu.c   |  2 +-
 hw/misc/tz-mpc.c|  2 +-
 hw/misc/tz-msc.c|  6 ++--
 hw/pci/pci.c|  4 +--
 6 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h
index 9fb98bc1ef..8359fc448b 100644
--- a/include/exec/memattrs.h
+++ b/include/exec/memattrs.h
@@ -14,7 +14,32 @@
 #ifndef MEMATTRS_H
 #define MEMATTRS_H
 
-/* Every memory transaction has associated with it a set of
+/**
+ * typedef MemTxRequesterType - source of memory transaction
+ *
+ * Every memory transaction comes from a specific place which defines
+ * how requester_id should be handled if at all.
+ *
+ * UNSPECIFIED: the default for otherwise undefined MemTxAttrs
+ * CPU: requester_id is the global cpu_index
+ *  This needs further processing if you need to work out which
+ *  socket or complex it comes from
+ * PCI: indicates the requester_id is a PCI id
+ * MACHINE: indicates a machine specific encoding
+ *  This will require further processing to decode into its
+ *  constituent parts.
+ */
+typedef enum MemTxRequesterType {
+MTRT_UNSPECIFIED = 0,
+MTRT_CPU,
+MTRT_PCI,
+MTRT_MACHINE
+} MemTxRequesterType;
+
+/**
+ * typedef MemTxAttrs - attributes of a memory transaction
+ *
+ * Every memory transaction has associated with it a set of
  * attributes. Some of these are generic (such as the ID of
  * the bus master); some are specific to a particular kind of
  * bus (such as the ARM Secure/NonSecure bit). We define them
@@ -23,13 +48,12 @@
  * different semantics.
  */
 typedef struct MemTxAttrs {
-/* Bus masters which don't specify any attributes will get this
- * (via the MEMTXATTRS_UNSPECIFIED constant), so that we can
- * distinguish "all attributes deliberately clear" from
- * "didn't specify" if necessary.
- */
-unsigned int unspecified:1;
-/* ARM/AMBA: TrustZone Secure access
+/* Requester type (e.g. CPU or PCI MSI) */
+MemTxRequesterType requester_type:2;
+/* Requester ID */
+unsigned int requester_id:16;
+/*
+ * ARM/AMBA: TrustZone Secure access
  * x86: System Management Mode access
  */
 unsigned int secure:1;
@@ -43,8 +67,6 @@ typedef struct MemTxAttrs {
  * (see MEMTX_ACCESS_ERROR).
  */
 unsigned int memory:1;
-/* Requester ID (for MSI for example) */
-unsigned int requester_id:16;
 /* Invert endianness for this page */
 unsigned int byte_swap:1;
 /*
@@ -59,12 +81,28 @@ typedef struct MemTxAttrs {
 unsigned int target_tlb_bit2 : 1;
 } MemTxAttrs;
 
-/* Bus masters which don't specify any attributes will get this,
- * which has all attribute bits clear except the topmost one
- * (so that we can distinguish "all attributes deliberately clear"
- * from "didn't specify" if necessary).
+/*
+ * Bus masters which don't specify any attributes will get this which
+ * indicates none of the attributes can be used.
+ */
+#define MEMTXATTRS_UNSPECIFIED ((MemTxAttrs) \

[PATCH v5 04/20] target/arm: ensure KVM traps set appropriate MemTxAttrs

2022-11-11 Thread Alex Bennée

Although most KVM users will use the in-kernel GIC emulation it is
perfectly possible not to. In this case we need to ensure the
MemTxAttrs are correctly populated so the GIC can divine the source
CPU of the operation.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 

---
v3
  - new for v3
v5
  - tags
  - use MEMTXATTRS_PCI
---
 target/arm/kvm.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index f022c644d2..bb4cdbfbd5 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -803,13 +803,14 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run 
*run)
 {
 ARMCPU *cpu;
 uint32_t switched_level;
+MemTxAttrs attrs = MEMTXATTRS_CPU(cs);
 
 if (kvm_irqchip_in_kernel()) {
 /*
  * We only need to sync timer states with user-space interrupt
  * controllers, so return early and save cycles if we don't.
  */
-return MEMTXATTRS_UNSPECIFIED;
+return attrs;
 }
 
 cpu = ARM_CPU(cs);
@@ -850,7 +851,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run 
*run)
 qemu_mutex_unlock_iothread();
 }
 
-return MEMTXATTRS_UNSPECIFIED;
+return attrs;
 }
 
 void kvm_arm_vm_state_change(void *opaque, bool running, RunState state)
@@ -1005,6 +1006,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry 
*route,
 hwaddr xlat, len, doorbell_gpa;
 MemoryRegionSection mrs;
 MemoryRegion *mr;
+MemTxAttrs attrs = MEMTXATTRS_PCI(dev);
 
 if (as == _space_memory) {
 return 0;
@@ -1014,8 +1016,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry 
*route,
 
 RCU_READ_LOCK_GUARD();
 
-mr = address_space_translate(as, address, , , true,
- MEMTXATTRS_UNSPECIFIED);
+mr = address_space_translate(as, address, , , true, attrs);
 
 if (!mr) {
 return 1;
-- 
2.34.1

[PATCH for 8.0 v5 00/20] use MemTxAttrs to avoid current_cpu in hw/

2022-11-11 Thread Alex Bennée

Hi,

This series attempts to improve the modelling of non-CPU writes to
peripherals by expanding the MemTxAttrs to carry more details about
the requester. There are only 3 requester types, the CPU, the PCI bus
and the MACHINE. The last is intended for machine specific buses and
leaves the details of how to decode that information to machine
specific code.

I've extended this beyond just being an Arm only experiment and into
some other machine types. Perhaps the most complicated bit was
tweaking the modelling of the IOAPIC/APIC which gave me the first use
of MTRT_MACHINE (although we don't fully validate the source we do now
correctly drop CPU accesses to the APIC MSI region).

The longer term goal will be to eliminate all the legacy mem
read/write functions and use MemTxAttrs everywhere.

The final patch deprecates the use of current_cpu in hw/ for new code
as a comment. What do people think?

Based-on: 2022145529.4020801-1-alex.ben...@linaro.org

Alex Bennée (20):
  hw: encode accessing CPU index in MemTxAttrs
  target/arm: ensure TCG IO accesses set appropriate MemTxAttrs
  target/arm: ensure HVF traps set appropriate MemTxAttrs
  target/arm: ensure KVM traps set appropriate MemTxAttrs
  target/arm: ensure m-profile helpers set appropriate MemTxAttrs
  qtest: make read/write operation appear to be from CPU
  hw/intc/gic: use MxTxAttrs to divine accessing CPU
  hw/timer: convert mptimer access to attrs to derive cpu index
  hw/arm: remove current_cpu hack from pxa2xx access
  target/microblaze: initialise MemTxAttrs for CPU access
  target/sparc: initialise MemTxAttrs for CPU access
  target/riscv: initialise MemTxAttrs for CPU access
  target/i386: add explicit initialisation for MexTxAttrs
  hw/audio: explicitly set .requester_type for intel-hda
  hw/i386: update vapic_write to use MemTxAttrs
  include: add MEMTXATTRS_MACHINE helper
  hw/intc: properly model IOAPIC MSI messages
  hw/i386: convert apic access to use MemTxAttrs
  hw/isa: derive CPUState from MemTxAttrs in apm_ioport_writeb
  include/hw: add commentary to current_cpu export

 include/exec/memattrs.h   |  76 +++---
 include/hw/core/cpu.h |  14 +++
 include/hw/i386/apic.h|   2 +-
 include/hw/i386/ioapic_internal.h |   2 +
 include/hw/isa/apm.h  |   2 +-
 target/i386/cpu.h |   4 +-
 hw/acpi/ich9.c|   1 -
 hw/acpi/piix4.c   |   2 +-
 hw/arm/pxa2xx.c   |   2 +-
 hw/audio/intel-hda.c  |   2 +-
 hw/i386/amd_iommu.c   |   6 +-
 hw/i386/intel_iommu.c |   2 +-
 hw/i386/kvmvapic.c|  19 ++--
 hw/i386/x86.c |  11 +--
 hw/intc/apic.c|  62 
 hw/intc/arm_gic.c | 159 +++---
 hw/intc/ioapic.c  |  35 +--
 hw/isa/apm.c  |  21 +++-
 hw/isa/lpc_ich9.c |   5 +-
 hw/misc/tz-mpc.c  |   2 +-
 hw/misc/tz-msc.c  |   6 +-
 hw/pci/pci.c  |   4 +-
 hw/timer/arm_mptimer.c|  49 ++---
 softmmu/qtest.c   |  26 ++---
 target/arm/hvf/hvf.c  |   4 +-
 target/arm/kvm.c  |   9 +-
 target/arm/m_helper.c |  12 +--
 target/arm/ptw.c  |   3 +-
 target/arm/tlb_helper.c   |   2 +-
 target/i386/hax/hax-all.c |   2 +-
 target/i386/nvmm/nvmm-all.c   |   2 +-
 target/i386/sev.c |   2 +-
 target/i386/whpx/whpx-all.c   |   2 +-
 target/microblaze/helper.c|   4 +-
 target/riscv/cpu_helper.c |   2 +-
 target/sparc/mmu_helper.c |   6 +-
 36 files changed, 370 insertions(+), 194 deletions(-)

-- 
2.34.1

[PATCH v5 03/20] target/arm: ensure HVF traps set appropriate MemTxAttrs

2022-11-11 Thread Alex Bennée

As most HVF devices are done purely in software we need to make sure
we properly encode the source CPU in MemTxAttrs. This will allow the
device emulations to use those attributes rather than relying on
current_cpu (although current_cpu will still be correct in this case).

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Reviewed-by: Mads Ynddal 
Acked-by: Alexander Graf 

---
v2
  - update MEMTXATTRS macro
v5
  - more tags
---
 target/arm/hvf/hvf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 060aa0ccf4..d81fbbb2df 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -1233,11 +1233,11 @@ int hvf_vcpu_exec(CPUState *cpu)
 val = hvf_get_reg(cpu, srt);
 address_space_write(_space_memory,
 hvf_exit->exception.physical_address,
-MEMTXATTRS_UNSPECIFIED, , len);
+MEMTXATTRS_CPU(cpu), , len);
 } else {
 address_space_read(_space_memory,
hvf_exit->exception.physical_address,
-   MEMTXATTRS_UNSPECIFIED, , len);
+   MEMTXATTRS_CPU(cpu), , len);
 hvf_set_reg(cpu, srt, val);
 }
 
-- 
2.34.1

[PATCH v5 08/20] hw/timer: convert mptimer access to attrs to derive cpu index

2022-11-11 Thread Alex Bennée

This removes the hacks to deal with empty current_cpu.

Reviewed-by: Richard Henderson 
Signed-off-by: Alex Bennée 

---
v2
  - update for new fields
  - bool asserts
v3
  - properly fail memory transactions from non-CPU sources
---
 hw/timer/arm_mptimer.c | 49 +-
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/hw/timer/arm_mptimer.c b/hw/timer/arm_mptimer.c
index cdfca3000b..4618779ade 100644
--- a/hw/timer/arm_mptimer.c
+++ b/hw/timer/arm_mptimer.c
@@ -28,6 +28,7 @@
 #include "migration/vmstate.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
+#include "qemu/log.h"
 #include "hw/core/cpu.h"
 
 #define PTIMER_POLICY   \
@@ -41,15 +42,23 @@
  * which is used in both the ARM11MPCore and Cortex-A9MP.
  */
 
-static inline int get_current_cpu(ARMMPTimerState *s)
+static bool is_from_cpu(MemTxAttrs attrs)
 {
-int cpu_id = current_cpu ? current_cpu->cpu_index : 0;
+if (attrs.requester_type != MTRT_CPU) {
+qemu_log_mask(LOG_UNIMP | LOG_GUEST_ERROR,
+  "%s: saw non-CPU transaction", __func__);
+return false;
+}
+return true;
+}
 
+static int get_current_cpu(ARMMPTimerState *s, MemTxAttrs attrs)
+{
+int cpu_id = attrs.requester_id;
 if (cpu_id >= s->num_cpu) {
 hw_error("arm_mptimer: num-cpu %d but this cpu is %d!\n",
  s->num_cpu, cpu_id);
 }
-
 return cpu_id;
 }
 
@@ -178,25 +187,35 @@ static void timerblock_write(void *opaque, hwaddr addr,
 /* Wrapper functions to implement the "read timer/watchdog for
  * the current CPU" memory regions.
  */
-static uint64_t arm_thistimer_read(void *opaque, hwaddr addr,
-   unsigned size)
+static MemTxResult arm_thistimer_read(void *opaque, hwaddr addr, uint64_t 
*data,
+  unsigned size, MemTxAttrs attrs)
 {
-ARMMPTimerState *s = (ARMMPTimerState *)opaque;
-int id = get_current_cpu(s);
-return timerblock_read(>timerblock[id], addr, size);
+if (is_from_cpu(attrs)) {
+ARMMPTimerState *s = (ARMMPTimerState *)opaque;
+int id = get_current_cpu(s, attrs);
+*data = timerblock_read(>timerblock[id], addr, size);
+return MEMTX_OK;
+} else {
+return MEMTX_ACCESS_ERROR;
+}
 }
 
-static void arm_thistimer_write(void *opaque, hwaddr addr,
-uint64_t value, unsigned size)
+static MemTxResult arm_thistimer_write(void *opaque, hwaddr addr,
+uint64_t value, unsigned size, MemTxAttrs 
attrs)
 {
-ARMMPTimerState *s = (ARMMPTimerState *)opaque;
-int id = get_current_cpu(s);
-timerblock_write(>timerblock[id], addr, value, size);
+if (is_from_cpu(attrs)) {
+ARMMPTimerState *s = (ARMMPTimerState *)opaque;
+int id = get_current_cpu(s, attrs);
+timerblock_write(>timerblock[id], addr, value, size);
+return MEMTX_OK;
+} else {
+return MEMTX_ACCESS_ERROR;
+}
 }
 
 static const MemoryRegionOps arm_thistimer_ops = {
-.read = arm_thistimer_read,
-.write = arm_thistimer_write,
+.read_with_attrs = arm_thistimer_read,
+.write_with_attrs = arm_thistimer_write,
 .valid = {
 .min_access_size = 4,
 .max_access_size = 4,
-- 
2.34.1

Re: [PATCH v2 04/12] tests/docker: allow user to override check target

2022-11-11 Thread Philippe Mathieu-Daudé


On 11/11/22 15:55, Alex Bennée wrote:

This is useful when trying to bisect a particular failing test behind
a docker run. For example:

   make docker-test-clang@fedora \
 TARGET_LIST=arm-softmmu \
 TEST_COMMAND="meson test qtest-arm/qos-test" \
 J=9 V=1

Signed-off-by: Alex Bennée 

---
v1
  - fix s/target /target./
  - CHECK_TARGET -> TEST_COMMAND
---
  tests/docker/Makefile.include | 2 ++
  tests/docker/common.rc| 6 +++---
  2 files changed, 5 insertions(+), 3 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 06/13] block: Drain invidual nodes during reopen

2022-11-11 Thread Kevin Wolf

Am 09.11.2022 um 17:00 hat Vladimir Sementsov-Ogievskiy geschrieben:
> In subject: individual
> 
> On 11/8/22 15:37, Kevin Wolf wrote:
> > bdrv_reopen() and friends use subtree drains as a lazy way of covering
> > all the nodes they touch. Turns out that this lazy way is a lot more
> > complicated than just draining the nodes individually, even not
> > accounting for the additional complexity in the drain mechanism itself.
> > 
> > Simplify the code by switching to draining the individual nodes that are
> > already managed in the BlockReopenQueue anyway.
> > 
> > Signed-off-by: Kevin Wolf 
> > ---
> >   block.c | 11 ---
> >   block/replication.c |  6 --
> >   blockdev.c  | 13 -
> >   3 files changed, 4 insertions(+), 26 deletions(-)
> > 
> 
> [..]
> 
> >   bdrv_reopen_queue_free(queue);
> > -for (p = drained; p; p = p->next) {
> > -BlockDriverState *bs = p->data;
> > -AioContext *ctx = bdrv_get_aio_context(bs);
> > -
> > -aio_context_acquire(ctx);
> 
> In bdrv_reopen_queue_free() we don't have this acquire()/release()
> pair around bdrv_drained_end(). We don't need it anymore?

Good catch, I think we do.

Reopen is a bit messy with AioContext locks. I think the rule is
supposed to be that bdrv_reopen_queue() requires that the lock for
bs->aio_context is held, and bdrv_reopen_multiple() requires that no
AioContext lock is held, right?

Because the former is not actually true: qmp_blockdev_reopen() and the
'replication' block driver do indeed take the lock, but bdrv_reopen()
drops it for both functions!

So I think we also need an additional fix for bdrv_reopen() to drop the
lock only after calling bdrv_reopen_queue(). It may not have made a
difference before, but now that we call bdrv_drained_begin() in it, it
seems important.

Kevin

Re: [PATCH-for-7.2 v2] libvduse: Avoid warning about dangerous use of strncpy()

2022-11-11 Thread Stefan Hajnoczi

Reviewed-by: Stefan Hajnoczi

Re: [PATCH v2 01/12] Run docker probe only if docker or podman are available

2022-11-11 Thread Thomas Huth


On 11/11/2022 15.55, Alex Bennée wrote:

From: Stefan Weil 

The docker probe uses "sudo -n" which can cause an e-mail with a security 
warning
each time when configure is run. Therefore run docker probe only if either 
docker
or podman are available.

That avoids the problematic "sudo -n" on build environments which have neither
docker nor podman installed.

Fixes: c4575b59155e2e00 ("configure: store container engine in config-host.mak")
Signed-off-by: Stefan Weil 
Message-Id: <20221030083510.310584-1...@weilnetz.de>
Signed-off-by: Alex Bennée 
---
  configure | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 66928692b0..26c7bc5154 100755
--- a/configure
+++ b/configure
@@ -1780,7 +1780,7 @@ fi
  # functions to probe cross compilers
  
  container="no"

-if test $use_containers = "yes"; then
+if test $use_containers = "yes" && (has "docker" || has "podman"); then
  case $($python "$source_path"/tests/docker/docker.py probe) in
  *docker) container=docker ;;
  podman) container=podman ;;


Maybe the probe should better be done in the docker.py script instead? ... 
but doing it here likely does not hurt either, so:


Reviewed-by: Thomas Huth

Re: [PATCH 04/13] block: Remove drained_end_counter

2022-11-11 Thread Kevin Wolf

Am 09.11.2022 um 15:44 hat Vladimir Sementsov-Ogievskiy geschrieben:
> On 11/8/22 15:37, Kevin Wolf wrote:
> > drained_end_counter is unused now, nobody changes its value any more. It
> > can be removed.
> > 
> > In cases where we had two almost identical functions that only differed
> > in whether the caller passes drained_end_counter, or whether they would
> > poll for a local drained_end_counter to reach 0, these become a single
> > function.
> > 
> > Signed-off-by: Kevin Wolf 
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> 
> [..]
> 
> >   /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
> 
> Not about this patch, but what is recursive in bdrv_drain_invoke() ?

Nothing today, but it used to be the case. Looks like I forgot to remove
the comment in commit 7d40d9ef five years ago.

> > -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
> > -  int *drained_end_counter)
> > +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
> >   {
> >   if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
> >   (!begin && !bs->drv->bdrv_drain_end)) {
> 
> [..]
> 
> >   /**
> >* This function does not poll, nor must any of its recursively called
> > - * functions.  The *drained_end_counter pointee will be incremented
> > - * once
> 
> Seems that is wrong already after previous commit.. Not critical

I think it's technically correct: We don't schedule background
operations any more, so any statement about them is true.  :-)

You're right, it could be updated in the previous commit, but maybe it's
easier to read the patches when you need to verify my claim only in this
patch. As you said, it doesn't matter much anyway, at the end of the
series it's gone.

Kevin

> > for every background operation scheduled, and decremented once
> > - * the operation settles.  Therefore, the pointer must remain valid
> > - * until the pointee reaches 0.  That implies that whoever sets up the
> > - * pointee has to poll until it is 0.
> > - *
> > - * We use atomic operations to access *drained_end_counter, because
> > - * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
> > - * @bs may contain nodes in different AioContexts,
> > - * (2) bdrv_drain_all_end() uses the same counter for all nodes,
> > - * regardless of which AioContext they are in.
> > + * functions.
> >*/
> >   static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
> > -BdrvChild *parent, bool ignore_bds_parents,
> > -int *drained_end_counter)
> > +BdrvChild *parent, bool ignore_bds_parents)
> >   {
> >   BdrvChild *child;
> >   int old_quiesce_counter;
> > -assert(drained_end_counter != NULL);
> > -
> 
> [..]
> 
> -- 
> Best regards,
> Vladimir
>

Re: [PATCH v2] util/qemu-config: Fix "query-command-line-options" to provide the right values

2022-11-11 Thread Thomas Huth


On 11/11/2022 15.53, Markus Armbruster wrote:

Thomas Huth  writes:


The "query-command-line-options" command uses a hand-crafted list
of options that should be returned for the "machine" parameter.
This is pretty much out of sync with reality, for example settings
like "kvm_shadow_mem" or "accel" are not parameters for the machine
anymore. Also, there is no distinction between the targets here, so
e.g. the s390x-specific values like "loadparm" in this list also
show up with the other targets like x86_64.

Let's fix this now by geting rid of the hand-crafted list and by
querying the properties of the machine classes instead to assemble
the list.


Do we know what uses this command, and how these users are
inconvenienced by the flaw you're fixing?

I'm asking because the command is pretty much out of sync with reality
by (mis-)design.


libvirt apparently queries this data (see the various 
tests/qemucapabilitiesdata/*.replies files in their repository), but since 
it's so much out-of-sync with reality, it's not of a big use there yet.


See for example here:

https://lists.gnu.org/archive/html/qemu-devel/2021-12/msg00581.html

If we finally fix this problem with "query-command-line-options" in QEMU, it 
should be much easier to deprecate -no-hpet in QEMU, too.


 Thomas

Re: [PATCH-for-7.2 v2] libvduse: Avoid warning about dangerous use of strncpy()

2022-11-11 Thread Bin Meng

> From: Philippe Mathieu-Daudé 
> 
> GCC 8 added a -Wstringop-truncation warning:
> 
>   The -Wstringop-truncation warning added in GCC 8.0 via r254630 for
>   bug 81117 is specifically intended to highlight likely unintended
>   uses of the strncpy function that truncate the terminating NUL
>   character from the source string.
> 
> Here the next line indeed unconditionally zeroes the last byte, but
> 1/ the buffer has been calloc'd, so we don't need to add an extra
> byte, and 2/ we called vduse_name_is_invalid() which checked the
> string length, so we can simply call strcpy().
> 
> This fixes when using gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0:
> 
>   [42/666] Compiling C object subprojects/libvduse/libvduse.a.p/libvduse.c.o
>   FAILED: subprojects/libvduse/libvduse.a.p/libvduse.c.o
>   cc -m64 -mcx16 -Isubprojects/libvduse/libvduse.a.p -Isubprojects/libvduse 
> -I../../subprojects/libvduse [...] -o 
> subprojects/libvduse/libvduse.a.p/libvduse.c.o -c 
> ../../subprojects/libvduse/libvduse.c
>   In file included from /usr/include/string.h:495,
>from ../../subprojects/libvduse/libvduse.c:24:
>   In function ‘strncpy’,
>   inlined from ‘vduse_dev_create’ at 
> ../../subprojects/libvduse/libvduse.c:1312:5:
>   /usr/include/x86_64-linux-gnu/bits/string_fortified.h:106:10: error: 
> ‘__builtin_strncpy’ specified bound 256 equals destination size 
> [-Werror=stringop-truncation]
> 106 |   return __builtin___strncpy_chk (__dest, __src, __len, __bos 
> (__dest));
> |  
> ^~
>   cc1: all warnings being treated as errors
>   ninja: build stopped: cannot make progress due to previous errors.
> 
> Fixes: d9cf16c0be ("libvduse: Replace strcpy() with strncpy()")
> Suggested-by: Markus Armbruster 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Supersedes: <20220919192306.52729-1-f4...@amsat.org>
> Cc: Xie Yongji 
> Cc: Kevin Wolf 
> ---
>  subprojects/libvduse/libvduse.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

Tested-by: Bin Meng

Re: should ioapic_service really be modelling cpu writes?

2022-11-11 Thread Paolo Bonzini

Il ven 11 nov 2022, 15:03 Alex Bennée  ha scritto:

>
> Paolo Bonzini  writes:
>
> > On 11/11/22 13:26, Alex Bennée wrote:
> >>   if (addr > 0xfff || !index) {
> >>   switch (attrs.requester_type) {
> >>   }
> >>   MSIMessage msi = { .address = addr, .data = val };
> >>   apic_send_msi();
> >>   return MEMTX_OK;
> >>   }
> >
> >
> >> which at least gets things booting properly. Does this seem like a
> >> better modelling of the APIC behaviour?
> >
> > Yes and you don't even need the "if", just do MTRT_CPU vs everything
> > else.
>
> Can the CPU trigger MSIs by writing to this area of memory?

No, it's a different bus. If it can in QEMU that's a bug.

I went for
> the explicit switch for clarity but are you saying:
>
> if (attrs.requester_type != MTRT_CPU) {
> MSIMessage msi = { .address = addr, .data = val };
> apic_send_msi();
> return MEMTX_OK;
> } else {
> return MEMTX_ACESSS_ERROR;
> }
>
> for the MSI range?
>

Yes that would work. It can be tightened even further by removing the "if
(addr ...)" completely and only checking the requester type (which in turn
I would do with a function like "return APIC based on txattrs requester
type and id, or return NULL if requester not MTRT_CPU"), but no need to
hurry.

Thanks,

Paolo

>
> >
> > Paolo
>
>
> --
> Alex Bennée
>
>

[PATCH v3] acpi/tests/avocado/bits: some misc fixes

2022-11-11 Thread Ani Sinha

Most of the changes are trivial. The bits test timeout has now been increased
to 110 seconds in order to accommodate slower systems and fewer unnecessary
failures. Removed of the reference to non-existent README file in docs. Some
minor corrections in the doc file.

CC: Thomas Huth 
CC: Michael S. Tsirkin 
CC: qemu-triv...@nongnu.org
Signed-off-by: Ani Sinha 
Reviewed-by: Thomas Huth 
---
 docs/devel/acpi-bits.rst   | 12 
 tests/avocado/acpi-bits.py |  5 +++--
 2 files changed, 7 insertions(+), 10 deletions(-)

changes from v1: address Thomas' suggestions.
changes from v2: more minor corrections in doc, tags added.

diff --git a/docs/devel/acpi-bits.rst b/docs/devel/acpi-bits.rst
index c9564d871a..56e76338c3 100644
--- a/docs/devel/acpi-bits.rst
+++ b/docs/devel/acpi-bits.rst
@@ -16,11 +16,8 @@ end user. The other is that we have more control of what we 
wanted to test
 and how by directly using acpica interpreter on top of the bios on a running
 system. More details on the inspiration for developing biosbits and its real
 life uses can be found in [#a]_ and [#b]_.
-This directory contains tests written in python using avocado framework that
-exercises the QEMU bios components using biosbits and reports test failures.
 For QEMU, we maintain a fork of bios bits in gitlab along with all the
-dependent submodules:
-https://gitlab.com/qemu-project/biosbits-bits
+dependent submodules here: https://gitlab.com/qemu-project/biosbits-bits
 This fork contains numerous fixes, a newer acpica and changes specific to
 running this avocado QEMU tests using bits. The author of this document
 is the sole maintainer of the QEMU fork of bios bits repo.
@@ -38,10 +35,9 @@ Under ``tests/avocado/`` as the root we have:
│ ├── bits-config
│ │ └── bits-cfg.txt
│ ├── bits-tests
-   │ │ ├── smbios.py2
-   │ │ ├── testacpi.py2
-   │ │ └── testcpuid.py2
-   │ └── README
+   │   ├── smbios.py2
+   │   ├── testacpi.py2
+   │   └── testcpuid.py2
├── acpi-bits.py
 
 * ``tests/avocado``:
diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index 8745a58a76..2edc36fc26 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -385,8 +385,9 @@ def test_acpi_smbios_bits(self):
 self._vm.launch()
 # biosbits has been configured to run all the specified test suites
 # in batch mode and then automatically initiate a vm shutdown.
-# sleep for maximum of one minute
-max_sleep_time = time.monotonic() + 60
+# sleep for maximum of a minute and 50 seconds in order to accommodate
+# even slower test setups.
+max_sleep_time = time.monotonic() + 110
 while self._vm.is_running() and time.monotonic() < max_sleep_time:
 time.sleep(1)
 
-- 
2.34.1

[PATCH v3 1/3] accel: introduce accelerator blocker API

2022-11-11 Thread Emanuele Giuseppe Esposito

This API allows the accelerators to prevent vcpus from issuing
new ioctls while execting a critical section marked with the
accel_ioctl_inhibit_begin/end functions.

Note that all functions submitting ioctls must mark where the
ioctl is being called with accel_{cpu_}ioctl_begin/end().

This API requires the caller to always hold the BQL.
API documentation is in sysemu/accel-blocker.h

Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
(to minimize cache line bouncing) to keep avoid that new ioctls
run when the critical section starts, and a QemuEvent to wait
that all running ioctls finish.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 accel/accel-blocker.c  | 154 +
 accel/meson.build  |   2 +-
 hw/core/cpu-common.c   |   2 +
 include/hw/core/cpu.h  |   3 +
 include/sysemu/accel-blocker.h |  56 
 5 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 accel/accel-blocker.c
 create mode 100644 include/sysemu/accel-blocker.h

diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
new file mode 100644
index 00..1e7f423462
--- /dev/null
+++ b/accel/accel-blocker.c
@@ -0,0 +1,154 @@
+/*
+ * Lock to inhibit accelerator ioctls
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito   
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "hw/core/cpu.h"
+#include "sysemu/accel-blocker.h"
+
+static QemuLockCnt accel_in_ioctl_lock;
+static QemuEvent accel_in_ioctl_event;
+
+void accel_blocker_init(void)
+{
+qemu_lockcnt_init(_in_ioctl_lock);
+qemu_event_init(_in_ioctl_event, false);
+}
+
+void accel_ioctl_begin(void)
+{
+if (likely(qemu_mutex_iothread_locked())) {
+return;
+}
+
+/* block if lock is taken in kvm_ioctl_inhibit_begin() */
+qemu_lockcnt_inc(_in_ioctl_lock);
+}
+
+void accel_ioctl_end(void)
+{
+if (likely(qemu_mutex_iothread_locked())) {
+return;
+}
+
+qemu_lockcnt_dec(_in_ioctl_lock);
+/* change event to SET. If event was BUSY, wake up all waiters */
+qemu_event_set(_in_ioctl_event);
+}
+
+void accel_cpu_ioctl_begin(CPUState *cpu)
+{
+if (unlikely(qemu_mutex_iothread_locked())) {
+return;
+}
+
+/* block if lock is taken in kvm_ioctl_inhibit_begin() */
+qemu_lockcnt_inc(>in_ioctl_lock);
+}
+
+void accel_cpu_ioctl_end(CPUState *cpu)
+{
+if (unlikely(qemu_mutex_iothread_locked())) {
+return;
+}
+
+qemu_lockcnt_dec(>in_ioctl_lock);
+/* change event to SET. If event was BUSY, wake up all waiters */
+qemu_event_set(_in_ioctl_event);
+}
+
+static bool accel_has_to_wait(void)
+{
+CPUState *cpu;
+bool needs_to_wait = false;
+
+CPU_FOREACH(cpu) {
+if (qemu_lockcnt_count(>in_ioctl_lock)) {
+/* exit the ioctl, if vcpu is running it */
+qemu_cpu_kick(cpu);
+needs_to_wait = true;
+}
+}
+
+return needs_to_wait || qemu_lockcnt_count(_in_ioctl_lock);
+}
+
+void accel_ioctl_inhibit_begin(void)
+{
+CPUState *cpu;
+
+/*
+ * We allow to inhibit only when holding the BQL, so we can identify
+ * when an inhibitor wants to issue an ioctl easily.
+ */
+g_assert(qemu_mutex_iothread_locked());
+
+/* Block further invocations of the ioctls outside the BQL.  */
+CPU_FOREACH(cpu) {
+qemu_lockcnt_lock(>in_ioctl_lock);
+}
+qemu_lockcnt_lock(_in_ioctl_lock);
+
+/* Keep waiting until there are running ioctls */
+while (true) {
+
+/* Reset event to FREE. */
+qemu_event_reset(_in_ioctl_event);
+
+if (accel_has_to_wait()) {
+/*
+ * If event is still FREE, and there are ioctls still in progress,
+ * wait.
+ *
+ *  If an ioctl finishes before

[PATCH v3 2/3] KVM: keep track of running ioctls

2022-11-11 Thread Emanuele Giuseppe Esposito

Using the new accel-blocker API, mark where ioctls are being called
in KVM. Next, we will implement the critical section that will take
care of performing memslots modifications atomically, therefore
preventing any new ioctl from running and allowing the running ones
to finish.

Signed-off-by: David Hildenbrand 
Signed-off-by: Emanuele Giuseppe Esposito 
---
 accel/kvm/kvm-all.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..ff660fd469 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
 assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
 
 s->sigmask_len = 8;
+accel_blocker_init();
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
 QTAILQ_INIT(>kvm_sw_breakpoints);
@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
 va_end(ap);
 
 trace_kvm_vm_ioctl(type, arg);
+accel_ioctl_begin();
 ret = ioctl(s->vmfd, type, arg);
+accel_ioctl_end();
 if (ret == -1) {
 ret = -errno;
 }
@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
 va_end(ap);
 
 trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+accel_cpu_ioctl_begin(cpu);
 ret = ioctl(cpu->kvm_fd, type, arg);
+accel_cpu_ioctl_end(cpu);
 if (ret == -1) {
 ret = -errno;
 }
@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
 va_end(ap);
 
 trace_kvm_device_ioctl(fd, type, arg);
+accel_ioctl_begin();
 ret = ioctl(fd, type, arg);
+accel_ioctl_end();
 if (ret == -1) {
 ret = -errno;
 }
-- 
2.31.1

[PATCH v3 0/3] KVM: allow listener to stop all vcpus before

2022-11-11 Thread Emanuele Giuseppe Esposito

QEMU needs to perform memslots operations like merging and splitting,
and each operation requires more than a single ioctl.
Therefore if a vcpu is concurrently reading the same memslots,
it could end up reading something that was temporarly deleted.
For example, merging two memslots into one would imply:
DELETE(m1)
DELETE(m2)
CREATE(m1+m2)

And a vcpu could attempt to read m2 right after it is deleted, but
before the new one is created.

This approach is 100% QEMU-based. No KVM API modification is involved,
but implies that QEMU must make sure no new ioctl is running and all
vcpus are stopped.

The logic and code are basically taken from David Hildenbrand
proposal given a while ago while reviewing a previous attempt where
I suggested to solve the above problem directly in KVM by extending
its API.

This is the original code:
https://github.com/davidhildenbrand/qemu/commit/86b1bf546a8d00908e33f7362b0b61e2be8dbb7a

I just split the patch in three smaller patches, and used a
QemuLockCnt instead of counter + mutex.

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1979276

Emanuele
---
v3:
* minor fixes on comments and docs
* improved accel_ioctl_inhibit_begin
* drop QSIMPLEQ_REMOVE from kvm_commit

v2:
* use QemuEvent instead of spinning in ioctl_inhibit_begin
* move the blocker API in a separate file and header, so that other accel can
  use it if they want.

David Hildenbrand (1):
  kvm: Atomic memslot updates

Emanuele Giuseppe Esposito (2):
  accel: introduce accelerator blocker API
  KVM: keep track of running ioctls

 accel/accel-blocker.c  | 154 +
 accel/kvm/kvm-all.c| 108 ---
 accel/meson.build  |   2 +-
 hw/core/cpu-common.c   |   2 +
 include/hw/core/cpu.h  |   3 +
 include/sysemu/accel-blocker.h |  56 
 include/sysemu/kvm_int.h   |   8 ++
 7 files changed, 321 insertions(+), 12 deletions(-)
 create mode 100644 accel/accel-blocker.c
 create mode 100644 include/sysemu/accel-blocker.h

-- 
2.31.1

[PATCH v3 3/3] kvm: Atomic memslot updates

2022-11-11 Thread Emanuele Giuseppe Esposito

From: David Hildenbrand 

If we update an existing memslot (e.g., resize, split), we temporarily
remove the memslot to re-add it immediately afterwards. These updates
are not atomic, especially not for KVM VCPU threads, such that we can
get spurious faults.

Let's inhibit most KVM ioctls while performing relevant updates, such
that we can perform the update just as if it would happen atomically
without additional kernel support.

We capture the add/del changes and apply them in the notifier commit
stage instead. There, we can check for overlaps and perform the ioctl
inhibiting only if really required (-> overlap).

To keep things simple we don't perform additional checks that wouldn't
actually result in an overlap -- such as !RAM memory regions in some
cases (see kvm_set_phys_mem()).

To minimize cache-line bouncing, use a separate indicator
(in_ioctl_lock) per CPU.  Also, make sure to hold the kvm_slots_lock
while performing both actions (removing+re-adding).

We have to wait until all IOCTLs were exited and block new ones from
getting executed.

This approach cannot result in a deadlock as long as the inhibitor does
not hold any locks that might hinder an IOCTL from getting finished and
exited - something fairly unusual. The inhibitor will always hold the BQL.

AFAIKs, one possible candidate would be userfaultfd. If a page cannot be
placed (e.g., during postcopy), because we're waiting for a lock, or if the
userfaultfd thread cannot process a fault, because it is waiting for a
lock, there could be a deadlock. However, the BQL is not applicable here,
because any other guest memory access while holding the BQL would already
result in a deadlock.

Nothing else in the kernel should block forever and wait for userspace
intervention.

Note: pause_all_vcpus()/resume_all_vcpus() or
start_exclusive()/end_exclusive() cannot be used, as they either drop
the BQL or require to be called without the BQL - something inhibitors
cannot handle. We need a low-level locking mechanism that is
deadlock-free even when not releasing the BQL.

Signed-off-by: David Hildenbrand 
Signed-off-by: Emanuele Giuseppe Esposito 
Tested-by: Emanuele Giuseppe Esposito 
---
 accel/kvm/kvm-all.c  | 101 ++-
 include/sysemu/kvm_int.h |   8 
 2 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ff660fd469..39ed30ab59 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -31,6 +31,7 @@
 #include "sysemu/kvm_int.h"
 #include "sysemu/runstate.h"
 #include "sysemu/cpus.h"
+#include "sysemu/accel-blocker.h"
 #include "qemu/bswap.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
@@ -46,6 +47,7 @@
 #include "sysemu/hw_accel.h"
 #include "kvm-cpus.h"
 #include "sysemu/dirtylimit.h"
+#include "qemu/range.h"
 
 #include "hw/boards.h"
 #include "monitor/stats.h"
@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
 kvm_max_slot_size = max_slot_size;
 }
 
+/* Called with KVMMemoryListener.slots_lock held */
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
  MemoryRegionSection *section, bool add)
 {
@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
 ram = memory_region_get_ram_ptr(mr) + mr_offset;
 ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
 
-kvm_slots_lock();
-
 if (!add) {
 do {
 slot_size = MIN(kvm_max_slot_size, size);
 mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
 if (!mem) {
-goto out;
+return;
 }
 if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
 /*
@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
 start_addr += slot_size;
 size -= slot_size;
 } while (size);
-goto out;
+return;
 }
 
 /* register the new slot */
@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
 ram += slot_size;
 size -= slot_size;
 } while (size);
-
-out:
-kvm_slots_unlock();
 }
 
 static void *kvm_dirty_ring_reaper_thread(void *data)
@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener,
MemoryRegionSection *section)
 {
 KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, 
listener);
+KVMMemoryUpdate *update;
+
+update = g_new0(KVMMemoryUpdate, 1);
+update->section = *section;
 
-memory_region_ref(section->mr);
-kvm_set_phys_mem(kml, section, true);
+QSIMPLEQ_INSERT_TAIL(>transaction_add, update, next);
 }
 
 static void kvm_region_del(MemoryListener *listener,
MemoryRegionSection *section)
 {
 KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, 
listener);
+KVMMemoryUpdate *update;
+
+update = g_new0(KVMMemoryUpdate, 1);
+update->section =

[PULL 09/11] block-backend: Update ctx immediately after root

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

blk_get_aio_context() asserts that blk->ctx is always equal to the root
BDS's context (if there is a root BDS).  Therefore,
blk_do_set_aio_context() must update blk->ctx immediately after the root
BDS's context has changed.

Without this patch, the next patch would break iotest 238, because
bdrv_drained_begin() (called by blk_do_set_aio_context()) may then
invoke bdrv_child_get_parent_aio_context() on the root child, i.e.
blk_get_aio_context().  However, by this point, blk->ctx would not have
been updated and thus differ from the root node's context.  This patch
fixes that.

Reviewed-by: Kevin Wolf 
Signed-off-by: Hanna Reitz 
Message-Id: <20221107151321.211175-3-hre...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index ed2f4b67a2..b48c91f4e1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2158,6 +2158,11 @@ static int blk_do_set_aio_context(BlockBackend *blk, 
AioContext *new_context,
 return ret;
 }
 }
+/*
+ * Make blk->ctx consistent with the root node before we invoke any
+ * other operations like drain that might inquire blk->ctx
+ */
+blk->ctx = new_context;
 if (tgm->throttle_state) {
 bdrv_drained_begin(bs);
 throttle_group_detach_aio_context(tgm);
@@ -2166,9 +2171,10 @@ static int blk_do_set_aio_context(BlockBackend *blk, 
AioContext *new_context,
 }
 
 bdrv_unref(bs);
+} else {
+blk->ctx = new_context;
 }
 
-blk->ctx = new_context;
 return 0;
 }
 
-- 
2.38.1

[PULL 08/11] block: Make bdrv_child_get_parent_aio_context I/O

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

We want to use bdrv_child_get_parent_aio_context() from
bdrv_parent_drained_{begin,end}_single(), both of which are "I/O or GS"
functions.

Prior to 3ed4f708fe1, all the implementations were I/O code anyway.
3ed4f708fe1 has put block jobs' AioContext field under the job mutex, so
to make child_job_get_parent_aio_context() work in an I/O context, we
need to take that lock there.

Furthermore, blk_root_get_parent_aio_context() is not marked as
anything, but is safe to run in an I/O context, so mark it that way now.
(blk_get_aio_context() is an I/O code function.)

With that done, all implementations explicitly are I/O code, so we can
mark bdrv_child_get_parent_aio_context() as I/O code, too, so callers
know it is safe to run from both GS and I/O contexts.

Signed-off-by: Hanna Reitz 
Message-Id: <20221107151321.211175-2-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/block/block-global-state.h | 1 -
 include/block/block-io.h   | 2 ++
 include/block/block_int-common.h   | 4 ++--
 block.c| 2 +-
 block/block-backend.c  | 1 +
 blockjob.c | 3 ++-
 6 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index bb42ed9559..c7bd4a2088 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -220,7 +220,6 @@ void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
  */
 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
 
-AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
 bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
GHashTable *visited, Transaction *tran,
Error **errp);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 770ddeb7c8..b099d7db45 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -171,6 +171,8 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent 
event);
  */
 AioContext *bdrv_get_aio_context(BlockDriverState *bs);
 
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
 /**
  * Move the current coroutine to the AioContext of @bs and return the old
  * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 5a2cc077a0..31ae91e56e 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -911,8 +911,6 @@ struct BdrvChildClass {
GHashTable *visited, Transaction *tran,
Error **errp);
 
-AioContext *(*get_parent_aio_context)(BdrvChild *child);
-
 /*
  * I/O API functions. These functions are thread-safe.
  *
@@ -929,6 +927,8 @@ struct BdrvChildClass {
  */
 const char *(*get_name)(BdrvChild *child);
 
+AioContext *(*get_parent_aio_context)(BdrvChild *child);
+
 /*
  * If this pair of functions is implemented, the parent doesn't issue new
  * requests after returning from .drained_begin() until .drained_end() is
diff --git a/block.c b/block.c
index c5e20c0bea..a18f052374 100644
--- a/block.c
+++ b/block.c
@@ -1543,7 +1543,7 @@ const BdrvChildClass child_of_bds = {
 
 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
 {
-GLOBAL_STATE_CODE();
+IO_CODE();
 return c->klass->get_parent_aio_context(c);
 }
 
diff --git a/block/block-backend.c b/block/block-backend.c
index c0c7d56c8d..ed2f4b67a2 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -311,6 +311,7 @@ static void blk_root_detach(BdrvChild *child)
 static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
 {
 BlockBackend *blk = c->opaque;
+IO_CODE();
 
 return blk_get_aio_context(blk);
 }
diff --git a/blockjob.c b/blockjob.c
index 2d86014fa5..f51d4e18f3 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -173,7 +173,8 @@ static bool child_job_change_aio_ctx(BdrvChild *c, 
AioContext *ctx,
 static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
 {
 BlockJob *job = c->opaque;
-GLOBAL_STATE_CODE();
+IO_CODE();
+JOB_LOCK_GUARD();
 
 return job->job.aio_context;
 }
-- 
2.38.1

[PULL 02/11] block/mirror: Drop mirror_wait_for_any_operation()

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

mirror_wait_for_free_in_flight_slot() is the only remaining user of
mirror_wait_for_any_operation(), so inline the latter into the former.

Signed-off-by: Hanna Reitz 
Message-Id: <20221109165452.67927-3-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index e5467b0053..5b6f42392c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -305,19 +305,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t 
*offset,
 }
 
 static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
 {
 MirrorOp *op;
 
 QTAILQ_FOREACH(op, >ops_in_flight, next) {
-/* Do not wait on pseudo ops, because it may in turn wait on
+/*
+ * Do not wait on pseudo ops, because it may in turn wait on
  * some other operation to start, which may in fact be the
  * caller of this function.  Since there is only one pseudo op
  * at any given time, we will always find some real operation
- * to wait on. */
-if (!op->is_pseudo_op && op->is_in_flight &&
-op->is_active_write == active)
-{
+ * to wait on.
+ * Also, do not wait on active operations, because they do not
+ * use up in-flight slots.
+ */
+if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) {
 qemu_co_queue_wait(>waiting_requests, NULL);
 return;
 }
@@ -325,13 +327,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool 
active)
 abort();
 }
 
-static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
-{
-/* Only non-active operations use up in-flight slots */
-mirror_wait_for_any_operation(s, false);
-}
-
 /* Perform a mirror copy operation.
  *
  * *op->bytes_handled is set to the number of bytes copied after and
-- 
2.38.1

[PULL 03/11] block/mirror: Fix NULL s->job in active writes

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

There is a small gap in mirror_start_job() before putting the mirror
filter node into the block graph (bdrv_append() call) and the actual job
being created.  Before the job is created, MirrorBDSOpaque.job is NULL.

It is possible that requests come in when bdrv_drained_end() is called,
and those requests would see MirrorBDSOpaque.job == NULL.  Have our
filter node handle that case gracefully.

Signed-off-by: Hanna Reitz 
Message-Id: <20221109165452.67927-4-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 5b6f42392c..251adc5ae0 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1438,11 +1438,13 @@ static int coroutine_fn 
bdrv_mirror_top_do_write(BlockDriverState *bs,
 MirrorOp *op = NULL;
 MirrorBDSOpaque *s = bs->opaque;
 int ret = 0;
-bool copy_to_target;
+bool copy_to_target = false;
 
-copy_to_target = s->job->ret >= 0 &&
- !job_is_cancelled(>job->common.job) &&
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+if (s->job) {
+copy_to_target = s->job->ret >= 0 &&
+ !job_is_cancelled(>job->common.job) &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+}
 
 if (copy_to_target) {
 op = active_write_prepare(s->job, offset, bytes);
@@ -1487,11 +1489,13 @@ static int coroutine_fn 
bdrv_mirror_top_pwritev(BlockDriverState *bs,
 QEMUIOVector bounce_qiov;
 void *bounce_buf;
 int ret = 0;
-bool copy_to_target;
+bool copy_to_target = false;
 
-copy_to_target = s->job->ret >= 0 &&
- !job_is_cancelled(>job->common.job) &&
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+if (s->job) {
+copy_to_target = s->job->ret >= 0 &&
+ !job_is_cancelled(>job->common.job) &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+}
 
 if (copy_to_target) {
 /* The guest might concurrently modify the data to write; but
-- 
2.38.1

[PULL 01/11] block/mirror: Do not wait for active writes

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

Waiting for all active writes to settle before daring to create a
background copying operation means that we will never do background
operations while the guest does anything (in write-blocking mode), and
therefore cannot converge.  Yes, we also will not diverge, but actually
converging would be even nicer.

It is unclear why we did decide to wait for all active writes to settle
before creating a background operation, but it just does not seem
necessary.  Active writes will put themselves into the in_flight bitmap
and thus properly block actually conflicting background requests.

It is important for active requests to wait on overlapping background
requests, which we do in active_write_prepare().  However, so far it was
not documented why it is important.  Add such documentation now, and
also to the other call of mirror_wait_on_conflicts(), so that it becomes
more clear why and when requests need to actively wait for other
requests to settle.

Another thing to note is that of course we need to ensure that there are
no active requests when the job completes, but that is done by virtue of
the BDS being drained anyway, so there cannot be any active requests at
that point.

With this change, we will need to explicitly keep track of how many
bytes are in flight in active requests so that
job_progress_set_remaining() in mirror_run() can set the correct number
of remaining bytes.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297
Signed-off-by: Hanna Reitz 
Message-Id: <20221109165452.67927-2-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 37 ++---
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 1a75a47cc3..e5467b0053 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -82,6 +82,7 @@ typedef struct MirrorBlockJob {
 int max_iov;
 bool initial_zeroing_ongoing;
 int in_active_write_counter;
+int64_t active_write_bytes_in_flight;
 bool prepared;
 bool in_drain;
 } MirrorBlockJob;
@@ -494,6 +495,13 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 }
 bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
 
+/*
+ * Wait for concurrent requests to @offset.  The next loop will limit the
+ * copied area based on in_flight_bitmap so we only copy an area that does
+ * not overlap with concurrent in-flight requests.  Still, we would like to
+ * copy something, so wait until there are at least no more requests to the
+ * very beginning of the area.
+ */
 mirror_wait_on_conflicts(NULL, s, offset, 1);
 
 job_pause_point(>common.job);
@@ -988,12 +996,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 int64_t cnt, delta;
 bool should_complete;
 
-/* Do not start passive operations while there are active
- * writes in progress */
-while (s->in_active_write_counter) {
-mirror_wait_for_any_operation(s, true);
-}
-
 if (s->ret < 0) {
 ret = s->ret;
 goto immediate_exit;
@@ -1010,7 +1012,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
  * the number of bytes currently being processed; together those are
  * the current remaining operation length */
-job_progress_set_remaining(>common.job, s->bytes_in_flight + cnt);
+job_progress_set_remaining(>common.job,
+   s->bytes_in_flight + cnt +
+   s->active_write_bytes_in_flight);
 
 /* Note that even when no rate limit is applied we need to yield
  * periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -1071,6 +1075,10 @@ static int coroutine_fn mirror_run(Job *job, Error 
**errp)
 
 s->in_drain = true;
 bdrv_drained_begin(bs);
+
+/* Must be zero because we are drained */
+assert(s->in_active_write_counter == 0);
+
 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
 if (cnt > 0 || mirror_flush(s) < 0) {
 bdrv_drained_end(bs);
@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod 
method,
 }
 
 job_progress_increase_remaining(>common.job, bytes);
+job->active_write_bytes_in_flight += bytes;
 
 switch (method) {
 case MIRROR_METHOD_COPY:
@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod 
method,
 abort();
 }
 
+job->active_write_bytes_in_flight -= bytes;
 if (ret >= 0) {
 job_progress_update(>common.job, bytes);
 } else {
@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn 
active_write_prepare(MirrorBlockJob *s,
 
 s->in_active_write_counter++;
 
+/*
+ * Wait for concurrent requests affecting the area.  If there are already
+ *

[PULL 10/11] block: Start/end drain on correct AioContext

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

bdrv_parent_drained_{begin,end}_single() are supposed to operate on the
parent, not on the child, so they should not attempt to get the context
to poll from the child but the parent instead.  BDRV_POLL_WHILE(c->bs)
does get the context from the child, so we should replace it with
AIO_WAIT_WHILE() on the parent's context instead.

This problem becomes apparent when bdrv_replace_child_noperm() invokes
bdrv_parent_drained_end_single() after removing a child from a subgraph
that is in an I/O thread.  By the time bdrv_parent_drained_end_single()
is called, child->bs is NULL, and so BDRV_POLL_WHILE(c->bs, ...) will
poll the main loop instead of the I/O thread; but anything that
bdrv_parent_drained_end_single_no_poll() may have scheduled is going to
want to run in the I/O thread, but because we poll the main loop, the
I/O thread is never unpaused, and nothing is run, resulting in a
deadlock.

Closes: https://gitlab.com/qemu-project/qemu/-/issues/1215
Reviewed-by: Kevin Wolf 
Signed-off-by: Hanna Reitz 
Message-Id: <20221107151321.211175-4-hre...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/io.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 34b30e304e..b9424024f9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -71,9 +71,10 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild 
*c,
 void bdrv_parent_drained_end_single(BdrvChild *c)
 {
 int drained_end_counter = 0;
+AioContext *ctx = bdrv_child_get_parent_aio_context(c);
 IO_OR_GS_CODE();
 bdrv_parent_drained_end_single_no_poll(c, _end_counter);
-BDRV_POLL_WHILE(c->bs, qatomic_read(_end_counter) > 0);
+AIO_WAIT_WHILE(ctx, qatomic_read(_end_counter) > 0);
 }
 
 static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
@@ -116,13 +117,14 @@ static bool bdrv_parent_drained_poll(BlockDriverState 
*bs, BdrvChild *ignore,
 
 void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
 {
+AioContext *ctx = bdrv_child_get_parent_aio_context(c);
 IO_OR_GS_CODE();
 c->parent_quiesce_counter++;
 if (c->klass->drained_begin) {
 c->klass->drained_begin(c);
 }
 if (poll) {
-BDRV_POLL_WHILE(c->bs, bdrv_parent_drained_poll_single(c));
+AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
 }
 }
 
-- 
2.38.1

[PULL 05/11] iotests/151: Test active requests on mirror start

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

Have write requests happen to the source node right when we start a
mirror job.  The mirror filter node may encounter MirrorBDSOpaque.job
being NULL, but this should not cause a segfault.

Signed-off-by: Hanna Reitz 
Message-Id: <20221109165452.67927-6-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/151 | 53 +++---
 tests/qemu-iotests/151.out |  4 +--
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
index 0a052e5050..b4d1bc2553 100755
--- a/tests/qemu-iotests/151
+++ b/tests/qemu-iotests/151
@@ -22,7 +22,8 @@
 import math
 import os
 import subprocess
-from typing import List
+import time
+from typing import List, Optional
 import iotests
 from iotests import qemu_img
 
@@ -195,12 +196,15 @@ class TestActiveMirror(iotests.QMPTestCase):
 self.potential_writes_in_flight = False
 
 
-class TestThrottledWithNbdExport(iotests.QMPTestCase):
+class TestThrottledWithNbdExportBase(iotests.QMPTestCase):
 image_len = 128 * 1024 * 1024  # MB
-iops = 16
+iops: Optional[int] = None
 background_processes: List['subprocess.Popen[str]'] = []
 
 def setUp(self):
+# Must be set by subclasses
+self.assertIsNotNone(self.iops)
+
 qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
 qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
 
@@ -284,6 +288,10 @@ class TestThrottledWithNbdExport(iotests.QMPTestCase):
 os.remove(source_img)
 os.remove(target_img)
 
+
+class TestLowThrottledWithNbdExport(TestThrottledWithNbdExportBase):
+iops = 16
+
 def testUnderLoad(self):
 '''
 Throttle the source node, then issue a whole bunch of external requests
@@ -370,6 +378,45 @@ class TestThrottledWithNbdExport(iotests.QMPTestCase):
 self.assertGreater(start_remaining - end_remaining, 0)
 
 
+class TestHighThrottledWithNbdExport(TestThrottledWithNbdExportBase):
+iops = 1024
+
+def testActiveOnCreation(self):
+'''
+Issue requests on the mirror source node right as the mirror is
+instated.  It's possible that requests occur before the actual job is
+created, but after the node has been put into the graph.  Write
+requests across the node must in that case be forwarded to the source
+node without attempting to mirror them (there is no job object yet, so
+attempting to access it would cause a segfault).
+We do this with a lightly throttled node (i.e. quite high IOPS limit).
+Using throttling seems to increase reproductivity, but if the limit is
+too low, all requests allowed per second will be submitted before
+mirror_start_job() gets to the problematic point.
+'''
+
+# Let qemu-img bench create write requests (enough for two seconds on
+# the virtual clock)
+bench_args = ['bench', '-w', '-d', '1024', '-f', 'nbd',
+  '-c', str(self.iops * 2), self.nbd_url]
+p = iotests.qemu_tool_popen(iotests.qemu_img_args + bench_args)
+self.background_processes += [p]
+
+# Give qemu-img bench time to start up and issue requests
+time.sleep(1.0)
+# Flush the request queue, so new requests can come in right as we
+# start blockdev-mirror
+self.vm.qtest(f'clock_step {1 * 1000 * 1000 * 1000}')
+
+result = self.vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ device='source-node',
+ target='target-node',
+ sync='full',
+ copy_mode='write-blocking')
+self.assert_qmp(result, 'return', {})
+
+
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2', 'raw'],
  supported_protocols=['file'])
diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out
index 914e3737bd..3f8a935a08 100644
--- a/tests/qemu-iotests/151.out
+++ b/tests/qemu-iotests/151.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 5 tests
+Ran 6 tests
 
 OK
-- 
2.38.1

[PULL 04/11] iotests/151: Test that active mirror progresses

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

Before this series, a mirror job in write-blocking mode would pause
issuing background requests while active requests are in flight.  Thus,
if the source is constantly in use by active requests, no actual
progress can be made.

This series should have fixed that, making the mirror job issue
background requests even while active requests are in flight.

Have a new test case in 151 verify this.

Signed-off-by: Hanna Reitz 
Message-Id: <20221109165452.67927-5-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/151 | 180 -
 tests/qemu-iotests/151.out |   4 +-
 2 files changed, 181 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
index 93d14193d0..0a052e5050 100755
--- a/tests/qemu-iotests/151
+++ b/tests/qemu-iotests/151
@@ -19,7 +19,10 @@
 # along with this program.  If not, see .
 #
 
+import math
 import os
+import subprocess
+from typing import List
 import iotests
 from iotests import qemu_img
 
@@ -50,7 +53,7 @@ class TestActiveMirror(iotests.QMPTestCase):
 self.vm = iotests.VM()
 self.vm.add_drive_raw(self.vm.qmp_to_opts(blk_source))
 self.vm.add_blockdev(self.vm.qmp_to_opts(blk_target))
-self.vm.add_device('virtio-blk,drive=source')
+self.vm.add_device('virtio-blk,id=vblk,drive=source')
 self.vm.launch()
 
 def tearDown(self):
@@ -192,6 +195,181 @@ class TestActiveMirror(iotests.QMPTestCase):
 self.potential_writes_in_flight = False
 
 
+class TestThrottledWithNbdExport(iotests.QMPTestCase):
+image_len = 128 * 1024 * 1024  # MB
+iops = 16
+background_processes: List['subprocess.Popen[str]'] = []
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, source_img, '128M')
+qemu_img('create', '-f', iotests.imgfmt, target_img, '128M')
+
+self.vm = iotests.VM()
+self.vm.launch()
+
+result = self.vm.qmp('object-add', **{
+'qom-type': 'throttle-group',
+'id': 'thrgr',
+'limits': {
+'iops-total': self.iops,
+'iops-total-max': self.iops
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'source-node',
+'driver': 'throttle',
+'throttle-group': 'thrgr',
+'file': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': source_img
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'target-node',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': target_img
+}
+})
+self.assert_qmp(result, 'return', {})
+
+self.nbd_sock = iotests.file_path('nbd.sock',
+  base_dir=iotests.sock_dir)
+self.nbd_url = f'nbd+unix:///source-node?socket={self.nbd_sock}'
+
+result = self.vm.qmp('nbd-server-start', addr={
+'type': 'unix',
+'data': {
+'path': self.nbd_sock
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('block-export-add', id='exp0', type='nbd',
+ node_name='source-node', writable=True)
+self.assert_qmp(result, 'return', {})
+
+def tearDown(self):
+# Wait for background requests to settle
+try:
+while True:
+p = self.background_processes.pop()
+while True:
+try:
+p.wait(timeout=0.0)
+break
+except subprocess.TimeoutExpired:
+self.vm.qtest(f'clock_step {1 * 1000 * 1000 * 1000}')
+except IndexError:
+pass
+
+# Cancel ongoing block jobs
+for job in self.vm.qmp('query-jobs')['return']:
+self.vm.qmp('block-job-cancel', device=job['id'], force=True)
+
+while True:
+self.vm.qtest(f'clock_step {1 * 1000 * 1000 * 1000}')
+if len(self.vm.qmp('query-jobs')['return']) == 0:
+break
+
+self.vm.shutdown()
+os.remove(source_img)
+os.remove(target_img)
+
+def testUnderLoad(self):
+'''
+Throttle the source node, then issue a whole bunch of external requests
+while the mirror job (in write-blocking mode) is running.  We want to
+see background requests being issued even while the source is under
+full load by active writes, so that progress can be made towards READY.
+'''
+
+# Fill the first half of the

[PULL 00/11] Block layer patches

2022-11-11 Thread Kevin Wolf

The following changes since commit 2ccad61746ca7de5dd3e25146062264387e43bd4:

  Merge tag 'pull-tcg-20221109' of https://gitlab.com/rth7680/qemu into staging 
(2022-11-09 13:26:45 -0500)

are available in the Git repository at:

  https://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to b04af371af685c12970ea93027dc6d8bf86265aa:

  tests/stream-under-throttle: New test (2022-11-11 13:02:43 +0100)


Block layer patches

- Fix deadlock in graph modification with iothreads
- mirror: Fix non-converging cases for active mirror
- qapi: Fix BlockdevOptionsNvmeIoUring @path description
- blkio: Set BlockDriver::has_variable_length to false


Alberto Faria (2):
  qapi/block-core: Fix BlockdevOptionsNvmeIoUring @path description
  block/blkio: Set BlockDriver::has_variable_length to false

Hanna Reitz (9):
  block/mirror: Do not wait for active writes
  block/mirror: Drop mirror_wait_for_any_operation()
  block/mirror: Fix NULL s->job in active writes
  iotests/151: Test that active mirror progresses
  iotests/151: Test active requests on mirror start
  block: Make bdrv_child_get_parent_aio_context I/O
  block-backend: Update ctx immediately after root
  block: Start/end drain on correct AioContext
  tests/stream-under-throttle: New test

 qapi/block-core.json   |   2 +-
 include/block/block-global-state.h |   1 -
 include/block/block-io.h   |   2 +
 include/block/block_int-common.h   |   4 +-
 block.c|   2 +-
 block/blkio.c  |   1 -
 block/block-backend.c  |   9 +-
 block/io.c |   6 +-
 block/mirror.c |  78 ---
 blockjob.c |   3 +-
 tests/qemu-iotests/151 | 227 -
 tests/qemu-iotests/151.out |   4 +-
 tests/qemu-iotests/tests/stream-under-throttle | 121 +++
 tests/qemu-iotests/tests/stream-under-throttle.out |   5 +
 14 files changed, 424 insertions(+), 41 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/stream-under-throttle
 create mode 100644 tests/qemu-iotests/tests/stream-under-throttle.out

[PULL 06/11] qapi/block-core: Fix BlockdevOptionsNvmeIoUring @path description

2022-11-11 Thread Kevin Wolf

From: Alberto Faria 

The nvme-io_uring BlockDriver's path option must point at the character
device of an NVMe namespace, not at an image file.

Fixes: fd66dbd424f5 ("blkio: add libblkio block driver")
Suggested-by: Stefano Garzarella 
Signed-off-by: Alberto Faria 
Message-Id: <20221108142347.1322674-1-afa...@redhat.com>
Reviewed-by: Stefano Garzarella 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 qapi/block-core.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6d904004f8..95ac4fa634 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3704,7 +3704,7 @@
 #
 # Driver specific block device options for the nvme-io_uring backend.
 #
-# @path: path to the image file
+# @path: path to the NVMe namespace's character device (e.g. /dev/ng0n1).
 #
 # Since: 7.2
 ##
-- 
2.38.1

[PULL 11/11] tests/stream-under-throttle: New test

2022-11-11 Thread Kevin Wolf

From: Hanna Reitz 

Test streaming a base image into the top image underneath two throttle
nodes.  This was reported to make qemu 7.1 hang
(https://gitlab.com/qemu-project/qemu/-/issues/1215), so this serves as
a regression test.

Signed-off-by: Hanna Reitz 
Message-Id: <20221110160921.33158-1-hre...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 .../qemu-iotests/tests/stream-under-throttle  | 121 ++
 .../tests/stream-under-throttle.out   |   5 +
 2 files changed, 126 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/stream-under-throttle
 create mode 100644 tests/qemu-iotests/tests/stream-under-throttle.out

diff --git a/tests/qemu-iotests/tests/stream-under-throttle 
b/tests/qemu-iotests/tests/stream-under-throttle
new file mode 100755
index 00..37788b147d
--- /dev/null
+++ b/tests/qemu-iotests/tests/stream-under-throttle
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# group: rw
+#
+# Test streaming with throttle nodes on top
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import asyncio
+import os
+from typing import List
+import iotests
+from iotests import qemu_img_create, qemu_io
+
+
+image_size = 256 * 1024 * 1024
+base_img = os.path.join(iotests.test_dir, 'base.img')
+top_img = os.path.join(iotests.test_dir, 'top.img')
+
+
+class TcgVM(iotests.VM):
+'''
+Variant of iotests.VM that uses -accel tcg.  Simply using
+iotests.VM.add_args('-accel', 'tcg') is not sufficient, because that will
+put -accel qtest before -accel tcg, and -accel arguments are prioritized in
+the order they appear.
+'''
+@property
+def _base_args(self) -> List[str]:
+# Put -accel tcg first so it takes precedence
+return ['-accel', 'tcg'] + super()._base_args
+
+
+class TestStreamWithThrottle(iotests.QMPTestCase):
+def setUp(self) -> None:
+'''
+Create a simple backing chain between two images, write something to
+the base image.  Attach them to the VM underneath two throttle nodes,
+one of which has actually no limits set, but the other does.  Then put
+a virtio-blk device on top.
+This test configuration has been taken from
+https://gitlab.com/qemu-project/qemu/-/issues/1215
+'''
+qemu_img_create('-f', iotests.imgfmt, base_img, str(image_size))
+qemu_img_create('-f', iotests.imgfmt, '-b', base_img, '-F',
+iotests.imgfmt, top_img, str(image_size))
+
+# Write something to stream
+qemu_io(base_img, '-c', f'write 0 {image_size}')
+
+blockdev = {
+'driver': 'throttle',
+'node-name': 'throttled-node',
+'throttle-group': 'thrgr-limited',
+'file': {
+'driver': 'throttle',
+'throttle-group': 'thrgr-unlimited',
+'file': {
+'driver': iotests.imgfmt,
+'node-name': 'unthrottled-node',
+'file': {
+'driver': 'file',
+'filename': top_img
+}
+}
+}
+}
+
+# Issue 1215 is not reproducible in qtest mode, which is why we need to
+# create an -accel tcg VM
+self.vm = TcgVM()
+self.vm.add_object('iothread,id=iothr0')
+self.vm.add_object('throttle-group,id=thrgr-unlimited')
+self.vm.add_object('throttle-group,id=thrgr-limited,'
+   'x-iops-total=1,x-bps-total=104857600')
+self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev))
+self.vm.add_device('virtio-blk,iothread=iothr0,drive=throttled-node')
+self.vm.launch()
+
+def tearDown(self) -> None:
+self.vm.shutdown()
+os.remove(top_img)
+os.remove(base_img)
+
+def test_stream(self) -> None:
+'''
+Do a simple stream beneath the two throttle nodes.  Should complete
+with no problems.
+'''
+result = self.vm.qmp('block-stream',
+ job_id='stream',
+ device='unthrottled-node')
+self.assert_qmp(result, 'return', {})
+
+# Should succeed and not time out
+try:
+self.vm.run_job('stream')
+except asyncio.exceptions.TimeoutError:

[PULL 07/11] block/blkio: Set BlockDriver::has_variable_length to false

2022-11-11 Thread Kevin Wolf

From: Alberto Faria 

Setting it to true can cause the device size to be queried from libblkio
in otherwise fast paths, degrading performance. Set it to false and
require users to refresh the device size explicitly instead.

Fixes: 4c8f4fda0504 ("block/blkio: Tolerate device size changes")
Suggested-by: Kevin Wolf 
Signed-off-by: Alberto Faria 
Message-Id: <20221108144433.1334074-1-afa...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/blkio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blkio.c b/block/blkio.c
index 620fab28a7..5eae3adfaf 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -993,7 +993,6 @@ static void blkio_refresh_limits(BlockDriverState *bs, 
Error **errp)
 { \
 .format_name = name, \
 .protocol_name   = name, \
-.has_variable_length = true, \
 .instance_size   = sizeof(BDRVBlkioState), \
 .bdrv_file_open  = blkio_file_open, \
 .bdrv_close  = blkio_close, \
-- 
2.38.1

[PATCH] MAINTAINERS: add mst to list of biosbits maintainers

2022-11-11 Thread Ani Sinha

Adding Michael's name to the list of bios bits maintainers so that all changes
and fixes into biosbits framework can go through his tree and he is notified.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Ani Sinha 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index caba73ec41..c50b15f94d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1865,6 +1865,7 @@ F: hw/acpi/viot.h
 
 ACPI/AVOCADO/BIOSBITS
 M: Ani Sinha 
+M: Michael S. Tsirkin 
 S: Supported
 F: tests/avocado/acpi-bits/*
 F: tests/avocado/acpi-bits.py
-- 
2.34.1

[PATCH v2 02/12] tests/avocado: improve behaviour waiting for login prompts

2022-11-11 Thread Alex Bennée

This attempts to deal with the problem of login prompts not being
guaranteed to be terminated with a newline. The solution to this is to
peek at the incoming data looking to see if we see an up-coming match
before we fall back to the old readline() logic. The reason to mostly
rely on readline is because I am occasionally seeing the peek stalling
despite data being there.

This seems kinda hacky and gross so I'm open to alternative approaches
and cleaner python code.

Signed-off-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 

---
v2
  - remove superfluous /r/n
---
 tests/avocado/avocado_qemu/__init__.py | 89 +-
 1 file changed, 88 insertions(+), 1 deletion(-)

diff --git a/tests/avocado/avocado_qemu/__init__.py 
b/tests/avocado/avocado_qemu/__init__.py
index 910f3ba1ea..20cba57161 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -131,6 +131,58 @@ def pick_default_qemu_bin(bin_prefix='qemu-system-', 
arch=None):
 return path
 return None
 
+def _peek_ahead(console, min_match, success_message):
+"""
+peek ahead in the console stream keeping an eye out for the
+success message.
+
+Returns some message to process or None, indicating the normal
+readline should occur.
+"""
+console_logger = logging.getLogger('console')
+peek_len = 0
+retries = 0
+
+while True:
+try:
+old_peek_len = peek_len
+peek_ahead = console.peek(min_match).decode()
+peek_len = len(peek_ahead)
+
+# if we get stuck too long lets just fallback to readline
+if peek_len <= old_peek_len:
+retries += 1
+if retries > 10:
+return None
+
+# if we see a newline in the peek we can let safely bail
+# and let the normal readline() deal with it
+if peek_ahead.endswith(('\n', '\r')):
+return None
+
+# if we haven't seen enough for the whole message but the
+# first part matches lets just loop again
+if len(peek_ahead) < min_match and \
+   success_message[:peek_len] in peek_ahead:
+continue
+
+# if we see the whole success_message we are done, consume
+# it and pass back so we can exit to the user
+if success_message in peek_ahead:
+return console.read(peek_len).decode()
+
+# of course if we've seen enough then this line probably
+# doesn't contain what we are looking for, fallback
+if peek_len > min_match:
+return None
+
+except UnicodeDecodeError:
+console_logger.log("error in decode of peek")
+return None
+
+# we should never get here
+return None
+
 
 def _console_interaction(test, success_message, failure_message,
  send_string, keep_sending=False, vm=None):
@@ -139,17 +191,52 @@ def _console_interaction(test, success_message, 
failure_message,
 vm = test.vm
 console = vm.console_socket.makefile(mode='rb', encoding='utf-8')
 console_logger = logging.getLogger('console')
+
+# Establish the minimum number of bytes we would need to
+# potentially match against success_message
+if success_message is not None:
+min_match = len(success_message)
+else:
+min_match = 0
+
+console_logger.debug("looking for %d:(%s), sending %s (always=%s)",
+ min_match, success_message, send_string, keep_sending)
+
 while True:
+msg = None
+
+# First send our string, optionally repeating the send next
+# cycle.
 if send_string:
 vm.console_socket.sendall(send_string.encode())
 if not keep_sending:
 send_string = None # send only once
+
+# If the console has no data to read we briefly
+# sleep before continuing.
+if not console.readable():
+time.sleep(0.1)
+continue
+
 try:
-msg = console.readline().decode().strip()
+
+# First we shall peek ahead for a potential match to cover waiting
+# for lines without any newlines.
+if min_match > 0:
+msg = _peek_ahead(console, min_match, success_message)
+
+# otherwise we block here for a full line
+if not msg:
+msg = console.readline().decode().strip()
+
 except UnicodeDecodeError:
+console_logger.debug("skipped unicode error")
 msg = None
+
+# if nothing came out we continue and try again
 if not msg:
 continue
+
 console_logger.debug(msg)
 if success_message is None or success_message in msg:
 break
-- 
2.34.1

[PATCH v2 04/12] tests/docker: allow user to override check target

2022-11-11 Thread Alex Bennée

This is useful when trying to bisect a particular failing test behind
a docker run. For example:

  make docker-test-clang@fedora \
TARGET_LIST=arm-softmmu \
TEST_COMMAND="meson test qtest-arm/qos-test" \
J=9 V=1

Signed-off-by: Alex Bennée 

---
v1
 - fix s/target /target./
 - CHECK_TARGET -> TEST_COMMAND
---
 tests/docker/Makefile.include | 2 ++
 tests/docker/common.rc| 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index c87f14477a..fc7a3b7e71 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -184,6 +184,7 @@ docker:
@echo 'TARGET_LIST=a,b,cOverride target list in builds.'
@echo 'EXTRA_CONFIGURE_OPTS="..."'
@echo ' Extra configure options.'
+   @echo 'TEST_COMMAND="..."   Override the default `make check` 
target.'
@echo 'IMAGES="a b c ..":   Restrict available images to subset.'
@echo 'TESTS="x y z .." Restrict available tests to subset.'
@echo 'J=[0..9]*Overrides the -jN parameter for make 
commands'
@@ -230,6 +231,7 @@ docker-run: docker-qemu-src
$(if $(NETWORK),$(if $(subst 
$(NETWORK),,1),--net=$(NETWORK)),--net=none) \
-e TARGET_LIST=$(subst 
$(SPACE),$(COMMA),$(TARGET_LIST))\
-e EXTRA_CONFIGURE_OPTS="$(EXTRA_CONFIGURE_OPTS)" \
+   -e TEST_COMMAND="$(TEST_COMMAND)"   \
-e V=$V -e J=$J -e DEBUG=$(DEBUG)   \
-e SHOW_ENV=$(SHOW_ENV) \
$(if $(NOUSER),,\
diff --git a/tests/docker/common.rc b/tests/docker/common.rc
index e6f8cee0d6..9a33df2832 100755
--- a/tests/docker/common.rc
+++ b/tests/docker/common.rc
@@ -63,12 +63,12 @@ check_qemu()
 {
 # default to make check unless the caller specifies
 if [ $# = 0 ]; then
-INVOCATION="check"
+INVOCATION="${TEST_COMMAND:-make $MAKEFLAGS check}"
 else
-INVOCATION="$@"
+INVOCATION="make $MAKEFLAGS $@"
 fi
 
-make $MAKEFLAGS $INVOCATION
+$INVOCATION
 }
 
 test_fail()
-- 
2.34.1

[PATCH v2 10/12] tests/avocado: Raise timeout for boot_linux.py:BootLinuxPPC64.test_pseries_tcg

2022-11-11 Thread Alex Bennée

From: Peter Maydell 

On my machine, a debug build of QEMU takes about 260 seconds to
complete this test, so with the current timeout value of 180 seconds
it always times out.  Double the timeout value to 360 so the test
definitely has enough time to complete.

Signed-off-by: Peter Maydell 
Signed-off-by: Alex Bennée 
Message-Id: <20221110142901.3832318-1-peter.mayd...@linaro.org>
---
 tests/avocado/boot_linux.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/avocado/boot_linux.py b/tests/avocado/boot_linux.py
index 571d33882a..2be4be395d 100644
--- a/tests/avocado/boot_linux.py
+++ b/tests/avocado/boot_linux.py
@@ -116,7 +116,7 @@ class BootLinuxPPC64(LinuxTest):
 :avocado: tags=arch:ppc64
 """
 
-timeout = 180
+timeout = 360
 
 def test_pseries_tcg(self):
 """
-- 
2.34.1

[PATCH v2 06/12] docs/devel: make language a little less code centric

2022-11-11 Thread Alex Bennée

We welcome all sorts of patches.

Signed-off-by: Alex Bennée 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Paolo Bonzini 
Message-Id: <20221012121152.1179051-3-alex.ben...@linaro.org>
---
 docs/devel/submitting-a-patch.rst | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/devel/submitting-a-patch.rst 
b/docs/devel/submitting-a-patch.rst
index fec33ce148..9c7c4331f3 100644
--- a/docs/devel/submitting-a-patch.rst
+++ b/docs/devel/submitting-a-patch.rst
@@ -3,11 +3,11 @@
 Submitting a Patch
 ==
 
-QEMU welcomes contributions of code (either fixing bugs or adding new
-functionality). However, we get a lot of patches, and so we have some
-guidelines about submitting patches. If you follow these, you'll help
-make our task of code review easier and your patch is likely to be
-committed faster.
+QEMU welcomes contributions to fix bugs, add functionality or improve
+the documentation. However, we get a lot of patches, and so we have
+some guidelines about submitting them. If you follow these, you'll
+help make our task of code review easier and your patch is likely to
+be committed faster.
 
 This page seems very long, so if you are only trying to post a quick
 one-shot fix, the bare minimum we ask is that:
-- 
2.34.1

1 2 3 >

1 - 100 of 239 matches

Mail list logo