date:20240605

Re: [PATCH v4 5/5] backends/hostmem: Report error when memory size is unaligned

2024-06-05 Thread Mario Casquero

This patch has been successfully tested. Try to allocate some 2M
hugepages in the host, then boot up a VM with the memory size
unaligned and backed by a file, QEMU prompts the following message:
qemu-system-x86_64: backend 'memory-backend-file' memory size must be
multiple of 2 MiB

Tested-by: Mario Casquero 

On Wed, Jun 5, 2024 at 12:45 PM Michal Privoznik  wrote:
>
> If memory-backend-{file,ram} has a size that's not aligned to
> underlying page size it is not only wasteful, but also may lead
> to hard to debug behaviour. For instance, in case
> memory-backend-file and hugepages, madvise() and mbind() fail.
> Rightfully so, page is the smallest unit they can work with. And
> even though an error is reported, the root cause it not very
> clear:
>
>   qemu-system-x86_64: Couldn't set property 'dump' on 'memory-backend-file': 
> Invalid argument
>
> After this commit:
>
>   qemu-system-x86_64: backend 'memory-backend-file' memory size must be 
> multiple of 2 MiB
>
> Signed-off-by: Michal Privoznik 
> Reviewed-by: Philippe Mathieu-Daudé 
> Tested-by: Mario Casquero 
> ---
>  backends/hostmem.c | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/backends/hostmem.c b/backends/hostmem.c
> index 012a8c190f..4d6c69fe4d 100644
> --- a/backends/hostmem.c
> +++ b/backends/hostmem.c
> @@ -20,6 +20,7 @@
>  #include "qom/object_interfaces.h"
>  #include "qemu/mmap-alloc.h"
>  #include "qemu/madvise.h"
> +#include "qemu/cutils.h"
>  #include "hw/qdev-core.h"
>
>  #ifdef CONFIG_NUMA
> @@ -337,6 +338,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
> Error **errp)
>  HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
>  void *ptr;
>  uint64_t sz;
> +size_t pagesize;
>  bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
>
>  if (!bc->alloc) {
> @@ -348,6 +350,14 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
> Error **errp)
>
>  ptr = memory_region_get_ram_ptr(>mr);
>  sz = memory_region_size(>mr);
> +pagesize = qemu_ram_pagesize(backend->mr.ram_block);
> +
> +if (!QEMU_IS_ALIGNED(sz, pagesize)) {
> +g_autofree char *pagesize_str = size_to_str(pagesize);
> +error_setg(errp, "backend '%s' memory size must be multiple of %s",
> +   object_get_typename(OBJECT(uc)), pagesize_str);
> +return;
> +}
>
>  if (backend->merge &&
>  qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE)) {
> --
> 2.44.1
>
>

[PATCH] util/bufferiszero: Split out host include files

2024-06-05 Thread Richard Henderson

Split out host/bufferiszero.h.inc for x86, aarch64 and generic
in order to avoid an overlong ifdef ladder.

Signed-off-by: Richard Henderson 
---
 host/include/aarch64/host/bufferiszero.h.inc |  76 
 host/include/generic/host/bufferiszero.h.inc |  10 +
 host/include/i386/host/bufferiszero.h.inc| 124 
 host/include/x86_64/host/bufferiszero.h.inc  |   1 +
 util/bufferiszero.c  | 191 +--
 5 files changed, 212 insertions(+), 190 deletions(-)
 create mode 100644 host/include/aarch64/host/bufferiszero.h.inc
 create mode 100644 host/include/generic/host/bufferiszero.h.inc
 create mode 100644 host/include/i386/host/bufferiszero.h.inc
 create mode 100644 host/include/x86_64/host/bufferiszero.h.inc

diff --git a/host/include/aarch64/host/bufferiszero.h.inc 
b/host/include/aarch64/host/bufferiszero.h.inc
new file mode 100644
index 00..0f0e478831
--- /dev/null
+++ b/host/include/aarch64/host/bufferiszero.h.inc
@@ -0,0 +1,76 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * buffer_is_zero acceleration, aarch64 version.
+ */
+
+#ifdef __ARM_NEON
+#include 
+
+/*
+ * Helper for preventing the compiler from reassociating
+ * chains of binary vector operations.
+ */
+#define REASSOC_BARRIER(vec0, vec1) asm("" : "+w"(vec0), "+w"(vec1))
+
+static bool buffer_is_zero_simd(const void *buf, size_t len)
+{
+uint32x4_t t0, t1, t2, t3;
+
+/* Align head/tail to 16-byte boundaries.  */
+const uint32x4_t *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+const uint32x4_t *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+
+/* Unaligned loads at head/tail.  */
+t0 = vld1q_u32(buf) | vld1q_u32(buf + len - 16);
+
+/* Collect a partial block at tail end.  */
+t1 = e[-7] | e[-6];
+t2 = e[-5] | e[-4];
+t3 = e[-3] | e[-2];
+t0 |= e[-1];
+REASSOC_BARRIER(t0, t1);
+REASSOC_BARRIER(t2, t3);
+t0 |= t1;
+t2 |= t3;
+REASSOC_BARRIER(t0, t2);
+t0 |= t2;
+
+/*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+do {
+/*
+ * Reduce via UMAXV.  Whatever the actual result,
+ * it will only be zero if all input bytes are zero.
+ */
+if (unlikely(vmaxvq_u32(t0) != 0)) {
+return false;
+}
+
+t0 = p[0] | p[1];
+t1 = p[2] | p[3];
+t2 = p[4] | p[5];
+t3 = p[6] | p[7];
+REASSOC_BARRIER(t0, t1);
+REASSOC_BARRIER(t2, t3);
+t0 |= t1;
+t2 |= t3;
+REASSOC_BARRIER(t0, t2);
+t0 |= t2;
+p += 8;
+} while (p < e - 7);
+
+return vmaxvq_u32(t0) == 0;
+}
+
+static biz_accel_fn const accel_table[] = {
+buffer_is_zero_int_ge256,
+buffer_is_zero_simd,
+};
+
+#define best_accel() 1
+#else
+# include "host/include/generic/host/bufferiszero.h.inc"
+#endif
diff --git a/host/include/generic/host/bufferiszero.h.inc 
b/host/include/generic/host/bufferiszero.h.inc
new file mode 100644
index 00..ea0875c24a
--- /dev/null
+++ b/host/include/generic/host/bufferiszero.h.inc
@@ -0,0 +1,10 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * buffer_is_zero acceleration, generic version.
+ */
+
+static biz_accel_fn const accel_table[1] = {
+buffer_is_zero_int_ge256
+};
+
+#define best_accel() 0
diff --git a/host/include/i386/host/bufferiszero.h.inc 
b/host/include/i386/host/bufferiszero.h.inc
new file mode 100644
index 00..ac9bcd07ee
--- /dev/null
+++ b/host/include/i386/host/bufferiszero.h.inc
@@ -0,0 +1,124 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * buffer_is_zero acceleration, x86 version.
+ */
+
+#if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
+#include 
+
+/* Helper for preventing the compiler from reassociating
+   chains of binary vector operations.  */
+#define SSE_REASSOC_BARRIER(vec0, vec1) asm("" : "+x"(vec0), "+x"(vec1))
+
+/* Note that these vectorized functions may assume len >= 256.  */
+
+static bool __attribute__((target("sse2")))
+buffer_zero_sse2(const void *buf, size_t len)
+{
+/* Unaligned loads at head/tail.  */
+__m128i v = *(__m128i_u *)(buf);
+__m128i w = *(__m128i_u *)(buf + len - 16);
+/* Align head/tail to 16-byte boundaries.  */
+const __m128i *p = QEMU_ALIGN_PTR_DOWN(buf + 16, 16);
+const __m128i *e = QEMU_ALIGN_PTR_DOWN(buf + len - 1, 16);
+__m128i zero = { 0 };
+
+/* Collect a partial block at tail end.  */
+v |= e[-1]; w |= e[-2];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-3]; w |= e[-4];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-5]; w |= e[-6];
+SSE_REASSOC_BARRIER(v, w);
+v |= e[-7]; v |= w;
+
+/*
+ * Loop over complete 128-byte blocks.
+ * With the head and tail removed, e - p >= 14, so the loop
+ * must iterate at least once.
+ */
+do {
+v = _mm_cmpeq_epi8(v, zero);
+if (unlikely(_mm_movemask_epi8(v) != 0x)) {
+

Re: Unexpected error in rme_configure_one() at ../target/arm/kvm-rme.c:159

2024-06-05 Thread Gavin Shan




On 6/6/24 01:56, Jean-Philippe Brucker wrote:

On Wed, Jun 05, 2024 at 11:28:47AM +1000, Gavin Shan wrote:

WriteSections64(): 
/home/gavin/sandbox/CCA/edk2-guest/Build/ArmVirtQemu-AARCH64/DEBUG_GCC5/AARCH64/ArmPlatformPkg/PrePeiCore/PrePeiCoreUniCore/DEBUG/ArmPlatformPrePeiCore.dll
 AARCH64 small code model requires identical ELF and PE/COFF section offsets 
modulo 4 KB.


Ah I've seen this once but it disappeared as I tried to investigate and
I've since changed the implementation, so I don't have many notes about
it.

Maybe you could try to bisect from "ArmVirtPkg: ArmCcaIoMmu: Provide an
implementation for SetAttribute", but it may give false positives if the
error depends on some random linker placement. Could be
"ArmVirtPkg/ArmPlatformLibQemu: Setup early UART mapping in a Realm" which
adds a 4k page to the data section for the ealy RSI config call, though
that has explicit 4kB alignment.

In my notes I also wrote that changing "-z common-page-size=0x20" to 4k in
the link flags may have made the error disappear, but I doubt it's the
right fix.

I'll try GCC 11 to see if I can reproduce.



Ok. I run a git-bisect and the first problematic commit is 1153ae939c
("ArmVirtPkg/ArmPlatformLibQemu: Add a third-level page table for the UART 
idmap")


Ah thanks, I'm able to reproduce the problem now, it was my local config
that masked it.



I'm not familiar with edk2. The error is raised by 
BaseTools/Source/C/GenFw/Elf64Convert.c::WriteSections64()
where the relocatable address isn't properly aligned to 4KB. So I modified the 
code
as below, but I have to run two consecutive builds. In the first attempt build, 
I
still hit the same error.


This seems to be because GenFw generates a file even on error, so it
doesn't retry the second time.

This commit moves the page tables from .rodata to .data. When linking
IdMap.obj into ArmPlatformPrePeiCore.dll, the alignment of the .text
section changes from 0x1000 to 0x800. This change comes from the linker
script putting .rodata into .text. I don't know why the included .rodata
alignment affects the .text alignment, but I don't think it matters here.

In GenFw, ScanSections64() calculates a mCoffAlignment as the max
.text/.data/.hii section alignement. Since with this commit, .data
alignement (0x1000) becomes larger than .text (0x800), it picks 0x1000 as
the output text offset, and then WriteSections64() complains that this
offset isn't equal to the input .text alignment modulo 0x1000.

The linker script says:

   /*
* The alignment of the .data section should be less than or equal to the
* alignment of the .text section. This ensures that the relative offset
* between these sections is the same in the ELF and the PE/COFF versions of
* this binary.
*/

but that's not what we're getting. I don't have a fix yet, other than
forcing the .text and .data alignment to 4k.



Jean, thanks for your explanation. Right, the issue is caused by mismatched
alignments for ELF and PE/COFF sections. I ever dumped the variables at the
failing point, showing the mismatched alignments (0x800 vs 0x1000). Apart from
that, the virtual address of 'text' section is aligned to 0x800 instead of
0x1000 after ArmPlatformPrePeiCore.dll is dumped by 'readelf'.

SecHdr->sh_addr:0x800  <<< Mismatched alignment 
between ELF and PE/COFF
SecOffset:  0x1000
SymShdr->sh_addr:   0x800
mCoffSectionsOffset[Sym->st_shndx]: 0x1000
GenFw: ERROR 3000: Invalid
  WriteSections64(): 
/home/gavin/sandbox/CCA/edk2-guest/Build/ArmVirtQemu-AARCH64/DEBUG_GCC5/AARCH64/ArmPlatformPkg/PrePeiCore/PrePeiCoreUniCore/DEBUG/ArmPlatformPrePeiCore.dll
 AARCH64 small code model requires identical ELF and PE/COFF section offsets 
modulo 4 KB.

# readelf -S 
Build/ArmVirtQemu-AARCH64/DEBUG_GCC5/AARCH64/ArmPlatformPkg/PrePeiCore/PrePeiCoreUniCore/DEBUG/ArmPlatformPrePeiCore.dll
Section Headers:
  [Nr] Name  Type Address   Offset
   Size  EntSize  Flags  Link  Info  Align
  [ 0]   NULL   
        0 0 0
  [ 1] .text PROGBITS 0800  0800   <<< 
Aligned to 0x800
   51b8    AX   0 0 2048

With the following changes, I'm able to build the firmware successfully. I don't
see how COMMONPAGESIZE is sorted out because I don't find its definition in the
source code.

diff --git a/BaseTools/Scripts/GccBase.lds b/BaseTools/Scripts/GccBase.lds
index 9f27e83bb0..5463df47a9 100644
--- a/BaseTools/Scripts/GccBase.lds
+++ b/BaseTools/Scripts/GccBase.lds
@@ -20,7 +20,8 @@ SECTIONS {
*/
   . = PECOFF_HEADER_SIZE;
 
-  .text : ALIGN(CONSTANT(COMMONPAGESIZE)) {

+  /* .text : ALIGN(CONSTANT(COMMONPAGESIZE)) { */^M
+  .text : ALIGN(4096) {^M

# 
# readelf -S

[PATCH v3 3/3] tests/qtest/x86: check for availability of older cpu models before running tests

2024-06-05 Thread Ani Sinha

It is better to check if some older cpu models like 486, athlon, pentium,
penryn, phenom, core2duo etc are available before running their corresponding
tests. Some downstream distributions may no longer support these older cpu
models.

Signature of add_feature_test() has been modified to return void as
FeatureTestArgs* was not used by the caller.

One minor correction. Replaced 'phenom' with '486' in the test
'x86/cpuid/auto-level/phenom/arat' matching the cpu used.

CC: th...@redhat.com
CC: imamm...@redhat.com
Signed-off-by: Ani Sinha 
---
 tests/qtest/test-x86-cpuid-compat.c | 170 ++--
 1 file changed, 108 insertions(+), 62 deletions(-)

changelog:
v2: reworked as per suggestion from danpb.
v3: reworked as_feature_test() same way as add_cpuid_test()

diff --git a/tests/qtest/test-x86-cpuid-compat.c 
b/tests/qtest/test-x86-cpuid-compat.c
index 6a39454fce..d5307f39e7 100644
--- a/tests/qtest/test-x86-cpuid-compat.c
+++ b/tests/qtest/test-x86-cpuid-compat.c
@@ -67,10 +67,29 @@ static void test_cpuid_prop(const void *data)
 g_free(path);
 }
 
-static void add_cpuid_test(const char *name, const char *cmdline,
+static void add_cpuid_test(const char *name, const char *cpu,
+   const char *cpufeat, const char *machine,
const char *property, int64_t expected_value)
 {
 CpuidTestArgs *args = g_new0(CpuidTestArgs, 1);
+char *cmdline;
+char *save;
+
+if (!qtest_has_cpu(cpu)) {
+return;
+}
+cmdline = g_strdup_printf("-cpu %s", cpu);
+
+if (cpufeat) {
+save = cmdline;
+cmdline = g_strdup_printf("%s,%s", cmdline, cpufeat);
+g_free(save);
+}
+if (machine) {
+save = cmdline;
+cmdline = g_strdup_printf("-machine %s %s", machine, cmdline);
+g_free(save);
+}
 args->cmdline = cmdline;
 args->property = property;
 args->expected_value = expected_value;
@@ -149,12 +168,24 @@ static void test_feature_flag(const void *data)
  * either "feature-words" or "filtered-features", when running QEMU
  * using cmdline
  */
-static FeatureTestArgs *add_feature_test(const char *name, const char *cmdline,
- uint32_t eax, uint32_t ecx,
- const char *reg, int bitnr,
- bool expected_value)
+static void add_feature_test(const char *name, const char *cpu,
+ const char *cpufeat, uint32_t eax,
+ uint32_t ecx, const char *reg,
+ int bitnr, bool expected_value)
 {
 FeatureTestArgs *args = g_new0(FeatureTestArgs, 1);
+char *cmdline;
+
+if (!qtest_has_cpu(cpu)) {
+return;
+}
+
+if (cpufeat) {
+cmdline = g_strdup_printf("-cpu %s,%s", cpu, cpufeat);
+} else {
+cmdline = g_strdup_printf("-cpu %s", cpu);
+}
+
 args->cmdline = cmdline;
 args->in_eax = eax;
 args->in_ecx = ecx;
@@ -162,13 +193,17 @@ static FeatureTestArgs *add_feature_test(const char 
*name, const char *cmdline,
 args->bitnr = bitnr;
 args->expected_value = expected_value;
 qtest_add_data_func(name, args, test_feature_flag);
-return args;
+return;
 }
 
 static void test_plus_minus_subprocess(void)
 {
 char *path;
 
+if (!qtest_has_cpu("pentium")) {
+return;
+}
+
 /* Rules:
  * 1)"-foo" overrides "+foo"
  * 2) "[+-]foo" overrides "foo=..."
@@ -198,6 +233,10 @@ static void test_plus_minus_subprocess(void)
 
 static void test_plus_minus(void)
 {
+if (!qtest_has_cpu("pentium")) {
+return;
+}
+
 g_test_trap_subprocess("/x86/cpuid/parsing-plus-minus/subprocess", 0, 0);
 g_test_trap_assert_passed();
 g_test_trap_assert_stderr("*Ambiguous CPU model string. "
@@ -217,99 +256,105 @@ int main(int argc, char **argv)
 
 /* Original level values for CPU models: */
 add_cpuid_test("x86/cpuid/phenom/level",
-   "-cpu phenom", "level", 5);
+   "phenom", NULL, NULL, "level", 5);
 add_cpuid_test("x86/cpuid/Conroe/level",
-   "-cpu Conroe", "level", 10);
+   "Conroe", NULL, NULL, "level", 10);
 add_cpuid_test("x86/cpuid/SandyBridge/level",
-   "-cpu SandyBridge", "level", 0xd);
+   "SandyBridge", NULL, NULL, "level", 0xd);
 add_cpuid_test("x86/cpuid/486/xlevel",
-   "-cpu 486", "xlevel", 0);
+   "486", NULL, NULL, "xlevel", 0);
 add_cpuid_test("x86/cpuid/core2duo/xlevel",
-   "-cpu core2duo", "xlevel", 0x8008);
+   "core2duo", NULL, NULL, "xlevel", 0x8008);
 add_cpuid_test("x86/cpuid/phenom/xlevel",
-   "-cpu phenom", "xlevel", 0x801A);
+   "phenom", NULL, NULL, "xlevel", 0x801A);
 add_cpuid_test("x86/cpuid/athlon/xlevel",
-   "-cpu

[PATCH v3 0/3] x86 cpu test refactoring

2024-06-05 Thread Ani Sinha

Add a new library api to check for the support of a specific cpu type.
Used the new api to check support for some older x86 cpu models before
running the tests.

CC: th...@redhat.com
CC: imamm...@redhat.com
CC: qemu-devel@nongnu.org
CC: pbonz...@redhat.com
CC: lviv...@redhat.com
CC: m...@redhat.com

Ani Sinha (3):
  qtest/x86/numa-test: do not use the obsolete 'pentium' cpu
  tests/qtest/libqtest: add qtest_has_cpu() api
  tests/qtest/x86: check for availability of older cpu models before
running tests

 tests/qtest/libqtest.c  |  84 +++
 tests/qtest/libqtest.h  |   8 ++
 tests/qtest/numa-test.c |   3 +-
 tests/qtest/test-x86-cpuid-compat.c | 159 +---
 4 files changed, 192 insertions(+), 62 deletions(-)

-- 
2.42.0

[PATCH v3 1/3] qtest/x86/numa-test: do not use the obsolete 'pentium' cpu

2024-06-05 Thread Ani Sinha

'pentium' cpu is old and obsolete and should be avoided for running tests if
its not strictly needed. Use 'max' cpu instead for generic non-cpu specific
numa test.

CC: th...@redhat.com
Reviewed-by: Thomas Huth 
Reviewed-by: Igor Mammedov 
Signed-off-by: Ani Sinha 
---
 tests/qtest/numa-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

changelog:
v2: added reviewed tag (thuth)
v3: added reviewed tag (imammeodo)

diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index 7aa262dbb9..f01f19592d 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -125,7 +125,8 @@ static void pc_numa_cpu(const void *data)
 QTestState *qts;
 g_autofree char *cli = NULL;
 
-cli = make_cli(data, "-cpu pentium -machine 
smp.cpus=8,smp.sockets=2,smp.cores=2,smp.threads=2 "
+cli = make_cli(data,
+"-cpu max -machine smp.cpus=8,smp.sockets=2,smp.cores=2,smp.threads=2 "
 "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
 "-numa cpu,node-id=1,socket-id=0 "
 "-numa cpu,node-id=0,socket-id=1,core-id=0 "
-- 
2.42.0

[PATCH 2/3] tests/qtest/libqtest: add qtest_has_cpu() api

2024-06-05 Thread Ani Sinha

Added a new test api qtest_has_cpu() in order to check availability of some
cpu models in the current QEMU binary. The specific architecture of the QEMU
binary is selected using the QTEST_QEMU_BINARY environment variable. This api
would be useful to run tests against some older cpu models after checking if
QEMU actually supported these models.

CC: th...@redhat.com
Signed-off-by: Ani Sinha 
---
 tests/qtest/libqtest.c | 84 ++
 tests/qtest/libqtest.h |  8 
 2 files changed, 92 insertions(+)

diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index d8f80d335e..135a607728 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -37,6 +37,7 @@
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qbool.h"
 
 #define MAX_IRQ 256
 
@@ -1471,6 +1472,12 @@ struct MachInfo {
 char *alias;
 };
 
+struct CpuInfo {
+char *name;
+char *alias_of;
+bool deprecated;
+};
+
 static void qtest_free_machine_list(struct MachInfo *machines)
 {
 if (machines) {
@@ -1550,6 +1557,83 @@ static struct MachInfo *qtest_get_machines(const char 
*var)
 return machines;
 }
 
+static struct CpuInfo *qtest_get_cpus(void)
+{
+static struct CpuInfo *cpus;
+QDict *response, *minfo;
+QList *list;
+const QListEntry *p;
+QObject *qobj;
+QString *qstr;
+QBool *qbool;
+QTestState *qts;
+int idx;
+
+if (cpus) {
+return cpus;
+}
+
+silence_spawn_log = !g_test_verbose();
+
+qts = qtest_init_with_env(NULL, "-machine none");
+response = qtest_qmp(qts, "{ 'execute': 'query-cpu-definitions' }");
+g_assert(response);
+list = qdict_get_qlist(response, "return");
+g_assert(list);
+
+cpus = g_new(struct CpuInfo, qlist_size(list) + 1);
+
+for (p = qlist_first(list), idx = 0; p; p = qlist_next(p), idx++) {
+minfo = qobject_to(QDict, qlist_entry_obj(p));
+g_assert(minfo);
+
+qobj = qdict_get(minfo, "name");
+g_assert(qobj);
+qstr = qobject_to(QString, qobj);
+g_assert(qstr);
+cpus[idx].name = g_strdup(qstring_get_str(qstr));
+
+qobj = qdict_get(minfo, "alias_of");
+if (qobj) { /* old machines do not report aliases */
+qstr = qobject_to(QString, qobj);
+g_assert(qstr);
+cpus[idx].alias_of = g_strdup(qstring_get_str(qstr));
+} else {
+cpus[idx].alias_of = NULL;
+}
+
+qobj = qdict_get(minfo, "deprecated");
+qbool = qobject_to(QBool, qobj);
+g_assert(qbool);
+cpus[idx].deprecated = qbool_get_bool(qbool);
+}
+
+qtest_quit(qts);
+qobject_unref(response);
+
+silence_spawn_log = false;
+
+memset([idx], 0, sizeof(struct CpuInfo)); /* Terminating entry */
+return cpus;
+}
+
+bool qtest_has_cpu(const char *cpu)
+{
+struct CpuInfo *cpus;
+int i;
+
+cpus = qtest_get_cpus();
+
+for (i = 0; cpus[i].name != NULL; i++) {
+if (g_str_equal(cpu, cpus[i].name) ||
+(cpus[i].alias_of && g_str_equal(cpu, cpus[i].alias_of))) {
+return true;
+}
+}
+
+return false;
+}
+
 void qtest_cb_for_every_machine(void (*cb)(const char *machine),
 bool skip_old_versioned)
 {
diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h
index 6e3d3525bf..c94b64f960 100644
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -949,6 +949,14 @@ bool qtest_has_machine(const char *machine);
  */
 bool qtest_has_machine_with_env(const char *var, const char *machine);
 
+/**
+ * qtest_has_cpu:
+ * @cpu: The cpu to look for
+ *
+ * Returns: true if the cpu is available in the target binary.
+ */
+bool qtest_has_cpu(const char *cpu);
+
 /**
  * qtest_has_device:
  * @device: The device to look for
-- 
2.42.0

RE: [PATCH v6 18/19] intel_iommu: Implement [set|unset]_iommu_device() callbacks

2024-06-05 Thread Duan, Zhenzhong



>-Original Message-
>From: Eric Auger 
>Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>[set|unset]_iommu_device() callbacks
>
>
>
>On 6/4/24 07:46, Duan, Zhenzhong wrote:
>>
>>> -Original Message-
>>> From: Eric Auger 
>>> Subject: Re: [PATCH v6 18/19] intel_iommu: Implement
>>> [set|unset]_iommu_device() callbacks
>>>
>>>
>>>
>>> On 6/3/24 08:10, Zhenzhong Duan wrote:
 From: Yi Liu 

 Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
 In set call, a new structure VTDHostIOMMUDevice which holds
 a reference to HostIOMMUDevice is stored in hash table
 indexed by PCI BDF.

 Signed-off-by: Yi Liu 
 Signed-off-by: Yi Sun 
 Signed-off-by: Zhenzhong Duan 
 ---
  hw/i386/intel_iommu_internal.h |  9 
  include/hw/i386/intel_iommu.h  |  2 +
  hw/i386/intel_iommu.c  | 76
>>> ++
  3 files changed, 87 insertions(+)

 diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
 index f8cf99bddf..b800d62ca0 100644
 --- a/hw/i386/intel_iommu_internal.h
 +++ b/hw/i386/intel_iommu_internal.h
 @@ -28,6 +28,7 @@
  #ifndef HW_I386_INTEL_IOMMU_INTERNAL_H
  #define HW_I386_INTEL_IOMMU_INTERNAL_H
  #include "hw/i386/intel_iommu.h"
 +#include "sysemu/host_iommu_device.h"

  /*
   * Intel IOMMU register specification
 @@ -537,4 +538,12 @@ typedef struct VTDRootEntry VTDRootEntry;
  #define VTD_SL_IGN_COM  0xbff0ULL
  #define VTD_SL_TM   (1ULL << 62)

 +
 +typedef struct VTDHostIOMMUDevice {
 +IntelIOMMUState *iommu_state;
 +PCIBus *bus;
 +uint8_t devfn;
 +HostIOMMUDevice *dev;
 +QLIST_ENTRY(VTDHostIOMMUDevice) next;
 +} VTDHostIOMMUDevice;
  #endif
 diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
 index 7d694b0813..2bbde41e45 100644
 --- a/include/hw/i386/intel_iommu.h
 +++ b/include/hw/i386/intel_iommu.h
 @@ -293,6 +293,8 @@ struct IntelIOMMUState {
  /* list of registered notifiers */
  QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;

 +GHashTable *vtd_host_iommu_dev; /*
>VTDHostIOMMUDevice
>>> */
 +
  /* interrupt remapping */
  bool intr_enabled;  /* Whether guest enabled IR */
  dma_addr_t intr_root;   /* Interrupt remapping table pointer 
 */
 diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
 index 519063c8f8..747c988bc4 100644
 --- a/hw/i386/intel_iommu.c
 +++ b/hw/i386/intel_iommu.c
 @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer
>v1,
>>> gconstpointer v2)
 (key1->pasid == key2->pasid);
  }

 +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
 +{
 +const struct vtd_as_key *key1 = v1;
 +const struct vtd_as_key *key2 = v2;
 +
 +return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
 +}
  /*
   * Note that we use pointer to PCIBus as the key, so hashing/shifting
   * based on the pointer value is intended. Note that we deal with
 @@ -3812,6 +3819,70 @@ VTDAddressSpace
>>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
  return vtd_dev_as;
  }

 +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>>> devfn,
 + HostIOMMUDevice *hiod, Error **errp)
 +{
 +IntelIOMMUState *s = opaque;
 +VTDHostIOMMUDevice *vtd_hdev;
 +struct vtd_as_key key = {
 +.bus = bus,
 +.devfn = devfn,
 +};
 +struct vtd_as_key *new_key;
 +
 +assert(hiod);
 +
 +vtd_iommu_lock(s);
 +
 +vtd_hdev = g_hash_table_lookup(s->vtd_host_iommu_dev, );
 +
 +if (vtd_hdev) {
 +error_setg(errp, "IOMMUFD device already exist");
 +vtd_iommu_unlock(s);
 +return false;
 +}
 +
 +vtd_hdev = g_malloc0(sizeof(VTDHostIOMMUDevice));
 +vtd_hdev->bus = bus;
 +vtd_hdev->devfn = (uint8_t)devfn;
 +vtd_hdev->iommu_state = s;
 +vtd_hdev->dev = hiod;
>>> I am still not totally clear about why we couldn't reuse VTDAddressSpace
>>> instance for this bus/devid. Is it a matter of aliased versus non
>>> aliased bus/devfn, or a matter of pasid diff. An AddressSpace could back
>>> an assigned device in which case a HostIOMMUDevice could be added to
>>> this latter. I think this should be explained in the commit msg
>> Yes, as you said, it's a matter of aliased vs non aliased BDF.
>>
>> VTDAddressSpace is per aliased BDF while VTDHostIOMMUDevice is per
>non aliased BDF.
>> There can be multiple assigned devices under same virtual iommu group
>and share same
>> VTDAddressSpace, but they have their

[PULL 0/6] loongarch-to-apply queue

2024-06-05 Thread Song Gao

The following changes since commit db2feb2df8d19592c9859efb3f682404e0052957:

  Merge tag 'pull-misc-20240605' of https://gitlab.com/rth7680/qemu into 
staging (2024-06-05 14:17:01 -0700)

are available in the Git repository at:

  https://gitlab.com/gaosong/qemu.git tags/pull-loongarch-20240606

for you to fetch changes up to 78f932ea1f7b3b9b0ac628dc2a91281318fe51fa:

  target/loongarch: fix a wrong print in cpu dump (2024-06-06 11:58:06 +0800)


pull-loongarch-20240606


Bibo Mao (2):
  tests/libqos: Add loongarch virt machine node
  tests/qtest: Add numa test for loongarch system

Song Gao (3):
  hw/intc/loongarch_extioi: Add extioi virt extension definition
  hw/loongarch/virt: Use MemTxAttrs interface for misc ops
  hw/loongarch/virt: Enable extioi virt extension

lanyanzhi (1):
  target/loongarch: fix a wrong print in cpu dump

 hw/intc/loongarch_extioi.c  |  88 -
 hw/loongarch/virt.c | 184 +++-
 include/hw/intc/loongarch_extioi.h  |  21 
 include/hw/loongarch/virt.h |   1 +
 target/loongarch/cpu.c  |   2 +-
 target/loongarch/cpu.h  |   1 +
 tests/qtest/libqos/loongarch-virt-machine.c | 114 +
 tests/qtest/libqos/meson.build  |   1 +
 tests/qtest/meson.build |   2 +-
 tests/qtest/numa-test.c |  53 
 10 files changed, 428 insertions(+), 39 deletions(-)
 create mode 100644 tests/qtest/libqos/loongarch-virt-machine.c

[PULL 4/6] hw/loongarch/virt: Use MemTxAttrs interface for misc ops

2024-06-05 Thread Song Gao

Use MemTxAttrs interface read_with_attrs/write_with_attrs
for virt_iocsr_misc_ops.

Signed-off-by: Song Gao 
Reviewed-by: Bibo Mao 
Message-Id: <20240528083855.1912757-3-gaos...@loongson.cn>
---
 hw/loongarch/virt.c | 36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 2b5ae45939..0cef33a904 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -900,37 +900,49 @@ static void virt_firmware_init(LoongArchVirtMachineState 
*lvms)
 }
 
 
-static void virt_iocsr_misc_write(void *opaque, hwaddr addr,
-  uint64_t val, unsigned size)
+static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size,
+ MemTxAttrs attrs)
 {
+return MEMTX_OK;
 }
 
-static uint64_t virt_iocsr_misc_read(void *opaque, hwaddr addr, unsigned size)
+static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
+uint64_t *data,
+unsigned size, MemTxAttrs attrs)
 {
-uint64_t ret;
+uint64_t ret = 0;
 
 switch (addr) {
 case VERSION_REG:
-return 0x11ULL;
+ret = 0x11ULL;
+break;
 case FEATURE_REG:
 ret = BIT(IOCSRF_MSI) | BIT(IOCSRF_EXTIOI) | BIT(IOCSRF_CSRIPI);
 if (kvm_enabled()) {
 ret |= BIT(IOCSRF_VM);
 }
-return ret;
+break;
 case VENDOR_REG:
-return 0x6e6f73676e6f6f4cULL; /* "Loongson" */
+ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */
+break;
 case CPUNAME_REG:
-return 0x303030354133ULL; /* "3A5000" */
+ret = 0x303030354133ULL; /* "3A5000" */
+break;
 case MISC_FUNC_REG:
-return BIT_ULL(IOCSRM_EXTIOI_EN);
+ret = BIT_ULL(IOCSRM_EXTIOI_EN);
+break;
+default:
+g_assert_not_reached();
 }
-return 0ULL;
+
+*data = ret;
+return MEMTX_OK;
 }
 
 static const MemoryRegionOps virt_iocsr_misc_ops = {
-.read  = virt_iocsr_misc_read,
-.write = virt_iocsr_misc_write,
+.read_with_attrs  = virt_iocsr_misc_read,
+.write_with_attrs = virt_iocsr_misc_write,
 .endianness = DEVICE_LITTLE_ENDIAN,
 .valid = {
 .min_access_size = 4,
-- 
2.34.1

[PULL 2/6] tests/qtest: Add numa test for loongarch system

2024-06-05 Thread Song Gao

From: Bibo Mao 

Add numa test case for loongarch system, it passes to run
with command "make check-qtest".

Signed-off-by: Bibo Mao 
Acked-by: Thomas Huth 
Tested-by: Song Gao 
Message-Id: <20240528082155.938586-1-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 tests/qtest/meson.build |  2 +-
 tests/qtest/numa-test.c | 53 +
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b98fae6a6d..12792948ff 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -140,7 +140,7 @@ qtests_hppa = ['boot-serial-test'] + \
   (config_all_devices.has_key('CONFIG_VGA') ? ['display-vga-test'] : [])
 
 qtests_loongarch64 = qtests_filter + \
-  ['boot-serial-test']
+  ['boot-serial-test', 'numa-test']
 
 qtests_m68k = ['boot-serial-test'] + \
   qtests_filter
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index 7aa262dbb9..5518f6596b 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -265,6 +265,54 @@ static void aarch64_numa_cpu(const void *data)
 qtest_quit(qts);
 }
 
+static void loongarch64_numa_cpu(const void *data)
+{
+QDict *resp;
+QList *cpus;
+QObject *e;
+QTestState *qts;
+g_autofree char *cli = NULL;
+
+cli = make_cli(data, "-machine "
+"smp.cpus=2,smp.sockets=2,smp.cores=1,smp.threads=1 "
+"-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
+"-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 "
+"-numa cpu,node-id=1,socket-id=0,core-id=0,thread-id=0");
+qts = qtest_init(cli);
+cpus = get_cpus(qts, );
+g_assert(cpus);
+
+while ((e = qlist_pop(cpus))) {
+QDict *cpu, *props;
+int64_t socket, core, thread, node;
+
+cpu = qobject_to(QDict, e);
+g_assert(qdict_haskey(cpu, "props"));
+props = qdict_get_qdict(cpu, "props");
+
+g_assert(qdict_haskey(props, "node-id"));
+node = qdict_get_int(props, "node-id");
+g_assert(qdict_haskey(props, "socket-id"));
+socket = qdict_get_int(props, "socket-id");
+g_assert(qdict_haskey(props, "core-id"));
+core = qdict_get_int(props, "core-id");
+g_assert(qdict_haskey(props, "thread-id"));
+thread = qdict_get_int(props, "thread-id");
+
+if (socket == 0 && core == 0 && thread == 0) {
+g_assert_cmpint(node, ==, 1);
+} else if (socket == 1 && core == 0 && thread == 0) {
+g_assert_cmpint(node, ==, 0);
+} else {
+g_assert(false);
+}
+qobject_unref(e);
+}
+
+qobject_unref(resp);
+qtest_quit(qts);
+}
+
 static void pc_dynamic_cpu_cfg(const void *data)
 {
 QObject *e;
@@ -593,6 +641,11 @@ int main(int argc, char **argv)
 aarch64_numa_cpu);
 }
 
+if (!strcmp(arch, "loongarch64")) {
+qtest_add_data_func("/numa/loongarch64/cpu/explicit", args,
+loongarch64_numa_cpu);
+}
+
 out:
 return g_test_run();
 }
-- 
2.34.1

[PULL 5/6] hw/loongarch/virt: Enable extioi virt extension

2024-06-05 Thread Song Gao

This patch adds a new board attribute 'v-eiointc'.
A value of true enables the virt extended I/O interrupt controller.
VMs working in kvm mode have 'v-eiointc' enabled by default.

Signed-off-by: Song Gao 
Reviewed-by: Bibo Mao 
Message-Id: <20240528083855.1912757-4-gaos...@loongson.cn>
---
 hw/loongarch/virt.c | 88 +++--
 include/hw/loongarch/virt.h |  1 +
 target/loongarch/cpu.h  |  1 +
 3 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 0cef33a904..66cef201ab 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -11,6 +11,7 @@
 #include "hw/boards.h"
 #include "hw/char/serial.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "sysemu/runstate.h"
@@ -48,6 +49,31 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "qemu/error-report.h"
 
+static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms)
+{
+if (lvms->veiointc == ON_OFF_AUTO_OFF) {
+return false;
+}
+return true;
+}
+
+static void virt_get_veiointc(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+OnOffAuto veiointc = lvms->veiointc;
+
+visit_type_OnOffAuto(v, name, , errp);
+}
+
+static void virt_set_veiointc(Object *obj, Visitor *v, const char *name,
+  void *opaque, Error **errp)
+{
+LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+
+visit_type_OnOffAuto(v, name, >veiointc, errp);
+}
+
 static PFlashCFI01 *virt_flash_create1(LoongArchVirtMachineState *lvms,
const char *name,
const char *alias_prop_name)
@@ -790,9 +816,16 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
 /* Create EXTIOI device */
 extioi = qdev_new(TYPE_LOONGARCH_EXTIOI);
 qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus);
+if (virt_is_veiointc_enabled(lvms)) {
+qdev_prop_set_bit(extioi, "has-virtualization-extension", true);
+}
 sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), _fatal);
 memory_region_add_subregion(>system_iocsr, APIC_BASE,
-   sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+if (virt_is_veiointc_enabled(lvms)) {
+memory_region_add_subregion(>system_iocsr, EXTIOI_VIRT_BASE,
+sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
+}
 
 /*
  * connect ext irq to the cpu irq
@@ -899,11 +932,37 @@ static void virt_firmware_init(LoongArchVirtMachineState 
*lvms)
 }
 }
 
-
 static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr,
  uint64_t val, unsigned size,
  MemTxAttrs attrs)
 {
+LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque);
+uint64_t features;
+
+switch (addr) {
+case MISC_FUNC_REG:
+if (!virt_is_veiointc_enabled(lvms)) {
+return MEMTX_OK;
+}
+
+features = address_space_ldl(>as_iocsr,
+ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+ attrs, NULL);
+if (val & BIT_ULL(IOCSRM_EXTIOI_EN)) {
+features |= BIT(EXTIOI_ENABLE);
+}
+if (val & BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE)) {
+features |= BIT(EXTIOI_ENABLE_INT_ENCODE);
+}
+
+address_space_stl(>as_iocsr,
+  EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+  features, attrs, NULL);
+break;
+default:
+g_assert_not_reached();
+}
+
 return MEMTX_OK;
 }
 
@@ -911,7 +970,9 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, 
hwaddr addr,
 uint64_t *data,
 unsigned size, MemTxAttrs attrs)
 {
+LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque);
 uint64_t ret = 0;
+int features;
 
 switch (addr) {
 case VERSION_REG:
@@ -930,7 +991,20 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, 
hwaddr addr,
 ret = 0x303030354133ULL; /* "3A5000" */
 break;
 case MISC_FUNC_REG:
-ret = BIT_ULL(IOCSRM_EXTIOI_EN);
+if (!virt_is_veiointc_enabled(lvms)) {
+ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
+break;
+}
+
+features = address_space_ldl(>as_iocsr,
+ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+ attrs, NULL);
+if (features & BIT(EXTIOI_ENABLE)) {
+ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
+}
+if (features & BIT(EXTIOI_ENABLE_INT_ENCODE)) {
+

[PULL 1/6] tests/libqos: Add loongarch virt machine node

2024-06-05 Thread Song Gao

From: Bibo Mao 

Add loongarch virt machine to the graph. It is a modified copy of
the existing riscv virtmachine in riscv-virt-machine.c

It contains a generic-pcihost controller, and an extra function
loongarch_config_qpci_bus() to configure GPEX pci host controller
information, such as ecam and pio_base addresses.

Also hotplug handle checking about TYPE_VIRTIO_IOMMU_PCI device is
added on loongarch virt machine, since virtio_mmu_pci device requires
it.

Signed-off-by: Bibo Mao 
Acked-by: Thomas Huth 
Message-Id: <20240528082053.938564-1-maob...@loongson.cn>
Signed-off-by: Song Gao 
---
 hw/loongarch/virt.c |   2 +
 tests/qtest/libqos/loongarch-virt-machine.c | 114 
 tests/qtest/libqos/meson.build  |   1 +
 3 files changed, 117 insertions(+)
 create mode 100644 tests/qtest/libqos/loongarch-virt-machine.c

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 3e6e93edf3..2d7f718570 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -45,6 +45,7 @@
 #include "sysemu/tpm.h"
 #include "sysemu/block-backend.h"
 #include "hw/block/flash.h"
+#include "hw/virtio/virtio-iommu.h"
 #include "qemu/error-report.h"
 
 static PFlashCFI01 *virt_flash_create1(LoongArchVirtMachineState *lvms,
@@ -1213,6 +1214,7 @@ static HotplugHandler 
*virt_get_hotplug_handler(MachineState *machine,
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 
 if (device_is_dynamic_sysbus(mc, dev) ||
+object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
 memhp_type_supported(dev)) {
 return HOTPLUG_HANDLER(machine);
 }
diff --git a/tests/qtest/libqos/loongarch-virt-machine.c 
b/tests/qtest/libqos/loongarch-virt-machine.c
new file mode 100644
index 00..c12089c015
--- /dev/null
+++ b/tests/qtest/libqos/loongarch-virt-machine.c
@@ -0,0 +1,114 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see 
+ */
+
+#include "qemu/osdep.h"
+#include "../libqtest.h"
+#include "qemu/module.h"
+#include "libqos-malloc.h"
+#include "qgraph.h"
+#include "virtio-mmio.h"
+#include "generic-pcihost.h"
+#include "hw/pci/pci_regs.h"
+
+#define LOONGARCH_PAGE_SIZE   0x1000
+#define LOONGARCH_VIRT_RAM_ADDR   0x10
+#define LOONGARCH_VIRT_RAM_SIZE   0xFF0
+
+#define LOONGARCH_VIRT_PIO_BASE   0x1800
+#define LOONGARCH_VIRT_PCIE_PIO_OFFSET0x4000
+#define LOONGARCH_VIRT_PCIE_PIO_LIMIT 0x1
+#define LOONGARCH_VIRT_PCIE_ECAM_BASE 0x2000
+#define LOONGARCH_VIRT_PCIE_MMIO32_BASE   0x4000
+#define LOONGARCH_VIRT_PCIE_MMIO32_LIMIT  0x8000
+
+typedef struct QVirtMachine QVirtMachine;
+
+struct QVirtMachine {
+QOSGraphObject obj;
+QGuestAllocator alloc;
+QVirtioMMIODevice virtio_mmio;
+QGenericPCIHost bridge;
+};
+
+static void virt_destructor(QOSGraphObject *obj)
+{
+QVirtMachine *machine = (QVirtMachine *) obj;
+alloc_destroy(>alloc);
+}
+
+static void *virt_get_driver(void *object, const char *interface)
+{
+QVirtMachine *machine = object;
+if (!g_strcmp0(interface, "memory")) {
+return >alloc;
+}
+
+fprintf(stderr, "%s not present in loongarch/virtio\n", interface);
+g_assert_not_reached();
+}
+
+static QOSGraphObject *virt_get_device(void *obj, const char *device)
+{
+QVirtMachine *machine = obj;
+if (!g_strcmp0(device, "generic-pcihost")) {
+return >bridge.obj;
+} else if (!g_strcmp0(device, "virtio-mmio")) {
+return >virtio_mmio.obj;
+}
+
+fprintf(stderr, "%s not present in loongarch/virt\n", device);
+g_assert_not_reached();
+}
+
+static void loongarch_config_qpci_bus(QGenericPCIBus *qpci)
+{
+qpci->gpex_pio_base = LOONGARCH_VIRT_PIO_BASE;
+qpci->bus.pio_alloc_ptr = LOONGARCH_VIRT_PCIE_PIO_OFFSET;
+qpci->bus.pio_limit = LOONGARCH_VIRT_PCIE_PIO_LIMIT;
+qpci->bus.mmio_alloc_ptr = LOONGARCH_VIRT_PCIE_MMIO32_BASE;
+qpci->bus.mmio_limit = LOONGARCH_VIRT_PCIE_MMIO32_LIMIT;
+qpci->ecam_alloc_ptr = LOONGARCH_VIRT_PCIE_ECAM_BASE;
+}
+
+static void *qos_create_machine_loongarch_virt(QTestState *qts)
+{
+QVirtMachine *machine = g_new0(QVirtMachine, 1);
+
+alloc_init(>alloc, 0,
+   LOONGARCH_VIRT_RAM_ADDR,
+   LOONGARCH_VIRT_RAM_ADDR + LOONGARCH_VIRT_RAM_SIZE,
+   LOONGARCH_PAGE_SIZE);
+
+

[PULL 3/6] hw/intc/loongarch_extioi: Add extioi virt extension definition

2024-06-05 Thread Song Gao

On LoongArch, IRQs can be routed to four vcpus with hardware extended
IRQ model. This patch adds the virt extension definition so that
the IRQ can route to 256 vcpus.

1.Extended IRQ model:
|
+---+ +-|+ +---+
| IPI/Timer | --> | CPUINTC(0-3)|(4-255) | <-- | IPI/Timer |
+---+ +-|+ +---+
^   |
|
   +-+
   | EIOINTC |
   +-+
^   ^
|   |
 +-+ +-+
 | PCH-PIC | | PCH-MSI |
 +-+ +-+
   ^  ^  ^
   |  |  |
++ +-+ +-+
| UARTs  | | Devices | | Devices |
++ +-+ +-+

2.Virt extended IRQ model:

  +-++---+ +---+
  | IPI |--> | CPUINTC(0-255)| <-- | Timer |
  +-++---+ +---+
^
|
  +---+
  | V-EIOINTC |
  +---+
   ^ ^
   | |
+-+ +-+
| PCH-PIC | | PCH-MSI |
+-+ +-+
  ^  ^  ^
  |  |  |
   ++ +-+ +-+
   | UARTs  | | Devices | | Devices |
   ++ +-+ +-+

Signed-off-by: Song Gao 
Reviewed-by: Bibo Mao 
Message-Id: <20240528083855.1912757-2-gaos...@loongson.cn>
---
 hw/intc/loongarch_extioi.c | 88 --
 hw/loongarch/virt.c| 60 +---
 include/hw/intc/loongarch_extioi.h | 21 +++
 3 files changed, 146 insertions(+), 23 deletions(-)

diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index 0b358548eb..1e8e0114dc 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -143,10 +143,13 @@ static inline void 
extioi_update_sw_coremap(LoongArchExtIOI *s, int irq,
 
 for (i = 0; i < 4; i++) {
 cpu = val & 0xff;
-cpu = ctz32(cpu);
-cpu = (cpu >= 4) ? 0 : cpu;
 val = val >> 8;
 
+if (!(s->status & BIT(EXTIOI_ENABLE_CPU_ENCODE))) {
+cpu = ctz32(cpu);
+cpu = (cpu >= 4) ? 0 : cpu;
+}
+
 if (s->sw_coremap[irq + i] == cpu) {
 continue;
 }
@@ -265,6 +268,61 @@ static const MemoryRegionOps extioi_ops = {
 .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static MemTxResult extioi_virt_readw(void *opaque, hwaddr addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs)
+{
+LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque);
+
+switch (addr) {
+case EXTIOI_VIRT_FEATURES:
+*data = s->features;
+break;
+case EXTIOI_VIRT_CONFIG:
+*data = s->status;
+break;
+default:
+g_assert_not_reached();
+}
+
+return MEMTX_OK;
+}
+
+static MemTxResult extioi_virt_writew(void *opaque, hwaddr addr,
+  uint64_t val, unsigned size,
+  MemTxAttrs attrs)
+{
+LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque);
+
+switch (addr) {
+case EXTIOI_VIRT_FEATURES:
+return MEMTX_ACCESS_ERROR;
+
+case EXTIOI_VIRT_CONFIG:
+/*
+ * extioi features can only be set at disabled status
+ */
+if ((s->status & BIT(EXTIOI_ENABLE)) && val) {
+return MEMTX_ACCESS_ERROR;
+}
+
+s->status = val & s->features;
+break;
+default:
+g_assert_not_reached();
+}
+return MEMTX_OK;
+}
+
+static const MemoryRegionOps extioi_virt_ops = {
+.read_with_attrs = extioi_virt_readw,
+.write_with_attrs = extioi_virt_writew,
+.impl.min_access_size = 4,
+.impl.max_access_size = 4,
+.valid.min_access_size = 4,
+.valid.max_access_size = 8,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
 {
 LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev);
@@ -284,6 +342,16 @@ static void loongarch_extioi_realize(DeviceState *dev, 
Error **errp)
 memory_region_init_io(>extioi_system_mem, OBJECT(s), _ops,
   s, "extioi_system_mem", 0x900);
 sysbus_init_mmio(sbd, >extioi_system_mem);
+
+if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
+memory_region_init_io(>virt_extend, OBJECT(s), _virt_ops,
+  s, "extioi_virt", EXTIOI_VIRT_SIZE);
+sysbus_init_mmio(sbd, >virt_extend);
+s->features |= EXTIOI_VIRT_HAS_FEATURES;
+} else {
+s->status |= BIT(EXTIOI_ENABLE);
+

[PULL 6/6] target/loongarch: fix a wrong print in cpu dump

2024-06-05 Thread Song Gao

From: lanyanzhi 

description:
loongarch_cpu_dump_state() want to dump all loongarch cpu
state registers, but there is a tiny typographical error when
printing "PRCFG2".

Cc: qemu-sta...@nongnu.org
Signed-off-by: lanyanzhi 
Reviewed-by: Richard Henderson 
Reviewed-by: Song Gao 
Message-Id: <20240604073831.90-1-lanyanzhi...@ict.ac.cn>
Signed-off-by: Song Gao 
---
 target/loongarch/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index b5c1ec94af..270f711f11 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -707,7 +707,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
 qemu_fprintf(f, "EENTRY=%016" PRIx64 "\n", env->CSR_EENTRY);
 qemu_fprintf(f, "PRCFG1=%016" PRIx64 ", PRCFG2=%016" PRIx64 ","
  " PRCFG3=%016" PRIx64 "\n",
- env->CSR_PRCFG1, env->CSR_PRCFG3, env->CSR_PRCFG3);
+ env->CSR_PRCFG1, env->CSR_PRCFG2, env->CSR_PRCFG3);
 qemu_fprintf(f, "TLBRENTRY=%016" PRIx64 "\n", env->CSR_TLBRENTRY);
 qemu_fprintf(f, "TLBRBADV=%016" PRIx64 "\n", env->CSR_TLBRBADV);
 qemu_fprintf(f, "TLBRERA=%016" PRIx64 "\n", env->CSR_TLBRERA);
-- 
2.34.1

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread maobibo





On 2024/6/6 上午11:42, Richard Henderson wrote:

On 6/5/24 20:36, maobibo wrote:

static biz_accel_fn const accel_table[] = {
 buffer_is_zero_int_ge256,
#ifdef __loongarch_sx
 buffer_is_zero_lsx,
#endif
#ifdef __loongarch_asx
 buffer_is_zero_lasx,
#endif
};

static unsigned best_accel(void)
{
#ifdef __loongarch_asx
 /* lasx may be index 1 or 2, but always last */
 return ARRAY_SIZE(accel_table) - 1;
#else
 /* lsx is always index 1 */
 return 1;
#endif
}
size of accel_table is decided at compile-time, will it be better if 
runtime checking is added also?  something like this:


  unsigned info = cpuinfo_init();

  #ifdef __loongarch_asx
  if (info & CPUINFO_LASX) {
   /* lasx may be index 1 or 2, but always last */
   return ARRAY_SIZE(accel_table) - 1;
  }
  #endif


No, because the ifdef checks that the *compiler* is prepared to use 
LASX/LSX instructions itself without further checks.  There's no point 
in qemu checking further.
By my understanding, currently compiler option is the same with all 
files, there is no separate compiler option with single file or file 
function.


So if compiler is prepared to use LASX/LSX instructions itself, host 
hardware must support LASX/LSX instructions, else there will be problem.


My main concern is that there is one hw machine which supports LSX, but 
no LASX, no KVM neither.  QEMU binary maybe fails to run on such hw 
machine if it is compiled with LASX option.


Regards
Bibo Mao



r~

Re: [PATCH] target/i386: SEV: do not assume machine->cgs is SEV

2024-06-05 Thread Richard Henderson


On 6/5/24 20:45, Zhao Liu wrote:

@@ -1710,7 +1710,9 @@ void sev_es_set_reset_vector(CPUState *cpu)
  {
  X86CPU *x86;
  CPUX86State *env;
-SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+SevCommonState *sev_common = SEV_COMMON(
+object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON));


SEV_COMMON(object_dynamic_cast()) looks to be twice cast, we can just
force to do conversion with pointer type:

(SevCommonState *) object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)


You don't need the explicit cast either, since C auto-converts from void*.

  sev_common = object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON);


r~

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread Richard Henderson


On 6/5/24 20:36, maobibo wrote:

static biz_accel_fn const accel_table[] = {
 buffer_is_zero_int_ge256,
#ifdef __loongarch_sx
 buffer_is_zero_lsx,
#endif
#ifdef __loongarch_asx
 buffer_is_zero_lasx,
#endif
};

static unsigned best_accel(void)
{
#ifdef __loongarch_asx
 /* lasx may be index 1 or 2, but always last */
 return ARRAY_SIZE(accel_table) - 1;
#else
 /* lsx is always index 1 */
 return 1;
#endif
}
size of accel_table is decided at compile-time, will it be better if runtime checking is 
added also?  something like this:


  unsigned info = cpuinfo_init();

  #ifdef __loongarch_asx
  if (info & CPUINFO_LASX) {
   /* lasx may be index 1 or 2, but always last */
   return ARRAY_SIZE(accel_table) - 1;
  }
  #endif


No, because the ifdef checks that the *compiler* is prepared to use LASX/LSX instructions 
itself without further checks.  There's no point in qemu checking further.



r~

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread maobibo





On 2024/6/6 上午11:27, Richard Henderson wrote:

On 6/5/24 20:18, Richard Henderson wrote:

On 6/5/24 19:30, maobibo wrote:



On 2024/6/6 上午7:51, Richard Henderson wrote:

On 6/5/24 02:32, Bibo Mao wrote:

Different gcc versions have different features, macro CONFIG_LSX_OPT
and CONFIG_LASX_OPT is added here to detect whether gcc supports
built-in lsx/lasx macro.

Function buffer_zero_lsx() is added for 128bit simd fpu optimization,
and function buffer_zero_lasx() is for 256bit simd fpu optimization.

Loongarch gcc built-in lsx/lasx macro can be used only when compiler
option -mlsx/-mlasx is added, and there is no separate compiler option
for function only. So it is only in effect when qemu is compiled with
parameter --extra-cflags="-mlasx"

Signed-off-by: Bibo Mao 
---
  meson.build |  11 +
  util/bufferiszero.c | 103 


  2 files changed, 114 insertions(+)

diff --git a/meson.build b/meson.build
index 6386607144..29bc362d7a 100644
--- a/meson.build
+++ b/meson.build
@@ -2855,6 +2855,17 @@ 
config_host_data.set('CONFIG_ARM_AES_BUILTIN', cc.compiles('''

  void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
    '''))
+# For Loongarch64, detect if LSX/LASX are available.
+ config_host_data.set('CONFIG_LSX_OPT', cc.compiles('''
+    #include "lsxintrin.h"
+    int foo(__m128i v) { return __lsx_bz_v(v); }
+  '''))
+
+config_host_data.set('CONFIG_LASX_OPT', cc.compiles('''
+    #include "lasxintrin.h"
+    int foo(__m256i v) { return __lasx_xbz_v(v); }
+  '''))


Both of these are introduced by gcc 14 and llvm 18, so I'm not 
certain of the utility of separate tests.  We might simplify this with


   config_host_data.set('CONFIG_LSX_LASX_INTRIN_H',
 cc.has_header('lsxintrin.h') && cc.has_header('lasxintrin.h'))


As you say, these headers require vector instructions to be enabled 
at compile-time rather than detecting them at runtime.  This is a 
point where the compilers could be improved to support 
__attribute__((target("xyz"))) and the builtins with that.  The i386 
port does this, for instance.


In the meantime, it means that you don't need a runtime test.  
Similar to aarch64 and the use of __ARM_NEON as a compile-time test 
for simd support.  Perhaps


#elif defined(CONFIG_LSX_LASX_INTRIN_H) && \
   (defined(__loongarch_sx) || defined(__loongarch_asx))
# ifdef __loongarch_sx
   ...
# endif
# ifdef __loongarch_asx
   ...
# endif

Sure, will do in this way.
And also there is runtime check coming from hwcap, such this:

unsigned info = cpuinfo_init();
   if (info & CPUINFO_LASX)


static biz_accel_fn const accel_table[] = {
 buffer_is_zero_int_ge256,
#ifdef __loongarch_sx
 buffer_is_zero_lsx,
#endif
#ifdef __loongarch_asx
 buffer_is_zero_lasx,
#endif
};

static unsigned best_accel(void)
{
#ifdef __loongarch_asx
 /* lasx may be index 1 or 2, but always last */
 return ARRAY_SIZE(accel_table) - 1;
#else
 /* lsx is always index 1 */
 return 1;
#endif
}
size of accel_table is decided at compile-time, will it be better if 
runtime checking is added also?  something like this:


 unsigned info = cpuinfo_init();

 #ifdef __loongarch_asx
 if (info & CPUINFO_LASX) {
  /* lasx may be index 1 or 2, but always last */
  return ARRAY_SIZE(accel_table) - 1;
 }
 #endif



It occurs to me that by accumulating host specific sections to this 
file, we should split it like the atomics.  Put each portion in 
host/include/*/host/bufferiszero.h.inc.

sure, will do.



I'll send a patch set handling the existing two hosts.


r~

Re: [PATCH] target/i386: SEV: do not assume machine->cgs is SEV

2024-06-05 Thread Zhao Liu

On Thu, Jun 06, 2024 at 12:44:09AM +0200, Paolo Bonzini wrote:
> Date: Thu,  6 Jun 2024 00:44:09 +0200
> From: Paolo Bonzini 
> Subject: [PATCH] target/i386: SEV: do not assume machine->cgs is SEV
> X-Mailer: git-send-email 2.45.1
> 
> There can be other confidential computing classes that are not derived
> from sev-common.  Avoid aborting when encountering them.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  target/i386/sev.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/target/i386/sev.c b/target/i386/sev.c
> index 004c667ac14..97e15f8b7a9 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -1710,7 +1710,9 @@ void sev_es_set_reset_vector(CPUState *cpu)
>  {
>  X86CPU *x86;
>  CPUX86State *env;
> -SevCommonState *sev_common = 
> SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
> +ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
> +SevCommonState *sev_common = SEV_COMMON(
> +object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON));

SEV_COMMON(object_dynamic_cast()) looks to be twice cast, we can just
force to do conversion with pointer type:

(SevCommonState *) object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)

>  /* Only update if we have valid reset information */
>  if (!sev_common || !sev_common->reset_data_valid) {
> -- 
> 2.45.1
> 
>

[PATCH v2 5/9] target/i386: Split out gdb-internal.h

2024-06-05 Thread Richard Henderson

Reviewed-by: Alex Bennée 
Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 target/i386/gdb-internal.h | 65 ++
 target/i386/gdbstub.c  |  1 +
 2 files changed, 66 insertions(+)
 create mode 100644 target/i386/gdb-internal.h

diff --git a/target/i386/gdb-internal.h b/target/i386/gdb-internal.h
new file mode 100644
index 00..7cf4c1a656
--- /dev/null
+++ b/target/i386/gdb-internal.h
@@ -0,0 +1,65 @@
+/*
+ * x86 gdb server stub
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2013 SUSE LINUX Products GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef I386_GDB_INTERNAL_H
+#define I386_GDB_INTERNAL_H
+
+/*
+ * Keep these in sync with assignment to
+ * gdb_num_core_regs in target/i386/cpu.c
+ * and with the machine description
+ */
+
+/*
+ * SEG: 6 segments, plus fs_base, gs_base, kernel_gs_base
+ */
+
+/*
+ * general regs ->  8 or 16
+ */
+#define IDX_NB_IP   1
+#define IDX_NB_FLAGS1
+#define IDX_NB_SEG  (6 + 3)
+#define IDX_NB_CTL  6
+#define IDX_NB_FP   16
+/*
+ * fpu regs --> 8 or 16
+ */
+#define IDX_NB_MXCSR1
+/*
+ *  total > 8+1+1+9+6+16+8+1=50 or 16+1+1+9+6+16+16+1=66
+ */
+
+#define IDX_IP_REG  CPU_NB_REGS
+#define IDX_FLAGS_REG   (IDX_IP_REG + IDX_NB_IP)
+#define IDX_SEG_REGS(IDX_FLAGS_REG + IDX_NB_FLAGS)
+#define IDX_CTL_REGS(IDX_SEG_REGS + IDX_NB_SEG)
+#define IDX_FP_REGS (IDX_CTL_REGS + IDX_NB_CTL)
+#define IDX_XMM_REGS(IDX_FP_REGS + IDX_NB_FP)
+#define IDX_MXCSR_REG   (IDX_XMM_REGS + CPU_NB_REGS)
+
+#define IDX_CTL_CR0_REG (IDX_CTL_REGS + 0)
+#define IDX_CTL_CR2_REG (IDX_CTL_REGS + 1)
+#define IDX_CTL_CR3_REG (IDX_CTL_REGS + 2)
+#define IDX_CTL_CR4_REG (IDX_CTL_REGS + 3)
+#define IDX_CTL_CR8_REG (IDX_CTL_REGS + 4)
+#define IDX_CTL_EFER_REG(IDX_CTL_REGS + 5)
+
+#endif
diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c
index 4acf485879..96b4382a5d 100644
--- a/target/i386/gdbstub.c
+++ b/target/i386/gdbstub.c
@@ -20,6 +20,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "gdbstub/helpers.h"
+#include "gdb-internal.h"
 
 #ifdef TARGET_X86_64
 static const int gpr_map[16] = {
-- 
2.34.1

[PATCH v2 6/9] target/i386: Introduce cpu_compute_eflags_ccop

2024-06-05 Thread Richard Henderson

This is a generalization of cpu_compute_eflags, with a dynamic
value of cc_op, and is thus tcg specific.

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 target/i386/cpu.h   |  2 ++
 target/i386/tcg/cc_helper.c | 10 ++
 2 files changed, 12 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c64ef0c1a2..48ad6f495b 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2431,6 +2431,8 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int 
bank,
 
 uint32_t cpu_cc_compute_all(CPUX86State *env1);
 
+uint32_t cpu_compute_eflags_ccop(CPUX86State *env, CCOp op);
+
 static inline uint32_t cpu_compute_eflags(CPUX86State *env)
 {
 uint32_t eflags = env->eflags;
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index f76e9cb8cf..8203682ca8 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -225,6 +225,16 @@ uint32_t cpu_cc_compute_all(CPUX86State *env)
 return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP);
 }
 
+uint32_t cpu_compute_eflags_ccop(CPUX86State *env, CCOp op)
+{
+uint32_t eflags;
+
+eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op);
+eflags |= env->df & DF_MASK;
+eflags |= env->eflags & ~(VM_MASK | RF_MASK);
+return eflags;
+}
+
 target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
  target_ulong src2, int op)
 {
-- 
2.34.1

[PATCH v2 2/9] accel/tcg: Set CPUState.plugin_ra before all plugin callbacks

2024-06-05 Thread Richard Henderson

Store a host code address to use with the tcg unwinder when called
from a plugin.  Generate one such store per guest insn that uses
a plugin callback.

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 include/hw/core/cpu.h  |  4 +---
 accel/tcg/plugin-gen.c | 49 +-
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index a2c8536943..19b7fcc9f3 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -354,9 +354,7 @@ typedef union IcountDecr {
 typedef struct CPUNegativeOffsetState {
 CPUTLB tlb;
 #ifdef CONFIG_PLUGIN
-/*
- * The callback pointer are accessed via TCG (see gen_empty_mem_helper).
- */
+uintptr_t plugin_ra;
 GArray *plugin_mem_cbs;
 #endif
 IcountDecr icount_decr;
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index cc1634e7a6..650e3810e6 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -37,6 +37,12 @@ enum plugin_gen_from {
 PLUGIN_GEN_AFTER_TB,
 };
 
+enum plugin_gen_ra {
+GEN_RA_DONE,
+GEN_RA_FROM_TB,
+GEN_RA_FROM_INSN,
+};
+
 /* called before finishing a TB with exit_tb, goto_tb or goto_ptr */
 void plugin_gen_disable_mem_helpers(void)
 {
@@ -213,11 +219,37 @@ static void gen_mem_cb(struct qemu_plugin_regular_cb *cb,
 tcg_temp_free_i32(cpu_index);
 }
 
-static void inject_cb(struct qemu_plugin_dyn_cb *cb)
+static void inject_ra(enum plugin_gen_ra *gen_ra)
+{
+TCGv_ptr ra;
 
+switch (*gen_ra) {
+case GEN_RA_DONE:
+return;
+case GEN_RA_FROM_TB:
+ra = tcg_constant_ptr(NULL);
+break;
+case GEN_RA_FROM_INSN:
+ra = tcg_temp_ebb_new_ptr();
+tcg_gen_plugin_pc(ra);
+break;
+default:
+g_assert_not_reached();
+}
+
+tcg_gen_st_ptr(ra, tcg_env,
+   offsetof(CPUState, neg.plugin_ra) -
+   offsetof(ArchCPU, env));
+tcg_temp_free_ptr(ra);
+*gen_ra = GEN_RA_DONE;
+}
+
+static void inject_cb(struct qemu_plugin_dyn_cb *cb,
+  enum plugin_gen_ra *gen_ra)
 {
 switch (cb->type) {
 case PLUGIN_CB_REGULAR:
+inject_ra(gen_ra);
 gen_udata_cb(>regular);
 break;
 case PLUGIN_CB_COND:
@@ -235,19 +267,21 @@ static void inject_cb(struct qemu_plugin_dyn_cb *cb)
 }
 
 static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb,
+  enum plugin_gen_ra *gen_ra,
   enum qemu_plugin_mem_rw rw,
   qemu_plugin_meminfo_t meminfo, TCGv_i64 addr)
 {
 switch (cb->type) {
 case PLUGIN_CB_MEM_REGULAR:
 if (rw && cb->regular.rw) {
+inject_ra(gen_ra);
 gen_mem_cb(>regular, meminfo, addr);
 }
 break;
 case PLUGIN_CB_INLINE_ADD_U64:
 case PLUGIN_CB_INLINE_STORE_U64:
 if (rw && cb->inline_insn.rw) {
-inject_cb(cb);
+inject_cb(cb, gen_ra);
 }
 break;
 default:
@@ -260,6 +294,7 @@ static void plugin_gen_inject(struct qemu_plugin_tb 
*plugin_tb)
 {
 TCGOp *op, *next;
 int insn_idx = -1;
+enum plugin_gen_ra gen_ra;
 
 if (unlikely(qemu_loglevel_mask(LOG_TB_OP_PLUGIN)
  && qemu_log_in_addr_range(tcg_ctx->plugin_db->pc_first))) {
@@ -279,10 +314,12 @@ static void plugin_gen_inject(struct qemu_plugin_tb 
*plugin_tb)
  */
 memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
 
+gen_ra = GEN_RA_FROM_TB;
 QTAILQ_FOREACH_SAFE(op, _ctx->ops, link, next) {
 switch (op->opc) {
 case INDEX_op_insn_start:
 insn_idx++;
+gen_ra = GEN_RA_FROM_INSN;
 break;
 
 case INDEX_op_plugin_cb:
@@ -318,7 +355,8 @@ static void plugin_gen_inject(struct qemu_plugin_tb 
*plugin_tb)
 cbs = plugin_tb->cbs;
 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
 inject_cb(
-_array_index(cbs, struct qemu_plugin_dyn_cb, i));
+_array_index(cbs, struct qemu_plugin_dyn_cb, i),
+_ra);
 }
 break;
 
@@ -330,7 +368,8 @@ static void plugin_gen_inject(struct qemu_plugin_tb 
*plugin_tb)
 cbs = insn->insn_cbs;
 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
 inject_cb(
-_array_index(cbs, struct qemu_plugin_dyn_cb, i));
+_array_index(cbs, struct qemu_plugin_dyn_cb, i),
+_ra);
 }
 break;
 
@@ -362,7 +401,7 @@ static void plugin_gen_inject(struct qemu_plugin_tb 
*plugin_tb)
 cbs = insn->mem_cbs;
 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
 inject_mem_cb(_array_index(cbs, struct qemu_plugin_dyn_cb, 
i),
-  rw, meminfo, addr);

[PATCH v2 7/9] target/i386: Implement TCGCPUOps for plugin register reads

2024-06-05 Thread Richard Henderson

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 target/i386/tcg/tcg-cpu.c | 72 ++-
 1 file changed, 56 insertions(+), 16 deletions(-)

diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index cca19cd40e..2370053df2 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -22,9 +22,11 @@
 #include "helper-tcg.h"
 #include "qemu/accel.h"
 #include "hw/core/accel-cpu.h"
-
+#include "gdbstub/helpers.h"
+#include "gdb-internal.h"
 #include "tcg-cpu.h"
 
+
 /* Frob eflags into and out of the CPU temporary format.  */
 
 static void x86_cpu_exec_enter(CPUState *cs)
@@ -61,38 +63,74 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
 }
 }
 
-static void x86_restore_state_to_opc(CPUState *cs,
- const TranslationBlock *tb,
- const uint64_t *data)
+static uint64_t eip_from_unwind(CPUX86State *env, const TranslationBlock *tb,
+uint64_t data0)
 {
-X86CPU *cpu = X86_CPU(cs);
-CPUX86State *env = >env;
-int cc_op = data[1];
 uint64_t new_pc;
 
 if (tb_cflags(tb) & CF_PCREL) {
 /*
- * data[0] in PC-relative TBs is also a linear address, i.e. an 
address with
- * the CS base added, because it is not guaranteed that EIP bits 12 
and higher
- * stay the same across the translation block.  Add the CS base back 
before
- * replacing the low bits, and subtract it below just like for 
!CF_PCREL.
+ * data[0] in PC-relative TBs is also a linear address,
+ * i.e. an address with the CS base added, because it is
+ * not guaranteed that EIP bits 12 and higher stay the
+ * same across the translation block.  Add the CS base
+ * back before replacing the low bits, and subtract it
+ * below just like for !CF_PCREL.
  */
 uint64_t pc = env->eip + tb->cs_base;
-new_pc = (pc & TARGET_PAGE_MASK) | data[0];
+new_pc = (pc & TARGET_PAGE_MASK) | data0;
 } else {
-new_pc = data[0];
+new_pc = data0;
 }
 if (tb->flags & HF_CS64_MASK) {
-env->eip = new_pc;
-} else {
-env->eip = (uint32_t)(new_pc - tb->cs_base);
+return new_pc;
 }
+return (uint32_t)(new_pc - tb->cs_base);
+}
 
+static void x86_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+CPUX86State *env = cpu_env(cs);
+CCOp cc_op;
+
+env->eip = eip_from_unwind(env, tb, data[0]);
+
+cc_op = data[1];
 if (cc_op != CC_OP_DYNAMIC) {
 env->cc_op = cc_op;
 }
 }
 
+static bool x86_plugin_need_unwind_for_reg(CPUState *cs, int reg)
+{
+return reg == IDX_IP_REG || reg == IDX_FLAGS_REG;
+}
+
+static int x86_plugin_unwind_read_reg(CPUState *cs, GByteArray *buf, int reg,
+  const TranslationBlock *tb,
+  const uint64_t *data)
+{
+CPUX86State *env = cpu_env(cs);
+CCOp cc_op;
+
+switch (reg) {
+case IDX_IP_REG:
+return gdb_get_regl(buf, eip_from_unwind(env, tb, data[0]));
+
+case IDX_FLAGS_REG:
+cc_op = data[1];
+if (cc_op == CC_OP_DYNAMIC) {
+cc_op = env->cc_op;
+}
+return gdb_get_reg32(buf, cpu_compute_eflags_ccop(env, cc_op));
+
+default:
+g_assert_not_reached();
+}
+}
+
 #ifndef CONFIG_USER_ONLY
 static bool x86_debug_check_breakpoint(CPUState *cs)
 {
@@ -110,6 +148,8 @@ static const TCGCPUOps x86_tcg_ops = {
 .initialize = tcg_x86_init,
 .synchronize_from_tb = x86_cpu_synchronize_from_tb,
 .restore_state_to_opc = x86_restore_state_to_opc,
+.plugin_need_unwind_for_reg = x86_plugin_need_unwind_for_reg,
+.plugin_unwind_read_reg = x86_plugin_unwind_read_reg,
 .cpu_exec_enter = x86_cpu_exec_enter,
 .cpu_exec_exit = x86_cpu_exec_exit,
 #ifdef CONFIG_USER_ONLY
-- 
2.34.1

[PATCH v2 0/9] plugins: Use unwind info for special gdb registers

2024-06-05 Thread Richard Henderson



This is an attempt to fix
https://gitlab.com/qemu-project/qemu/-/issues/2208
("PC is not updated for each instruction in TCG plugins")

I have only updated target/{i386,arm} so far, but basically all
targets need updating for the new callbacks.  Extra points to
anyone who sees how to avoid the extra code duplication.  :-)


r~


Richard Henderson (9):
  tcg: Introduce INDEX_op_plugin_pc
  accel/tcg: Set CPUState.plugin_ra before all plugin callbacks
  accel/tcg: Return the TranslationBlock from cpu_unwind_state_data
  plugins: Introduce TCGCPUOps callbacks for mid-tb register reads
  target/i386: Split out gdb-internal.h
  target/i386: Introduce cpu_compute_eflags_ccop
  target/i386: Implement TCGCPUOps for plugin register reads
  target/arm: Add aarch64_tcg_ops
  target/arm: Implement TCGCPUOps for plugin register reads

 include/exec/cpu-common.h |  9 +++--
 include/hw/core/cpu.h |  4 +-
 include/hw/core/tcg-cpu-ops.h | 14 +++
 include/tcg/tcg-op-common.h   |  1 +
 include/tcg/tcg-opc.h |  1 +
 target/arm/internals.h|  8 +++-
 target/i386/cpu.h |  2 +
 target/i386/gdb-internal.h| 65 +++
 accel/tcg/plugin-gen.c| 49 +---
 accel/tcg/translate-all.c |  9 +++--
 plugins/api.c | 36 +-
 target/arm/cpu.c  | 40 ++-
 target/arm/cpu64.c| 55 ++
 target/arm/tcg/cpu-v7m.c  |  2 +
 target/i386/gdbstub.c |  1 +
 target/i386/helper.c  |  6 ++-
 target/i386/tcg/cc_helper.c   | 10 +
 target/i386/tcg/tcg-cpu.c | 72 +++
 tcg/tcg-op.c  |  5 +++
 tcg/tcg.c | 10 +
 20 files changed, 360 insertions(+), 39 deletions(-)
 create mode 100644 target/i386/gdb-internal.h

-- 
2.34.1

[PATCH v2 3/9] accel/tcg: Return the TranslationBlock from cpu_unwind_state_data

2024-06-05 Thread Richard Henderson

Adjust the i386 get_memio_eip function to use tb->cflags instead
of tcg_cflags_has, which is technically more correct.

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 include/exec/cpu-common.h | 9 +
 accel/tcg/translate-all.c | 9 +
 target/i386/helper.c  | 6 --
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 815342d043..c1887462e6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -189,12 +189,13 @@ uint32_t curr_cflags(CPUState *cpu);
  * @host_pc: the host pc within the translation
  * @data: output data
  *
- * Attempt to load the the unwind state for a host pc occurring in
- * translated code.  If @host_pc is not in translated code, the
- * function returns false; otherwise @data is loaded.
+ * Attempt to load the the unwind state for a host pc occurring in translated
+ * code.  If @host_pc is not in translated code, the function returns NULL;
+ * otherwise @data is loaded and the TranslationBlock is returned.
  * This is the same unwind info as given to restore_state_to_opc.
  */
-bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
+const TranslationBlock *cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc,
+  uint64_t *data);
 
 /**
  * cpu_restore_state:
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index fdf6d8ac19..45a1cf57bc 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -243,15 +243,16 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
 return false;
 }
 
-bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
+const TranslationBlock *
+cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
 {
 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 TranslationBlock *tb = tcg_tb_lookup(host_pc);
-if (tb) {
-return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
+if (tb && cpu_unwind_data_from_tb(tb, host_pc, data) >= 0) {
+return tb;
 }
 }
-return false;
+return NULL;
 }
 
 void page_init(void)
diff --git a/target/i386/helper.c b/target/i386/helper.c
index f9d1381f90..565e01a3a9 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -521,13 +521,15 @@ static inline target_ulong get_memio_eip(CPUX86State *env)
 #ifdef CONFIG_TCG
 uint64_t data[TARGET_INSN_START_WORDS];
 CPUState *cs = env_cpu(env);
+const TranslationBlock *tb;
 
-if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) {
+tb = cpu_unwind_state_data(cs, cs->mem_io_pc, data);
+if (!tb) {
 return env->eip;
 }
 
 /* Per x86_restore_state_to_opc. */
-if (tcg_cflags_has(cs, CF_PCREL)) {
+if (tb->cflags & CF_PCREL) {
 return (env->eip & TARGET_PAGE_MASK) | data[0];
 } else {
 return data[0] - env->segs[R_CS].base;
-- 
2.34.1

[PATCH v2 8/9] target/arm: Add aarch64_tcg_ops

2024-06-05 Thread Richard Henderson

For the moment, this is an exact copy of arm_tcg_ops.
Export arm_cpu_exec_interrupt for the cross-file reference.

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h |  1 +
 target/arm/cpu.c   |  2 +-
 target/arm/cpu64.c | 30 ++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 11b5da2562..dc53d86249 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -364,6 +364,7 @@ void arm_restore_state_to_opc(CPUState *cs,
 
 #ifdef CONFIG_TCG
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
 #endif /* CONFIG_TCG */
 
 typedef enum ARMFPRounding {
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 35fa281f1b..3cd4711064 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -824,7 +824,7 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned 
int excp_idx,
 return unmasked || pstate_unmasked;
 }
 
-static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
 CPUClass *cc = CPU_GET_CLASS(cs);
 CPUARMState *env = cpu_env(cs);
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 262a1d6c0b..7ba80099af 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -31,6 +31,9 @@
 #include "hvf_arm.h"
 #include "qapi/visitor.h"
 #include "hw/qdev-properties.h"
+#ifdef CONFIG_TCG
+#include "hw/core/tcg-cpu-ops.h"
+#endif
 #include "internals.h"
 #include "cpu-features.h"
 #include "cpregs.h"
@@ -793,6 +796,29 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs)
 return "aarch64";
 }
 
+#ifdef CONFIG_TCG
+static const TCGCPUOps aarch64_tcg_ops = {
+.initialize = arm_translate_init,
+.synchronize_from_tb = arm_cpu_synchronize_from_tb,
+.debug_excp_handler = arm_debug_excp_handler,
+.restore_state_to_opc = arm_restore_state_to_opc,
+
+#ifdef CONFIG_USER_ONLY
+.record_sigsegv = arm_cpu_record_sigsegv,
+.record_sigbus = arm_cpu_record_sigbus,
+#else
+.tlb_fill = arm_cpu_tlb_fill,
+.cpu_exec_interrupt = arm_cpu_exec_interrupt,
+.do_interrupt = arm_cpu_do_interrupt,
+.do_transaction_failed = arm_cpu_do_transaction_failed,
+.do_unaligned_access = arm_cpu_do_unaligned_access,
+.adjust_watchpoint_address = arm_adjust_watchpoint_address,
+.debug_check_watchpoint = arm_debug_check_watchpoint,
+.debug_check_breakpoint = arm_debug_check_breakpoint,
+#endif /* !CONFIG_USER_ONLY */
+};
+#endif /* CONFIG_TCG */
+
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
 CPUClass *cc = CPU_CLASS(oc);
@@ -802,6 +828,10 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void 
*data)
 cc->gdb_core_xml_file = "aarch64-core.xml";
 cc->gdb_arch_name = aarch64_gdb_arch_name;
 
+#ifdef CONFIG_TCG
+cc->tcg_ops = _tcg_ops;
+#endif
+
 object_class_property_add_bool(oc, "aarch64", aarch64_cpu_get_aarch64,
aarch64_cpu_set_aarch64);
 object_class_property_set_description(oc, "aarch64",
-- 
2.34.1

[PATCH v2 9/9] target/arm: Implement TCGCPUOps for plugin register reads

2024-06-05 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/internals.h   |  7 +--
 target/arm/cpu.c | 38 ++
 target/arm/cpu64.c   | 25 +
 target/arm/tcg/cpu-v7m.c |  2 ++
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc53d86249..fe28937515 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -358,11 +358,14 @@ void init_cpreg_list(ARMCPU *cpu);
 void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 void arm_translate_init(void);
 
+#ifdef CONFIG_TCG
 void arm_restore_state_to_opc(CPUState *cs,
   const TranslationBlock *tb,
   const uint64_t *data);
-
-#ifdef CONFIG_TCG
+bool arm_plugin_need_unwind_for_reg(CPUState *cs, int reg);
+int arm_plugin_unwind_read_reg(CPUState *cs, GByteArray *buf, int reg,
+   const TranslationBlock *tb,
+   const uint64_t *data);
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
 bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
 #endif /* CONFIG_TCG */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 3cd4711064..e8ac3da351 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -29,6 +29,7 @@
 #include "cpu.h"
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
+#include "gdbstub/helpers.h"
 #endif /* CONFIG_TCG */
 #include "internals.h"
 #include "cpu-features.h"
@@ -120,6 +121,41 @@ void arm_restore_state_to_opc(CPUState *cs,
 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
 }
 }
+
+bool arm_plugin_need_unwind_for_reg(CPUState *cs, int reg)
+{
+return reg == 15 || reg == 25; /* pc (r15) or cpsr */
+}
+
+int arm_plugin_unwind_read_reg(CPUState *cs, GByteArray *buf, int reg,
+   const TranslationBlock *tb,
+   const uint64_t *data)
+{
+CPUARMState *env = cpu_env(cs);
+uint32_t val, condexec;
+
+switch (reg) {
+case 15: /* PC */
+val = data[0];
+if (tb_cflags(tb) & CF_PCREL) {
+val |= env->regs[15] & TARGET_PAGE_MASK;
+}
+break;
+case 25: /* CPSR, or XPSR for M-profile */
+if (arm_feature(env, ARM_FEATURE_M)) {
+val = xpsr_read(env);
+} else {
+val = cpsr_read(env);
+}
+condexec = data[1] & 0xff;
+val = (val & ~(3 << 25)) | ((condexec & 3) << 25);
+val = (val & ~(0xfc << 8)) | ((condexec & 0xfc) << 8);
+break;
+default:
+g_assert_not_reached();
+}
+return gdb_get_reg32(buf, val);
+}
 #endif /* CONFIG_TCG */
 
 /*
@@ -2657,6 +2693,8 @@ static const TCGCPUOps arm_tcg_ops = {
 .synchronize_from_tb = arm_cpu_synchronize_from_tb,
 .debug_excp_handler = arm_debug_excp_handler,
 .restore_state_to_opc = arm_restore_state_to_opc,
+.plugin_need_unwind_for_reg = arm_plugin_need_unwind_for_reg,
+.plugin_unwind_read_reg = arm_plugin_unwind_read_reg,
 
 #ifdef CONFIG_USER_ONLY
 .record_sigsegv = arm_cpu_record_sigsegv,
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 7ba80099af..1595be5d8f 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -33,6 +33,8 @@
 #include "hw/qdev-properties.h"
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
+#include "exec/translation-block.h"
+#include "gdbstub/helpers.h"
 #endif
 #include "internals.h"
 #include "cpu-features.h"
@@ -797,11 +799,34 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs)
 }
 
 #ifdef CONFIG_TCG
+static bool aarch64_plugin_need_unwind_for_reg(CPUState *cs, int reg)
+{
+return reg == 32; /* pc */
+}
+
+static int aarch64_plugin_unwind_read_reg(CPUState *cs, GByteArray *buf,
+  int reg, const TranslationBlock *tb,
+  const uint64_t *data)
+{
+CPUARMState *env = cpu_env(cs);
+uint64_t val;
+
+assert(reg == 32);
+
+val = data[0];
+if (tb_cflags(tb) & CF_PCREL) {
+val |= env->pc & TARGET_PAGE_MASK;
+}
+return gdb_get_reg64(buf, val);
+}
+
 static const TCGCPUOps aarch64_tcg_ops = {
 .initialize = arm_translate_init,
 .synchronize_from_tb = arm_cpu_synchronize_from_tb,
 .debug_excp_handler = arm_debug_excp_handler,
 .restore_state_to_opc = arm_restore_state_to_opc,
+.plugin_need_unwind_for_reg = aarch64_plugin_need_unwind_for_reg,
+.plugin_unwind_read_reg = aarch64_plugin_unwind_read_reg,
 
 #ifdef CONFIG_USER_ONLY
 .record_sigsegv = arm_cpu_record_sigsegv,
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
index c059c681e9..47e44f70c7 100644
--- a/target/arm/tcg/cpu-v7m.c
+++ b/target/arm/tcg/cpu-v7m.c
@@ -237,6 +237,8 @@ static const TCGCPUOps arm_v7m_tcg_ops = {
 .synchronize_from_tb = arm_cpu_synchronize_from_tb,
 .debug_excp_handler =

[PATCH v2 4/9] plugins: Introduce TCGCPUOps callbacks for mid-tb register reads

2024-06-05 Thread Richard Henderson

Certain target registers are not updated continuously within
the translation block.  For normal exception handling we use
unwind info to re-generate the correct value when required.
Leverage that same info for reading those registers for plugins.

All targets will need updating for these new callbacks.

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 include/hw/core/tcg-cpu-ops.h | 14 ++
 plugins/api.c | 36 +--
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 099de3375e..b34f999e78 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -53,6 +53,20 @@ struct TCGCPUOps {
 /** @debug_excp_handler: Callback for handling debug exceptions */
 void (*debug_excp_handler)(CPUState *cpu);
 
+/**
+ * @plugin_need_unwind_for_reg:
+ * True if unwind info needed for reading reg.
+ */
+bool (*plugin_need_unwind_for_reg)(CPUState *cpu, int reg);
+/**
+ * @plugin_unwind_read_reg:
+ * Like CPUClass.gdb_read_register, but for registers that require
+ * regeneration using unwind info, like in @restore_state_to_opc.
+ */
+int (*plugin_unwind_read_reg)(CPUState *cpu, GByteArray *buf, int reg,
+  const TranslationBlock *tb,
+  const uint64_t *data);
+
 #ifdef CONFIG_USER_ONLY
 /**
  * @fake_user_interrupt: Callback for 'fake exception' handling.
diff --git a/plugins/api.c b/plugins/api.c
index 5a0a7f8c71..53127ed9ee 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -40,10 +40,12 @@
 #include "qemu/plugin.h"
 #include "qemu/log.h"
 #include "tcg/tcg.h"
+#include "tcg/insn-start-words.h"
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "exec/translator.h"
 #include "disas/disas.h"
+#include "hw/core/tcg-cpu-ops.h"
 #include "plugin.h"
 #ifndef CONFIG_USER_ONLY
 #include "exec/ram_addr.h"
@@ -526,9 +528,39 @@ GArray *qemu_plugin_get_registers(void)
 
 int qemu_plugin_read_register(struct qemu_plugin_register *reg, GByteArray 
*buf)
 {
-g_assert(current_cpu);
+CPUState *cs;
+uintptr_t ra;
+int regno;
 
-return gdb_read_register(current_cpu, buf, GPOINTER_TO_INT(reg));
+assert(current_cpu);
+cs = current_cpu;
+ra = cs->neg.plugin_ra;
+regno = GPOINTER_TO_INT(reg);
+
+/*
+ * When plugin_ra is 0, we have no unwind info.  This will be true for
+ * TB callbacks that happen before any insns of the TB have started.
+ */
+if (ra) {
+const TCGCPUOps *tcg_ops = cs->cc->tcg_ops;
+
+/*
+ * For plugins in the middle of the TB, we may need to locate
+ * and use unwind data to reconstruct a register value.
+ * Usually this required for the PC, but there may be others.
+ */
+if (tcg_ops->plugin_need_unwind_for_reg &&
+tcg_ops->plugin_need_unwind_for_reg(cs, regno)) {
+uint64_t data[TARGET_INSN_START_WORDS];
+const TranslationBlock *tb;
+
+tb = cpu_unwind_state_data(cs, ra, data);
+assert(tb);
+return tcg_ops->plugin_unwind_read_reg(cs, buf, regno, tb, data);
+}
+}
+
+return gdb_read_register(cs, buf, regno);
 }
 
 struct qemu_plugin_scoreboard *qemu_plugin_scoreboard_new(size_t element_size)
-- 
2.34.1

[PATCH v2 1/9] tcg: Introduce INDEX_op_plugin_pc

2024-06-05 Thread Richard Henderson

Add an opcode to find a code address within the current insn,
for later use with unwinding.  Generate the code generically
using tcg_reg_alloc_do_movi.

Reviewed-by: Pierrick Bouvier 
Signed-off-by: Richard Henderson 
---
 include/tcg/tcg-op-common.h |  1 +
 include/tcg/tcg-opc.h   |  1 +
 tcg/tcg-op.c|  5 +
 tcg/tcg.c   | 10 ++
 4 files changed, 17 insertions(+)

diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
index 009e2778c5..a32c88a182 100644
--- a/include/tcg/tcg-op-common.h
+++ b/include/tcg/tcg-op-common.h
@@ -76,6 +76,7 @@ void tcg_gen_lookup_and_goto_ptr(void);
 
 void tcg_gen_plugin_cb(unsigned from);
 void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo);
+void tcg_gen_plugin_pc(TCGv_ptr);
 
 /* 32 bit ops */
 
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 546eb49c11..087d1b82da 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -199,6 +199,7 @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 
 DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(plugin_mem_cb, 0, 1, 1, TCG_OPF_NOT_PRESENT)
+DEF(plugin_pc, 1, 0, 0, TCG_OPF_NOT_PRESENT)
 
 /* Replicate ld/st ops for 32 and 64-bit guest addresses. */
 DEF(qemu_ld_a32_i32, 1, 1, 1,
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index eff3728622..b8ca78cbe4 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -322,6 +322,11 @@ void tcg_gen_plugin_mem_cb(TCGv_i64 addr, unsigned meminfo)
 tcg_gen_op2(INDEX_op_plugin_mem_cb, tcgv_i64_arg(addr), meminfo);
 }
 
+void tcg_gen_plugin_pc(TCGv_ptr arg)
+{
+tcg_gen_op1(INDEX_op_plugin_pc, tcgv_ptr_arg(arg));
+}
+
 /* 32 bit ops */
 
 void tcg_gen_discard_i32(TCGv_i32 arg)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 34e3056380..b7c28d92a6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4689,6 +4689,13 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp 
*op)
 }
 }
 
+static void tcg_reg_alloc_plugin_pc(TCGContext *s, const TCGOp *op)
+{
+tcg_reg_alloc_do_movi(s, arg_temp(op->args[0]),
+  (uintptr_t)tcg_splitwx_to_rx(s->code_ptr),
+  op->life, output_pref(op, 0));
+}
+
 /*
  * Specialized code generation for INDEX_op_dup_vec.
  */
@@ -6196,6 +6203,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, 
uint64_t pc_start)
 case INDEX_op_mov_vec:
 tcg_reg_alloc_mov(s, op);
 break;
+case INDEX_op_plugin_pc:
+tcg_reg_alloc_plugin_pc(s, op);
+break;
 case INDEX_op_dup_vec:
 tcg_reg_alloc_dup(s, op);
 break;
-- 
2.34.1

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread Richard Henderson


On 6/5/24 20:18, Richard Henderson wrote:

On 6/5/24 19:30, maobibo wrote:



On 2024/6/6 上午7:51, Richard Henderson wrote:

On 6/5/24 02:32, Bibo Mao wrote:

Different gcc versions have different features, macro CONFIG_LSX_OPT
and CONFIG_LASX_OPT is added here to detect whether gcc supports
built-in lsx/lasx macro.

Function buffer_zero_lsx() is added for 128bit simd fpu optimization,
and function buffer_zero_lasx() is for 256bit simd fpu optimization.

Loongarch gcc built-in lsx/lasx macro can be used only when compiler
option -mlsx/-mlasx is added, and there is no separate compiler option
for function only. So it is only in effect when qemu is compiled with
parameter --extra-cflags="-mlasx"

Signed-off-by: Bibo Mao 
---
  meson.build |  11 +
  util/bufferiszero.c | 103 
  2 files changed, 114 insertions(+)

diff --git a/meson.build b/meson.build
index 6386607144..29bc362d7a 100644
--- a/meson.build
+++ b/meson.build
@@ -2855,6 +2855,17 @@ config_host_data.set('CONFIG_ARM_AES_BUILTIN', 
cc.compiles('''
  void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
    '''))
+# For Loongarch64, detect if LSX/LASX are available.
+ config_host_data.set('CONFIG_LSX_OPT', cc.compiles('''
+    #include "lsxintrin.h"
+    int foo(__m128i v) { return __lsx_bz_v(v); }
+  '''))
+
+config_host_data.set('CONFIG_LASX_OPT', cc.compiles('''
+    #include "lasxintrin.h"
+    int foo(__m256i v) { return __lasx_xbz_v(v); }
+  '''))


Both of these are introduced by gcc 14 and llvm 18, so I'm not certain of the utility 
of separate tests.  We might simplify this with


   config_host_data.set('CONFIG_LSX_LASX_INTRIN_H',
 cc.has_header('lsxintrin.h') && cc.has_header('lasxintrin.h'))


As you say, these headers require vector instructions to be enabled at compile-time 
rather than detecting them at runtime.  This is a point where the compilers could be 
improved to support __attribute__((target("xyz"))) and the builtins with that.  The 
i386 port does this, for instance.


In the meantime, it means that you don't need a runtime test.  Similar to aarch64 and 
the use of __ARM_NEON as a compile-time test for simd support.  Perhaps


#elif defined(CONFIG_LSX_LASX_INTRIN_H) && \
   (defined(__loongarch_sx) || defined(__loongarch_asx))
# ifdef __loongarch_sx
   ...
# endif
# ifdef __loongarch_asx
   ...
# endif

Sure, will do in this way.
And also there is runtime check coming from hwcap, such this:

unsigned info = cpuinfo_init();
   if (info & CPUINFO_LASX)


static biz_accel_fn const accel_table[] = {
     buffer_is_zero_int_ge256,
#ifdef __loongarch_sx
     buffer_is_zero_lsx,
#endif
#ifdef __loongarch_asx
     buffer_is_zero_lasx,
#endif
};

static unsigned best_accel(void)
{
#ifdef __loongarch_asx
     /* lasx may be index 1 or 2, but always last */
     return ARRAY_SIZE(accel_table) - 1;
#else
     /* lsx is always index 1 */
     return 1;
#endif
}


It occurs to me that by accumulating host specific sections to this file, we should split 
it like the atomics.  Put each portion in host/include/*/host/bufferiszero.h.inc.


I'll send a patch set handling the existing two hosts.


r~

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread Richard Henderson


On 6/5/24 19:30, maobibo wrote:



On 2024/6/6 上午7:51, Richard Henderson wrote:

On 6/5/24 02:32, Bibo Mao wrote:

Different gcc versions have different features, macro CONFIG_LSX_OPT
and CONFIG_LASX_OPT is added here to detect whether gcc supports
built-in lsx/lasx macro.

Function buffer_zero_lsx() is added for 128bit simd fpu optimization,
and function buffer_zero_lasx() is for 256bit simd fpu optimization.

Loongarch gcc built-in lsx/lasx macro can be used only when compiler
option -mlsx/-mlasx is added, and there is no separate compiler option
for function only. So it is only in effect when qemu is compiled with
parameter --extra-cflags="-mlasx"

Signed-off-by: Bibo Mao 
---
  meson.build |  11 +
  util/bufferiszero.c | 103 
  2 files changed, 114 insertions(+)

diff --git a/meson.build b/meson.build
index 6386607144..29bc362d7a 100644
--- a/meson.build
+++ b/meson.build
@@ -2855,6 +2855,17 @@ config_host_data.set('CONFIG_ARM_AES_BUILTIN', 
cc.compiles('''
  void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
    '''))
+# For Loongarch64, detect if LSX/LASX are available.
+ config_host_data.set('CONFIG_LSX_OPT', cc.compiles('''
+    #include "lsxintrin.h"
+    int foo(__m128i v) { return __lsx_bz_v(v); }
+  '''))
+
+config_host_data.set('CONFIG_LASX_OPT', cc.compiles('''
+    #include "lasxintrin.h"
+    int foo(__m256i v) { return __lasx_xbz_v(v); }
+  '''))


Both of these are introduced by gcc 14 and llvm 18, so I'm not certain of the utility of 
separate tests.  We might simplify this with


   config_host_data.set('CONFIG_LSX_LASX_INTRIN_H',
 cc.has_header('lsxintrin.h') && cc.has_header('lasxintrin.h'))


As you say, these headers require vector instructions to be enabled at compile-time 
rather than detecting them at runtime.  This is a point where the compilers could be 
improved to support __attribute__((target("xyz"))) and the builtins with that.  The i386 
port does this, for instance.


In the meantime, it means that you don't need a runtime test.  Similar to aarch64 and 
the use of __ARM_NEON as a compile-time test for simd support.  Perhaps


#elif defined(CONFIG_LSX_LASX_INTRIN_H) && \
   (defined(__loongarch_sx) || defined(__loongarch_asx))
# ifdef __loongarch_sx
   ...
# endif
# ifdef __loongarch_asx
   ...
# endif

Sure, will do in this way.
And also there is runtime check coming from hwcap, such this:

unsigned info = cpuinfo_init();
   if (info & CPUINFO_LASX)


static biz_accel_fn const accel_table[] = {
buffer_is_zero_int_ge256,
#ifdef __loongarch_sx
buffer_is_zero_lsx,
#endif
#ifdef __loongarch_asx
buffer_is_zero_lasx,
#endif
};

static unsigned best_accel(void)
{
#ifdef __loongarch_asx
/* lasx may be index 1 or 2, but always last */
return ARRAY_SIZE(accel_table) - 1;
#else
/* lsx is always index 1 */
return 1;
#endif
}


r~

Re: [PATCH] Hexagon: fix HVX store new

2024-06-05 Thread Brian Cain




On 5/20/2024 10:53 AM, Matheus Tavares Bernardino wrote:

At 09a7e7db0f (Hexagon (target/hexagon) Remove uses of
op_regs_generated.h.inc, 2024-03-06), we've changed the logic of
check_new_value() to use the new pre-calculated
packet->insn[...].dest_idx instead of calculating the index on the fly
using opcode_reginfo[...]. The dest_idx index is calculated roughly like
the following:

 for reg in iset[tag]["syntax"]:
 if reg.is_written():
 dest_idx = regno
 break

Thus, we take the first register that is writtable. Before that,
however, we also used to follow an alphabetical order on the register
type: 'd', 'e', 'x', and 'y'. No longer following that makes us select
the wrong register index and the HVX store new instruction does not
update the memory like expected.

Signed-off-by: Matheus Tavares Bernardino 
---


Queued -- at https://github.com/quic/qemu/tree/hex.next



  tests/tcg/hexagon/hvx_misc.c  | 23 +++
  target/hexagon/gen_trans_funcs.py |  9 ++---
  2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index 1fe14b5158..90c3733da0 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -474,6 +474,27 @@ static void test_vcombine(void)
  check_output_w(__LINE__, BUFSIZE);
  }
  
+void test_store_new()

+{
+asm volatile(
+"r0 = #0x12345678\n"
+"v0 = vsplat(r0)\n"
+"r0 = #0xff00ff00\n"
+"v1 = vsplat(r0)\n"
+"{\n"
+"   vdeal(v1,v0,r0)\n"
+"   vmem(%0) = v0.new\n"
+"}\n"
+:
+: "r"([0])
+: "r0", "v0", "v1", "memory"
+);
+for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+expect[0].w[i] = 0x12345678;
+}
+check_output_w(__LINE__, 1);
+}
+
  int main()
  {
  init_buffers();
@@ -515,6 +536,8 @@ int main()
  
  test_vcombine();
  
+test_store_new();

+
  puts(err ? "FAIL" : "PASS");
  return err ? 1 : 0;
  }
diff --git a/target/hexagon/gen_trans_funcs.py 
b/target/hexagon/gen_trans_funcs.py
index 9f86b4edbd..30f0c73e0c 100755
--- a/target/hexagon/gen_trans_funcs.py
+++ b/target/hexagon/gen_trans_funcs.py
@@ -89,6 +89,7 @@ def gen_trans_funcs(f):
  
  new_read_idx = -1

  dest_idx = -1
+dest_idx_reg_id = None
  has_pred_dest = "false"
  for regno, (reg_type, reg_id, *_) in enumerate(regs):
  reg = hex_common.get_register(tag, reg_type, reg_id)
@@ -97,9 +98,11 @@ def gen_trans_funcs(f):
  """))
  if reg.is_read() and reg.is_new():
  new_read_idx = regno
-# dest_idx should be the first destination, so check for -1
-if reg.is_written() and dest_idx == -1:
-dest_idx = regno
+if reg.is_written():
+# dest_idx should be the first destination alphabetically
+if dest_idx_reg_id is None or reg_id < dest_idx_reg_id:
+dest_idx = regno
+dest_idx_reg_id = reg_id
  if reg_type == "P" and reg.is_written() and not reg.is_read():
  has_pred_dest = "true"

Re: [PATCH v4 1/4] target/hexagon: idef-parser remove unused defines

2024-06-05 Thread Brian Cain




On 5/23/2024 7:58 AM, Anton Johansson via wrote:

Before switching to GArray/g_string_printf we used fixed size arrays for
output buffers and instructions arguments among other things.

Macros defining the sizes of these buffers were left behind, remove
them.

Signed-off-by: Anton Johansson 
Reviewed-by: Taylor Simpson 
Reviewed-by: Brian Cain 
---


Queued -- at https://github.com/quic/qemu/tree/hex.next



  target/hexagon/idef-parser/idef-parser.h | 10 --
  1 file changed, 10 deletions(-)

diff --git a/target/hexagon/idef-parser/idef-parser.h 
b/target/hexagon/idef-parser/idef-parser.h
index 3faa1deecd..8594cbe3a2 100644
--- a/target/hexagon/idef-parser/idef-parser.h
+++ b/target/hexagon/idef-parser/idef-parser.h
@@ -23,16 +23,6 @@
  #include 
  #include 
  
-#define TCGV_NAME_SIZE 7

-#define MAX_WRITTEN_REGS 32
-#define OFFSET_STR_LEN 32
-#define ALLOC_LIST_LEN 32
-#define ALLOC_NAME_SIZE 32
-#define INIT_LIST_LEN 32
-#define OUT_BUF_LEN (1024 * 1024)
-#define SIGNATURE_BUF_LEN (128 * 1024)
-#define HEADER_BUF_LEN (128 * 1024)
-
  /* Variadic macros to wrap the buffer printing functions */
  #define EMIT(c, ...)  
 \
  do {  
 \

Re: [PATCH] stubs/meson: Fix qemuutil build when --disable-system

2024-06-05 Thread Zhao Liu

On Wed, Jun 05, 2024 at 11:25:49PM +0800, Zhao Liu wrote:
> Date: Wed, 5 Jun 2024 23:25:49 +0800
> From: Zhao Liu 
> Subject: [PATCH] stubs/meson: Fix qemuutil build when --disable-system
> X-Mailer: git-send-email 2.34.1
> 
> Compiling without system, user, tools or guest-agent fails with the
> following error message:
> 
> ./configure --disable-system --disable-user --disable-tools \
> --disable-guest-agent
> 
> error message:
> 
> /usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function 
> `error_printf':
> /media/liuzhao/data/qemu-cook/build/../util/error-report.c:38: undefined 
> reference to `error_vprintf'
> /usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function `vreport':
> /media/liuzhao/data/qemu-cook/build/../util/error-report.c:215: undefined 
> reference to `error_vprintf'
> collect2: error: ld returned 1 exit status
> 
> This is because tests/bench and tests/unit both need qemuutil, which
> requires error_vprintf stub when system is disabled.
> 
> Add error_vprintf stub into stub_ss for all cases other than disabling
> system.
> 
> Fixes: 3a15604900c4 ("stubs: include stubs only if needed")
> Reported-by: Daniel P. Berrangé 
> Signed-off-by: Zhao Liu 
> ---
>  stubs/meson.build | 10 +++---
>  1 file changed, 3 insertions(+), 7 deletions(-)
> 
> diff --git a/stubs/meson.build b/stubs/meson.build
> index 3b9d42023cb2..a99522ab6bbf 100644
> --- a/stubs/meson.build
> +++ b/stubs/meson.build
> @@ -45,17 +45,10 @@ if have_block or have_ga
>stub_ss.add(files('qmp-quit.c'))
>  endif
>  
> -if have_ga
> -  stub_ss.add(files('error-printf.c'))
> -endif
> -
>  if have_block or have_user
>stub_ss.add(files('qtest.c'))
>stub_ss.add(files('vm-stop.c'))
>stub_ss.add(files('vmstate.c'))
> -
> -  # more symbols provided by the monitor
> -  stub_ss.add(files('error-printf.c'))
>  endif
>  
>  if have_user
> @@ -76,6 +69,9 @@ if have_system
>stub_ss.add(files('target-monitor-defs.c'))
>stub_ss.add(files('win32-kbd-hook.c'))
>stub_ss.add(files('xen-hw-stub.c'))
> +else
> +  # more symbols provided by the monitor
> +  stub_ss.add(files('error-printf.c'))
>  endif

Oops, it's not a correct fix. error-printf.c should still be added
unconditionally.

>  
>  if have_system or have_user
> -- 
> 2.34.1
>

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread maobibo





On 2024/6/6 上午7:51, Richard Henderson wrote:

On 6/5/24 02:32, Bibo Mao wrote:

Different gcc versions have different features, macro CONFIG_LSX_OPT
and CONFIG_LASX_OPT is added here to detect whether gcc supports
built-in lsx/lasx macro.

Function buffer_zero_lsx() is added for 128bit simd fpu optimization,
and function buffer_zero_lasx() is for 256bit simd fpu optimization.

Loongarch gcc built-in lsx/lasx macro can be used only when compiler
option -mlsx/-mlasx is added, and there is no separate compiler option
for function only. So it is only in effect when qemu is compiled with
parameter --extra-cflags="-mlasx"

Signed-off-by: Bibo Mao 
---
  meson.build |  11 +
  util/bufferiszero.c | 103 
  2 files changed, 114 insertions(+)

diff --git a/meson.build b/meson.build
index 6386607144..29bc362d7a 100644
--- a/meson.build
+++ b/meson.build
@@ -2855,6 +2855,17 @@ config_host_data.set('CONFIG_ARM_AES_BUILTIN', 
cc.compiles('''

  void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
    '''))
+# For Loongarch64, detect if LSX/LASX are available.
+ config_host_data.set('CONFIG_LSX_OPT', cc.compiles('''
+    #include "lsxintrin.h"
+    int foo(__m128i v) { return __lsx_bz_v(v); }
+  '''))
+
+config_host_data.set('CONFIG_LASX_OPT', cc.compiles('''
+    #include "lasxintrin.h"
+    int foo(__m256i v) { return __lasx_xbz_v(v); }
+  '''))


Both of these are introduced by gcc 14 and llvm 18, so I'm not certain 
of the utility of separate tests.  We might simplify this with


   config_host_data.set('CONFIG_LSX_LASX_INTRIN_H',
     cc.has_header('lsxintrin.h') && cc.has_header('lasxintrin.h'))


As you say, these headers require vector instructions to be enabled at 
compile-time rather than detecting them at runtime.  This is a point 
where the compilers could be improved to support 
__attribute__((target("xyz"))) and the builtins with that.  The i386 
port does this, for instance.


In the meantime, it means that you don't need a runtime test.  Similar 
to aarch64 and the use of __ARM_NEON as a compile-time test for simd 
support.  Perhaps


#elif defined(CONFIG_LSX_LASX_INTRIN_H) && \
   (defined(__loongarch_sx) || defined(__loongarch_asx))
# ifdef __loongarch_sx
   ...
# endif
# ifdef __loongarch_asx
   ...
# endif

Sure, will do in this way.
And also there is runtime check coming from hwcap, such this:

unsigned info = cpuinfo_init();
  if (info & CPUINFO_LASX)




The actual code is perfectly fine, of course, since it follows the 
pattern from the others.  How much improvement do you see from 
bufferiszero-bench?

yes, it is much easier to follow others, it is not new things.

Here is the benchmark result, no obvious improvement with 1K
buffer size. 200% improvement with LASX, 100% improve with LSX
with 16K page size.

# /root/src/qemu/b/tests/bench/bufferiszero-bench --tap -k
# Start of cutils tests
# Start of bufferiszero tests
# buffer_is_zero #0:  1KB13460 MB/sec
# buffer_is_zero #0:  4KB36857 MB/sec
# buffer_is_zero #0: 16KB69884 MB/sec
# buffer_is_zero #0: 64KB80863 MB/sec
#
# buffer_is_zero #1:  1KB11180 MB/sec
# buffer_is_zero #1:  4KB27972 MB/sec
# buffer_is_zero #1: 16KB42951 MB/sec
# buffer_is_zero #1: 64KB43293 MB/sec
#
# buffer_is_zero #2:  1KB10026 MB/sec
# buffer_is_zero #2:  4KB18373 MB/sec
# buffer_is_zero #2: 16KB23933 MB/sec
# buffer_is_zero #2: 64KB25180 MB/sec

Regards
Bibo Mao




r~

Re: [PATCH 1/2] util: Add lasx cpuinfo for loongarch64

2024-06-05 Thread maobibo





On 2024/6/5 下午7:53, Philippe Mathieu-Daudé wrote:

On 5/6/24 11:32, Bibo Mao wrote:

Lasx is 256bit vector FPU capability, lsx is 128bit vector VFP. lsx
is added already, lasx is added here.

Signed-off-by: Bibo Mao 
---
  host/include/loongarch64/host/cpuinfo.h | 1 +
  util/cpuinfo-loongarch.c    | 1 +
  2 files changed, 2 insertions(+)

diff --git a/host/include/loongarch64/host/cpuinfo.h 
b/host/include/loongarch64/host/cpuinfo.h

index fab664a10b..d7bf27501d 100644
--- a/host/include/loongarch64/host/cpuinfo.h
+++ b/host/include/loongarch64/host/cpuinfo.h
@@ -8,6 +8,7 @@
  #define CPUINFO_ALWAYS  (1u << 0)  /* so cpuinfo is nonzero */
  #define CPUINFO_LSX (1u << 1)
+#define CPUINFO_LASX    (1u << 2)
  /* Initialized with a constructor. */
  extern unsigned cpuinfo;
diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c
index 08b6d7460c..bb1f7f698b 100644
--- a/util/cpuinfo-loongarch.c
+++ b/util/cpuinfo-loongarch.c
@@ -29,6 +29,7 @@ unsigned __attribute__((constructor)) 
cpuinfo_init(void)

  info = CPUINFO_ALWAYS;
  info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0);
+    info |= (hwcap & HWCAP_LOONGARCH_LASX ? CPUINFO_LASX : 0);
  cpuinfo = info;
  return info;


This is 
https://lore.kernel.org/qemu-devel/20240527211912.14060-6-richard.hender...@linaro.org/ 

oops, I did not notice this.

And I will drop patch 1 and refresh the patch based on this weblink.

Regards
Bibo Mao

Re: [PATCH v4 0/3] target/riscv/kvm: QEMU support for KVM Guest Debug on RISC-V

2024-06-05 Thread Alistair Francis

On Thu, Jun 6, 2024 at 11:50 AM Chao Du  wrote:
>
> This series implements QEMU KVM Guest Debug on RISC-V, with which we
> could debug RISC-V KVM guest from the host side, using software
> breakpoints.
>
> This series is based on riscv-to-apply.next branch and is also
> available at:
> https://github.com/Du-Chao/alistair23-qemu/tree/riscv-to-apply.next.0606
>
> The corresponding KVM side patches have been merged already:
> https://lore.kernel.org/kvm/20240402062628.5425-1-duc...@eswincomputing.com/
>
> A TODO list which will be added later:
> 1. HW breakpoints support
> 2. A 'corner case' in which the debug exception is not inserted by the
> debugger, need to be re-injected to the guest.
>
> v3,v4:
> - rebased.
>
> v2->v2 resend:
> - add the type conversion in patch #1 to avoid warnings
>
> v1->v2:
> - squash patch #2 into #1
> - check the instruction length from the tail two bits, instead of passing the
>   length information by parameters.
>
> RFC->v1:
> - Rebased on riscv-to-apply.next
> - use configs/ definition to conditionalize debug support
>
> v2 link:
> https://lore.kernel.org/qemu-riscv/20240528080759.26439-1-duc...@eswincomputing.com/
> v1 link:
> https://lore.kernel.org/qemu-riscv/20240527021916.12953-1-duc...@eswincomputing.com/
> RFC link:
> https://lore.kernel.org/qemu-riscv/20231221094923.7349-1-duc...@eswincomputing.com/
>
> Chao Du (3):
>   target/riscv/kvm: add software breakpoints support
>   target/riscv/kvm: handle the exit with debug reason
>   target/riscv/kvm: define TARGET_KVM_HAVE_GUEST_DEBUG

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  configs/targets/riscv64-softmmu.mak |  1 +
>  target/riscv/kvm/kvm-cpu.c  | 89 +
>  2 files changed, 90 insertions(+)
>
> --
> 2.17.1
>

[PATCH v4 0/3] target/riscv/kvm: QEMU support for KVM Guest Debug on RISC-V

2024-06-05 Thread Chao Du

This series implements QEMU KVM Guest Debug on RISC-V, with which we
could debug RISC-V KVM guest from the host side, using software
breakpoints.

This series is based on riscv-to-apply.next branch and is also
available at:
https://github.com/Du-Chao/alistair23-qemu/tree/riscv-to-apply.next.0606

The corresponding KVM side patches have been merged already:
https://lore.kernel.org/kvm/20240402062628.5425-1-duc...@eswincomputing.com/

A TODO list which will be added later:
1. HW breakpoints support
2. A 'corner case' in which the debug exception is not inserted by the
debugger, need to be re-injected to the guest.

v3,v4:
- rebased.

v2->v2 resend:
- add the type conversion in patch #1 to avoid warnings

v1->v2:
- squash patch #2 into #1
- check the instruction length from the tail two bits, instead of passing the
  length information by parameters.

RFC->v1:
- Rebased on riscv-to-apply.next
- use configs/ definition to conditionalize debug support

v2 link:
https://lore.kernel.org/qemu-riscv/20240528080759.26439-1-duc...@eswincomputing.com/
v1 link:
https://lore.kernel.org/qemu-riscv/20240527021916.12953-1-duc...@eswincomputing.com/
RFC link:
https://lore.kernel.org/qemu-riscv/20231221094923.7349-1-duc...@eswincomputing.com/

Chao Du (3):
  target/riscv/kvm: add software breakpoints support
  target/riscv/kvm: handle the exit with debug reason
  target/riscv/kvm: define TARGET_KVM_HAVE_GUEST_DEBUG

 configs/targets/riscv64-softmmu.mak |  1 +
 target/riscv/kvm/kvm-cpu.c  | 89 +
 2 files changed, 90 insertions(+)

--
2.17.1

[PATCH v4 1/3] target/riscv/kvm: add software breakpoints support

2024-06-05 Thread Chao Du

This patch implements insert/remove software breakpoint process.

For RISC-V, GDB treats single-step similarly to breakpoint: add a
breakpoint at the next step address, then continue. So this also
works for single-step debugging.

Implement kvm_arch_update_guest_debug(): Set the control flag
when there are active breakpoints. This will help KVM to know
the status in the userspace.

Add some stubs which are necessary for building, and will be
implemented later.

Signed-off-by: Chao Du 
Reviewed-by: Andrew Jones 
Acked-by: Alistair Francis 
---
 target/riscv/kvm/kvm-cpu.c | 69 ++
 1 file changed, 69 insertions(+)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 235e2cdaca..748fe5980f 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1969,3 +1969,72 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = {
 };
 
 DEFINE_TYPES(riscv_kvm_cpu_type_infos)
+
+static const uint32_t ebreak_insn = 0x00100073;
+static const uint16_t c_ebreak_insn = 0x9002;
+
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)>saved_insn, 2, 0)) {
+return -EINVAL;
+}
+
+if ((bp->saved_insn & 0x3) == 0x3) {
+if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)>saved_insn, 4, 0)
+|| cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)_insn, 4, 1)) 
{
+return -EINVAL;
+}
+} else {
+if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)_ebreak_insn, 2, 1)) {
+return -EINVAL;
+}
+}
+
+return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+uint32_t ebreak;
+uint16_t c_ebreak;
+
+if ((bp->saved_insn & 0x3) == 0x3) {
+if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *), 4, 0) ||
+ebreak != ebreak_insn ||
+cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)>saved_insn, 4, 1)) 
{
+return -EINVAL;
+}
+} else {
+if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)_ebreak, 2, 0) ||
+c_ebreak != c_ebreak_insn ||
+cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)>saved_insn, 2, 1)) 
{
+return -EINVAL;
+}
+}
+
+return 0;
+}
+
+int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+/* TODO; To be implemented later. */
+return -EINVAL;
+}
+
+int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+/* TODO; To be implemented later. */
+return -EINVAL;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+/* TODO; To be implemented later. */
+}
+
+void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
+{
+if (kvm_sw_breakpoints_active(cs)) {
+dbg->control |= KVM_GUESTDBG_ENABLE;
+}
+}
-- 
2.17.1

[PATCH v4 2/3] target/riscv/kvm: handle the exit with debug reason

2024-06-05 Thread Chao Du

If the breakpoint belongs to the userspace then set the ret value.

Signed-off-by: Chao Du 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
Acked-by: Alistair Francis 
---
 target/riscv/kvm/kvm-cpu.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 748fe5980f..1047961fed 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1555,6 +1555,21 @@ static int kvm_riscv_handle_csr(CPUState *cs, struct 
kvm_run *run)
 return ret;
 }
 
+static bool kvm_riscv_handle_debug(CPUState *cs)
+{
+RISCVCPU *cpu = RISCV_CPU(cs);
+CPURISCVState *env = >env;
+
+/* Ensure PC is synchronised */
+kvm_cpu_synchronize_state(cs);
+
+if (kvm_find_sw_breakpoint(cs, env->pc)) {
+return true;
+}
+
+return false;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
 int ret = 0;
@@ -1565,6 +1580,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 case KVM_EXIT_RISCV_CSR:
 ret = kvm_riscv_handle_csr(cs, run);
 break;
+case KVM_EXIT_DEBUG:
+if (kvm_riscv_handle_debug(cs)) {
+ret = EXCP_DEBUG;
+}
+break;
 default:
 qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
   __func__, run->exit_reason);
-- 
2.17.1

[PATCH v4 3/3] target/riscv/kvm: define TARGET_KVM_HAVE_GUEST_DEBUG

2024-06-05 Thread Chao Du

To enable the KVM GUEST DEBUG for RISC-V at QEMU side.

Signed-off-by: Chao Du 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
Acked-by: Alistair Francis 
---
 configs/targets/riscv64-softmmu.mak | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/targets/riscv64-softmmu.mak 
b/configs/targets/riscv64-softmmu.mak
index f688ffa7bc..917980e63e 100644
--- a/configs/targets/riscv64-softmmu.mak
+++ b/configs/targets/riscv64-softmmu.mak
@@ -1,6 +1,7 @@
 TARGET_ARCH=riscv64
 TARGET_BASE_ARCH=riscv
 TARGET_SUPPORTS_MTTCG=y
+TARGET_KVM_HAVE_GUEST_DEBUG=y
 TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml 
gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml
 # needed by boot.c
 TARGET_NEED_FDT=y
-- 
2.17.1

Re: [PATCH] MAINTAINERS: Add reviewers for ASPEED BMCs

2024-06-05 Thread Andrew Jeffery

On Wed, 2024-06-05 at 14:03 +0800, Jamin Lin via wrote:
> Add ASPEED members "Steven Lee", "Troy Lee" and "Jamin Lin"
> to be reviewers of ASPEED BMCs.
> 
> Signed-off-by: Jamin Lin 
> Signed-off-by: Troy Lee 
> Signed-off-by: Steven Lee 

I'm not very active wrt qemu these days but am still interested in
Aspeed-related patches. For what it's worth:

Reviewed-by: Andrew Jeffery 

> ---
>  MAINTAINERS | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 951556224a..0f63bcdc7d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1158,6 +1158,9 @@ F: docs/system/arm/emcraft-sf2.rst
>  ASPEED BMCs
>  M: Cédric Le Goater 
>  M: Peter Maydell 
> +R: Steven Lee 
> +R: Troy Lee 
> +R: Jamin Lin 
>  R: Andrew Jeffery 
>  R: Joel Stanley 
>  L: qemu-...@nongnu.org

Re: [PULL 00/20] Net patches

2024-06-05 Thread Jason Wang

On Wed, Jun 5, 2024 at 6:14 PM Michael Tokarev  wrote:
>
> 04.06.2024 10:37, Jason Wang wrote:
> > Akihiko Odaki (18):
> >tap: Remove tap_probe_vnet_hdr_len()
> >tap: Remove qemu_using_vnet_hdr()
> >net: Move virtio-net header length assertion
> >net: Remove receive_raw()
> >tap: Call tap_receive_iov() from tap_receive()
> >tap: Shrink zeroed virtio-net header
> >virtio-net: Do not propagate ebpf-rss-fds errors
> >virtio-net: Add only one queue pair when realizing
> >virtio-net: Copy header only when necessary
> >virtio-net: Shrink header byte swapping buffer
> >virtio-net: Disable RSS on reset
> >virtio-net: Unify the logic to update NIC state for RSS
> >virtio-net: Always set populate_hash
> >virtio-net: Do not write hashes to peer buffer
> >ebpf: Fix RSS error handling
> >ebpf: Return 0 when configuration fails
> >ebpf: Refactor tun_rss_steering_prog()
> >ebpf: Add a separate target for skeleton
> >
> > Alexey Dobriyan (1):
> >virtio-net: drop too short packets early
> >
> > Andrew Melnychenko (1):
> >ebpf: Added traces back. Changed source set for eBPF to 'system'.
>
> Is there anything in there for qemu-stable?
> (NOT picking up without explicit mention of stable)

One candidate might be "virtio-net: drop too short packets early".

Thanks

>
> Thanks,
>
> /mjt
> --
> GPG Key transition (from rsa2048 to rsa4096) since 2024-04-24.
> New key: rsa4096/61AD3D98ECDF2C8E  9D8B E14E 3F2A 9DD7 9199  28F1 61AD 3D98 
> ECDF 2C8E
> Old key: rsa2048/457CE0A0804465C5  6EE1 95D1 886E 8FFB 810D  4324 457C E0A0 
> 8044 65C5
> Transition statement: http://www.corpit.ru/mjt/gpg-transition-2024.txt
>

Re: [PATCH] cxl: Get rid of unused cfmw_list

2024-06-05 Thread Zhijian Li (Fujitsu)



On 05/06/2024 20:02, Jonathan Cameron wrote:
> On Fri, 31 May 2024 14:13:17 +0800
> Li Zhijian  wrote:
> 
>> There is no user for this member. All '-M cxl-fmw.N' options have
>> been parsed and saved to CXLState.fixed_windows.
>>
>> Signed-off-by: Li Zhijian 
> 
> Hi Li,
> 
> Applied to my tree with slight change to patch title
> to hw/cxl: Get rid of unused cfmw_list
> 
> I aim to send a group of more minor changes like this for upstream
> in the next week or so.

Many thanks

> 
> Btw, to make it easy to spot QEMU patches in patchwork so that
> the kernel maintainers can ignore them - when posting to 
> linux-...@vger.kernel.org
> [PATCH qemu]
> marking for patches is helpful.

Thanks for your reminder.

Thanks
Zhijian

> 
> Thanks,
> 
> Jonathan
> 
>> ---
>>   hw/cxl/cxl-host.c| 1 -
>>   include/hw/cxl/cxl.h | 1 -
>>   2 files changed, 2 deletions(-)
>>
>> diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
>> index c5f5fcfd64d0..926d3d3da705 100644
>> --- a/hw/cxl/cxl-host.c
>> +++ b/hw/cxl/cxl-host.c
>> @@ -335,7 +335,6 @@ static void machine_set_cfmw(Object *obj, Visitor *v, 
>> const char *name,
>>   for (it = cfmw_list; it; it = it->next) {
>>   cxl_fixed_memory_window_config(state, it->value, errp);
>>   }
>> -state->cfmw_list = cfmw_list;
>>   }
>>   
>>   void cxl_machine_init(Object *obj, CXLState *state)
>> diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
>> index 75e47b686441..e3ecbef03872 100644
>> --- a/include/hw/cxl/cxl.h
>> +++ b/include/hw/cxl/cxl.h
>> @@ -43,7 +43,6 @@ typedef struct CXLState {
>>   MemoryRegion host_mr;
>>   unsigned int next_mr_idx;
>>   GList *fixed_windows;
>> -CXLFixedMemoryWindowOptionsList *cfmw_list;
>>   } CXLState;
>>   
>>   struct CXLHost {
>

Re: [PATCH v3 4/6] target/riscv: Add 'P1P13' bit in SMSTATEEN0

2024-06-05 Thread Alistair Francis

On Tue, Jun 4, 2024 at 4:24 PM Fea.Wang  wrote:
>
> Based on privilege 1.13 spec, there should be a bit56 for 'P1P13' in
> mstateen0 that controls access to the hedeleg.
>
> Signed-off-by: Fea.Wang 
> Reviewed-by: Frank Chang 
> Reviewed-by: Weiwei Li 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_bits.h | 1 +
>  target/riscv/csr.c  | 8 
>  2 files changed, 9 insertions(+)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 74318a925c..28bd3fb0b4 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -315,6 +315,7 @@
>  #define SMSTATEEN0_CS   (1ULL << 0)
>  #define SMSTATEEN0_FCSR (1ULL << 1)
>  #define SMSTATEEN0_JVT  (1ULL << 2)
> +#define SMSTATEEN0_P1P13(1ULL << 56)
>  #define SMSTATEEN0_HSCONTXT (1ULL << 57)
>  #define SMSTATEEN0_IMSIC(1ULL << 58)
>  #define SMSTATEEN0_AIA  (1ULL << 59)
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 58ef7079dc..3dcfb343fe 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -2245,6 +2245,10 @@ static RISCVException write_mstateen0(CPURISCVState 
> *env, int csrno,
>  wr_mask |= SMSTATEEN0_FCSR;
>  }
>
> +if (env->priv_ver >= PRIV_VERSION_1_13_0) {
> +wr_mask |= SMSTATEEN0_P1P13;
> +}
> +
>  return write_mstateen(env, csrno, wr_mask, new_val);
>  }
>
> @@ -2280,6 +2284,10 @@ static RISCVException write_mstateen0h(CPURISCVState 
> *env, int csrno,
>  {
>  uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG;
>
> +if (env->priv_ver >= PRIV_VERSION_1_13_0) {
> +wr_mask |= SMSTATEEN0_P1P13;
> +}
> +
>  return write_mstateenh(env, csrno, wr_mask, new_val);
>  }
>
> --
> 2.34.1
>
>

Re: [PATCH v3 3/6] target/riscv: Support the version for ss1p13

2024-06-05 Thread Alistair Francis

On Tue, Jun 4, 2024 at 4:23 PM Fea.Wang  wrote:
>
> Add RISC-V privilege 1.13 support.
>
> Signed-off-by: Fea.Wang 
> Signed-off-by: Fea.Wang 
> Reviewed-by: Frank Chang 
> Reviewed-by: Weiwei Li 
> Reviewed-by: LIU Zhiwei 

This should be the last patch in the series. The idea is that we add
support and then let users enable it.

Alistair

> ---
>  target/riscv/cpu.c | 6 +-
>  target/riscv/tcg/tcg-cpu.c | 4 
>  2 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index e9e69b9863..02c1e12a03 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1775,7 +1775,9 @@ static int priv_spec_from_str(const char *priv_spec_str)
>  {
>  int priv_version = -1;
>
> -if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) {
> +if (!g_strcmp0(priv_spec_str, PRIV_VER_1_13_0_STR)) {
> +priv_version = PRIV_VERSION_1_13_0;
> +} else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) {
>  priv_version = PRIV_VERSION_1_12_0;
>  } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_11_0_STR)) {
>  priv_version = PRIV_VERSION_1_11_0;
> @@ -1795,6 +1797,8 @@ const char *priv_spec_to_str(int priv_version)
>  return PRIV_VER_1_11_0_STR;
>  case PRIV_VERSION_1_12_0:
>  return PRIV_VER_1_12_0_STR;
> +case PRIV_VERSION_1_13_0:
> +return PRIV_VER_1_13_0_STR;
>  default:
>  return NULL;
>  }
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index 60fe0fd060..595d3b5b8f 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -318,6 +318,10 @@ static void riscv_cpu_update_named_features(RISCVCPU 
> *cpu)
>  cpu->cfg.has_priv_1_12 = true;
>  }
>
> +if (cpu->env.priv_ver >= PRIV_VERSION_1_13_0) {
> +cpu->cfg.has_priv_1_13 = true;
> +}
> +
>  /* zic64b is 1.12 or later */
>  cpu->cfg.ext_zic64b = cpu->cfg.cbom_blocksize == 64 &&
>cpu->cfg.cbop_blocksize == 64 &&
> --
> 2.34.1
>
>

Re: [PULL v3 00/41] virtio: features,fixes

2024-06-05 Thread Richard Henderson


On 6/5/24 16:34, Michael S. Tsirkin wrote:

Dropped acpi patches that had endian-ness issues.

The following changes since commit 60b54b67c63d8f076152e0f7dccf39854dfc6a77:

   Merge tag 'pull-lu-20240526' of https://gitlab.com/rth7680/qemu into staging 
(2024-05-26 17:51:00 -0700)

are available in the Git repository at:

   https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to d23bc95d390a1800198c92a0177240d9e1a1eb66:

   hw/cxl: Fix read from bogus memory (2024-06-05 19:33:01 -0400)


virtio: features,fixes

A bunch of improvements:
- vhost dirty log is now only scanned once, not once per device
- virtio and vhost now support VIRTIO_F_NOTIFICATION_DATA
- cxl gained DCD emulation support
- pvpanic gained shutdown support
- beginning of patchset for Generic Port Affinity Structure
- new tests
- bugfixes

Signed-off-by: Michael S. Tsirkin 


Sorry to have to require a v4, but

merging...
Auto-merging hw/misc/pvpanic-isa.c
CONFLICT (content): Merge conflict in hw/misc/pvpanic-isa.c
Auto-merging hw/misc/pvpanic-pci.c
CONFLICT (content): Merge conflict in hw/misc/pvpanic-pci.c
Auto-merging hw/misc/pvpanic.c
CONFLICT (content): Merge conflict in hw/misc/pvpanic.c

Looks like Paolo's pull induced the conflict.


r~

Re: [PATCH v3 2/6] target/riscv: Define macros and variables for ss1p13

2024-06-05 Thread Alistair Francis

On Tue, Jun 4, 2024 at 4:23 PM Fea.Wang  wrote:
>
> Add macros and variables for RISC-V privilege 1.13 support.
>
> Signed-off-by: Fea.Wang 
> Reviewed-by: Frank Chang 
> Reviewed-by: Weiwei Li 
> Reviewed-by: LIU Zhiwei 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h | 4 +++-
>  target/riscv/cpu_cfg.h | 1 +
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 94600b91fa..4d73486ea2 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -96,12 +96,14 @@ extern RISCVCPUProfile *riscv_profiles[];
>  #define PRIV_VER_1_10_0_STR "v1.10.0"
>  #define PRIV_VER_1_11_0_STR "v1.11.0"
>  #define PRIV_VER_1_12_0_STR "v1.12.0"
> +#define PRIV_VER_1_13_0_STR "v1.13.0"
>  enum {
>  PRIV_VERSION_1_10_0 = 0,
>  PRIV_VERSION_1_11_0,
>  PRIV_VERSION_1_12_0,
> +PRIV_VERSION_1_13_0,
>
> -PRIV_VERSION_LATEST = PRIV_VERSION_1_12_0,
> +PRIV_VERSION_LATEST = PRIV_VERSION_1_13_0,
>  };
>
>  #define VEXT_VERSION_1_00_0 0x0001
> diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
> index e1e4f32698..fb7eebde52 100644
> --- a/target/riscv/cpu_cfg.h
> +++ b/target/riscv/cpu_cfg.h
> @@ -136,6 +136,7 @@ struct RISCVCPUConfig {
>   * TCG always implement/can't be user disabled,
>   * based on spec version.
>   */
> +bool has_priv_1_13;
>  bool has_priv_1_12;
>  bool has_priv_1_11;
>
> --
> 2.34.1
>
>

Re: [PULL 00/16] sprintf fixes

2024-06-05 Thread Richard Henderson


On 6/5/24 14:15, Richard Henderson wrote:

The following changes since commit f1572ab94738bd5787b7badcd4bd93a3657f0680:

   Merge tag 'for-upstream' ofhttps://gitlab.com/bonzini/qemu  into staging 
(2024-06-05 07:45:23 -0700)

are available in the Git repository at:

   https://gitlab.com/rth7680/qemu.git  tags/pull-misc-20240605

for you to fetch changes up to b89fb575fd467ed5dfde4608d51c47c2aa427f30:

   disas/riscv: Use GString in format_inst (2024-06-05 12:29:54 -0700)


util/hexdump: Use a GString for qemu_hexdump_line.
system/qtest: Replace sprintf by qemu_hexdump_line
hw/scsi/scsi-disk: Use qemu_hexdump_line to avoid sprintf
hw/ide/atapi: Use qemu_hexdump_line to avoid sprintf
hw/dma/pl330: Use qemu_hexdump_line to avoid sprintf
disas/microblaze: Reorg to avoid intermediate sprintf
disas/riscv: Use GString in format_inst


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~

Re: [PATCH v3 0/3] target/riscv/kvm: QEMU support for KVM Guest Debug on RISC-V

2024-06-05 Thread Alistair Francis

On Wed, Jun 5, 2024 at 1:00 PM Chao Du  wrote:
>
> This series implements QEMU KVM Guest Debug on RISC-V, with which we
> could debug RISC-V KVM guest from the host side, using software
> breakpoints.
>
> This series is based on riscv-to-apply.next branch and is also
> available at:
> https://github.com/Du-Chao/alistair23-qemu/tree/riscv-to-apply.next.0605
>
> The corresponding KVM side patches have been merged already:
> https://lore.kernel.org/kvm/20240402062628.5425-1-duc...@eswincomputing.com/
>
> A TODO list which will be added later:
> 1. HW breakpoints support
> 2. A 'corner case' in which the debug exception is not inserted by the
> debugger, need to be re-injected to the guest.
>
> v2 resend->v3:
> - rebased.

I think you rebased on the wrong tree. Do you mind rebasing on
https://github.com/alistair23/qemu/tree/riscv-to-apply.next

Alistair

>
> v2->v2 resend:
> - add the type conversion in patch #1 to avoid warnings
>
> v1->v2:
> - squash patch #2 into #1
> - check the instruction length from the tail two bits, instead of passing the
>   length information by parameters.
>
> RFC->v1:
> - Rebased on riscv-to-apply.next
> - use configs/ definition to conditionalize debug support
>
> v2 link:
> https://lore.kernel.org/qemu-riscv/20240528080759.26439-1-duc...@eswincomputing.com/
> v1 link:
> https://lore.kernel.org/qemu-riscv/20240527021916.12953-1-duc...@eswincomputing.com/
> RFC link:
> https://lore.kernel.org/qemu-riscv/20231221094923.7349-1-duc...@eswincomputing.com/
>
> Chao Du (3):
>   target/riscv/kvm: add software breakpoints support
>   target/riscv/kvm: handle the exit with debug reason
>   target/riscv/kvm: define TARGET_KVM_HAVE_GUEST_DEBUG
>
>  configs/targets/riscv64-softmmu.mak |  1 +
>  target/riscv/kvm/kvm-cpu.c  | 90 +
>  2 files changed, 91 insertions(+)
>
> --
> 2.17.1
>

Re: [PATCH 2/2] util/bufferiszero: Add simd acceleration for loongarch64

2024-06-05 Thread Richard Henderson


On 6/5/24 02:32, Bibo Mao wrote:

Different gcc versions have different features, macro CONFIG_LSX_OPT
and CONFIG_LASX_OPT is added here to detect whether gcc supports
built-in lsx/lasx macro.

Function buffer_zero_lsx() is added for 128bit simd fpu optimization,
and function buffer_zero_lasx() is for 256bit simd fpu optimization.

Loongarch gcc built-in lsx/lasx macro can be used only when compiler
option -mlsx/-mlasx is added, and there is no separate compiler option
for function only. So it is only in effect when qemu is compiled with
parameter --extra-cflags="-mlasx"

Signed-off-by: Bibo Mao 
---
  meson.build |  11 +
  util/bufferiszero.c | 103 
  2 files changed, 114 insertions(+)

diff --git a/meson.build b/meson.build
index 6386607144..29bc362d7a 100644
--- a/meson.build
+++ b/meson.build
@@ -2855,6 +2855,17 @@ config_host_data.set('CONFIG_ARM_AES_BUILTIN', 
cc.compiles('''
  void foo(uint8x16_t *p) { *p = vaesmcq_u8(*p); }
'''))
  
+# For Loongarch64, detect if LSX/LASX are available.

+ config_host_data.set('CONFIG_LSX_OPT', cc.compiles('''
+#include "lsxintrin.h"
+int foo(__m128i v) { return __lsx_bz_v(v); }
+  '''))
+
+config_host_data.set('CONFIG_LASX_OPT', cc.compiles('''
+#include "lasxintrin.h"
+int foo(__m256i v) { return __lasx_xbz_v(v); }
+  '''))


Both of these are introduced by gcc 14 and llvm 18, so I'm not certain of the utility of 
separate tests.  We might simplify this with


  config_host_data.set('CONFIG_LSX_LASX_INTRIN_H',
cc.has_header('lsxintrin.h') && cc.has_header('lasxintrin.h'))


As you say, these headers require vector instructions to be enabled at compile-time rather 
than detecting them at runtime.  This is a point where the compilers could be improved to 
support __attribute__((target("xyz"))) and the builtins with that.  The i386 port does 
this, for instance.


In the meantime, it means that you don't need a runtime test.  Similar to aarch64 and the 
use of __ARM_NEON as a compile-time test for simd support.  Perhaps


#elif defined(CONFIG_LSX_LASX_INTRIN_H) && \
  (defined(__loongarch_sx) || defined(__loongarch_asx))
# ifdef __loongarch_sx
  ...
# endif
# ifdef __loongarch_asx
  ...
# endif


The actual code is perfectly fine, of course, since it follows the pattern from the 
others.  How much improvement do you see from bufferiszero-bench?



r~

[PULL v3 38/41] tests/qtest/pvpanic: add tests for pvshutdown event

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

Validate that a shutdown via the pvpanic device emits the correct
QMP events.

Signed-off-by: Thomas Weißschuh 
Reviewed-by: Thomas Huth 
Message-Id: <20240527-pvpanic-shutdown-v8-7-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/qtest/pvpanic-pci-test.c | 39 ++
 tests/qtest/pvpanic-test.c | 29 +
 2 files changed, 68 insertions(+)

diff --git a/tests/qtest/pvpanic-pci-test.c b/tests/qtest/pvpanic-pci-test.c
index b372caf41d..dc021c2fdf 100644
--- a/tests/qtest/pvpanic-pci-test.c
+++ b/tests/qtest/pvpanic-pci-test.c
@@ -85,11 +85,50 @@ static void test_panic(void)
 qtest_quit(qts);
 }
 
+static void test_pvshutdown(void)
+{
+uint8_t val;
+QDict *response, *data;
+QTestState *qts;
+QPCIBus *pcibus;
+QPCIDevice *dev;
+QPCIBar bar;
+
+qts = qtest_init("-device pvpanic-pci,addr=04.0");
+pcibus = qpci_new_pc(qts, NULL);
+dev = qpci_device_find(pcibus, QPCI_DEVFN(0x4, 0x0));
+qpci_device_enable(dev);
+bar = qpci_iomap(dev, 0, NULL);
+
+qpci_memread(dev, bar, 0, , sizeof(val));
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
+
+val = PVPANIC_SHUTDOWN;
+qpci_memwrite(dev, bar, 0, , sizeof(val));
+
+response = qtest_qmp_eventwait_ref(qts, "GUEST_PVSHUTDOWN");
+qobject_unref(response);
+
+response = qtest_qmp_eventwait_ref(qts, "SHUTDOWN");
+g_assert(qdict_haskey(response, "data"));
+data = qdict_get_qdict(response, "data");
+g_assert(qdict_haskey(data, "guest"));
+g_assert(qdict_get_bool(data, "guest"));
+g_assert(qdict_haskey(data, "reason"));
+g_assert_cmpstr(qdict_get_str(data, "reason"), ==, "guest-shutdown");
+qobject_unref(response);
+
+g_free(dev);
+qpci_free_pc(pcibus);
+qtest_quit(qts);
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
 qtest_add_func("/pvpanic-pci/panic", test_panic);
 qtest_add_func("/pvpanic-pci/panic-nopause", test_panic_nopause);
+qtest_add_func("/pvpanic-pci/pvshutdown", test_pvshutdown);
 
 return g_test_run();
 }
diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c
index ccc603472f..d49d2ba931 100644
--- a/tests/qtest/pvpanic-test.c
+++ b/tests/qtest/pvpanic-test.c
@@ -58,11 +58,40 @@ static void test_panic(void)
 qtest_quit(qts);
 }
 
+static void test_pvshutdown(void)
+{
+uint8_t val;
+QDict *response, *data;
+QTestState *qts;
+
+qts = qtest_init("-device pvpanic");
+
+val = qtest_inb(qts, 0x505);
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
+
+qtest_outb(qts, 0x505, PVPANIC_SHUTDOWN);
+
+response = qtest_qmp_eventwait_ref(qts, "GUEST_PVSHUTDOWN");
+qobject_unref(response);
+
+response = qtest_qmp_eventwait_ref(qts, "SHUTDOWN");
+g_assert(qdict_haskey(response, "data"));
+data = qdict_get_qdict(response, "data");
+g_assert(qdict_haskey(data, "guest"));
+g_assert(qdict_get_bool(data, "guest"));
+g_assert(qdict_haskey(data, "reason"));
+g_assert_cmpstr(qdict_get_str(data, "reason"), ==, "guest-shutdown");
+qobject_unref(response);
+
+qtest_quit(qts);
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
 qtest_add_func("/pvpanic/panic", test_panic);
 qtest_add_func("/pvpanic/panic-nopause", test_panic_nopause);
+qtest_add_func("/pvpanic/pvshutdown", test_pvshutdown);
 
 return g_test_run();
 }
-- 
MST

[PULL v3 40/41] virtio-pci: Fix the failure process in kvm_virtio_pci_vector_use_one()

2024-06-05 Thread Michael S. Tsirkin

From: Cindy Lu 

In function kvm_virtio_pci_vector_use_one(), the function will only use
the irqfd/vector for itself. Therefore, in the undo label, the failing
process is incorrect.
To fix this, we can just remove this label.

Fixes: f9a09ca3ea ("vhost: add support for configure interrupt")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Cindy Lu 
Message-Id: <20240528084840.194538-1-l...@redhat.com>
Reviewed-by: Peter Maydell 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-pci.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 7d62e92365..5941f1a94d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -898,7 +898,7 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy 
*proxy, int queue_no)
 }
 ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
 if (ret < 0) {
-goto undo;
+return ret;
 }
 /*
  * If guest supports masking, set up irqfd now.
@@ -908,25 +908,11 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy 
*proxy, int queue_no)
 ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
 if (ret < 0) {
 kvm_virtio_pci_vq_vector_release(proxy, vector);
-goto undo;
+return ret;
 }
 }
 
 return 0;
-undo:
-
-vector = virtio_queue_vector(vdev, queue_no);
-if (vector >= msix_nr_vectors_allocated(dev)) {
-return ret;
-}
-if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
-ret = virtio_pci_get_notifier(proxy, queue_no, , );
-if (ret < 0) {
-return ret;
-}
-kvm_virtio_pci_irqfd_release(proxy, n, vector);
-}
-return ret;
 }
 static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs)
 {
-- 
MST

Re: [PATCH qemu ] hw/acpi: Fix big endian host creation of Generic Port Affinity Structures

2024-06-05 Thread Michael S. Tsirkin

On Wed, Jun 05, 2024 at 07:04:55PM +0100, Jonathan Cameron wrote:
> Treating the HID as an integer caused it to get bit reversed
> on big endian hosts running little endian guests.  Treat it
> as a character array instead.
> 
> Fixes hw/acpi: Generic Port Affinity Structure Support
> Tested-by: Richard Henderson 
> Signed-off-by: Jonathan Cameron 
> 
> ---
> Richard ran the version posted in the thread on an s390 instance.
> Thanks for the help!
> 
> Difference from version in thread:
> - Instantiate i in the for loop.
> 
> Sending out now so Michael can decide whether to fold this in, or
> drop the GP series for now from his pull request (in which case
> I'll do an updated version with this and Markus' docs feedback
> folded in.)


Dropped for now.


> ---
>  include/hw/acpi/acpi_generic_initiator.h | 2 +-
>  hw/acpi/acpi_generic_initiator.c | 4 +++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/acpi/acpi_generic_initiator.h 
> b/include/hw/acpi/acpi_generic_initiator.h
> index 1a899af30f..5baefda33a 100644
> --- a/include/hw/acpi/acpi_generic_initiator.h
> +++ b/include/hw/acpi/acpi_generic_initiator.h
> @@ -61,7 +61,7 @@ typedef struct PCIDeviceHandle {
>  uint16_t bdf;
>  };
>  struct {
> -uint64_t hid;
> +char hid[8];
>  uint32_t uid;
>  };
>  };

I think there is another issue:

+memcpy(_handle.hid, hid, sizeof(dev_handle.hid));

not nice since there is no check that 8 will hold all of
+const char *hid = "ACPI0016";
and won't access buffer out of range.




> diff --git a/hw/acpi/acpi_generic_initiator.c 
> b/hw/acpi/acpi_generic_initiator.c
> index 78b80dcf08..f064753b67 100644
> --- a/hw/acpi/acpi_generic_initiator.c
> +++ b/hw/acpi/acpi_generic_initiator.c
> @@ -151,7 +151,9 @@ build_srat_generic_node_affinity(GArray *table_data, int 
> node,
>  build_append_int_noprefix(table_data, 0, 12);
>  } else {
>  /* Device Handle - ACPI */
> -build_append_int_noprefix(table_data, handle->hid, 8);
> +for (int i = 0; i < sizeof(handle->hid); i++) {
> +build_append_int_noprefix(table_data, handle->hid[i], 1);
> +}
>  build_append_int_noprefix(table_data, handle->uid, 4);
>  build_append_int_noprefix(table_data, 0, 4);
>  }
> -- 
> 2.39.2

[PULL v3 30/41] hw/acpi/GI: Fix trivial parameter alignment issue.

2024-06-05 Thread Michael S. Tsirkin

From: Jonathan Cameron 

Before making additional modification, tidy up this misleading indentation.

Reviewed-by: Ankit Agrawal 
Signed-off-by: Jonathan Cameron 
Message-Id: <20240524100507.32106-2-jonathan.came...@huawei.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/acpi/acpi_generic_initiator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c
index 17b9a052f5..18a939b0e5 100644
--- a/hw/acpi/acpi_generic_initiator.c
+++ b/hw/acpi/acpi_generic_initiator.c
@@ -132,7 +132,7 @@ static int build_all_acpi_generic_initiators(Object *obj, 
void *opaque)
 
 dev_handle.segment = 0;
 dev_handle.bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
-   pci_dev->devfn);
+   pci_dev->devfn);
 
 build_srat_generic_pci_initiator_affinity(table_data,
   gi->node, _handle);
-- 
MST

[PULL v3 35/41] tests/qtest/pvpanic: use centralized definition of supported events

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

Avoid the necessity to update all tests when new events are added
to the device.

Acked-by: Thomas Huth 
Reviewed-by: Cornelia Huck 
Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-4-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/qtest/pvpanic-pci-test.c | 5 +++--
 tests/qtest/pvpanic-test.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/qtest/pvpanic-pci-test.c b/tests/qtest/pvpanic-pci-test.c
index 2c05b376ba..b372caf41d 100644
--- a/tests/qtest/pvpanic-pci-test.c
+++ b/tests/qtest/pvpanic-pci-test.c
@@ -16,6 +16,7 @@
 #include "qapi/qmp/qdict.h"
 #include "libqos/pci.h"
 #include "libqos/pci-pc.h"
+#include "hw/misc/pvpanic.h"
 #include "hw/pci/pci_regs.h"
 
 static void test_panic_nopause(void)
@@ -34,7 +35,7 @@ static void test_panic_nopause(void)
 bar = qpci_iomap(dev, 0, NULL);
 
 qpci_memread(dev, bar, 0, , sizeof(val));
-g_assert_cmpuint(val, ==, 3);
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
 
 val = 1;
 qpci_memwrite(dev, bar, 0, , sizeof(val));
@@ -67,7 +68,7 @@ static void test_panic(void)
 bar = qpci_iomap(dev, 0, NULL);
 
 qpci_memread(dev, bar, 0, , sizeof(val));
-g_assert_cmpuint(val, ==, 3);
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
 
 val = 1;
 qpci_memwrite(dev, bar, 0, , sizeof(val));
diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c
index 78f1cf8186..ccc603472f 100644
--- a/tests/qtest/pvpanic-test.c
+++ b/tests/qtest/pvpanic-test.c
@@ -10,6 +10,7 @@
 #include "qemu/osdep.h"
 #include "libqtest.h"
 #include "qapi/qmp/qdict.h"
+#include "hw/misc/pvpanic.h"
 
 static void test_panic_nopause(void)
 {
@@ -20,7 +21,7 @@ static void test_panic_nopause(void)
 qts = qtest_init("-device pvpanic -action panic=none");
 
 val = qtest_inb(qts, 0x505);
-g_assert_cmpuint(val, ==, 3);
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
 
 qtest_outb(qts, 0x505, 0x1);
 
@@ -43,7 +44,7 @@ static void test_panic(void)
 qts = qtest_init("-device pvpanic -action panic=pause");
 
 val = qtest_inb(qts, 0x505);
-g_assert_cmpuint(val, ==, 3);
+g_assert_cmpuint(val, ==, PVPANIC_EVENTS);
 
 qtest_outb(qts, 0x505, 0x1);
 
-- 
MST

[PULL v3 21/41] hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

With the change, when setting up memory for type3 memory device, we can
create DC regions.
A property 'num-dc-regions' is added to ct3_props to allow users to pass the
number of DC regions to create. To make it easier, other region parameters
like region base, length, and block size are hard coded. If needed,
these parameters can be added easily.

With the change, we can create DC regions with proper kernel side
support like below:

region=$(cat /sys/bus/cxl/devices/decoder0.0/create_dc_region)
echo $region > /sys/bus/cxl/devices/decoder0.0/create_dc_region
echo 256 > /sys/bus/cxl/devices/$region/interleave_granularity
echo 1 > /sys/bus/cxl/devices/$region/interleave_ways

echo "dc0" >/sys/bus/cxl/devices/decoder2.0/mode
echo 0x4000 >/sys/bus/cxl/devices/decoder2.0/dpa_size

echo 0x4000 > /sys/bus/cxl/devices/$region/size
echo  "decoder2.0" > /sys/bus/cxl/devices/$region/target0
echo 1 > /sys/bus/cxl/devices/$region/commit
echo $region > /sys/bus/cxl/drivers/cxl_region/bind

Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-7-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Li Zhijian 
---
 hw/mem/cxl_type3.c | 53 ++
 1 file changed, 53 insertions(+)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 7194c8f902..06c6f9bb78 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -30,6 +30,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 
 /* Default CDAT entries for a memory region */
 enum {
@@ -567,6 +568,50 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 }
 
+/*
+ * TODO: dc region configuration will be updated once host backend and address
+ * space support is added for DCD.
+ */
+static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
+{
+int i;
+uint64_t region_base = 0;
+uint64_t region_len =  2 * GiB;
+uint64_t decode_len = 2 * GiB;
+uint64_t blk_size = 2 * MiB;
+CXLDCRegion *region;
+MemoryRegion *mr;
+
+if (ct3d->hostvmem) {
+mr = host_memory_backend_get_memory(ct3d->hostvmem);
+region_base += memory_region_size(mr);
+}
+if (ct3d->hostpmem) {
+mr = host_memory_backend_get_memory(ct3d->hostpmem);
+region_base += memory_region_size(mr);
+}
+if (region_base % CXL_CAPACITY_MULTIPLIER != 0) {
+error_setg(errp, "DC region base not aligned to 0x%lx",
+   CXL_CAPACITY_MULTIPLIER);
+return false;
+}
+
+for (i = 0, region = >dc.regions[0];
+ i < ct3d->dc.num_regions;
+ i++, region++, region_base += region_len) {
+*region = (CXLDCRegion) {
+.base = region_base,
+.decode_len = decode_len,
+.len = region_len,
+.block_size = blk_size,
+/* dsmad_handle set when creating CDAT table entries */
+.flags = 0,
+};
+}
+
+return true;
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
 DeviceState *ds = DEVICE(ct3d);
@@ -635,6 +680,13 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 g_free(p_name);
 }
 
+if (ct3d->dc.num_regions > 0) {
+if (!cxl_create_dc_regions(ct3d, errp)) {
+error_append_hint(errp, "setup DC regions failed");
+return false;
+}
+}
+
 return true;
 }
 
@@ -930,6 +982,7 @@ static Property ct3_props[] = {
  HostMemoryBackend *),
 DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
 DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
MST

[PULL v3 14/41] Revert "vhost-user: fix lost reconnect"

2024-06-05 Thread Michael S. Tsirkin

From: Li Feng 

This reverts commit f02a4b8e6431598612466f76aac64ab492849abf.

Since the current patch cannot completely fix the lost reconnect
problem, there is a scenario that is not considered:
- When the virtio-blk driver is removed from the guest os,
  s->connected has no chance to be set to false, resulting in
  subsequent reconnection not being executed.

The next patch will completely fix this issue with a better approach.

Signed-off-by: Li Feng 
Message-Id: <20240516025753.130171-2-fen...@smartx.com>
Reviewed-by: Raphael Norwitz 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/vhost-user.h |  3 +--
 hw/block/vhost-user-blk.c  |  2 +-
 hw/scsi/vhost-user-scsi.c  |  3 +--
 hw/virtio/vhost-user-base.c|  2 +-
 hw/virtio/vhost-user.c | 10 ++
 5 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index d7c09ffd34..324cd8663a 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -108,7 +108,6 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
 
 void vhost_user_async_close(DeviceState *d,
 CharBackend *chardev, struct vhost_dev *vhost,
-vu_async_close_fn cb,
-IOEventHandler *event_cb);
+vu_async_close_fn cb);
 
 #endif
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index bc2677dbef..15cc24d017 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -385,7 +385,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent 
event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, >chardev, >dev,
-   vhost_user_blk_disconnect, 
vhost_user_blk_event);
+   vhost_user_blk_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 0b050805a8..421cd654f8 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -215,8 +215,7 @@ static void vhost_user_scsi_event(void *opaque, 
QEMUChrEvent event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, >conf.chardev, >dev,
-   vhost_user_scsi_disconnect,
-   vhost_user_scsi_event);
+   vhost_user_scsi_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index a83167191e..4b54255682 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -254,7 +254,7 @@ static void vub_event(void *opaque, QEMUChrEvent event)
 case CHR_EVENT_CLOSED:
 /* defer close until later to avoid circular close */
 vhost_user_async_close(dev, >chardev, >vhost_dev,
-   vub_disconnect, vub_event);
+   vub_disconnect);
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index cdf9af4a4b..c929097e87 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2776,7 +2776,6 @@ typedef struct {
 DeviceState *dev;
 CharBackend *cd;
 struct vhost_dev *vhost;
-IOEventHandler *event_cb;
 } VhostAsyncCallback;
 
 static void vhost_user_async_close_bh(void *opaque)
@@ -2791,10 +2790,7 @@ static void vhost_user_async_close_bh(void *opaque)
  */
 if (vhost->vdev) {
 data->cb(data->dev);
-} else if (data->event_cb) {
-qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
- NULL, data->dev, NULL, true);
-   }
+}
 
 g_free(data);
 }
@@ -2806,8 +2802,7 @@ static void vhost_user_async_close_bh(void *opaque)
  */
 void vhost_user_async_close(DeviceState *d,
 CharBackend *chardev, struct vhost_dev *vhost,
-vu_async_close_fn cb,
-IOEventHandler *event_cb)
+vu_async_close_fn cb)
 {
 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
 /*
@@ -2823,7 +2818,6 @@ void vhost_user_async_close(DeviceState *d,
 data->dev = d;
 data->cd = chardev;
 data->vhost = vhost;
-data->event_cb = event_cb;
 
 /* Disable any further notifications on the chardev */
 qemu_chr_fe_set_handlers(chardev,
-- 
MST

[PULL v3 24/41] hw/mem/cxl_type3: Add DC extent list representative and get DC extent list mailbox support

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Add dynamic capacity extent list representative to the definition of
CXLType3Dev and implement get DC extent list mailbox command per
CXL.spec.3.1:.8.2.9.9.9.2.

Tested-by: Svetly Todorov 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-10-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h | 22 +++
 hw/cxl/cxl-mailbox-utils.c  | 73 -
 hw/mem/cxl_type3.c  |  1 +
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index c2c3df0d2a..6aec6ac983 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -424,6 +424,25 @@ typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 
 #define DCD_MAX_NUM_REGION 8
 
+typedef struct CXLDCExtentRaw {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+} QEMU_PACKED CXLDCExtentRaw;
+
+typedef struct CXLDCExtent {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t tag[0x10];
+uint16_t shared_seq;
+uint8_t rsvd[0x6];
+
+QTAILQ_ENTRY(CXLDCExtent) node;
+} CXLDCExtent;
+typedef QTAILQ_HEAD(, CXLDCExtent) CXLDCExtentList;
+
 typedef struct CXLDCRegion {
 uint64_t base;   /* aligned to 256*MiB */
 uint64_t decode_len; /* aligned to 256*MiB */
@@ -474,6 +493,9 @@ struct CXLType3Dev {
  * memory region size.
  */
 uint64_t total_capacity; /* 256M aligned */
+CXLDCExtentList extents;
+uint32_t total_extent_count;
+uint32_t ext_list_gen_seq;
 
 uint8_t num_regions; /* 0-8 regions */
 CXLDCRegion regions[DCD_MAX_NUM_REGION];
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 6ad227f112..7872d2f3e6 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -84,6 +84,7 @@ enum {
 #define CLEAR_POISON   0x2
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
+#define GET_DYN_CAP_EXT_LIST   0x1
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1322,7 +1323,8 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct 
cxl_cmd *cmd,
  * to use.
  */
 stl_le_p(_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
-stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED);
+stl_le_p(_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count);
 stl_le_p(_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
 stl_le_p(_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
 
@@ -1330,6 +1332,72 @@ static CXLRetCode cmd_dcd_get_dyn_cap_config(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.2:
+ * Get Dynamic Capacity Extent List (Opcode 4801h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
+   uint8_t *payload_in,
+   size_t len_in,
+   uint8_t *payload_out,
+   size_t *len_out,
+   CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint32_t extent_cnt;
+uint32_t start_extent_id;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint32_t count;
+uint32_t total_extents;
+uint32_t generation_num;
+uint8_t rsvd[4];
+CXLDCExtentRaw records[];
+} QEMU_PACKED *out = (void *)payload_out;
+uint32_t start_extent_id = in->start_extent_id;
+CXLDCExtentList *extent_list = >dc.extents;
+uint16_t record_count = 0, i = 0, record_done = 0;
+uint16_t out_pl_len, size;
+CXLDCExtent *ent;
+
+if (start_extent_id > ct3d->dc.total_extent_count) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(in->extent_cnt,
+   ct3d->dc.total_extent_count - start_extent_id);
+size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
+record_count = MIN(record_count, size / sizeof(out->records[0]));
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+
+stl_le_p(>count, record_count);
+stl_le_p(>total_extents, ct3d->dc.total_extent_count);
+stl_le_p(>generation_num, ct3d->dc.ext_list_gen_seq);
+
+if (record_count > 0) {
+CXLDCExtentRaw *out_rec = >records[record_done];
+
+QTAILQ_FOREACH(ent, extent_list, node) {
+if (i++ < start_extent_id) {
+continue;
+}
+stq_le_p(_rec->start_dpa, ent->start_dpa);
+stq_le_p(_rec->len, ent->len);
+memcpy(_rec->tag, ent->tag, 0x10);
+

[PULL v3 37/41] pvpanic: Emit GUEST_PVSHUTDOWN QMP event on pvpanic shutdown signal

2024-06-05 Thread Michael S. Tsirkin

From: Alejandro Jimenez 

Emit a QMP event on receiving a PVPANIC_SHUTDOWN event. Even though a typical
SHUTDOWN event will be sent, it will be indistinguishable from a shutdown
originating from other cases (e.g. KVM exit due to KVM_SYSTEM_EVENT_SHUTDOWN)
that also issue the guest-shutdown cause.
A management layer application can detect the new GUEST_PVSHUTDOWN event to
determine if the guest is using the pvpanic interface to request shutdowns.

Signed-off-by: Alejandro Jimenez 
Message-Id: <20240527-pvpanic-shutdown-v8-6-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 qapi/run-state.json | 14 ++
 system/runstate.c   |  1 +
 2 files changed, 15 insertions(+)

diff --git a/qapi/run-state.json b/qapi/run-state.json
index f8773f23b2..5ac0fec852 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -462,6 +462,20 @@
 { 'event': 'GUEST_CRASHLOADED',
   'data': { 'action': 'GuestPanicAction', '*info': 'GuestPanicInformation' } }
 
+##
+# @GUEST_PVSHUTDOWN:
+#
+# Emitted when guest submits a shutdown request via pvpanic interface
+#
+# Since: 9.1
+#
+# Example:
+#
+# <- { "event": "GUEST_PVSHUTDOWN",
+#  "timestamp": { "seconds": 1648245259, "microseconds": 893771 } }
+##
+{ 'event': 'GUEST_PVSHUTDOWN' }
+
 ##
 # @GuestPanicAction:
 #
diff --git a/system/runstate.c b/system/runstate.c
index 83055f3278..c149b1ab4b 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -587,6 +587,7 @@ void qemu_system_guest_crashloaded(GuestPanicInformation 
*info)
 
 void qemu_system_guest_pvshutdown(void)
 {
+qapi_event_send_guest_pvshutdown();
 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 }
 
-- 
MST

[PULL v3 20/41] include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 memory devices

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Rename mem_size as static_mem_size for type3 memdev to cover static RAM and
pmem capacity, preparing for the introduction of dynamic capacity to support
dynamic capacity devices.

Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-6-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h | 2 +-
 hw/cxl/cxl-mailbox-utils.c  | 4 ++--
 hw/mem/cxl_type3.c  | 8 
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index e839370266..f7f56b44e3 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -234,7 +234,7 @@ typedef struct cxl_device_state {
 } timestamp;
 
 /* memory region size, HDM */
-uint64_t mem_size;
+uint64_t static_mem_size;
 uint64_t pmem_size;
 uint64_t vmem_size;
 
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index bede28e3c8..b592473587 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -803,7 +803,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
 
 stq_le_p(>total_capacity,
- cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER);
+ cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>persistent_capacity,
  cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER);
 stq_le_p(>volatile_capacity,
@@ -1179,7 +1179,7 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3e42490b6c..7194c8f902 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -608,7 +608,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostvmem_as, vmr, v_name);
 ct3d->cxl_dstate.vmem_size = memory_region_size(vmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(vmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(vmr);
 g_free(v_name);
 }
 
@@ -631,7 +631,7 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error 
**errp)
 }
 address_space_init(>hostpmem_as, pmr, p_name);
 ct3d->cxl_dstate.pmem_size = memory_region_size(pmr);
-ct3d->cxl_dstate.mem_size += memory_region_size(pmr);
+ct3d->cxl_dstate.static_mem_size += memory_region_size(pmr);
 g_free(p_name);
 }
 
@@ -837,7 +837,7 @@ static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d,
 return -EINVAL;
 }
 
-if (*dpa_offset > ct3d->cxl_dstate.mem_size) {
+if (*dpa_offset > ct3d->cxl_dstate.static_mem_size) {
 return -EINVAL;
 }
 
@@ -1010,7 +1010,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 return false;
 }
 
-if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.mem_size) {
+if (dpa_offset + CXL_CACHE_LINE_SIZE > ct3d->cxl_dstate.static_mem_size) {
 return false;
 }
 
-- 
MST

[PULL v3 12/41] virtio-pci: only reset pm state during resetting

2024-06-05 Thread Michael S. Tsirkin

From: Jiqian Chen 

Fix bug imported by 27ce0f3afc9dd ("fix Power Management Control Register for 
PCI Express virtio devices"
After this change, observe that QEMU may erroneously clear the power status of 
the device,
or may erroneously clear non writable registers, such as NO_SOFT_RESET, etc.

Only state of PM_CTRL is writable.
Only when flag VIRTIO_PCI_FLAG_INIT_PM is set, need to reset state.

Fixes: 27ce0f3afc9dd ("fix Power Management Control Register for PCI Express 
virtio devices"
Signed-off-by: Jiqian Chen 
Message-Id: <20240515073526.17297-2-jiqian.c...@amd.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index cffc7efcae..7d62e92365 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2306,10 +2306,16 @@ static void virtio_pci_bus_reset_hold(Object *obj, 
ResetType type)
 virtio_pci_reset(qdev);
 
 if (pci_is_express(dev)) {
+VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
+
 pcie_cap_deverr_reset(dev);
 pcie_cap_lnkctl_reset(dev);
 
-pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
+if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
+pci_word_test_and_clear_mask(
+dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
+PCI_PM_CTRL_STATE_MASK);
+}
 }
 }
 
-- 
MST

[PULL v3 29/41] hw/mem/cxl_type3: Allow to release extent superset in QMP interface

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Before the change, the QMP interface used for add/release DC extents
only allows to release an extent whose DPA range is contained by a single
accepted extent in the device.

With the change, we relax the constraints.  As long as the DPA range of
the extent is covered by accepted extents, we allow the release.

Tested-by: Svetly Todorov 
Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-15-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/mem/cxl_type3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0d18259ec0..5d4a1276be 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1947,7 +1947,7 @@ static void 
qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
"cannot release extent with pending DPA range");
 return;
 }
-if (!cxl_extents_contains_dpa_range(>dc.extents, dpa, len)) {
+if (!ct3_test_region_block_backed(dcd, dpa, len)) {
 error_setg(errp,
"cannot release extent with non-existing DPA 
range");
 return;
-- 
MST

[PULL v3 39/41] Revert "docs/specs/pvpanic: mark shutdown event as not implemented"

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

The missing functionality has been implemented now.

This reverts commit e739d1935c461d0668057e9dbba9d06f728d29ec.

Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-8-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/specs/pvpanic.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/specs/pvpanic.rst b/docs/specs/pvpanic.rst
index b0f27860ec..61a80480ed 100644
--- a/docs/specs/pvpanic.rst
+++ b/docs/specs/pvpanic.rst
@@ -29,7 +29,7 @@ bit 1
   a guest panic has happened and will be handled by the guest;
   the host should record it or report it, but should not affect
   the execution of the guest.
-bit 2 (to be implemented)
+bit 2
   a regular guest shutdown has happened and should be processed by the host
 
 PCI Interface
-- 
MST

[PULL v3 33/41] linux-headers: update to 6.10-rc1

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-2-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/standard-headers/linux/ethtool.h|  55 
 include/standard-headers/linux/pci_regs.h   |   6 +
 include/standard-headers/linux/pvpanic.h|   7 +-
 include/standard-headers/linux/virtio_bt.h  |   1 -
 include/standard-headers/linux/virtio_mem.h |   2 +
 include/standard-headers/linux/virtio_net.h | 143 
 linux-headers/asm-generic/unistd.h  |   5 +-
 linux-headers/asm-loongarch/kvm.h   |   4 +
 linux-headers/asm-mips/unistd_n32.h |   1 +
 linux-headers/asm-mips/unistd_n64.h |   1 +
 linux-headers/asm-mips/unistd_o32.h |   1 +
 linux-headers/asm-powerpc/unistd_32.h   |   1 +
 linux-headers/asm-powerpc/unistd_64.h   |   1 +
 linux-headers/asm-riscv/kvm.h   |   1 +
 linux-headers/asm-s390/unistd_32.h  |   1 +
 linux-headers/asm-s390/unistd_64.h  |   1 +
 linux-headers/asm-x86/kvm.h |   4 +-
 linux-headers/asm-x86/unistd_32.h   |   1 +
 linux-headers/asm-x86/unistd_64.h   |   1 +
 linux-headers/asm-x86/unistd_x32.h  |   2 +
 linux-headers/linux/kvm.h   |   4 +-
 linux-headers/linux/stddef.h|   8 ++
 linux-headers/linux/vhost.h |  15 +-
 23 files changed, 252 insertions(+), 14 deletions(-)

diff --git a/include/standard-headers/linux/ethtool.h 
b/include/standard-headers/linux/ethtool.h
index 01503784d2..b0b4b68410 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -752,6 +752,61 @@ enum ethtool_module_power_mode {
ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
 
+/**
+ * enum ethtool_pse_types - Types of PSE controller.
+ * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown
+ * @ETHTOOL_PSE_PODL: PSE controller which support PoDL
+ * @ETHTOOL_PSE_C33: PSE controller which support Clause 33 (PoE)
+ */
+enum ethtool_pse_types {
+   ETHTOOL_PSE_UNKNOWN =   1 << 0,
+   ETHTOOL_PSE_PODL =  1 << 1,
+   ETHTOOL_PSE_C33 =   1 << 2,
+};
+
+/**
+ * enum ethtool_c33_pse_admin_state - operational state of the PoDL PSE
+ * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN: state of PSE functions is unknown
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED: PSE functions are disabled
+ * @ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED: PSE functions are enabled
+ */
+enum ethtool_c33_pse_admin_state {
+   ETHTOOL_C33_PSE_ADMIN_STATE_UNKNOWN = 1,
+   ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
+   ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_c33_pse_pw_d_status - power detection status of the PSE.
+ * IEEE 802.3-2022 30.9.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN: PSE status is unknown
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED: The enumeration "disabled"
+ * indicates that the PSE State diagram is in the state DISABLED.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING: The enumeration "searching"
+ * indicates the PSE State diagram is in a state other than those
+ * listed.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING: The enumeration
+ * "deliveringPower" indicates that the PSE State diagram is in the
+ * state POWER_ON.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_TEST: The enumeration "test" indicates that
+ * the PSE State diagram is in the state TEST_MODE.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_FAULT: The enumeration "fault" indicates that
+ * the PSE State diagram is in the state TEST_ERROR.
+ * @ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT: The enumeration "otherFault"
+ * indicates that the PSE State diagram is in the state IDLE due to
+ * the variable error_condition = true.
+ */
+enum ethtool_c33_pse_pw_d_status {
+   ETHTOOL_C33_PSE_PW_D_STATUS_UNKNOWN = 1,
+   ETHTOOL_C33_PSE_PW_D_STATUS_DISABLED,
+   ETHTOOL_C33_PSE_PW_D_STATUS_SEARCHING,
+   ETHTOOL_C33_PSE_PW_D_STATUS_DELIVERING,
+   ETHTOOL_C33_PSE_PW_D_STATUS_TEST,
+   ETHTOOL_C33_PSE_PW_D_STATUS_FAULT,
+   ETHTOOL_C33_PSE_PW_D_STATUS_OTHERFAULT,
+};
+
 /**
  * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
  * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
diff --git a/include/standard-headers/linux/pci_regs.h 
b/include/standard-headers/linux/pci_regs.h
index a39193213f..94c00996e6 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -1144,8 +1144,14 @@
 #define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH0x0003
 
 #define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX   0x00ff
+#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER 0xff00
 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x
 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL

[PULL v3 41/41] hw/cxl: Fix read from bogus memory

2024-06-05 Thread Michael S. Tsirkin

From: Ira Weiny 

Peter and coverity report:

We've passed '' to address_space_write(), which means "read
from the address on the stack where the function argument 'data'
lives", so instead of writing 64 bytes of data to the guest ,
we'll write 64 bytes which start with a host pointer value and
then continue with whatever happens to be on the host stack
after that.

Indeed the intention was to write 64 bytes of data at the address given.

Fix the parameter to address_space_write().

Reported-by: Peter Maydell 
Link: 
https://lore.kernel.org/all/cafeaca-u4sytgwtksb__y+_+0o2-wwarntm3x8wnhvl1wfh...@mail.gmail.com/
Fixes: 6bda41a69bdc ("hw/cxl: Add clear poison mailbox command support.")
Cc: Jonathan Cameron 
Signed-off-by: Ira Weiny 
Message-Id: <20240531-fix-poison-set-cacheline-v1-1-e3bc7e8f1...@intel.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/mem/cxl_type3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5d4a1276be..3274e5dcbb 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1292,7 +1292,7 @@ static bool set_cacheline(CXLType3Dev *ct3d, uint64_t 
dpa_offset, uint8_t *data)
 dpa_offset -= (vmr_size + pmr_size);
 }
 
-address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, ,
+address_space_write(as, dpa_offset, MEMTXATTRS_UNSPECIFIED, data,
 CXL_CACHE_LINE_SIZE);
 return true;
 }
-- 
MST

[PULL v3 08/41] vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits

2024-06-05 Thread Michael S. Tsirkin

From: Jonah Palmer 

Add support for the VIRTIO_F_NOTIFICATION_DATA feature across a variety
of vhost devices.

The inclusion of VIRTIO_F_NOTIFICATION_DATA in the feature bits arrays
for these devices ensures that the backend is capable of offering and
providing support for this feature, and that it can be disabled if the
backend does not support it.

Tested-by: Lei Yang 
Reviewed-by: Eugenio Pérez 
Signed-off-by: Jonah Palmer 
Message-Id: <20240315165557.26942-6-jonah.pal...@oracle.com>
Acked-by: Srujana Challa 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/block/vhost-user-blk.c| 1 +
 hw/net/vhost_net.c   | 2 ++
 hw/scsi/vhost-scsi.c | 1 +
 hw/scsi/vhost-user-scsi.c| 1 +
 hw/virtio/vhost-user-fs.c| 2 +-
 hw/virtio/vhost-user-vsock.c | 1 +
 net/vhost-vdpa.c | 1 +
 7 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 9e6bbc6950..bc2677dbef 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -51,6 +51,7 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index fd1a93701a..18898afe81 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_NET_F_HASH_REPORT,
 VHOST_INVALID_FEATURE_BIT
 };
@@ -55,6 +56,7 @@ static const int kernel_feature_bits[] = {
 /* Features supported by others. */
 static const int user_feature_bits[] = {
 VIRTIO_F_NOTIFY_ON_EMPTY,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_RING_F_INDIRECT_DESC,
 VIRTIO_RING_F_EVENT_IDX,
 
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index ae26bc19a4..3d5fe0994d 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_SCSI_F_HOTPLUG,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index a63b1f4948..0b050805a8 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -36,6 +36,7 @@ static const int user_feature_bits[] = {
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_SCSI_F_HOTPLUG,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index cca2cd41be..ae48cc1c96 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -33,7 +33,7 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_RESET,
-
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 9431b9792c..802b44a07d 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -21,6 +21,7 @@ static const int user_feature_bits[] = {
 VIRTIO_RING_F_INDIRECT_DESC,
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_F_NOTIFY_ON_EMPTY,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index eda714d1a4..daa38428c5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -62,6 +62,7 @@ const int vdpa_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_RING_RESET,
 VIRTIO_F_VERSION_1,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_NET_F_CSUM,
 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
 VIRTIO_NET_F_CTRL_MAC_ADDR,
-- 
MST

[PULL v3 23/41] hw/mem/cxl_type3: Add host backend and address space handling for DC regions

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Add (file/memory backed) host backend for DCD. All the dynamic capacity
regions will share a single, large enough host backend. Set up address
space for DC regions to support read/write operations to dynamic capacity
for DCD.

With the change, the following support is added:
1. Add a new property to type3 device "volatile-dc-memdev" to point to host
   memory backend for dynamic capacity. Currently, all DC regions share one
   host backend;
2. Add namespace for dynamic capacity for read/write support;
3. Create cdat entries for each dynamic capacity region.

Reviewed-by: Gregory Price 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-9-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h |   8 ++
 hw/cxl/cxl-mailbox-utils.c  |  16 +++-
 hw/mem/cxl_type3.c  | 177 +---
 3 files changed, 164 insertions(+), 37 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index f7f56b44e3..c2c3df0d2a 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -467,6 +467,14 @@ struct CXLType3Dev {
 uint64_t poison_list_overflow_ts;
 
 struct dynamic_capacity {
+HostMemoryBackend *host_dc;
+AddressSpace host_dc_as;
+/*
+ * total_capacity is equivalent to the dynamic capability
+ * memory region size.
+ */
+uint64_t total_capacity; /* 256M aligned */
+
 uint8_t num_regions; /* 0-8 regions */
 CXLDCRegion regions[DCD_MAX_NUM_REGION];
 } dc;
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index b592473587..6ad227f112 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -622,7 +622,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
size_t *len_out,
CXLCCI *cci)
 {
-CXLDeviceState *cxl_dstate = _TYPE3(cci->d)->cxl_dstate;
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+CXLDeviceState *cxl_dstate = >cxl_dstate;
 struct {
 uint8_t slots_supported;
 uint8_t slot_info;
@@ -636,7 +637,8 @@ static CXLRetCode cmd_firmware_update_get_info(const struct 
cxl_cmd *cmd,
 QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
 
 if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) ||
-(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) {
+(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) ||
+(ct3d->dc.total_capacity < CXL_CAPACITY_MULTIPLIER)) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -793,7 +795,8 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 CXLDeviceState *cxl_dstate = >cxl_dstate;
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -835,9 +838,11 @@ static CXLRetCode cmd_ccls_get_partition_info(const struct 
cxl_cmd *cmd,
 uint64_t next_pmem;
 } QEMU_PACKED *part_info = (void *)payload_out;
 QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
 
 if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) ||
-(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) {
+(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER)) ||
+(!QEMU_IS_ALIGNED(ct3d->dc.total_capacity, CXL_CAPACITY_MULTIPLIER))) {
 return CXL_MBOX_INTERNAL_ERROR;
 }
 
@@ -1179,7 +1184,8 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 struct clear_poison_pl *in = (void *)payload_in;
 
 dpa = ldq_le_p(>dpa);
-if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size) {
+if (dpa + CXL_CACHE_LINE_SIZE > cxl_dstate->static_mem_size +
+ct3d->dc.total_capacity) {
 return CXL_MBOX_INVALID_PA;
 }
 
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 51be50ce87..658570aa1a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -45,7 +45,8 @@ enum {
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
   int dsmad_handle, uint64_t size,
-  bool is_pmem, uint64_t dpa_base)
+  bool is_pmem, bool is_dynamic,
+  uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
 CDATDslbis *dslbis0;
@@ -61,7 +62,8 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .length = sizeof(*dsmas),
 },

[PULL v3 19/41] hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and mailbox command support

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Per cxl spec r3.1, add dynamic capacity (DC) region representative based on
Table 8-165 and extend the cxl type3 device definition to include DC region
information. Also, based on info in 8.2.9.9.9.1, add 'Get Dynamic Capacity
Configuration' mailbox support.

Note: we store region decode length as byte-wise length on the device, which
should be divided by 256 * MiB before being returned to the host
for "Get Dynamic Capacity Configuration" mailbox command per
specification.

Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-5-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h | 16 +++
 hw/cxl/cxl-mailbox-utils.c  | 96 +
 2 files changed, 112 insertions(+)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index a5f8e25020..e839370266 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -422,6 +422,17 @@ typedef struct CXLPoison {
 typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
 #define CXL_POISON_LIST_LIMIT 256
 
+#define DCD_MAX_NUM_REGION 8
+
+typedef struct CXLDCRegion {
+uint64_t base;   /* aligned to 256*MiB */
+uint64_t decode_len; /* aligned to 256*MiB */
+uint64_t len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+} CXLDCRegion;
+
 struct CXLType3Dev {
 /* Private */
 PCIDevice parent_obj;
@@ -454,6 +465,11 @@ struct CXLType3Dev {
 unsigned int poison_list_cnt;
 bool poison_list_overflowed;
 uint64_t poison_list_overflow_ts;
+
+struct dynamic_capacity {
+uint8_t num_regions; /* 0-8 regions */
+CXLDCRegion regions[DCD_MAX_NUM_REGION];
+} dc;
 };
 
 #define TYPE_CXL_TYPE3 "cxl-type3"
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 626acc1d0d..bede28e3c8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -22,6 +22,8 @@
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
+#define CXL_NUM_EXTENTS_SUPPORTED 512
+#define CXL_NUM_TAGS_SUPPORTED 0
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -80,6 +82,8 @@ enum {
 #define GET_POISON_LIST0x0
 #define INJECT_POISON  0x1
 #define CLEAR_POISON   0x2
+DCD_CONFIG  = 0x48,
+#define GET_DC_CONFIG  0x0
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1238,6 +1242,88 @@ static CXLRetCode cmd_media_clear_poison(const struct 
cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
+ * (Opcode: 4800h)
+ */
+static CXLRetCode cmd_dcd_get_dyn_cap_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+struct {
+uint8_t region_cnt;
+uint8_t start_rid;
+} QEMU_PACKED *in = (void *)payload_in;
+struct {
+uint8_t num_regions;
+uint8_t regions_returned;
+uint8_t rsvd1[6];
+struct {
+uint64_t base;
+uint64_t decode_len;
+uint64_t region_len;
+uint64_t block_size;
+uint32_t dsmadhandle;
+uint8_t flags;
+uint8_t rsvd2[3];
+} QEMU_PACKED records[];
+} QEMU_PACKED *out = (void *)payload_out;
+struct {
+uint32_t num_extents_supported;
+uint32_t num_extents_available;
+uint32_t num_tags_supported;
+uint32_t num_tags_available;
+} QEMU_PACKED *extra_out;
+uint16_t record_count;
+uint16_t i;
+uint16_t out_pl_len;
+uint8_t start_rid;
+
+start_rid = in->start_rid;
+if (start_rid >= ct3d->dc.num_regions) {
+return CXL_MBOX_INVALID_INPUT;
+}
+
+record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);
+
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+extra_out = (void *)(payload_out + out_pl_len);
+out_pl_len += sizeof(*extra_out);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+out->num_regions = ct3d->dc.num_regions;
+out->regions_returned = record_count;
+for (i = 0; i < record_count; i++) {
+stq_le_p(>records[i].base,
+ ct3d->dc.regions[start_rid + i].base);
+stq_le_p(>records[i].decode_len,
+ ct3d->dc.regions[start_rid + i].decode_len /
+ CXL_CAPACITY_MULTIPLIER);
+stq_le_p(>records[i].region_len,
+

[PULL v3 25/41] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

For the process of the above two commands, we use two-pass approach.
Pass 1: Check whether the input payload is valid or not; if not, skip
Pass 2 and return mailbox process error.
Pass 2: Do the real work--add or release extents, respectively.

Tested-by: Svetly Todorov 
Reviewed-by: Gregory Price 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-11-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h |   4 +
 hw/cxl/cxl-mailbox-utils.c  | 394 
 hw/mem/cxl_type3.c  |  11 +
 3 files changed, 409 insertions(+)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 6aec6ac983..df3511e91b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -551,4 +551,8 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d);
 
 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
 
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+CXLDCExtent *extent);
 #endif
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 7872d2f3e6..e322407fb3 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -19,6 +19,7 @@
 #include "qemu/units.h"
 #include "qemu/uuid.h"
 #include "sysemu/hostmem.h"
+#include "qemu/range.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
 #define CXL_DC_EVENT_LOG_SIZE 8
@@ -85,6 +86,8 @@ enum {
 DCD_CONFIG  = 0x48,
 #define GET_DC_CONFIG  0x0
 #define GET_DYN_CAP_EXT_LIST   0x1
+#define ADD_DYN_CAP_RSP0x2
+#define RELEASE_DYN_CAP0x3
 PHYSICAL_SWITCH = 0x51,
 #define IDENTIFY_SWITCH_DEVICE  0x0
 #define GET_PHYSICAL_PORT_STATE 0x1
@@ -1398,6 +1401,391 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const 
struct cxl_cmd *cmd,
 return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+  unsigned long size)
+{
+unsigned long res = find_next_bit(addr, size + nr, nr);
+
+return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+int i;
+CXLDCRegion *region = >dc.regions[0];
+
+if (dpa < region->base ||
+dpa >= region->base + ct3d->dc.total_capacity) {
+return NULL;
+}
+
+/*
+ * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+ *
+ * Regions are used in increasing-DPA order, with Region 0 being used for
+ * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+ * So check from the last region to find where the dpa belongs. Extents 
that
+ * cross multiple regions are not allowed.
+ */
+for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+region = >dc.regions[i];
+if (dpa >= region->base) {
+if (dpa + len > region->base + region->len) {
+return NULL;
+}
+return region;
+}
+}
+
+return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+ uint64_t dpa,
+ uint64_t len,
+ uint8_t *tag,
+ uint16_t shared_seq)
+{
+CXLDCExtent *extent;
+
+extent = g_new0(CXLDCExtent, 1);
+extent->start_dpa = dpa;
+extent->len = len;
+if (tag) {
+memcpy(extent->tag, tag, 0x10);
+}
+extent->shared_seq = shared_seq;
+
+QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+CXLDCExtent *extent)
+{
+QTAILQ_REMOVE(list, extent, node);
+g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+uint32_t num_entries_updated;
+uint8_t flags;
+uint8_t rsvd[3];
+/* CXL r3.1 Table 8-169: Updated Extent */
+struct {
+uint64_t start_dpa;
+uint64_t len;
+uint8_t rsvd[8];
+} QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA

[PULL v3 13/41] vhost-user-gpu: fix import of DMABUF

2024-06-05 Thread Michael S. Tsirkin

From: Marc-André Lureau 

When using vhost-user-gpu with GL, qemu -display gtk doesn't show output
and prints: qemu: eglCreateImageKHR failed

Since commit 9ac06df8b ("virtio-gpu-udmabuf: correct naming of
QemuDmaBuf size properties"), egl_dmabuf_import_texture() uses
backing_{width,height} for the texture dimension.

Fixes: 9ac06df8b ("virtio-gpu-udmabuf: correct naming of QemuDmaBuf size 
properties")
Signed-off-by: Marc-André Lureau 
Message-Id: <20240515105237.1074116-1-marcandre.lur...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/display/vhost-user-gpu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index e4b398d26c..63c64ddde6 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -281,8 +281,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, 
VhostUserGpuMsg *msg)
 modifier = m2->modifier;
 }
 
-dmabuf = qemu_dmabuf_new(m->fd_width, m->fd_height,
- m->fd_stride, 0, 0, 0, 0,
+dmabuf = qemu_dmabuf_new(m->width, m->height,
+ m->fd_stride, 0, 0,
+ m->fd_width, m->fd_height,
  m->fd_drm_fourcc, modifier,
  fd, false, m->fd_flags &
  VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP);
-- 
MST

[PULL v3 36/41] hw/misc/pvpanic: add support for normal shutdowns

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

Shutdown requests are normally hardware dependent.
By extending pvpanic to also handle shutdown requests, guests can
submit such requests with an easily implementable and cross-platform
mechanism.

Acked-by: Cornelia Huck 
Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-5-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/misc/pvpanic.h | 4 +++-
 include/sysemu/runstate.h | 1 +
 hw/misc/pvpanic.c | 5 +
 system/runstate.c | 5 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index 947468b81b..04986537af 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -20,7 +20,9 @@
 
 #include "standard-headers/linux/pvpanic.h"
 
-#define PVPANIC_EVENTS (PVPANIC_PANICKED | PVPANIC_CRASH_LOADED)
+#define PVPANIC_EVENTS (PVPANIC_PANICKED | \
+PVPANIC_CRASH_LOADED | \
+PVPANIC_SHUTDOWN)
 
 #define TYPE_PVPANIC_ISA_DEVICE "pvpanic"
 #define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci"
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 0117d243c4..e210a37abf 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -104,6 +104,7 @@ void qemu_system_killed(int signal, pid_t pid);
 void qemu_system_reset(ShutdownCause reason);
 void qemu_system_guest_panicked(GuestPanicInformation *info);
 void qemu_system_guest_crashloaded(GuestPanicInformation *info);
+void qemu_system_guest_pvshutdown(void);
 bool qemu_system_dump_in_progress(void);
 
 #endif
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index a4982cc592..0e9505451a 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -40,6 +40,11 @@ static void handle_event(int event)
 qemu_system_guest_crashloaded(NULL);
 return;
 }
+
+if (event & PVPANIC_SHUTDOWN) {
+qemu_system_guest_pvshutdown();
+return;
+}
 }
 
 /* return supported events on read */
diff --git a/system/runstate.c b/system/runstate.c
index cb4905a40f..83055f3278 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -585,6 +585,11 @@ void qemu_system_guest_crashloaded(GuestPanicInformation 
*info)
 qapi_free_GuestPanicInformation(info);
 }
 
+void qemu_system_guest_pvshutdown(void)
+{
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+
 void qemu_system_reset_request(ShutdownCause reason)
 {
 if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
-- 
MST

[PULL v3 06/41] virtio-mmio: Handle extra notification data

2024-06-05 Thread Michael S. Tsirkin

From: Jonah Palmer 

Add support to virtio-mmio devices for handling the extra data sent from
the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport
feature has been negotiated.

The extra data that's passed to the virtio-mmio device when this feature
is enabled varies depending on the device's virtqueue layout.

The data passed to the virtio-mmio device is in the same format as the
data passed to virtio-pci devices.

Signed-off-by: Jonah Palmer 
Message-Id: <20240315165557.26942-4-jonah.pal...@oracle.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-mmio.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 22f9fbcf5a..320428ac0d 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 {
 VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
+uint16_t vq_idx;
 
 trace_virtio_mmio_write_offset(offset, value);
 
@@ -407,8 +408,14 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 break;
 case VIRTIO_MMIO_QUEUE_NOTIFY:
-if (value < VIRTIO_QUEUE_MAX) {
-virtio_queue_notify(vdev, value);
+vq_idx = value;
+if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
+
+virtio_queue_set_shadow_avail_idx(vq, (value >> 16) & 0x);
+}
+virtio_queue_notify(vdev, vq_idx);
 }
 break;
 case VIRTIO_MMIO_INTERRUPT_ACK:
-- 
MST

[PULL v3 15/41] vhost-user: fix lost reconnect again

2024-06-05 Thread Michael S. Tsirkin

From: Li Feng 

When the vhost-user is reconnecting to the backend, and if the vhost-user fails
at the get_features in vhost_dev_init(), then the reconnect will fail
and it will not be retriggered forever.

The reason is:
When the vhost-user fail at get_features, the vhost_dev_cleanup will be called
immediately.

vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.

The reconnect path is:
vhost_user_blk_event
   vhost_user_async_close(.. vhost_user_blk_disconnect ..)
 qemu_chr_fe_set_handlers <- clear the notifier callback
   schedule vhost_user_async_close_bh

The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
called, then the event fd callback will not be reinstalled.

We need to ensure that even if vhost_dev_init initialization fails, the event
handler still needs to be reinstalled when s->connected is false.

All vhost-user devices have this issue, including vhost-user-blk/scsi.

Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")

Signed-off-by: Li Feng 
Message-Id: <20240516025753.130171-3-fen...@smartx.com>
Reviewed-by: Raphael Norwitz 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/block/vhost-user-blk.c   |  3 ++-
 hw/scsi/vhost-user-scsi.c   |  3 ++-
 hw/virtio/vhost-user-base.c |  3 ++-
 hw/virtio/vhost-user.c  | 10 +-
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 15cc24d017..fdbc30b9ce 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -354,7 +354,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
 if (!s->connected) {
-return;
+goto done;
 }
 s->connected = false;
 
@@ -362,6 +362,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 
 vhost_dev_cleanup(>dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(>chardev, NULL, NULL, vhost_user_blk_event,
  NULL, dev, NULL, true);
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 421cd654f8..cc91ade525 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -182,7 +182,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
 VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
 
 if (!s->connected) {
-return;
+goto done;
 }
 s->connected = false;
 
@@ -190,6 +190,7 @@ static void vhost_user_scsi_disconnect(DeviceState *dev)
 
 vhost_dev_cleanup(>dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(>conf.chardev, NULL, NULL,
  vhost_user_scsi_event, NULL, dev, NULL, true);
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index 4b54255682..11e72b1e3b 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -225,13 +225,14 @@ static void vub_disconnect(DeviceState *dev)
 VHostUserBase *vub = VHOST_USER_BASE(vdev);
 
 if (!vub->connected) {
-return;
+goto done;
 }
 vub->connected = false;
 
 vub_stop(vdev);
 vhost_dev_cleanup(>vhost_dev);
 
+done:
 /* Re-instate the event handler for new connections */
 qemu_chr_fe_set_handlers(>chardev,
  NULL, NULL, vub_event,
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index c929097e87..c407ea8939 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2781,16 +2781,8 @@ typedef struct {
 static void vhost_user_async_close_bh(void *opaque)
 {
 VhostAsyncCallback *data = opaque;
-struct vhost_dev *vhost = data->vhost;
 
-/*
- * If the vhost_dev has been cleared in the meantime there is
- * nothing left to do as some other path has completed the
- * cleanup.
- */
-if (vhost->vdev) {
-data->cb(data->dev);
-}
+data->cb(data->dev);
 
 g_free(data);
 }
-- 
MST

[PULL v3 32/41] scripts/update-linux-headers: Copy setup_data.h to correct directory

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

Add the missing "include/" path component, so the files ends up in the
correct place like the other headers.

Fixes: 66210a1a30f2 ("scripts/update-linux-headers: Add setup_data.h to import 
list")
Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-1-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Thomas Huth 
---
 scripts/update-linux-headers.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 8963c39189..a148793bd5 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -158,7 +158,7 @@ for arch in $ARCHLIST; do
 cp_portable "$hdrdir/bootparam.h" \
 "$output/include/standard-headers/asm-$arch"
 cp_portable "$hdrdir/include/asm/setup_data.h" \
-"$output/standard-headers/asm-x86"
+"$output/include/standard-headers/asm-x86"
 fi
 if [ $arch = riscv ]; then
 cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/"
-- 
MST

[PULL v3 31/41] hw/acpi: Insert an acpi-generic-node base under acpi-generic-initiator

2024-06-05 Thread Michael S. Tsirkin

From: Jonathan Cameron 

This will simplify reuse when adding acpi-generic-port.
Note that some error_printf() messages will now print acpi-generic-node
whereas others will move to type specific cases in next patch so
are left alone for now.

Signed-off-by: Jonathan Cameron 
Message-Id: <20240524100507.32106-3-jonathan.came...@huawei.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/acpi/acpi_generic_initiator.h | 15 -
 hw/acpi/acpi_generic_initiator.c | 80 +++-
 2 files changed, 63 insertions(+), 32 deletions(-)

diff --git a/include/hw/acpi/acpi_generic_initiator.h 
b/include/hw/acpi/acpi_generic_initiator.h
index a304bad73e..dd4be19c8f 100644
--- a/include/hw/acpi/acpi_generic_initiator.h
+++ b/include/hw/acpi/acpi_generic_initiator.h
@@ -8,15 +8,26 @@
 
 #include "qom/object_interfaces.h"
 
-#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
+/*
+ * Abstract type to be used as base for
+ * - acpi-generic-initiator
+ * - acpi-generic-port
+ */
+#define TYPE_ACPI_GENERIC_NODE "acpi-generic-node"
 
-typedef struct AcpiGenericInitiator {
+typedef struct AcpiGenericNode {
 /* private */
 Object parent;
 
 /* public */
 char *pci_dev;
 uint16_t node;
+} AcpiGenericNode;
+
+#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
+
+typedef struct AcpiGenericInitiator {
+AcpiGenericNode parent;
 } AcpiGenericInitiator;
 
 /*
diff --git a/hw/acpi/acpi_generic_initiator.c b/hw/acpi/acpi_generic_initiator.c
index 18a939b0e5..c054e0e27d 100644
--- a/hw/acpi/acpi_generic_initiator.c
+++ b/hw/acpi/acpi_generic_initiator.c
@@ -10,45 +10,61 @@
 #include "hw/pci/pci_device.h"
 #include "qemu/error-report.h"
 
-typedef struct AcpiGenericInitiatorClass {
+typedef struct AcpiGenericNodeClass {
 ObjectClass parent_class;
+} AcpiGenericNodeClass;
+
+typedef struct AcpiGenericInitiatorClass {
+ AcpiGenericNodeClass parent_class;
 } AcpiGenericInitiatorClass;
 
+OBJECT_DEFINE_ABSTRACT_TYPE(AcpiGenericNode, acpi_generic_node,
+ACPI_GENERIC_NODE, OBJECT)
+
+OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericNode, ACPI_GENERIC_NODE)
+
 OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, 
acpi_generic_initiator,
-   ACPI_GENERIC_INITIATOR, OBJECT,
+   ACPI_GENERIC_INITIATOR, ACPI_GENERIC_NODE,
{ TYPE_USER_CREATABLE },
{ NULL })
 
 OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
 
+static void acpi_generic_node_init(Object *obj)
+{
+AcpiGenericNode *gn = ACPI_GENERIC_NODE(obj);
+
+gn->node = MAX_NODES;
+gn->pci_dev = NULL;
+}
+
 static void acpi_generic_initiator_init(Object *obj)
 {
-AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+}
 
-gi->node = MAX_NODES;
-gi->pci_dev = NULL;
+static void acpi_generic_node_finalize(Object *obj)
+{
+AcpiGenericNode *gn = ACPI_GENERIC_NODE(obj);
+
+g_free(gn->pci_dev);
 }
 
 static void acpi_generic_initiator_finalize(Object *obj)
 {
-AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
-
-g_free(gi->pci_dev);
 }
 
-static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val,
-  Error **errp)
+static void acpi_generic_node_set_pci_device(Object *obj, const char *val,
+ Error **errp)
 {
-AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+AcpiGenericNode *gn = ACPI_GENERIC_NODE(obj);
 
-gi->pci_dev = g_strdup(val);
+gn->pci_dev = g_strdup(val);
 }
-
-static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
-const char *name, void *opaque,
-Error **errp)
+static void acpi_generic_node_set_node(Object *obj, Visitor *v,
+   const char *name, void *opaque,
+   Error **errp)
 {
-AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+AcpiGenericNode *gn = ACPI_GENERIC_NODE(obj);
 MachineState *ms = MACHINE(qdev_get_machine());
 uint32_t value;
 
@@ -58,20 +74,24 @@ static void acpi_generic_initiator_set_node(Object *obj, 
Visitor *v,
 
 if (value >= MAX_NODES) {
 error_printf("%s: Invalid NUMA node specified\n",
- TYPE_ACPI_GENERIC_INITIATOR);
+ TYPE_ACPI_GENERIC_NODE);
 exit(1);
 }
 
-gi->node = value;
-ms->numa_state->nodes[gi->node].has_gi = true;
+gn->node = value;
+ms->numa_state->nodes[gn->node].has_gi = true;
+}
+
+static void acpi_generic_node_class_init(ObjectClass *oc, void *data)
+{
+object_class_property_add_str(oc, "pci-dev", NULL,
+acpi_generic_node_set_pci_device);
+object_class_property_add(oc, "node", "int", NULL,
+acpi_generic_node_set_node, NULL, NULL);
 }
 
 static void

[PULL v3 02/41] vhost: Perform memory section dirty scans once per iteration

2024-06-05 Thread Michael S. Tsirkin

From: Si-Wei Liu 

On setups with one or more virtio-net devices with vhost on,
dirty tracking iteration increases cost the bigger the number
amount of queues are set up e.g. on idle guests migration the
following is observed with virtio-net with vhost=on:

48 queues -> 78.11%  [.] vhost_dev_sync_region.isra.13
8 queues -> 40.50%   [.] vhost_dev_sync_region.isra.13
1 queue -> 6.89% [.] vhost_dev_sync_region.isra.13
2 devices, 1 queue -> 18.60%  [.] vhost_dev_sync_region.isra.14

With high memory rates the symptom is lack of convergence as soon
as it has a vhost device with a sufficiently high number of queues,
the sufficient number of vhost devices.

On every migration iteration (every 100msecs) it will redundantly
query the *shared log* the number of queues configured with vhost
that exist in the guest. For the virtqueue data, this is necessary,
but not for the memory sections which are the same. So essentially
we end up scanning the dirty log too often.

To fix that, select a vhost device responsible for scanning the
log with regards to memory sections dirty tracking. It is selected
when we enable the logger (during migration) and cleared when we
disable the logger. If the vhost logger device goes away for some
reason, the logger will be re-selected from the rest of vhost
devices.

After making mem-section logger a singleton instance, constant cost
of 7%-9% (like the 1 queue report) will be seen, no matter how many
queues or how many vhost devices are configured:

48 queues -> 8.71%[.] vhost_dev_sync_region.isra.13
2 devices, 8 queues -> 7.97%   [.] vhost_dev_sync_region.isra.14

Co-developed-by: Joao Martins 
Signed-off-by: Joao Martins 
Signed-off-by: Si-Wei Liu 
Message-Id: <1710448055-11709-2-git-send-email-si-wei@oracle.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Acked-by: Jason Wang 
---
 include/hw/virtio/vhost.h |  1 +
 hw/virtio/vhost.c | 67 +++
 2 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 02477788df..d75faf46e9 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -129,6 +129,7 @@ struct vhost_dev {
 void *opaque;
 struct vhost_log *log;
 QLIST_ENTRY(vhost_dev) entry;
+QLIST_ENTRY(vhost_dev) logdev_entry;
 QLIST_HEAD(, vhost_iommu) iommu_list;
 IOMMUNotifier n;
 const VhostDevConfigOps *config_ops;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a1e8b79e1a..06fc71746e 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -45,6 +45,7 @@
 
 static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
 static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
+static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
 
 /* Memslots used by backends that support private memslots (without an fd). */
 static unsigned int used_memslots;
@@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
 }
 }
 
+static inline bool vhost_dev_should_log(struct vhost_dev *dev)
+{
+assert(dev->vhost_ops);
+assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
+assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
+
+return dev == QLIST_FIRST(_log_devs[dev->vhost_ops->backend_type]);
+}
+
+static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add)
+{
+VhostBackendType backend_type;
+
+assert(hdev->vhost_ops);
+
+backend_type = hdev->vhost_ops->backend_type;
+assert(backend_type > VHOST_BACKEND_TYPE_NONE);
+assert(backend_type < VHOST_BACKEND_TYPE_MAX);
+
+if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
+if (QLIST_EMPTY(_log_devs[backend_type])) {
+QLIST_INSERT_HEAD(_log_devs[backend_type],
+  hdev, logdev_entry);
+} else {
+/*
+ * The first vhost_device in the list is selected as the shared
+ * logger to scan memory sections. Put new entry next to the head
+ * to avoid inadvertent change to the underlying logger device.
+ * This is done in order to get better cache locality and to avoid
+ * performance churn on the hot path for log scanning. Even when
+ * new devices come and go quickly, it wouldn't end up changing
+ * the active leading logger device at all.
+ */
+QLIST_INSERT_AFTER(QLIST_FIRST(_log_devs[backend_type]),
+   hdev, logdev_entry);
+}
+} else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) {
+QLIST_REMOVE(hdev, logdev_entry);
+}
+}
+
 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
MemoryRegionSection *section,
hwaddr first,
@@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
 start_addr = MAX(first, start_addr);

[PULL v3 34/41] hw/misc/pvpanic: centralize definition of supported events

2024-06-05 Thread Michael S. Tsirkin

From: Thomas Weißschuh 

The different components of pvpanic duplicate the list of supported
events. Move it to the shared header file to minimize changes when new
events are added.

Reviewed-by: Thomas Huth 
Reviewed-by: Cornelia Huck 
Signed-off-by: Thomas Weißschuh 
Message-Id: <20240527-pvpanic-shutdown-v8-3-5a28ec025...@t-8ch.de>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/misc/pvpanic.h | 4 
 hw/misc/pvpanic-isa.c | 3 +--
 hw/misc/pvpanic-pci.c | 3 +--
 hw/misc/pvpanic.c | 3 +--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h
index fab94165d0..947468b81b 100644
--- a/include/hw/misc/pvpanic.h
+++ b/include/hw/misc/pvpanic.h
@@ -18,6 +18,10 @@
 #include "exec/memory.h"
 #include "qom/object.h"
 
+#include "standard-headers/linux/pvpanic.h"
+
+#define PVPANIC_EVENTS (PVPANIC_PANICKED | PVPANIC_CRASH_LOADED)
+
 #define TYPE_PVPANIC_ISA_DEVICE "pvpanic"
 #define TYPE_PVPANIC_PCI_DEVICE "pvpanic-pci"
 
diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c
index ccec50f61b..9a923b7869 100644
--- a/hw/misc/pvpanic-isa.c
+++ b/hw/misc/pvpanic-isa.c
@@ -21,7 +21,6 @@
 #include "hw/misc/pvpanic.h"
 #include "qom/object.h"
 #include "hw/isa/isa.h"
-#include "standard-headers/linux/pvpanic.h"
 #include "hw/acpi/acpi_aml_interface.h"
 
 OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE)
@@ -102,7 +101,7 @@ static void build_pvpanic_isa_aml(AcpiDevAmlIf *adev, Aml 
*scope)
 static Property pvpanic_isa_properties[] = {
 DEFINE_PROP_UINT16(PVPANIC_IOPORT_PROP, PVPanicISAState, ioport, 0x505),
 DEFINE_PROP_UINT8("events", PVPanicISAState, pvpanic.events,
-  PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
+  PVPANIC_EVENTS),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c
index 83be95d0d2..603c5c7600 100644
--- a/hw/misc/pvpanic-pci.c
+++ b/hw/misc/pvpanic-pci.c
@@ -21,7 +21,6 @@
 #include "hw/misc/pvpanic.h"
 #include "qom/object.h"
 #include "hw/pci/pci_device.h"
-#include "standard-headers/linux/pvpanic.h"
 
 OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE)
 
@@ -55,7 +54,7 @@ static void pvpanic_pci_realizefn(PCIDevice *dev, Error 
**errp)
 
 static Property pvpanic_pci_properties[] = {
 DEFINE_PROP_UINT8("events", PVPanicPCIState, pvpanic.events,
-  PVPANIC_PANICKED | PVPANIC_CRASH_LOADED),
+  PVPANIC_EVENTS),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 1540e9091a..a4982cc592 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -21,13 +21,12 @@
 #include "hw/qdev-properties.h"
 #include "hw/misc/pvpanic.h"
 #include "qom/object.h"
-#include "standard-headers/linux/pvpanic.h"
 
 static void handle_event(int event)
 {
 static bool logged;
 
-if (event & ~(PVPANIC_PANICKED | PVPANIC_CRASH_LOADED) && !logged) {
+if (event & ~PVPANIC_EVENTS && !logged) {
 qemu_log_mask(LOG_GUEST_ERROR, "pvpanic: unknown event %#x.\n", event);
 logged = true;
 }
-- 
MST

[PULL v3 16/41] hw/cxl/mailbox: change CCI cmd set structure to be a member, not a reference

2024-06-05 Thread Michael S. Tsirkin

From: Gregory Price 

This allows devices to have fully customized CCIs, along with complex
devices where wrapper devices can override or add additional CCI
commands without having to replicate full command structures or
pollute a base device with every command that might ever be used.

Signed-off-by: Gregory Price 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-2-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h |  2 +-
 hw/cxl/cxl-mailbox-utils.c  | 19 +++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 279b276bda..ccc4611875 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -164,7 +164,7 @@ typedef struct CXLEventLog {
 } CXLEventLog;
 
 typedef struct CXLCCI {
-const struct cxl_cmd (*cxl_cmd_set)[256];
+struct cxl_cmd cxl_cmd_set[256][256];
 struct cel_log {
 uint16_t opcode;
 uint16_t effect;
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index e5eb97cb91..2c9f50f0f9 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1447,10 +1447,21 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
  bg_timercb, cci);
 }
 
+static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd 
(*cxl_cmds)[256])
+{
+for (int set = 0; set < 256; set++) {
+for (int cmd = 0; cmd < 256; cmd++) {
+if (cxl_cmds[set][cmd].handler) {
+cci->cxl_cmd_set[set][cmd] = cxl_cmds[set][cmd];
+}
+}
+}
+}
+
 void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
   DeviceState *d, size_t payload_max)
 {
-cci->cxl_cmd_set = cxl_cmd_set_sw;
+cxl_copy_cci_commands(cci, cxl_cmd_set_sw);
 cci->d = d;
 cci->intf = intf;
 cxl_init_cci(cci, payload_max);
@@ -1458,7 +1469,7 @@ void cxl_initialize_mailbox_swcci(CXLCCI *cci, 
DeviceState *intf,
 
 void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, size_t payload_max)
 {
-cci->cxl_cmd_set = cxl_cmd_set;
+cxl_copy_cci_commands(cci, cxl_cmd_set);
 cci->d = d;
 
 /* No separation for PCI MB as protocol handled in PCI device */
@@ -1476,7 +1487,7 @@ static const struct cxl_cmd cxl_cmd_set_t3_ld[256][256] = 
{
 void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf,
size_t payload_max)
 {
-cci->cxl_cmd_set = cxl_cmd_set_t3_ld;
+cxl_copy_cci_commands(cci, cxl_cmd_set_t3_ld);
 cci->d = d;
 cci->intf = intf;
 cxl_init_cci(cci, payload_max);
@@ -1496,7 +1507,7 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, 
DeviceState *d,
DeviceState *intf,
size_t payload_max)
 {
-cci->cxl_cmd_set = cxl_cmd_set_t3_fm_owned_ld_mctp;
+cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp);
 cci->d = d;
 cci->intf = intf;
 cxl_init_cci(cci, payload_max);
-- 
MST

[PULL v3 11/41] hw/virtio: Fix obtain the buffer id from the last descriptor

2024-06-05 Thread Michael S. Tsirkin

From: Wafer 

The virtio-1.3 specification
 writes:
2.8.6 Next Flag: Descriptor Chaining
  Buffer ID is included in the last descriptor in the list.

If the feature (_F_INDIRECT_DESC) has been negotiated, install only
one descriptor in the virtqueue.
Therefor the buffer id should be obtained from the first descriptor.

In descriptor chaining scenarios, the buffer id should be obtained
from the last descriptor.

Fixes: 86044b24e8 ("virtio: basic packed virtqueue support")

Signed-off-by: Wafer 
Reviewed-by: Jason Wang 
Reviewed-by: Eugenio Pérez 
Acked-by: Jason Wang 
Message-Id: <20240510072753.26158-2-wa...@jaguarmicro.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 28cd406e16..3678ec2f88 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1745,6 +1745,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t 
sz)
  _desc_cache);
 } while (rc == VIRTQUEUE_READ_DESC_MORE);
 
+if (desc_cache != _desc_cache) {
+/* Buffer ID is included in the last descriptor in the list. */
+id = desc.id;
+}
+
 /* Now copy what we have collected and mapped */
 elem = virtqueue_alloc_element(sz, out_num, in_num);
 for (i = 0; i < out_num; i++) {
-- 
MST

[PULL v3 28/41] hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

With the change, we extend the extent release mailbox command processing
to allow more flexible release. As long as the DPA range of the extent to
release is covered by accepted extent(s) in the device, the release can be
performed.

Tested-by: Svetly Todorov 
Reviewed-by: Gregory Price 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-14-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/cxl/cxl-mailbox-utils.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index c4852112fe..74eeb6fde7 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1704,6 +1704,13 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 dpa = in->updated_entries[i].start_dpa;
 len = in->updated_entries[i].len;
 
+/* Check if the DPA range is not fully backed with valid extents */
+if (!ct3_test_region_block_backed(ct3d, dpa, len)) {
+ret = CXL_MBOX_INVALID_PA;
+goto free_and_exit;
+}
+
+/* After this point, extent overflow is the only error can happen */
 while (len > 0) {
 QTAILQ_FOREACH(ent, updated_list, node) {
 range_init_nofail(, ent->start_dpa, ent->len);
@@ -1718,14 +1725,7 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 if (range_contains(, dpa + len - 1)) {
 len2 = ent_start_dpa + ent_len - dpa - len;
 } else {
-/*
- * TODO: we reject the attempt to remove an extent
- * that overlaps with multiple extents in the device
- * for now. We will allow it once superset release
- * support is added.
- */
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
+dpa = ent_start_dpa + ent_len;
 }
 len_done = ent_len - len1 - len2;
 
@@ -1752,14 +1752,9 @@ static CXLRetCode 
cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
 }
 
 len -= len_done;
-/* len == 0 here until superset release is added */
 break;
 }
 }
-if (len) {
-ret = CXL_MBOX_INVALID_PA;
-goto free_and_exit;
-}
 }
 }
 free_and_exit:
-- 
MST

[PULL v3 04/41] virtio/virtio-pci: Handle extra notification data

2024-06-05 Thread Michael S. Tsirkin

From: Jonah Palmer 

Add support to virtio-pci devices for handling the extra data sent
from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA
transport feature has been negotiated.

The extra data that's passed to the virtio-pci device when this
feature is enabled varies depending on the device's virtqueue
layout.

In a split virtqueue layout, this data includes:
 - upper 16 bits: shadow_avail_idx
 - lower 16 bits: virtqueue index

In a packed virtqueue layout, this data includes:
 - upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
 - lower 16 bits: virtqueue index

Signed-off-by: Jonah Palmer 
Message-Id: <20240315165557.26942-2-jonah.pal...@oracle.com>
Reviewed-by: Eugenio Pérez 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio.h |  2 ++
 hw/virtio/virtio-pci.c | 12 +---
 hw/virtio/virtio.c | 18 ++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 7d5ffdc145..1451926a13 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -307,6 +307,8 @@ int virtio_queue_ready(VirtQueue *vq);
 
 int virtio_queue_empty(VirtQueue *vq);
 
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t idx);
+
 /* Host binding interface.  */
 
 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b1d02f4b3d..cffc7efcae 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 {
 VirtIOPCIProxy *proxy = opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
-uint16_t vector;
+uint16_t vector, vq_idx;
 hwaddr pa;
 
 switch (addr) {
@@ -408,8 +408,14 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 vdev->queue_sel = val;
 break;
 case VIRTIO_PCI_QUEUE_NOTIFY:
-if (val < VIRTIO_QUEUE_MAX) {
-virtio_queue_notify(vdev, val);
+vq_idx = val;
+if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+VirtQueue *vq = virtio_get_queue(vdev, vq_idx);
+
+virtio_queue_set_shadow_avail_idx(vq, val >> 16);
+}
+virtio_queue_notify(vdev, vq_idx);
 }
 break;
 case VIRTIO_PCI_STATUS:
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 893a072c9d..f7c99e3a96 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2264,6 +2264,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, 
int align)
 }
 }
 
+void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t 
shadow_avail_idx)
+{
+if (!vq->vring.desc) {
+return;
+}
+
+/*
+ * 16-bit data for packed VQs include 1-bit wrap counter and
+ * 15-bit shadow_avail_idx.
+ */
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1;
+vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF;
+} else {
+vq->shadow_avail_idx = shadow_avail_idx;
+}
+}
+
 static void virtio_queue_notify_vq(VirtQueue *vq)
 {
 if (vq->vring.desc && vq->handle_output) {
-- 
MST

[PULL v3 26/41] hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

To simulate FM functionalities for initiating Dynamic Capacity Add
(Opcode 5604h) and Dynamic Capacity Release (Opcode 5605h) as in CXL spec
r3.1 7.6.7.6.5 and 7.6.7.6.6, we implemented two QMP interfaces to issue
add/release dynamic capacity extents requests.

With the change, we allow to release an extent only when its DPA range
is contained by a single accepted extent in the device. That is to say,
extent superset release is not supported yet.

1. Add dynamic capacity extents:

For example, the command to add two continuous extents (each 128MiB long)
to region 0 (starting at DPA offset 0) looks like below:

{ "execute": "qmp_capabilities" }

{ "execute": "cxl-add-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "host-id": 0,
  "selection-policy": "prescriptive",
  "region": 0,
  "extents": [
  {
  "offset": 0,
  "len": 134217728
  },
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

2. Release dynamic capacity extents:

For example, the command to release an extent of size 128MiB from region 0
(DPA offset 128MiB) looks like below:

{ "execute": "cxl-release-dynamic-capacity",
  "arguments": {
  "path": "/machine/peripheral/cxl-dcd0",
  "host-id": 0,
  "removal-policy":"prescriptive",
  "region": 0,
  "extents": [
  {
  "offset": 134217728,
  "len": 134217728
  }
  ]
  }
}

Tested-by: Svetly Todorov 
Reviewed-by: Gregory Price 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-12-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 qapi/cxl.json   | 143 +
 include/hw/cxl/cxl_device.h |  22 +++
 include/hw/cxl/cxl_events.h |  18 +++
 hw/cxl/cxl-mailbox-utils.c  |  62 ++--
 hw/mem/cxl_type3.c  | 306 +++-
 hw/mem/cxl_type3_stubs.c|  25 +++
 6 files changed, 563 insertions(+), 13 deletions(-)

diff --git a/qapi/cxl.json b/qapi/cxl.json
index 4281726dec..57d9f82014 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -361,3 +361,146 @@
 ##
 {'command': 'cxl-inject-correctable-error',
  'data': {'path': 'str', 'type': 'CxlCorErrorType'}}
+
+##
+# @CXLDynamicCapacityExtent:
+#
+# A single dynamic capacity extent
+#
+# @offset: The offset (in bytes) to the start of the region
+# where the extent belongs to.
+#
+# @len: The length of the extent in bytes.
+#
+# Since: 9.1
+##
+{ 'struct': 'CXLDynamicCapacityExtent',
+  'data': {
+  'offset':'uint64',
+  'len': 'uint64'
+  }
+}
+
+##
+# @CXLExtSelPolicy:
+#
+# The policy to use for selecting which extents comprise the added
+# capacity, as defined in cxl spec r3.1 Table 7-70.
+#
+# @free: 0h = Free
+#
+# @contiguous: 1h = Continuous
+#
+# @prescriptive: 2h = Prescriptive
+#
+# @enable-shared-access: 3h = Enable Shared Access
+#
+# Since: 9.1
+##
+{ 'enum': 'CXLExtSelPolicy',
+  'data': ['free',
+   'contiguous',
+   'prescriptive',
+   'enable-shared-access']
+}
+
+##
+# @cxl-add-dynamic-capacity:
+#
+# Command to initiate to add dynamic capacity extents to a host.  It
+# simulates operations defined in cxl spec r3.1 7.6.7.6.5.
+#
+# @path: CXL DCD canonical QOM path.
+#
+# @host-id: The "Host ID" field as defined in cxl spec r3.1
+# Table 7-70.
+#
+# @selection-policy: The "Selection Policy" bits as defined in
+# cxl spec r3.1 Table 7-70.  It specifies the policy to use for
+# selecting which extents comprise the added capacity.
+#
+# @region: The "Region Number" field as defined in cxl spec r3.1
+# Table 7-70.  The dynamic capacity region where the capacity
+# is being added.  Valid range is from 0-7.
+#
+# @tag: The "Tag" field as defined in cxl spec r3.1 Table 7-70.
+#
+# @extents: The "Extent List" field as defined in cxl spec r3.1
+# Table 7-70.
+#
+# Since : 9.1
+##
+{ 'command': 'cxl-add-dynamic-capacity',
+  'data': { 'path': 'str',
+'host-id': 'uint16',
+'selection-policy': 'CXLExtSelPolicy',
+'region': 'uint8',
+'*tag': 'str',
+'extents': [ 'CXLDynamicCapacityExtent' ]
+   }
+}
+
+##
+# @CXLExtRemovalPolicy:
+#
+# The policy to use for selecting which extents comprise the released
+# capacity, defined in the "Flags" field in cxl spec r3.1 Table 7-71.
+#
+# @tag-based: value = 0h.  Extents are selected by the device based
+# on tag, with no requirement for contiguous extents.
+#
+# @prescriptive: value = 1h.  Extent list of capacity to release is
+# included in the request payload.
+#
+# Since: 9.1
+##
+{ 'enum': 'CXLExtRemovalPolicy',
+  'data': ['tag-based',
+   'prescriptive']
+}
+
+##
+# @cxl-release-dynamic-capacity:
+#
+# Command to initiate to release dynamic capacity extents from a
+# host.  It simulates operations defined in cxl spec r3.1 7.6.7.6.6.
+#
+# @path: CXL DCD canonical QOM path.
+#
+# @host-id: The

[PULL v3 27/41] hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

All DPA ranges in the DC regions are invalid to access until an extent
covering the range has been successfully accepted by the host. A bitmap
is added to each region to record whether a DC block in the region has
been backed by a DC extent. Each bit in the bitmap represents a DC block.
When a DC extent is accepted, all the bits representing the blocks in the
extent are set, which will be cleared when the extent is released.

Tested-by: Svetly Todorov 
Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-13-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h |  7 
 hw/cxl/cxl-mailbox-utils.c  |  3 ++
 hw/mem/cxl_type3.c  | 76 +
 3 files changed, 86 insertions(+)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index c69ff6b5de..0a4fcb2800 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -456,6 +456,7 @@ typedef struct CXLDCRegion {
 uint64_t block_size;
 uint32_t dsmadhandle;
 uint8_t flags;
+unsigned long *blk_bitmap;
 } CXLDCRegion;
 
 struct CXLType3Dev {
@@ -577,4 +578,10 @@ CXLDCExtentGroup 
*cxl_insert_extent_to_extent_group(CXLDCExtentGroup *group,
 void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list,
CXLDCExtentGroup *group);
 void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list);
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len);
+void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+   uint64_t len);
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+  uint64_t len);
 #endif
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 64387f34ce..c4852112fe 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1655,6 +1655,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct 
cxl_cmd *cmd,
 
 cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
 ct3d->dc.total_extent_count += 1;
+ct3_set_region_block_backed(ct3d, dpa, len);
 }
 /* Remove the first extent group in the pending list */
 cxl_extent_group_list_delete_front(>dc.extents_pending);
@@ -1813,10 +1814,12 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct 
cxl_cmd *cmd,
  * list and update the extent count;
  */
 QTAILQ_FOREACH_SAFE(ent, >dc.extents, node, ent_next) {
+ct3_clear_region_block_backed(ct3d, ent->start_dpa, ent->len);
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
 }
 copy_extent_list(>dc.extents, _list);
 QTAILQ_FOREACH_SAFE(ent, _list, node, ent_next) {
+ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len);
 cxl_remove_extent_from_extent_list(_list, ent);
 }
 ct3d->dc.total_extent_count = updated_list_size;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index f53bcca6d3..0d18259ec0 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -672,6 +672,7 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error 
**errp)
 .flags = 0,
 };
 ct3d->dc.total_capacity += region->len;
+region->blk_bitmap = bitmap_new(region->len / region->block_size);
 }
 QTAILQ_INIT(>dc.extents);
 QTAILQ_INIT(>dc.extents_pending);
@@ -683,6 +684,8 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 {
 CXLDCExtent *ent, *ent_next;
 CXLDCExtentGroup *group, *group_next;
+int i;
+CXLDCRegion *region;
 
 QTAILQ_FOREACH_SAFE(ent, >dc.extents, node, ent_next) {
 cxl_remove_extent_from_extent_list(>dc.extents, ent);
@@ -695,6 +698,11 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
 }
 g_free(group);
 }
+
+for (i = 0; i < ct3d->dc.num_regions; i++) {
+region = >dc.regions[i];
+g_free(region->blk_bitmap);
+}
 }
 
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
@@ -926,6 +934,70 @@ static void ct3_exit(PCIDevice *pci_dev)
 }
 }
 
+/*
+ * Mark the DPA range [dpa, dap + len - 1] to be backed and accessible. This
+ * happens when a DC extent is added and accepted by the host.
+ */
+void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
+ uint64_t len)
+{
+CXLDCRegion *region;
+
+region = cxl_find_dc_region(ct3d, dpa, len);
+if (!region) {
+return;
+}
+
+bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
+   len / region->block_size);
+}
+
+/*
+ * Check whether the DPA range [dpa, dpa + len - 1] is backed with DC extents.
+ * Used when validating read/write to dc regions
+ */
+bool ct3_test_region_block_backed(CXLType3Dev *ct3d,

[PULL v3 22/41] hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size instead of mr as argument

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

The function ct3_build_cdat_entries_for_mr only uses size of the passed
memory region argument, refactor the function definition to make the passed
arguments more specific.

Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-8-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 06c6f9bb78..51be50ce87 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -44,7 +44,7 @@ enum {
 };
 
 static void ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
-  int dsmad_handle, MemoryRegion *mr,
+  int dsmad_handle, uint64_t size,
   bool is_pmem, uint64_t dpa_base)
 {
 CDATDsmas *dsmas;
@@ -63,7 +63,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
 .DSMADhandle = dsmad_handle,
 .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0,
 .DPA_base = dpa_base,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
@@ -132,7 +132,7 @@ static void ct3_build_cdat_entries_for_mr(CDATSubHeader 
**cdat_table,
  */
 .EFI_memory_type_attr = is_pmem ? 2 : 1,
 .DPA_offset = 0,
-.DPA_length = memory_region_size(mr),
+.DPA_length = size,
 };
 
 /* Header always at start of structure */
@@ -149,6 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 g_autofree CDATSubHeader **table = NULL;
 CXLType3Dev *ct3d = priv;
 MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL;
+uint64_t vmr_size = 0, pmr_size = 0;
 int dsmad_handle = 0;
 int cur_ent = 0;
 int len = 0;
@@ -163,6 +164,7 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+vmr_size = memory_region_size(volatile_mr);
 }
 
 if (ct3d->hostpmem) {
@@ -171,21 +173,22 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table, void *priv)
 return -EINVAL;
 }
 len += CT3_CDAT_NUM_ENTRIES;
+pmr_size = memory_region_size(nonvolatile_mr);
 }
 
 table = g_malloc0(len * sizeof(*table));
 
 /* Now fill them in */
 if (volatile_mr) {
-ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr,
+ct3_build_cdat_entries_for_mr(table, dsmad_handle++, vmr_size,
   false, 0);
 cur_ent = CT3_CDAT_NUM_ENTRIES;
 }
 
 if (nonvolatile_mr) {
-uint64_t base = volatile_mr ? memory_region_size(volatile_mr) : 0;
+uint64_t base = vmr_size;
 ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++,
-  nonvolatile_mr, true, base);
+  pmr_size, true, base);
 cur_ent += CT3_CDAT_NUM_ENTRIES;
 }
 assert(len == cur_ent);
-- 
MST

[PULL v3 03/41] vhost-vdpa: check vhost_vdpa_set_vring_ready() return value

2024-06-05 Thread Michael S. Tsirkin

From: Stefano Garzarella 

vhost_vdpa_set_vring_ready() could already fail, but if Linux's
patch [1] will be merged, it may fail with more chance if
userspace does not activate virtqueues before DRIVER_OK when
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not negotiated.

So better check its return value anyway.

[1] 
https://lore.kernel.org/virtualization/20240206145154.118044-1-sgarz...@redhat.com/T/#u

Acked-by: Eugenio Pérez 
Acked-by: Jason Wang 
Signed-off-by: Stefano Garzarella 
Message-Id: <20240322092315.31885-1-sgarz...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 net/vhost-vdpa.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 85e73dd6a7..eda714d1a4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -399,7 +399,10 @@ static int vhost_vdpa_net_data_load(NetClientState *nc)
 }
 
 for (int i = 0; i < v->dev->nvqs; ++i) {
-vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
+int ret = vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
+if (ret < 0) {
+return ret;
+}
 }
 return 0;
 }
@@ -1238,7 +1241,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc)
 
 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
-vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
+r = vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
+if (unlikely(r < 0)) {
+return r;
+}
 
 if (v->shadow_vqs_enabled) {
 n = VIRTIO_NET(v->dev->vdev);
@@ -1277,7 +1283,10 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc)
 }
 
 for (int i = 0; i < v->dev->vq_index; ++i) {
-vhost_vdpa_set_vring_ready(v, i);
+r = vhost_vdpa_set_vring_ready(v, i);
+if (unlikely(r < 0)) {
+return r;
+}
 }
 
 return 0;
-- 
MST

[PULL v3 07/41] virtio-ccw: Handle extra notification data

2024-06-05 Thread Michael S. Tsirkin

From: Jonah Palmer 

Add support to virtio-ccw devices for handling the extra data sent from
the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport
feature has been negotiated.

The extra data that's passed to the virtio-ccw device when this feature
is enabled varies depending on the device's virtqueue layout.

That data passed to the virtio-ccw device is in the same format as the
data passed to virtio-pci devices.

Signed-off-by: Jonah Palmer 
Message-Id: <20240315165557.26942-5-jonah.pal...@oracle.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/s390x/s390-virtio-ccw.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 3d0bc3e7f2..956ef7b98d 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -125,9 +125,11 @@ static void subsystem_reset(void)
 static int virtio_ccw_hcall_notify(const uint64_t *args)
 {
 uint64_t subch_id = args[0];
-uint64_t queue = args[1];
+uint64_t data = args[1];
 SubchDev *sch;
+VirtIODevice *vdev;
 int cssid, ssid, schid, m;
+uint16_t vq_idx = data;
 
 if (ioinst_disassemble_sch_ident(subch_id, , , , )) {
 return -EINVAL;
@@ -136,12 +138,19 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
 if (!sch || !css_subch_visible(sch)) {
 return -EINVAL;
 }
-if (queue >= VIRTIO_QUEUE_MAX) {
+
+vdev = virtio_ccw_get_vdev(sch);
+if (vq_idx >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, vq_idx)) {
 return -EINVAL;
 }
-virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
-return 0;
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+virtio_queue_set_shadow_avail_idx(virtio_get_queue(vdev, vq_idx),
+  (data >> 16) & 0x);
+}
+
+virtio_queue_notify(vdev, vq_idx);
+return 0;
 }
 
 static int virtio_ccw_hcall_early_printk(const uint64_t *args)
-- 
MST

[PULL v3 17/41] hw/cxl/mailbox: interface to add CCI commands to an existing CCI

2024-06-05 Thread Michael S. Tsirkin

From: Gregory Price 

This enables wrapper devices to customize the base device's CCI
(for example, with custom commands outside the specification)
without the need to change the base device.

The also enabled the base device to dispatch those commands without
requiring additional driver support.

Heavily edited by Jonathan Cameron to increase code reuse

Signed-off-by: Gregory Price 
Signed-off-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-3-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/cxl/cxl_device.h |  2 ++
 hw/cxl/cxl-mailbox-utils.c  | 19 +--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index ccc4611875..a5f8e25020 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -301,6 +301,8 @@ void cxl_initialize_mailbox_t3(CXLCCI *cci, DeviceState *d, 
size_t payload_max);
 void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
   DeviceState *d, size_t payload_max);
 void cxl_init_cci(CXLCCI *cci, size_t payload_max);
+void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd 
(*cxl_cmd_set)[256],
+  size_t payload_max);
 int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
 size_t len_in, uint8_t *pl_in,
 size_t *len_out, uint8_t *pl_out,
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 2c9f50f0f9..2a64c58e2f 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -1424,9 +1424,9 @@ static void bg_timercb(void *opaque)
 }
 }
 
-void cxl_init_cci(CXLCCI *cci, size_t payload_max)
+static void cxl_rebuild_cel(CXLCCI *cci)
 {
-cci->payload_max = payload_max;
+cci->cel_size = 0; /* Reset for a fresh build */
 for (int set = 0; set < 256; set++) {
 for (int cmd = 0; cmd < 256; cmd++) {
 if (cci->cxl_cmd_set[set][cmd].handler) {
@@ -1440,6 +1440,13 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
 }
 }
 }
+}
+
+void cxl_init_cci(CXLCCI *cci, size_t payload_max)
+{
+cci->payload_max = payload_max;
+cxl_rebuild_cel(cci);
+
 cci->bg.complete_pct = 0;
 cci->bg.starttime = 0;
 cci->bg.runtime = 0;
@@ -1458,6 +1465,14 @@ static void cxl_copy_cci_commands(CXLCCI *cci, const 
struct cxl_cmd (*cxl_cmds)[
 }
 }
 
+void cxl_add_cci_commands(CXLCCI *cci, const struct cxl_cmd 
(*cxl_cmd_set)[256],
+ size_t payload_max)
+{
+cci->payload_max = MAX(payload_max, cci->payload_max);
+cxl_copy_cci_commands(cci, cxl_cmd_set);
+cxl_rebuild_cel(cci);
+}
+
 void cxl_initialize_mailbox_swcci(CXLCCI *cci, DeviceState *intf,
   DeviceState *d, size_t payload_max)
 {
-- 
MST

[PULL v3 18/41] hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload of identify memory device command

2024-06-05 Thread Michael S. Tsirkin

From: Fan Ni 

Based on CXL spec r3.1 Table 8-127 (Identify Memory Device Output
Payload), dynamic capacity event log size should be part of
output of the Identify command.
Add dc_event_log_size to the output payload for the host to get the info.

Reviewed-by: Gregory Price 
Reviewed-by: Jonathan Cameron 
Signed-off-by: Fan Ni 
Message-Id: <20240523174651.1089554-4-nifan@gmail.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/cxl/cxl-mailbox-utils.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 2a64c58e2f..626acc1d0d 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -21,6 +21,7 @@
 #include "sysemu/hostmem.h"
 
 #define CXL_CAPACITY_MULTIPLIER   (256 * MiB)
+#define CXL_DC_EVENT_LOG_SIZE 8
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -780,8 +781,9 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 uint16_t inject_poison_limit;
 uint8_t poison_caps;
 uint8_t qos_telemetry_caps;
+uint16_t dc_event_log_size;
 } QEMU_PACKED *id;
-QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+QEMU_BUILD_BUG_ON(sizeof(*id) != 0x45);
 CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
 CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
 CXLDeviceState *cxl_dstate = >cxl_dstate;
@@ -807,6 +809,7 @@ static CXLRetCode cmd_identify_memory_device(const struct 
cxl_cmd *cmd,
 st24_le_p(id->poison_list_max_mer, 256);
 /* No limit - so limited by main poison record limit */
 stw_le_p(>inject_poison_limit, 0);
+stw_le_p(>dc_event_log_size, CXL_DC_EVENT_LOG_SIZE);
 
 *len_out = sizeof(*id);
 return CXL_MBOX_SUCCESS;
-- 
MST

[PULL v3 09/41] Fix vhost user assertion when sending more than one fd

2024-06-05 Thread Michael S. Tsirkin

From: Christian Pötzsch 

If the client sends more than one region this assert triggers. The
reason is that two fd's are 8 bytes and VHOST_MEMORY_BASELINE_NREGIONS
is exactly 8.

The assert is wrong because it should not test for the size of the fd
array, but for the numbers of regions.

Signed-off-by: Christian Pötzsch 
Message-Id: <20240426083313.3081272-1-christian.poetz...@kernkonzept.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 subprojects/libvhost-user/libvhost-user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subprojects/libvhost-user/libvhost-user.c 
b/subprojects/libvhost-user/libvhost-user.c
index a879149fef..8adb277d54 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -568,7 +568,7 @@ vu_message_read_default(VuDev *dev, int conn_fd, 
VhostUserMsg *vmsg)
 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
 vmsg->fd_num = fd_size / sizeof(int);
-assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS);
+assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
 break;
 }
-- 
MST

[PULL v3 01/41] vhost: dirty log should be per backend type

2024-06-05 Thread Michael S. Tsirkin

From: Si-Wei Liu 

There could be a mix of both vhost-user and vhost-kernel clients
in the same QEMU process, where separate vhost loggers for the
specific vhost type have to be used. Make the vhost logger per
backend type, and have them properly reference counted.

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Si-Wei Liu 
Message-Id: <1710448055-11709-1-git-send-email-si-wei@oracle.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost.c | 45 +
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4acd77e890..a1e8b79e1a 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -43,8 +43,8 @@
 do { } while (0)
 #endif
 
-static struct vhost_log *vhost_log;
-static struct vhost_log *vhost_log_shm;
+static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
+static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
 
 /* Memslots used by backends that support private memslots (without an fd). */
 static unsigned int used_memslots;
@@ -287,6 +287,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev,
 r = -1;
 }
 
+if (r == 0) {
+assert(dev->vhost_ops->backend_type == backend_type);
+}
+
 return r;
 }
 
@@ -319,16 +323,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, 
bool share)
 return log;
 }
 
-static struct vhost_log *vhost_log_get(uint64_t size, bool share)
+static struct vhost_log *vhost_log_get(VhostBackendType backend_type,
+   uint64_t size, bool share)
 {
-struct vhost_log *log = share ? vhost_log_shm : vhost_log;
+struct vhost_log *log;
+
+assert(backend_type > VHOST_BACKEND_TYPE_NONE);
+assert(backend_type < VHOST_BACKEND_TYPE_MAX);
+
+log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type];
 
 if (!log || log->size != size) {
 log = vhost_log_alloc(size, share);
 if (share) {
-vhost_log_shm = log;
+vhost_log_shm[backend_type] = log;
 } else {
-vhost_log = log;
+vhost_log[backend_type] = log;
 }
 } else {
 ++log->refcnt;
@@ -340,11 +350,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, 
bool share)
 static void vhost_log_put(struct vhost_dev *dev, bool sync)
 {
 struct vhost_log *log = dev->log;
+VhostBackendType backend_type;
 
 if (!log) {
 return;
 }
 
+assert(dev->vhost_ops);
+backend_type = dev->vhost_ops->backend_type;
+
+if (backend_type == VHOST_BACKEND_TYPE_NONE ||
+backend_type >= VHOST_BACKEND_TYPE_MAX) {
+return;
+}
+
 --log->refcnt;
 if (log->refcnt == 0) {
 /* Sync only the range covered by the old log */
@@ -352,13 +371,13 @@ static void vhost_log_put(struct vhost_dev *dev, bool 
sync)
 vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
 }
 
-if (vhost_log == log) {
+if (vhost_log[backend_type] == log) {
 g_free(log->log);
-vhost_log = NULL;
-} else if (vhost_log_shm == log) {
+vhost_log[backend_type] = NULL;
+} else if (vhost_log_shm[backend_type] == log) {
 qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
 log->fd);
-vhost_log_shm = NULL;
+vhost_log_shm[backend_type] = NULL;
 }
 
 g_free(log);
@@ -376,7 +395,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
 
 static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
 {
-struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
+struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type,
+  size, vhost_dev_log_is_shared(dev));
 uint64_t log_base = (uintptr_t)log->log;
 int r;
 
@@ -2044,7 +2064,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice 
*vdev, bool vrings)
 uint64_t log_base;
 
 hdev->log_size = vhost_get_log_size(hdev);
-hdev->log = vhost_log_get(hdev->log_size,
+hdev->log = vhost_log_get(hdev->vhost_ops->backend_type,
+  hdev->log_size,
   vhost_dev_log_is_shared(hdev));
 log_base = (uintptr_t)hdev->log->log;
 r = hdev->vhost_ops->vhost_set_log_base(hdev,
-- 
MST

[PULL v3 05/41] virtio: Prevent creation of device using notification-data with ioeventfd

2024-06-05 Thread Michael S. Tsirkin

From: Jonah Palmer 

Prevent the realization of a virtio device that attempts to use the
VIRTIO_F_NOTIFICATION_DATA transport feature without disabling
ioeventfd.

Due to ioeventfd not being able to carry the extra data associated with
this feature, having both enabled is a functional mismatch and therefore
Qemu should not continue the device's realization process.

Although the device does not yet know if the feature will be
successfully negotiated, many devices using this feature wont actually
work without this extra data and would fail FEATURES_OK anyway.

If ioeventfd is able to work with the extra notification data in the
future, this compatibility check can be removed.

Signed-off-by: Jonah Palmer 
Message-Id: <20240315165557.26942-3-jonah.pal...@oracle.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f7c99e3a96..28cd406e16 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2980,6 +2980,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
 return ret;
 }
 
+static void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
+   Error **errp)
+{
+VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+DeviceState *proxy = DEVICE(BUS(bus)->parent);
+
+if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
+k->ioeventfd_enabled(proxy)) {
+error_setg(errp,
+   "notification_data=on without ioeventfd=off is not 
supported");
+}
+}
+
 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
   uint64_t host_features)
 {
@@ -3740,6 +3754,14 @@ static void virtio_device_realize(DeviceState *dev, 
Error **errp)
 }
 }
 
+/* Devices should not use both ioeventfd and notification data feature */
+virtio_device_check_notification_compatibility(vdev, );
+if (err != NULL) {
+error_propagate(errp, err);
+vdc->unrealize(dev);
+return;
+}
+
 virtio_bus_device_plugged(vdev, );
 if (err != NULL) {
 error_propagate(errp, err);
-- 
MST

[PULL v3 00/41] virtio: features,fixes

2024-06-05 Thread Michael S. Tsirkin

Dropped acpi patches that had endian-ness issues.

The following changes since commit 60b54b67c63d8f076152e0f7dccf39854dfc6a77:

  Merge tag 'pull-lu-20240526' of https://gitlab.com/rth7680/qemu into staging 
(2024-05-26 17:51:00 -0700)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to d23bc95d390a1800198c92a0177240d9e1a1eb66:

  hw/cxl: Fix read from bogus memory (2024-06-05 19:33:01 -0400)


virtio: features,fixes

A bunch of improvements:
- vhost dirty log is now only scanned once, not once per device
- virtio and vhost now support VIRTIO_F_NOTIFICATION_DATA
- cxl gained DCD emulation support
- pvpanic gained shutdown support
- beginning of patchset for Generic Port Affinity Structure
- new tests
- bugfixes

Signed-off-by: Michael S. Tsirkin 


Alejandro Jimenez (1):
  pvpanic: Emit GUEST_PVSHUTDOWN QMP event on pvpanic shutdown signal

Christian Pötzsch (1):
  Fix vhost user assertion when sending more than one fd

Cindy Lu (1):
  virtio-pci: Fix the failure process in kvm_virtio_pci_vector_use_one()

Fan Ni (12):
  hw/cxl/cxl-mailbox-utils: Add dc_event_log_size field to output payload 
of identify memory device command
  hw/cxl/cxl-mailbox-utils: Add dynamic capacity region representative and 
mailbox command support
  include/hw/cxl/cxl_device: Rename mem_size as static_mem_size for type3 
memory devices
  hw/mem/cxl_type3: Add support to create DC regions to type3 memory devices
  hw/mem/cxl-type3: Refactor ct3_build_cdat_entries_for_mr to take mr size 
instead of mr as argument
  hw/mem/cxl_type3: Add host backend and address space handling for DC 
regions
  hw/mem/cxl_type3: Add DC extent list representative and get DC extent 
list mailbox support
  hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release 
dynamic capacity response
  hw/cxl/events: Add qmp interfaces to add/release dynamic capacity extents
  hw/mem/cxl_type3: Add DPA range validation for accesses to DC regions
  hw/cxl/cxl-mailbox-utils: Add superset extent release mailbox support
  hw/mem/cxl_type3: Allow to release extent superset in QMP interface

Gregory Price (2):
  hw/cxl/mailbox: change CCI cmd set structure to be a member, not a 
reference
  hw/cxl/mailbox: interface to add CCI commands to an existing CCI

Halil Pasic (1):
  vhost-vsock: add VIRTIO_F_RING_PACKED to feature_bits

Ira Weiny (1):
  hw/cxl: Fix read from bogus memory

Jiqian Chen (1):
  virtio-pci: only reset pm state during resetting

Jonah Palmer (5):
  virtio/virtio-pci: Handle extra notification data
  virtio: Prevent creation of device using notification-data with ioeventfd
  virtio-mmio: Handle extra notification data
  virtio-ccw: Handle extra notification data
  vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits

Jonathan Cameron (2):
  hw/acpi/GI: Fix trivial parameter alignment issue.
  hw/acpi: Insert an acpi-generic-node base under acpi-generic-initiator

Li Feng (2):
  Revert "vhost-user: fix lost reconnect"
  vhost-user: fix lost reconnect again

Marc-André Lureau (1):
  vhost-user-gpu: fix import of DMABUF

Si-Wei Liu (2):
  vhost: dirty log should be per backend type
  vhost: Perform memory section dirty scans once per iteration

Stefano Garzarella (1):
  vhost-vdpa: check vhost_vdpa_set_vring_ready() return value

Thomas Weißschuh (7):
  scripts/update-linux-headers: Copy setup_data.h to correct directory
  linux-headers: update to 6.10-rc1
  hw/misc/pvpanic: centralize definition of supported events
  tests/qtest/pvpanic: use centralized definition of supported events
  hw/misc/pvpanic: add support for normal shutdowns
  tests/qtest/pvpanic: add tests for pvshutdown event
  Revert "docs/specs/pvpanic: mark shutdown event as not implemented"

Wafer (1):
  hw/virtio: Fix obtain the buffer id from the last descriptor

 qapi/cxl.json   | 143 ++
 qapi/run-state.json |  14 +
 include/hw/acpi/acpi_generic_initiator.h|  15 +-
 include/hw/cxl/cxl_device.h |  85 +++-
 include/hw/cxl/cxl_events.h |  18 +
 include/hw/misc/pvpanic.h   |   6 +
 include/hw/virtio/vhost-user.h  |   3 +-
 include/hw/virtio/vhost.h   |   1 +
 include/hw/virtio/virtio.h  |   2 +
 include/standard-headers/linux/ethtool.h|  55 +++
 include/standard-headers/linux/pci_regs.h   |   6 +
 include/standard-headers/linux/pvpanic.h|   7 +-
 include/standard-headers/linux/virtio_bt.h  |   1 -
 include/standard-headers/linux/virtio_mem.h |   2 +
 include/standard-headers/linux/virtio_net.h | 143 ++

[PULL v3 10/41] vhost-vsock: add VIRTIO_F_RING_PACKED to feature_bits

2024-06-05 Thread Michael S. Tsirkin

From: Halil Pasic 

Not having VIRTIO_F_RING_PACKED in feature_bits[] is a problem when the
vhost-vsock device does not offer the feature bit VIRTIO_F_RING_PACKED
but the in QEMU device is configured to try to use the packed layout
(the virtio property "packed" is on).

As of today, the  Linux kernel vhost-vsock device does not support the
packed queue layout (as vhost does not support packed), and does not
offer VIRTIO_F_RING_PACKED. Thus when for example a vhost-vsock-ccw is
used with packed=on, VIRTIO_F_RING_PACKED ends up being negotiated,
despite the fact that the device does not actually support it, and
one gets to keep the pieces.

Fixes: 74b3e46630 ("virtio: add property to enable packed virtqueue")
Reported-by: Marc Hartmayer 
Signed-off-by: Halil Pasic 
Message-Id: <20240429113334.2454197-1-pa...@linux.ibm.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-vsock-common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 12ea87d7a7..fd88df2560 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -22,6 +22,7 @@
 const int feature_bits[] = {
 VIRTIO_VSOCK_F_SEQPACKET,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
-- 
MST

Re: linux-user emulation hangs during fork

2024-06-05 Thread Richard Henderson


On 6/5/24 02:14, Andreas Schwab wrote:

$ qemu-x86_64 --version
qemu-x86_64 version 9.0.50 (v9.0.0-1211-gd16cab541a)
Copyright (c) 2003-2024 Fabrice Bellard and the QEMU Project developers
$ cat fork.rb
begin
   r, w = IO.pipe
   if pid1 = fork
 w.close
 r.read 1
 Process.kill "USR1", pid1
 Process.wait2 pid1
   else
 print "child\n"
 r.close
 if pid2 = fork
   trap("USR1") { print "child: kill\n"; Process.kill "USR2", pid2 }
   w.close
   print "child: wait\n"
   Process.wait2 pid2
 else
   print "grandchild\n"
   w.close
   sleep 0.2
 end
   end
end
$ ruby fork.rb
child
child: wait
grandchild
child: kill
$ qemu-x86_64 /usr/bin/ruby fork.rb
child
child: wait
^Z
[1]+  Stopped qemu-x86_64 /usr/bin/ruby fork.rb
$ grep SigB $(for p in $(pidof qemu-x86_64); do echo /proc/$p/status; done | 
sort)
/proc/3221/status:SigBlk:   
/proc/3224/status:SigBlk:   
/proc/3228/status:SigBlk:   fff27ffbfa9f



Works for me:

rth@stoup:~/zz$ ~/qemu/bld/qemu-x86_64 `which ruby` fork.rb
child
grandchild
child: wait
child: kill
rth@stoup:~/zz$ ~/qemu/bld/qemu-x86_64 `which ruby` fork.rb
child
grandchild
child: wait
child: kill
rth@stoup:~/zz$ ~/qemu/bld/qemu-x86_64 `which ruby` fork.rb
child
grandchild
child: wait
child: kill
rth@stoup:~/zz$ ~/qemu/bld/qemu-x86_64 `which ruby` fork.rb
child
grandchild
child: wait
child: kill


r~

[PATCH] target/i386: SEV: do not assume machine->cgs is SEV

2024-06-05 Thread Paolo Bonzini

There can be other confidential computing classes that are not derived
from sev-common.  Avoid aborting when encountering them.

Signed-off-by: Paolo Bonzini 
---
 target/i386/sev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 004c667ac14..97e15f8b7a9 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1710,7 +1710,9 @@ void sev_es_set_reset_vector(CPUState *cpu)
 {
 X86CPU *x86;
 CPUX86State *env;
-SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+SevCommonState *sev_common = SEV_COMMON(
+object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON));
 
 /* Only update if we have valid reset information */
 if (!sev_common || !sev_common->reset_data_valid) {
-- 
2.45.1

Re: [PATCH] accel/tcg/plugin: Fix inject_mem_cb rw masking

2024-06-05 Thread Pierrick Bouvier


Thanks for catching this Richard.

I did the same mistake in plugins/core.c as well, could you fix it as 
well as part of this patch?


For complement, rw are enums,
R is 0b01, W is 0b10, and RW is 0b11, thus it work as expected with &.

Thanks,
Pierrick

Reviewed-by: Pierrick Bouvier 

On 6/5/24 15:25, Richard Henderson wrote:

These are not booleans, but masks.

Fixes: f86fd4d8721 ("plugins: distinct types for callbacks")
Signed-off-by: Richard Henderson 
---
  accel/tcg/plugin-gen.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index cc1634e7a6..b6bae32b99 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -240,13 +240,13 @@ static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb,
  {
  switch (cb->type) {
  case PLUGIN_CB_MEM_REGULAR:
-if (rw && cb->regular.rw) {
+if (rw & cb->regular.rw) {
  gen_mem_cb(>regular, meminfo, addr);
  }
  break;
  case PLUGIN_CB_INLINE_ADD_U64:
  case PLUGIN_CB_INLINE_STORE_U64:
-if (rw && cb->inline_insn.rw) {
+if (rw & cb->inline_insn.rw) {
  inject_cb(cb);
  }
  break;

Re: [PATCH v7 7/7] tests/migration-test: add qpl compression test

2024-06-05 Thread Fabiano Rosas

Yuan Liu  writes:

> add qpl to compression method test for multifd migration
>
> the qpl compression supports software path and hardware
> path(IAA device), and the hardware path is used first by
> default. If the hardware path is unavailable, it will
> automatically fallback to the software path for testing.
>
> Signed-off-by: Yuan Liu 
> Reviewed-by: Nanhai Zou 
> Reviewed-by: Peter Xu 

Reviewed-by: Fabiano Rosas

Re: [PATCH v7 6/7] migration/multifd: implement qpl compression and decompression

2024-06-05 Thread Fabiano Rosas

Yuan Liu  writes:

> QPL compression and decompression will use IAA hardware first.
> If IAA hardware is not available, it will automatically fall
> back to QPL software path, if the software job also fails,
> the uncompressed page is sent directly.
>
> Signed-off-by: Yuan Liu 
> Reviewed-by: Nanhai Zou 
> ---
>  migration/multifd-qpl.c | 412 +++-
>  1 file changed, 408 insertions(+), 4 deletions(-)
>
> diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
> index 6791a204d5..18b3384bd5 100644
> --- a/migration/multifd-qpl.c
> +++ b/migration/multifd-qpl.c
> @@ -13,9 +13,14 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-types-migration.h"
> +#include "exec/ramblock.h"
>  #include "multifd.h"
>  #include "qpl/qpl.h"
>  
> +/* Maximum number of retries to resubmit a job if IAA work queues are full */
> +#define MAX_SUBMIT_RETRY_NUM (3)
> +
>  typedef struct {
>  /* the QPL hardware path job */
>  qpl_job *job;
> @@ -260,6 +265,219 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams 
> *p, Error **errp)
>  p->iov = NULL;
>  }
>  
> +/**
> + * multifd_qpl_prepare_job: prepare the job
> + *
> + * Set the QPL job parameters and properties.
> + *
> + * @job: pointer to the qpl_job structure
> + * @is_compression: indicates compression and decompression
> + * @input: pointer to the input data buffer
> + * @input_len: the length of the input data
> + * @output: pointer to the output data buffer
> + * @output_len: the length of the output data
> + */
> +static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression,
> +uint8_t *input, uint32_t input_len,
> +uint8_t *output, uint32_t output_len)
> +{
> +job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
> +job->next_in_ptr = input;
> +job->next_out_ptr = output;
> +job->available_in = input_len;
> +job->available_out = output_len;
> +job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
> +/* only supports compression level 1 */
> +job->level = 1;
> +}
> +
> +/**
> + * multifd_qpl_prepare_job: prepare the compression job

function name is wrong

> + *
> + * Set the compression job parameters and properties.
> + *
> + * @job: pointer to the qpl_job structure
> + * @input: pointer to the input data buffer
> + * @input_len: the length of the input data
> + * @output: pointer to the output data buffer
> + * @output_len: the length of the output data
> + */
> +static void multifd_qpl_prepare_comp_job(qpl_job *job, uint8_t *input,
> + uint32_t input_len, uint8_t *output,
> + uint32_t output_len)
> +{
> +multifd_qpl_prepare_job(job, true, input, input_len, output, output_len);
> +}
> +
> +/**
> + * multifd_qpl_prepare_job: prepare the decompression job

here as well

> + *
> + * Set the decompression job parameters and properties.
> + *
> + * @job: pointer to the qpl_job structure
> + * @input: pointer to the input data buffer
> + * @input_len: the length of the input data
> + * @output: pointer to the output data buffer
> + * @output_len: the length of the output data
> + */
> +static void multifd_qpl_prepare_decomp_job(qpl_job *job, uint8_t *input,
> +   uint32_t input_len, uint8_t 
> *output,
> +   uint32_t output_len)
> +{
> +multifd_qpl_prepare_job(job, false, input, input_len, output, 
> output_len);
> +}
> +
> +/**
> + * multifd_qpl_fill_iov: fill in the IOV
> + *
> + * Fill in the QPL packet IOV
> + *
> + * @p: Params for the channel being used
> + * @data: pointer to the IOV data
> + * @len: The length of the IOV data
> + */
> +static void multifd_qpl_fill_iov(MultiFDSendParams *p, uint8_t *data,
> + uint32_t len)
> +{
> +p->iov[p->iovs_num].iov_base = data;
> +p->iov[p->iovs_num].iov_len = len;
> +p->iovs_num++;
> +p->next_packet_size += len;
> +}
> +
> +/**
> + * multifd_qpl_fill_packet: fill the compressed page into the QPL packet
> + *
> + * Fill the compressed page length and IOV into the QPL packet
> + *
> + * @idx: The index of the compressed length array
> + * @p: Params for the channel being used
> + * @data: pointer to the compressed page buffer
> + * @len: The length of the compressed page
> + */
> +static void multifd_qpl_fill_packet(uint32_t idx, MultiFDSendParams *p,
> +uint8_t *data, uint32_t len)
> +{
> +QplData *qpl = p->compress_data;
> +
> +qpl->zlen[idx] = cpu_to_be32(len);
> +multifd_qpl_fill_iov(p, data, len);
> +}
> +
> +/**
> + * multifd_qpl_submit_job: submit a job to the hardware
> + *
> + * Submit a QPL hardware job to the IAA device
> + *
> + * Returns true if the job is submitted successfully, otherwise false.
> + *
> +

1 2 3 4 >

1 - 100 of 369 matches

Mail list logo