Re: [RFC v5 05/10] tcg: Add tcg opcodes and helpers for native library calls

2023-08-25 Thread Richard Henderson

On 8/25/23 03:45, Yeqi Fu wrote:

This commit implements tcg opcodes and helpers for native library
calls. A table is used to store the parameter types and return value
types for each native library function. In terms of types, only three
types are of real concern: the two base sizes int and intptr_t, and
if the value is a pointer, tcg_gen_g2h and tcg_gen_h2g are used for
address conversion.

Signed-off-by: Yeqi Fu 
---
  accel/tcg/tcg-runtime.h  |  22 
  include/native/native-defs.h |  42 
  include/tcg/tcg-op-common.h  |  11 ++
  include/tcg/tcg.h|   9 ++
  tcg/tcg-op.c | 193 ++-
  5 files changed, 276 insertions(+), 1 deletion(-)
  create mode 100644 include/native/native-defs.h

diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 39e68007f9..bda78b4489 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -37,6 +37,28 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, 
env)
   */
  #define helper_memset memset
  DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
+
+#define helper_memcpy memcpy
+DEF_HELPER_FLAGS_3(memcpy, TCG_CALL_NO_RWG, ptr, ptr, ptr, ptr)
+
+#define helper_strncpy strncpy
+DEF_HELPER_FLAGS_3(strncpy, TCG_CALL_NO_RWG, ptr, ptr, ptr, ptr)
+
+#define helper_memcmp memcmp
+DEF_HELPER_FLAGS_3(memcmp, TCG_CALL_NO_RWG, int, ptr, ptr, ptr)
+
+#define helper_strncmp strncmp
+DEF_HELPER_FLAGS_3(strncmp, TCG_CALL_NO_RWG, int, ptr, ptr, ptr)
+
+#define helper_strcpy strcpy
+DEF_HELPER_FLAGS_2(strcpy, TCG_CALL_NO_RWG, ptr, ptr, ptr)
+
+#define helper_strcat strcat
+DEF_HELPER_FLAGS_2(strcat, TCG_CALL_NO_RWG, ptr, ptr, ptr)
+
+#define helper_strcmp strcmp
+DEF_HELPER_FLAGS_2(strcmp, TCG_CALL_NO_RWG, int, ptr, ptr)


You cannot just call these directly.  This will fail immediately whenever the guest does 
something silly like


memcpy(NULL, "foo", 4);

This must raise SIGSEGV to the guest.

If we leave the bulk transform to tcg, the absolute minimum is

void * HELPER(memcpy)(void *dst, void *src, target_ulong len)
{
set_helper_retaddr(GETPC());
void *r = memcpy(dst, src, len);
clear_helper_retaddr();
return r;
}

There is no way to do this thread-local storage update from TCG.

But if we need to have a helper at all, we might as well do more and *not* leave the 
transform to tcg.  Something akin to


target_ulong HELPER(memcpy)(target_ulong dst, target_ulong src, target_ulong 
len)
{
uintptr_t ra = GETPC();
CPUState *cpu = thread_cpu;
void *h_dst, *h_src;

if (!h2g_valid(src)) {
   cpu_loop_exit_sigsegv(cpu, src, MMU_DATA_LOAD, 1, ra);
}
if (!h2g_valid(dst)) {
   cpu_loop_exit_sigsegv(cpu, dst, MMU_DATA_STORE, 1, ra);
}

set_helper_retaddr(ra);
memcpy(g2h(cpu, dst), g2h(cpu, src), len);
clear_helper_retaddr(ra);

/* memcpy always returns its first argument */
return dst;
}



--- /dev/null
+++ b/include/native/native-defs.h
@@ -0,0 +1,42 @@
+/*
+ * Argument encoding. We only really care about 3 types. The two base
+ * sizes (int and intptr_t) and if the value is a pointer (in which
+ * case we need to adjust it g2h before passing to the native
+ * function).
+ */
+#define TYPE_NO_ARG 0x0
+#define TYPE_INT_ARG 0x1
+#define TYPE_IPTR_ARG 0x2
+#define TYPE_PTR_ARG 0x3
+
+#define ENCODE_TYPE(ret_value, arg1, arg2, arg3) \
+((ret_value) | (arg1 << 4) | (arg2 << 8) | (arg3 << 12))


Supposing we do the transform in tcg, this duplicates include/exec/helper-head.h, and 
dh_typemask().



+static const FuncHelper func_helper_table[] = {
+{ .func = "memset",
+  .helper = (helper_func)gen_helper_memset,
+  .type = TYPE_AAIP },
+{ .func = "memcpy",
+  .helper = (helper_func)gen_helper_memcpy,
+  .type = TYPE_AAAP },
+{ .func = "strncpy",
+  .helper = (helper_func)gen_helper_strncpy,
+  .type = TYPE_AAAP },
+{ .func = "memcmp",
+  .helper = (helper_func)gen_helper_memcmp,
+  .type = TYPE_IAAP },
+{ .func = "strncmp",
+  .helper = (helper_func)gen_helper_strncmp,
+  .type = TYPE_IAAP },
+{ .func = "strcpy",
+  .helper = (helper_func)gen_helper_strcpy,
+  .type = TYPE_AAA },
+{ .func = "strcat",
+  .helper = (helper_func)gen_helper_strcat,
+  .type = TYPE_AAA },
+{ .func = "strcmp",
+  .helper = (helper_func)gen_helper_strcmp,
+  .type = TYPE_IAA },
+};
+/* p: iptr ; i: i32 ; a: ptr(address) */
+void gen_native_call_i32(const char *func_name, TCGv_i32 ret, TCGv_i32 arg1,
+ TCGv_i32 arg2, TCGv_i32 arg3)
+{
+TCGv_ptr arg1_ptr = tcg_temp_new_ptr();
+TCGv_ptr arg2_ptr = tcg_temp_new_ptr();
+TCGv_ptr arg3_ptr = tcg_temp_new_ptr();
+TCGv_ptr ret_ptr = tcg_temp_new_ptr();
+unsigned int i;
+for (i = 0; i < sizeof(func_helper_table) / sizeof(FuncHelper); i++) {
+if (strcmp(func_name, func_helper_table[i].func) == 0) {
+break;
+}
+}
+   

Re: [RFC v5 04/10] linux-user: Implement native-bypass option support

2023-08-25 Thread Richard Henderson

On 8/25/23 03:20, Yeqi Fu wrote:

+#if defined(CONFIG_NATIVE_CALL)
+/* Set the library for native bypass  */
+if (native_lib_path) {
+if (g_file_test(native_lib_path, G_FILE_TEST_IS_REGULAR)) {
+GString *lib = g_string_new(native_lib_path);
+lib = g_string_prepend(lib, "LD_PRELOAD=");
+if (envlist_appendenv(envlist, g_string_free(lib, false), ":")) {
+fprintf(stderr,
+"failed to append the native library to environment.\n");
+exit(EXIT_FAILURE);
+}
+} else {
+fprintf(stderr, "native library %s does not exist.\n",
+native_lib_path);
+exit(EXIT_FAILURE);
+}
+}
+#endif


Here you append to the existing LD_PRELOAD.


+/*
+ * An error may occur when executing execv, stating that the
+ * shared library from LD_PRELOAD cannot be preloaded on a
+ * different arch. So, we find LD_PRELOAD and remove it from
+ * envp before executing the execv.
+ */
+if (native_bypass_enabled()) {
+i = 0;
+while (envp[i] != NULL) {
+if (strncmp(envp[i], "LD_PRELOAD=", 11) == 0) {
+for (int j = i; envp[j] != NULL; j++) {
+envp[j] = envp[j + 1];
+}
+} else {
+i++;
+}
+}
+}


Here you simply remove LD_PRELOAD entirely.
At most you should only remove libnative.so.

I'm not at all sure that you should be modifying the target environment at all.  It's ok 
for simple testing, but it is definitely error prone.  There are a couple of different 
solutions:


(1) Dynamically modify /etc/ld.so.preload, similar to how we handle various 
/proc files.

(2) Merge libnative.so with vdso.so (and select one of two images depending on bypass 
enabled).



r~



Re: [RFC v5 02/10] build: Implement libnative library and the build machinery for libnative

2023-08-25 Thread Richard Henderson

On 8/25/23 03:20, Yeqi Fu wrote:

This commit implements a shared library, where native functions are
rewritten as special instructions. At runtime, user programs load
the shared library, and special instructions are executed when
native functions are called.

Signed-off-by: Yeqi Fu 

...

diff --git a/common-user/native/libnative.S b/common-user/native/libnative.S
new file mode 100644
index 00..3692eaa3cf
--- /dev/null
+++ b/common-user/native/libnative.S
@@ -0,0 +1,69 @@
+#if defined(i386) || defined(x86_64)
+/*
+ * An unused instruction is utilized to mark a native call.
+ */
+#define __SPECIAL_INSTR .byte 0x0f, 0xff;
+#define __RET_INSTR ret;
+#endif
+
+#if defined(arm) || defined(aarch64)
+/*
+ * HLT is an invalid instruction for userspace programs,
+ * and is used to mark a native call.
+ */
+#define __SPECIAL_INSTR hlt 0x;
+#if defined(aarch64)
+#define __RET_INSTR ret;
+#else
+#define __RET_INSTR bx lr;
+#endif
+#endif
+
+
+#if defined(mips) || defined(mips64)
+/*
+ * The syscall instruction contains 20 unused bits, which are typically
+ * set to 0. These bits can be used to store non-zero data,
+ * distinguishing them from a regular syscall instruction.
+ */
+#define __SPECIAL_INSTR syscall 0x;
+#define __RET_INSTR jr $ra;
+#endif
+
+/* Symbols of native functions */
+.section .data
+sym_memset:  .asciz "memset"
+sym_memcpy:  .asciz "memcpy"
+sym_strncpy:  .asciz "strncpy"
+sym_memcmp:  .asciz "memcmp"
+sym_strncmp:  .asciz "strncmp"
+sym_strcpy:  .asciz "strcpy"
+sym_strcat:  .asciz "strcat"
+sym_strcmp:  .asciz "strcmp"
+
+.macro define_function name
+\name:
+#if defined(x86_64) || defined(aarch64)
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(mips64)
+.align 4
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(i386) || defined(mips) || defined(arm)
+__SPECIAL_INSTR
+.long sym_\name
+__RET_INSTR
+#endif
+.endm
+
+define_function memcpy
+define_function strncpy
+define_function memset
+define_function memcmp
+define_function strncmp
+define_function strcpy
+define_function strcat
+define_function strcmp


This cannot possibly work, since none of the symbols are marked .globl, and are therefore 
not exported from your libnative.so.


Furthermore, you placed your strings in .data, but then failed to change back to .text, so 
none of the instructions are in an executable load segment.


I conclude that your testing succeeded only because no library calls were 
replaced.
This is not sufficient testing.

In review of previous versions, I have mentioned that the x86 UD0 instruction has more 
bytes than simply 0x0f 0xff -- at minimum 3 -- and moreover can be used in the assembler 
to produce pc-relative values.


We can clean up the assembly as follows.


r~


-


.macro special_instr sym
#if defined(__i386__)
ud0 \sym-1f, %eax; 1:
#elif defined(__x86_64__)
ud0 \sym(%rip), %eax
#elif defined(__arm__) || defined(__aarch64__)
hlt 0x
1:  .word   \sym - 1b
#elif defined(__mips__)
syscall 0x
1:  .word   \sym - 1b
#else
# error
#endif
.endm

.macro ret_instr
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
ret
#elif defined(__arm__)
bx  lr
#elif defined(__mips__)
jr  $ra
#else
# error
#endif
.endm

/* Symbols of native functions */

.macro define_function name
.text
\name:
special_instr 9f
ret_instr
.globl \name
.type \name, %function
.size \name, . - \name

.section .rodata
9:  .asciz  "\name"
.endm

define_function memcmp
define_function memcpy
define_function memset
define_function strcat
define_function strcmp
define_function strcpy
define_function strncmp
define_function strncpy



[QEMU][PATCH v3 2/2] xen_arm: Initialize RAM and add hi/low memory regions

2023-08-25 Thread Vikram Garhwal
From: Oleksandr Tyshchenko 

In order to use virtio backends we need to initialize RAM for the
xen-mapcache (which is responsible for mapping guest memory using foreign
mapping) to work. Calculate and add hi/low memory regions based on
machine->ram_size.

Use the constants defined in public header arch-arm.h to be aligned with the xen
toolstack.

While using this machine, the toolstack should then pass real ram_size using
"-m" arg. If "-m" is not given, create a QEMU machine without IOREQ and other
emulated devices like TPM and VIRTIO. This is done to keep this QEMU machine
usable for /etc/init.d/xencommons.

Signed-off-by: Oleksandr Tyshchenko 
Signed-off-by: Vikram Garhwal 
---
 hw/arm/xen_arm.c | 53 
 1 file changed, 53 insertions(+)

diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
index d1e9f7b488..aa8b6171ad 100644
--- a/hw/arm/xen_arm.c
+++ b/hw/arm/xen_arm.c
@@ -60,6 +60,8 @@ struct XenArmState {
 } cfg;
 };
 
+static MemoryRegion ram_lo, ram_hi;
+
 /*
  * VIRTIO_MMIO_DEV_SIZE is imported from tools/libs/light/libxl_arm.c under Xen
  * repository.
@@ -80,6 +82,14 @@ static int xendevicemodel_set_irq_level(
 }
 #endif
 
+#if defined(__i386__) || defined(__x86_64__)
+#define GUEST_RAM_BANKS   2
+#define GUEST_RAM0_BASE   0x4000ULL /* 3GB of low RAM @ 1GB */
+#define GUEST_RAM0_SIZE   0xc000ULL
+#define GUEST_RAM1_BASE   0x02ULL /* 1016GB of RAM @ 8GB */
+#define GUEST_RAM1_SIZE   0xfeULL
+#endif
+
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION <= 41700
 #define GUEST_VIRTIO_MMIO_BASE   xen_mk_ullong(0x0200)
 #define GUEST_VIRTIO_MMIO_SIZE   xen_mk_ullong(0x0010)
@@ -108,6 +118,39 @@ static void xen_create_virtio_mmio_devices(XenArmState 
*xam)
 }
 }
 
+static void xen_init_ram(MachineState *machine)
+{
+MemoryRegion *sysmem = get_system_memory();
+ram_addr_t block_len, ram_size[GUEST_RAM_BANKS];
+
+if (machine->ram_size <= GUEST_RAM0_SIZE) {
+ram_size[0] = machine->ram_size;
+ram_size[1] = 0;
+block_len = GUEST_RAM0_BASE + ram_size[0];
+} else {
+ram_size[0] = GUEST_RAM0_SIZE;
+ram_size[1] = machine->ram_size - GUEST_RAM0_SIZE;
+block_len = GUEST_RAM1_BASE + ram_size[1];
+}
+
+memory_region_init_ram(_memory, NULL, "xen.ram", block_len,
+   _fatal);
+
+memory_region_init_alias(_lo, NULL, "xen.ram.lo", _memory,
+ GUEST_RAM0_BASE, ram_size[0]);
+memory_region_add_subregion(sysmem, GUEST_RAM0_BASE, _lo);
+DPRINTF("Initialized region xen.ram.lo: base 0x%llx size 0x%lx\n",
+GUEST_RAM0_BASE, ram_size[0]);
+
+if (ram_size[1] > 0) {
+memory_region_init_alias(_hi, NULL, "xen.ram.hi", _memory,
+ GUEST_RAM1_BASE, ram_size[1]);
+memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, _hi);
+DPRINTF("Initialized region xen.ram.hi: base 0x%llx size 0x%lx\n",
+GUEST_RAM1_BASE, ram_size[1]);
+}
+}
+
 void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
 {
 hw_error("Invalid ioreq type 0x%x\n", req->type);
@@ -157,6 +200,14 @@ static void xen_arm_init(MachineState *machine)
 
 xam->state =  g_new0(XenIOState, 1);
 
+if (machine->ram_size == 0) {
+DPRINTF("ram_size not specified. QEMU machine started without IOREQ"
+"(no emulated devices including Virtio)\n");
+return;
+}
+
+xen_init_ram(machine);
+
 xen_register_ioreq(xam->state, machine->smp.cpus, _memory_listener);
 
 xen_create_virtio_mmio_devices(xam);
@@ -204,6 +255,8 @@ static void xen_arm_machine_class_init(ObjectClass *oc, 
void *data)
 mc->init = xen_arm_init;
 mc->max_cpus = 1;
 mc->default_machine_opts = "accel=xen";
+/* Set explicitly here to make sure that real ram_size is passed */
+mc->default_ram_size = 0;
 
 #ifdef CONFIG_TPM
 object_class_property_add(oc, "tpm-base-addr", "uint64_t",
-- 
2.17.1




[QEMU][PATCH v3 1/2] xen_arm: Create virtio-mmio devices during initialization

2023-08-25 Thread Vikram Garhwal
From: Oleksandr Tyshchenko 

In order to use virtio backends we need to allocate virtio-mmio
parameters (irq and base) and register corresponding buses.

Use the constants defined in public header arch-arm.h to be
aligned with the toolstack. So the number of current supported
virtio-mmio devices is 10.

For the interrupts triggering use already existing on Arm
device-model hypercall.

The toolstack should then insert the same amount of device nodes
into guest device-tree.

Signed-off-by: Oleksandr Tyshchenko 
Signed-off-by: Vikram Garhwal 
---
 hw/arm/xen_arm.c | 51 
 1 file changed, 51 insertions(+)

diff --git a/hw/arm/xen_arm.c b/hw/arm/xen_arm.c
index 1d3e6d481a..d1e9f7b488 100644
--- a/hw/arm/xen_arm.c
+++ b/hw/arm/xen_arm.c
@@ -26,6 +26,7 @@
 #include "qapi/qapi-commands-migration.h"
 #include "qapi/visitor.h"
 #include "hw/boards.h"
+#include "hw/irq.h"
 #include "hw/sysbus.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/tpm_backend.h"
@@ -59,6 +60,54 @@ struct XenArmState {
 } cfg;
 };
 
+/*
+ * VIRTIO_MMIO_DEV_SIZE is imported from tools/libs/light/libxl_arm.c under Xen
+ * repository.
+ *
+ * Origin: git://xenbits.xen.org/xen.git 2128143c114c
+ */
+#define VIRTIO_MMIO_DEV_SIZE   0x200
+
+#define NR_VIRTIO_MMIO_DEVICES   \
+   (GUEST_VIRTIO_MMIO_SPI_LAST - GUEST_VIRTIO_MMIO_SPI_FIRST)
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION <= 41500
+static int xendevicemodel_set_irq_level(
+xendevicemodel_handle *dmod, domid_t domid, uint32_t irq,
+unsigned int level)
+{
+return 0;
+}
+#endif
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION <= 41700
+#define GUEST_VIRTIO_MMIO_BASE   xen_mk_ullong(0x0200)
+#define GUEST_VIRTIO_MMIO_SIZE   xen_mk_ullong(0x0010)
+#define GUEST_VIRTIO_MMIO_SPI_FIRST   33
+#define GUEST_VIRTIO_MMIO_SPI_LAST43
+#endif
+
+static void xen_set_irq(void *opaque, int irq, int level)
+{
+xendevicemodel_set_irq_level(xen_dmod, xen_domid, irq, level);
+}
+
+static void xen_create_virtio_mmio_devices(XenArmState *xam)
+{
+int i;
+
+for (i = 0; i < NR_VIRTIO_MMIO_DEVICES; i++) {
+hwaddr base = GUEST_VIRTIO_MMIO_BASE + i * VIRTIO_MMIO_DEV_SIZE;
+qemu_irq irq = qemu_allocate_irq(xen_set_irq, NULL,
+ GUEST_VIRTIO_MMIO_SPI_FIRST + i);
+
+sysbus_create_simple("virtio-mmio", base, irq);
+
+DPRINTF("Created virtio-mmio device %d: irq %d base 0x%lx\n",
+i, GUEST_VIRTIO_MMIO_SPI_FIRST + i, base);
+}
+}
+
 void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
 {
 hw_error("Invalid ioreq type 0x%x\n", req->type);
@@ -110,6 +159,8 @@ static void xen_arm_init(MachineState *machine)
 
 xen_register_ioreq(xam->state, machine->smp.cpus, _memory_listener);
 
+xen_create_virtio_mmio_devices(xam);
+
 #ifdef CONFIG_TPM
 if (xam->cfg.tpm_base_addr) {
 xen_enable_tpm(xam);
-- 
2.17.1




[QEMU][PATCH v3 0/2] Add Virtio support to Xenpvh machine for arm

2023-08-25 Thread Vikram Garhwal
Hi,
We added virtio-mmio support in xenpvh machine. Now, it can support upto
10 virtio mmio.

Changelog:
v2->v3:
Define GUEST_VIRTIO_*, GUEST_RAM* and xendevicemodel_set_irq() manually
for old xen version. This was done to avoid build failures in gitlab-ci
v1->v2:
Add reference for VIRTIO_MMIO_DEV_SIZE.
Update ram_size=0 print statement.

Oleksandr Tyshchenko (2):
  xen_arm: Create virtio-mmio devices during initialization
  xen_arm: Initialize RAM and add hi/low memory regions

 hw/arm/xen_arm.c | 104 +++
 1 file changed, 104 insertions(+)

-- 
2.17.1




Re: [PATCH 1/3] hw/mips/jazz: Remove the big_endian variable

2023-08-25 Thread Richard Henderson

On 8/25/23 10:51, Thomas Huth wrote:

There is an easier way to get a value that can be used to decide
whether the target is big endian or not: Simply use the
target_words_bigendian() function instead.

Signed-off-by: Thomas Huth
---
  hw/mips/jazz.c | 10 ++
  1 file changed, 2 insertions(+), 8 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 2/4] tests/migration-test: Add a test for null parameter setups

2023-08-25 Thread Peter Xu
On Fri, Aug 25, 2023 at 07:33:23PM +0200, Thomas Huth wrote:
> On 25/08/2023 19.15, Peter Xu wrote:
> > Add a test for StrOrNull parameters (tls-*).
> > 
> > Reviewed-by: Fabiano Rosas 
> > Signed-off-by: Peter Xu 
> > ---
> >   tests/qtest/migration-test.c | 21 +
> >   1 file changed, 21 insertions(+)
> > 
> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > index 62d3f37021..64efee8b04 100644
> > --- a/tests/qtest/migration-test.c
> > +++ b/tests/qtest/migration-test.c
> > @@ -1471,6 +1471,26 @@ static void test_postcopy_preempt_all(void)
> >   #endif
> > +/*
> > + * We have a few parameters that allows null as input, test them to make
> > + * sure they won't crash (where some used to).
> > + */
> > +static void test_null_parameters(void)
> > +{
> > +const char *allow_null_params[] = {
> > +"tls-authz", "tls-hostname", "tls-creds"};
> 
> I'd place the ending bracket on a new line.
> 
> > +QTestState *vm = qtest_init("");
> > +int i;
> > +
> > +for (i = 0; i < sizeof(allow_null_params) / sizeof(const char *); i++) 
> > {
> 
> Could you use ARRAY_SIZE() instead of calculating it on your own?

Sure (on both), thanks.

-- 
Peter Xu




[PATCH v1 3/7] hw/fsi: Introduce IBM's cfam,fsi-slave

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

The Common FRU Access Macro (CFAM), an address space containing
various "engines" that drive accesses on busses internal and external
to the POWER chip. Examples include the SBEFIFO and I2C masters. The
engines hang off of an internal Local Bus (LBUS) which is described
by the CFAM configuration block.

The FSI slave: The slave is the terminal point of the FSI bus for
FSI symbols addressed to it. Slaves can be cascaded off of one
another. The slave's configuration registers appear in address space
of the CFAM to which it is attached.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/fsi/Kconfig |   9 ++
 hw/fsi/cfam.c  | 235 +
 hw/fsi/fsi-slave.c | 109 +
 hw/fsi/meson.build |   2 +
 include/hw/fsi/cfam.h  |  61 ++
 include/hw/fsi/fsi-slave.h |  29 +
 6 files changed, 445 insertions(+)
 create mode 100644 hw/fsi/cfam.c
 create mode 100644 hw/fsi/fsi-slave.c
 create mode 100644 include/hw/fsi/cfam.h
 create mode 100644 include/hw/fsi/fsi-slave.h

diff --git a/hw/fsi/Kconfig b/hw/fsi/Kconfig
index 2a9c49f2c9..087980be22 100644
--- a/hw/fsi/Kconfig
+++ b/hw/fsi/Kconfig
@@ -1,3 +1,12 @@
+config CFAM
+bool
+select FSI
+select SCRATCHPAD
+select LBUS
+
+config FSI
+bool
+
 config SCRATCHPAD
 bool
 select LBUS
diff --git a/hw/fsi/cfam.c b/hw/fsi/cfam.c
new file mode 100644
index 00..19256050bd
--- /dev/null
+++ b/hw/fsi/cfam.c
@@ -0,0 +1,235 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM Common FRU Access Macro
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/fsi/bits.h"
+#include "hw/fsi/cfam.h"
+#include "hw/fsi/engine-scratchpad.h"
+
+#include "hw/qdev-properties.h"
+
+#define TO_REG(x)  ((x) >> 2)
+
+#define CFAM_ENGINE_CONFIG  TO_REG(0x04)
+
+#define CFAM_CONFIG_CHIP_IDTO_REG(0x00)
+#define CFAM_CONFIG_CHIP_ID_P9 0xc0022d15
+#define   CFAM_CONFIG_CHIP_ID_BREAK0xc0de
+
+static uint64_t cfam_config_read(void *opaque, hwaddr addr, unsigned size)
+{
+CFAMConfig *config;
+CFAMState *cfam;
+LBusNode *node;
+int i;
+
+config = CFAM_CONFIG(opaque);
+cfam = container_of(config, CFAMState, config);
+
+qemu_log_mask(LOG_UNIMP, "%s: read @0x%" HWADDR_PRIx " size=%d\n",
+  __func__, addr, size);
+
+assert(size == 4);
+assert(!(addr & 3));
+
+switch (addr) {
+case 0x00:
+return CFAM_CONFIG_CHIP_ID_P9;
+case 0x04:
+return ENGINE_CONFIG_NEXT
+| 0x0001/* slots */
+| 0x1000/* version */
+| ENGINE_CONFIG_TYPE_PEEK   /* type */
+| 0x000c;   /* crc */
+case 0x08:
+return ENGINE_CONFIG_NEXT
+| 0x0001/* slots */
+| 0x5000/* version */
+| ENGINE_CONFIG_TYPE_FSI/* type */
+| 0x000a;   /* crc */
+break;
+default:
+/* FIXME: Improve this */
+i = 0xc;
+QLIST_FOREACH(node, >lbus.devices, next) {
+if (i == addr) {
+return LBUS_DEVICE_GET_CLASS(node->ldev)->config;
+}
+i += size;
+}
+
+if (i == addr) {
+return 0;
+}
+
+return 0xc0de;
+}
+}
+
+static void cfam_config_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned size)
+{
+CFAMConfig *s = CFAM_CONFIG(opaque);
+
+qemu_log_mask(LOG_UNIMP, "%s: write @0x%" HWADDR_PRIx " size=%d "
+  "value=%"PRIx64"\n", __func__, addr, size, data);
+
+assert(size == 4);
+assert(!(addr & 3));
+
+switch (TO_REG(addr)) {
+case CFAM_CONFIG_CHIP_ID:
+case CFAM_CONFIG_CHIP_ID + 4:
+if (data == CFAM_CONFIG_CHIP_ID_BREAK) {
+bus_cold_reset(qdev_get_parent_bus(DEVICE(s)));
+}
+break;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Not implemented: 0x%"
+  HWADDR_PRIx" for %u\n",
+  __func__, addr, size);
+}
+}
+
+static const struct MemoryRegionOps cfam_config_ops = {
+.read = cfam_config_read,
+.write = cfam_config_write,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void cfam_config_realize(DeviceState *dev, Error **errp)
+{
+CFAMConfig *s = CFAM_CONFIG(dev);
+
+memory_region_init_io(>iomem, OBJECT(s), _config_ops, s,
+  TYPE_CFAM_CONFIG, 0x400);
+}
+
+static void cfam_config_reset(DeviceState *dev)
+{
+/* Config is read-only */
+}
+
+static void cfam_config_class_init(ObjectClass 

[PATCH v1 0/7] Introduce model for IBM's FSP

2023-08-25 Thread Ninad Palsule
Hello,

Please review the patch-set.

This is a first step towards introducing model for IBM's Flexible
Service Interface. The full functionality will be implemented over the
time.

Ninad Palsule (7):
  hw/fsi: Introduce IBM's Local bus
  hw/fsi: Introduce IBM's scratchpad
  hw/fsi: Introduce IBM's cfam,fsi-slave
  hw/fsi: Introduce IBM's FSI
  hw/fsi: IBM's On-chip Peripheral Bus
  hw/fsi: Aspeed APB2OPB interface
  hw/arm: Hook up FSI module in AST2600

 hw/Kconfig |   1 +
 hw/arm/Kconfig |   1 +
 hw/arm/aspeed_ast2600.c|  15 ++
 hw/fsi/Kconfig |  23 ++
 hw/fsi/aspeed-apb2opb.c| 346 +
 hw/fsi/cfam.c  | 236 
 hw/fsi/engine-scratchpad.c | 100 +
 hw/fsi/fsi-master.c| 202 +
 hw/fsi/fsi-slave.c | 109 +
 hw/fsi/fsi.c   |  54 +
 hw/fsi/lbus.c  |  94 
 hw/fsi/meson.build |   6 +
 hw/fsi/opb.c   | 194 
 hw/fsi/trace-events|   2 +
 hw/fsi/trace.h |   1 +
 hw/meson.build |   1 +
 include/hw/arm/aspeed_soc.h|   4 +
 include/hw/fsi/aspeed-apb2opb.h|  32 +++
 include/hw/fsi/bits.h  |  15 ++
 include/hw/fsi/cfam.h  |  59 +
 include/hw/fsi/engine-scratchpad.h |  32 +++
 include/hw/fsi/fsi-master.h|  30 +++
 include/hw/fsi/fsi-slave.h |  29 +++
 include/hw/fsi/fsi.h   |  35 +++
 include/hw/fsi/lbus.h  |  57 +
 include/hw/fsi/opb.h   |  45 
 meson.build|   1 +
 27 files changed, 1724 insertions(+)
 create mode 100644 hw/fsi/Kconfig
 create mode 100644 hw/fsi/aspeed-apb2opb.c
 create mode 100644 hw/fsi/cfam.c
 create mode 100644 hw/fsi/engine-scratchpad.c
 create mode 100644 hw/fsi/fsi-master.c
 create mode 100644 hw/fsi/fsi-slave.c
 create mode 100644 hw/fsi/fsi.c
 create mode 100644 hw/fsi/lbus.c
 create mode 100644 hw/fsi/meson.build
 create mode 100644 hw/fsi/opb.c
 create mode 100644 hw/fsi/trace-events
 create mode 100644 hw/fsi/trace.h
 create mode 100644 include/hw/fsi/aspeed-apb2opb.h
 create mode 100644 include/hw/fsi/bits.h
 create mode 100644 include/hw/fsi/cfam.h
 create mode 100644 include/hw/fsi/engine-scratchpad.h
 create mode 100644 include/hw/fsi/fsi-master.h
 create mode 100644 include/hw/fsi/fsi-slave.h
 create mode 100644 include/hw/fsi/fsi.h
 create mode 100644 include/hw/fsi/lbus.h
 create mode 100644 include/hw/fsi/opb.h

-- 
2.39.2




[PATCH v1 2/7] hw/fsi: Introduce IBM's scratchpad

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

The LBUS device is embeded inside the scratchpad. The scratchpad
provides a non-functional registers. There is a 1-1 relation between
scratchpad and LBUS devices. Each LBUS device has 1K memory mapped in
the LBUS.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/fsi/Kconfig |   4 ++
 hw/fsi/engine-scratchpad.c | 100 +
 hw/fsi/meson.build |   1 +
 include/hw/fsi/engine-scratchpad.h |  32 +
 4 files changed, 137 insertions(+)
 create mode 100644 hw/fsi/engine-scratchpad.c
 create mode 100644 include/hw/fsi/engine-scratchpad.h

diff --git a/hw/fsi/Kconfig b/hw/fsi/Kconfig
index 687449e14e..2a9c49f2c9 100644
--- a/hw/fsi/Kconfig
+++ b/hw/fsi/Kconfig
@@ -1,2 +1,6 @@
+config SCRATCHPAD
+bool
+select LBUS
+
 config LBUS
 bool
diff --git a/hw/fsi/engine-scratchpad.c b/hw/fsi/engine-scratchpad.c
new file mode 100644
index 00..15a8f8cc66
--- /dev/null
+++ b/hw/fsi/engine-scratchpad.c
@@ -0,0 +1,100 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM scratchpad engine
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/fsi/engine-scratchpad.h"
+
+static uint64_t scratchpad_read(void *opaque, hwaddr addr, unsigned size)
+{
+ScratchPad *s = SCRATCHPAD(opaque);
+
+qemu_log_mask(LOG_UNIMP, "%s: read @0x%" HWADDR_PRIx " size=%d\n",
+  __func__, addr, size);
+
+if (addr) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Out of bounds read: 0x%"HWADDR_PRIx" for %u\n",
+  __func__, addr, size);
+return 0;
+}
+
+return s->reg;
+}
+
+static void scratchpad_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned size)
+{
+ScratchPad *s = SCRATCHPAD(opaque);
+
+qemu_log_mask(LOG_UNIMP, "%s: write @0x%" HWADDR_PRIx " size=%d "
+  "value=%"PRIx64"\n", __func__, addr, size, data);
+
+if (addr) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: Out of bounds write: 0x%"HWADDR_PRIx" for %u\n",
+  __func__, addr, size);
+return;
+}
+
+s->reg = data;
+}
+
+static const struct MemoryRegionOps scratchpad_ops = {
+.read = scratchpad_read,
+.write = scratchpad_write,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void scratchpad_realize(DeviceState *dev, Error **errp)
+{
+LBusDevice *ldev = LBUS_DEVICE(dev);
+
+memory_region_init_io(>iomem, OBJECT(ldev), _ops,
+  ldev, TYPE_SCRATCHPAD, 0x400);
+}
+
+static void scratchpad_reset(DeviceState *dev)
+{
+ScratchPad *s = SCRATCHPAD(dev);
+
+s->reg = 0;
+}
+
+static void scratchpad_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+LBusDeviceClass *ldc = LBUS_DEVICE_CLASS(klass);
+
+dc->realize = scratchpad_realize;
+dc->reset = scratchpad_reset;
+
+ldc->config =
+  ENGINE_CONFIG_NEXT/* valid */
+| 0x0001/* slots */
+| 0x1000/* version */
+| ENGINE_CONFIG_TYPE_SCRATCHPAD /* type */
+| 0x0007;   /* crc */
+}
+
+static const TypeInfo scratchpad_info = {
+.name = TYPE_SCRATCHPAD,
+.parent = TYPE_LBUS_DEVICE,
+.instance_size = sizeof(ScratchPad),
+.class_init = scratchpad_class_init,
+.class_size = sizeof(LBusDeviceClass),
+};
+
+static void scratchpad_register_types(void)
+{
+type_register_static(_info);
+}
+
+type_init(scratchpad_register_types);
diff --git a/hw/fsi/meson.build b/hw/fsi/meson.build
index e1007d5fea..f90e09ddab 100644
--- a/hw/fsi/meson.build
+++ b/hw/fsi/meson.build
@@ -1 +1,2 @@
 system_ss.add(when: 'CONFIG_LBUS', if_true: files('lbus.c'))
+system_ss.add(when: 'CONFIG_SCRATCHPAD', if_true: files('engine-scratchpad.c'))
diff --git a/include/hw/fsi/engine-scratchpad.h 
b/include/hw/fsi/engine-scratchpad.h
new file mode 100644
index 00..c2c8aa0b7e
--- /dev/null
+++ b/include/hw/fsi/engine-scratchpad.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM scratchpad engne
+ */
+#ifndef FSI_ENGINE_SCRATCHPAD_H
+#define FSI_ENGINE_SCRATCHPAD_H
+
+#include "hw/fsi/lbus.h"
+#include "hw/fsi/bits.h"
+
+#define ENGINE_CONFIG_NEXT  BE_BIT(0)
+#define ENGINE_CONFIG_VPD   BE_BIT(1)
+#define ENGINE_CONFIG_SLOTS BE_GENMASK(8, 15)
+#define ENGINE_CONFIG_VERSION   BE_GENMASK(16, 19)
+#define ENGINE_CONFIG_TYPE  BE_GENMASK(20, 27)
+#define   ENGINE_CONFIG_TYPE_PEEK   (0x02 << 4)
+#define   ENGINE_CONFIG_TYPE_FSI(0x03 << 4)
+#define   ENGINE_CONFIG_TYPE_SCRATCHPAD (0x06 << 4)
+#define 

[PATCH v1 4/7] hw/fsi: Introduce IBM's FSI

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

This commit models the FSI bus. CFAM is hanging out of FSI bus. The bus
is model such a way that it is embeded inside the FSI master which is a
bus controller.

The FSI master: A controller in the platform service processor (e.g.
BMC) driving CFAM engine accesses into the POWER chip. At the
hardware level FSI is a bit-based protocol supporting synchronous and
DMA-driven accesses of engines in a CFAM.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/fsi/cfam.c   |   1 +
 hw/fsi/fsi-master.c | 203 
 hw/fsi/fsi.c|  54 ++
 hw/fsi/meson.build  |   2 +-
 include/hw/fsi/cfam.h   |   2 -
 include/hw/fsi/fsi-master.h |  30 ++
 include/hw/fsi/fsi.h|  35 +++
 7 files changed, 324 insertions(+), 3 deletions(-)
 create mode 100644 hw/fsi/fsi-master.c
 create mode 100644 hw/fsi/fsi.c
 create mode 100644 include/hw/fsi/fsi-master.h
 create mode 100644 include/hw/fsi/fsi.h

diff --git a/hw/fsi/cfam.c b/hw/fsi/cfam.c
index 19256050bd..12ce31cac4 100644
--- a/hw/fsi/cfam.c
+++ b/hw/fsi/cfam.c
@@ -12,6 +12,7 @@
 
 #include "hw/fsi/bits.h"
 #include "hw/fsi/cfam.h"
+#include "hw/fsi/fsi.h"
 #include "hw/fsi/engine-scratchpad.h"
 
 #include "hw/qdev-properties.h"
diff --git a/hw/fsi/fsi-master.c b/hw/fsi/fsi-master.c
new file mode 100644
index 00..fe1693539a
--- /dev/null
+++ b/hw/fsi/fsi-master.c
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM Flexible Service Interface master
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+
+#include "qemu/log.h"
+
+#include "hw/fsi/bits.h"
+#include "hw/fsi/fsi-master.h"
+
+#define TYPE_OP_BUS "opb"
+
+#define TO_REG(x)   ((x) >> 2)
+
+#define FSI_MMODE   TO_REG(0x000)
+#define   FSI_MMODE_IPOLL_DMA_ENBE_BIT(0)
+#define   FSI_MMODE_HW_ERROR_RECOVERY_ENBE_BIT(1)
+#define   FSI_MMODE_RELATIVE_ADDRESS_EN BE_BIT(2)
+#define   FSI_MMODE_PARITY_CHECK_EN BE_BIT(3)
+#define   FSI_MMODE_CLOCK_DIVIDER_0 BE_GENMASK(4, 13)
+#define   FSI_MMODE_CLOCK_DIVIDER_1 BE_GENMASK(14, 23)
+#define   FSI_MMODE_DEBUG_ENBE_BIT(24)
+
+#define FSI_MDELAY  TO_REG(0x004)
+#define   FSI_MDELAY_ECHO_0 BE_GENMASK(0, 3)
+#define   FSI_MDELAY_SEND_0 BE_GENMASK(4, 7)
+#define   FSI_MDELAY_ECHO_1 BE_GENMASK(8, 11)
+#define   FSI_MDELAY_SEND_1 BE_GENMASK(12, 15)
+
+#define FSI_MENP0   TO_REG(0x010)
+#define FSI_MENP32  TO_REG(0x014)
+#define FSI_MSENP0  TO_REG(0x018)
+#define FSI_MLEVP0  TO_REG(0x018)
+#define FSI_MSENP32 TO_REG(0x01c)
+#define FSI_MLEVP32 TO_REG(0x01c)
+#define FSI_MCENP0  TO_REG(0x020)
+#define FSI_MREFP0  TO_REG(0x020)
+#define FSI_MCENP32 TO_REG(0x024)
+#define FSI_MREFP32 TO_REG(0x024)
+
+#define FSI_MAEBTO_REG(0x070)
+#define   FSI_MAEB_ANY_CPU_ERRORBE_BIT(0)
+#define   FSI_MAEB_ANY_DMA_ERRORBE_GENMASK(1, 16)
+#define   FSI_MAEB_ANY_PARITY_ERROR BE_BIT(17)
+
+#define FSI_MVERTO_REG(0x074)
+#define   FSI_MVER_VERSION  BE_GENMASK(0, 7)
+#define   FSI_MVER_BRIDGES  BE_GENMASK(8, 15)
+#define   FSI_MVER_PORTSBE_GENMASK(16, 23)
+
+#define FSI_MRESP0  TO_REG(0x0d0)
+#define   FSI_MRESP0_RESET_PORT_GENERAL BE_BIT(0)
+#define   FSI_MRESP0_RESET_PORT_ERROR   BE_BIT(1)
+#define   FSI_MRESP0_RESET_ALL_BRIDGES_GENERAL  BE_BIT(2)
+#define   FSI_MRESP0_RESET_ALL_PORTS_GENERALBE_BIT(3)
+#define   FSI_MRESP0_RESET_MASTER   BE_BIT(4)
+#define   FSI_MRESP0_RESET_PARITY_ERROR_LATCH   BE_BIT(5)
+
+#define FSI_MRESB0  TO_REG(0x1d0)
+#define   FSI_MRESB0_RESET_GENERAL  BE_BIT(0)
+#define   FSI_MRESB0_RESET_ERRORBE_BIT(1)
+#define   FSI_MRESB0_SET_DMA_SUSPENDBE_BIT(5)
+#define   FSI_MRESB0_CLEAR_DMA_SUSPEND  BE_BIT(6)
+#define   FSI_MRESB0_SET_DELAY_MEASURE  BE_BIT(7)
+
+#define FSI_MECTRL  TO_REG(0x2e0)
+#define   FSI_MECTRL_TEST_PULSE BE_GENMASK(0, 7)
+#define   FSI_MECTRL_INHIBIT_PARITY_ERROR   BE_GENMASK(8, 15)
+#define   FSI_MECTRL_ENABLE_OPB_ERR_ACK BE_BIT(16)
+#define   FSI_MECTRL_AUTO_TERMINATE 

[PATCH v1 7/7] hw/arm: Hook up FSI module in AST2600

2023-08-25 Thread Ninad Palsule
This patchset introduces IBM's Flexible Service Interface(FSI).

Time for some fun with inter-processor buses. FSI allows a service
processor access to the internal buses of a host POWER processor to
perform configuration or debugging.

FSI has long existed in POWER processes and so comes with some baggage,
including how it has been integrated into the ASPEED SoC.

Working backwards from the POWER processor, the fundamental pieces of
interest for the implementation are:

1. The Common FRU Access Macro (CFAM), an address space containing
   various "engines" that drive accesses on buses internal and external
   to the POWER chip. Examples include the SBEFIFO and I2C masters. The
   engines hang off of an internal Local Bus (LBUS) which is described
   by the CFAM configuration block.

2. The FSI slave: The slave is the terminal point of the FSI bus for
   FSI symbols addressed to it. Slaves can be cascaded off of one
   another. The slave's configuration registers appear in address space
   of the CFAM to which it is attached.

3. The FSI master: A controller in the platform service processor (e.g.
   BMC) driving CFAM engine accesses into the POWER chip. At the
   hardware level FSI is a bit-based protocol supporting synchronous and
   DMA-driven accesses of engines in a CFAM.

4. The On-Chip Peripheral Bus (OPB): A low-speed bus typically found in
   POWER processors. This now makes an appearance in the ASPEED SoC due
   to tight integration of the FSI master IP with the OPB, mainly the
   existence of an MMIO-mapping of the CFAM address straight onto a
   sub-region of the OPB address space.

5. An APB-to-OPB bridge enabling access to the OPB from the ARM core in
   the AST2600. Hardware limitations prevent the OPB from being directly
   mapped into APB, so all accesses are indirect through the bridge.

The implementation appears as following in the qemu device tree:

(qemu) info qtree
bus: main-system-bus
  type System
  ...
  dev: aspeed.apb2opb, id ""
gpio-out "sysbus-irq" 1
mmio 1e79b000/1000
bus: opb.1
  type opb
  dev: fsi.master, id ""
bus: fsi.bus.1
  type fsi.bus
  dev: cfam.config, id ""
  dev: cfam, id ""
bus: lbus.1
  type lbus
  dev: scratchpad, id ""
address = 0 (0x0)
bus: opb.0
  type opb
  dev: fsi.master, id ""
bus: fsi.bus.0
  type fsi.bus
  dev: cfam.config, id ""
  dev: cfam, id ""
bus: lbus.0
  type lbus
  dev: scratchpad, id ""
address = 0 (0x0)

The LBUS is modelled to maintain the qdev bus hierarchy and to take
advantage of the object model to automatically generate the CFAM
configuration block. The configuration block presents engines in the
order they are attached to the CFAM's LBUS. Engine implementations
should subclass the LBusDevice and set the 'config' member of
LBusDeviceClass to match the engine's type.

CFAM designs offer a lot of flexibility, for instance it is possible for
a CFAM to be simultaneously driven from multiple FSI links. The modeling
is not so complete; it's assumed that each CFAM is attached to a single
FSI slave (as a consequence the CFAM subclasses the FSI slave).

As for FSI, its symbols and wire-protocol are not modelled at all. This
is not necessary to get FSI off the ground thanks to the mapping of the
CFAM address space onto the OPB address space - the models follow this
directly and map the CFAM memory region into the OPB's memory region.
Future work includes supporting more advanced accesses that drive the
FSI master directly rather than indirectly via the CFAM mapping, which
will require implementing the FSI state machine and methods for each of
the FSI symbols on the slave. Further down the track we can also look at
supporting the bitbanged SoftFSI drivers in Linux by extending the FSI
slave model to resolve sequences of GPIO IRQs into FSI symbols, and
calling the associated symbol method on the slave to map the access onto
the CFAM.

Testing:
Tested by reading cfam config address 0 on rainier machine. We can
ignore the error line as it is not related.
root@p10bmc:~# pdbg -a getcfam 0x0
Unable to open dtb file '/var/lib/phosphor-software-manager/pnor/rw/DEVTREE'
p0: 0x0 = 0xc0022d15

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/arm/aspeed_ast2600.c | 15 +++
 include/hw/arm/aspeed_soc.h |  4 
 2 files changed, 19 insertions(+)

diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index a8b3a8065a..e239487c16 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -75,6 +75,8 @@ static const hwaddr aspeed_soc_ast2600_memmap[] = {
 [ASPEED_DEV_UART12]= 0x1E790600,
 [ASPEED_DEV_UART13]= 0x1E790700,
 

[PATCH v1 5/7] hw/fsi: IBM's On-chip Peripheral Bus

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

The On-Chip Peripheral Bus (OPB): A low-speed bus typically found in
POWER processors. This now makes an appearance in the ASPEED SoC due
to tight integration of the FSI master IP with the OPB, mainly the
existence of an MMIO-mapping of the CFAM address straight onto a
sub-region of the OPB address space.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/fsi/Kconfig   |   4 +
 hw/fsi/fsi-master.c  |   3 +-
 hw/fsi/meson.build   |   1 +
 hw/fsi/opb.c | 194 +++
 include/hw/fsi/opb.h |  45 ++
 5 files changed, 245 insertions(+), 2 deletions(-)
 create mode 100644 hw/fsi/opb.c
 create mode 100644 include/hw/fsi/opb.h

diff --git a/hw/fsi/Kconfig b/hw/fsi/Kconfig
index 087980be22..560ce536db 100644
--- a/hw/fsi/Kconfig
+++ b/hw/fsi/Kconfig
@@ -1,3 +1,7 @@
+config OPB
+bool
+select CFAM
+
 config CFAM
 bool
 select FSI
diff --git a/hw/fsi/fsi-master.c b/hw/fsi/fsi-master.c
index fe1693539a..ba00e2bb7d 100644
--- a/hw/fsi/fsi-master.c
+++ b/hw/fsi/fsi-master.c
@@ -13,8 +13,7 @@
 
 #include "hw/fsi/bits.h"
 #include "hw/fsi/fsi-master.h"
-
-#define TYPE_OP_BUS "opb"
+#include "hw/fsi/opb.h"
 
 #define TO_REG(x)   ((x) >> 2)
 
diff --git a/hw/fsi/meson.build b/hw/fsi/meson.build
index ca80d11cb9..cab645f4ea 100644
--- a/hw/fsi/meson.build
+++ b/hw/fsi/meson.build
@@ -2,3 +2,4 @@ system_ss.add(when: 'CONFIG_LBUS', if_true: files('lbus.c'))
 system_ss.add(when: 'CONFIG_SCRATCHPAD', if_true: files('engine-scratchpad.c'))
 system_ss.add(when: 'CONFIG_CFAM', if_true: files('cfam.c'))
 system_ss.add(when: 'CONFIG_FSI', if_true: 
files('fsi.c','fsi-master.c','fsi-slave.c'))
+system_ss.add(when: 'CONFIG_OPB', if_true: files('opb.c'))
diff --git a/hw/fsi/opb.c b/hw/fsi/opb.c
new file mode 100644
index 00..ac7693c001
--- /dev/null
+++ b/hw/fsi/opb.c
@@ -0,0 +1,194 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM On-chip Peripheral Bus
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/fsi/opb.h"
+
+static MemTxResult opb_read(OPBus *opb, hwaddr addr, void *data, size_t len)
+{
+return address_space_read(>as, addr, MEMTXATTRS_UNSPECIFIED, data,
+  len);
+}
+
+uint8_t opb_read8(OPBus *opb, hwaddr addr)
+{
+MemTxResult tx;
+uint8_t data;
+
+tx = opb_read(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+
+return data;
+}
+
+uint16_t opb_read16(OPBus *opb, hwaddr addr)
+{
+MemTxResult tx;
+uint16_t data;
+
+tx = opb_read(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+
+return data;
+}
+
+uint32_t opb_read32(OPBus *opb, hwaddr addr)
+{
+MemTxResult tx;
+uint32_t data;
+
+tx = opb_read(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+
+return data;
+}
+
+static MemTxResult opb_write(OPBus *opb, hwaddr addr, void *data, size_t len)
+{
+return address_space_write(>as, addr, MEMTXATTRS_UNSPECIFIED, data,
+   len);
+}
+
+void opb_write8(OPBus *opb, hwaddr addr, uint8_t data)
+{
+MemTxResult tx;
+
+tx = opb_write(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+}
+
+void opb_write16(OPBus *opb, hwaddr addr, uint16_t data)
+{
+MemTxResult tx;
+
+tx = opb_write(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+}
+
+void opb_write32(OPBus *opb, hwaddr addr, uint32_t data)
+{
+MemTxResult tx;
+
+tx = opb_write(opb, addr, , sizeof(data));
+/* FIXME: improve error handling */
+assert(!tx);
+}
+
+void opb_fsi_master_address(OPBus *opb, hwaddr addr)
+{
+memory_region_transaction_begin();
+memory_region_set_address(>fsi.iomem, addr);
+memory_region_transaction_commit();
+}
+
+void opb_opb2fsi_address(OPBus *opb, hwaddr addr)
+{
+memory_region_transaction_begin();
+memory_region_set_address(>fsi.opb2fsi, addr);
+memory_region_transaction_commit();
+}
+
+static uint64_t opb_unimplemented_read(void *opaque, hwaddr addr, unsigned 
size)
+{
+qemu_log_mask(LOG_UNIMP, "%s: read @0x%" HWADDR_PRIx " size=%d\n",
+  __func__, addr, size);
+
+return 0;
+}
+
+static void opb_unimplemented_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned size)
+{
+qemu_log_mask(LOG_UNIMP, "%s: write @0x%" HWADDR_PRIx " size=%d "
+  "value=%"PRIx64"\n", __func__, addr, size, data);
+}
+
+static const struct MemoryRegionOps opb_unimplemented_ops = {
+.read = opb_unimplemented_read,
+.write = opb_unimplemented_write,
+.endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void 

[PATCH v1 1/7] hw/fsi: Introduce IBM's Local bus

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

The LBUS is modelled to maintain the qdev bus hierarchy and to take
advantage of the object model to automatically generate the CFAM
configuration block. The configuration block presents engines in the
order they are attached to the CFAM's LBUS. Engine implementations
should subclass the LBusDevice and set the 'config' member of
LBusDeviceClass to match the engine's type.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/Kconfig|  1 +
 hw/fsi/Kconfig|  2 +
 hw/fsi/lbus.c | 94 +++
 hw/fsi/meson.build|  1 +
 hw/meson.build|  1 +
 include/hw/fsi/bits.h | 15 +++
 include/hw/fsi/lbus.h | 57 ++
 7 files changed, 171 insertions(+)
 create mode 100644 hw/fsi/Kconfig
 create mode 100644 hw/fsi/lbus.c
 create mode 100644 hw/fsi/meson.build
 create mode 100644 include/hw/fsi/bits.h
 create mode 100644 include/hw/fsi/lbus.h

diff --git a/hw/Kconfig b/hw/Kconfig
index ba62ff6417..2ccb73add5 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -9,6 +9,7 @@ source core/Kconfig
 source cxl/Kconfig
 source display/Kconfig
 source dma/Kconfig
+source fsi/Kconfig
 source gpio/Kconfig
 source hyperv/Kconfig
 source i2c/Kconfig
diff --git a/hw/fsi/Kconfig b/hw/fsi/Kconfig
new file mode 100644
index 00..687449e14e
--- /dev/null
+++ b/hw/fsi/Kconfig
@@ -0,0 +1,2 @@
+config LBUS
+bool
diff --git a/hw/fsi/lbus.c b/hw/fsi/lbus.c
new file mode 100644
index 00..afb26ef7ea
--- /dev/null
+++ b/hw/fsi/lbus.c
@@ -0,0 +1,94 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * IBM Local bus where FSI slaves are connected
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/fsi/lbus.h"
+
+#include "hw/qdev-properties.h"
+
+static void lbus_realize(BusState *bus, Error **errp)
+{
+LBusNode *node;
+LBus *lbus = LBUS(bus);
+
+memory_region_init(>mr, OBJECT(lbus), TYPE_LBUS,
+   (2 * 1024 * 1024) - 0x400);
+
+QLIST_FOREACH(node, >devices, next) {
+memory_region_add_subregion(>mr, node->ldev->address,
+>ldev->iomem);
+}
+}
+
+static void lbus_init(Object *o)
+{
+}
+
+static void lbus_class_init(ObjectClass *klass, void *data)
+{
+BusClass *k = BUS_CLASS(klass);
+k->realize = lbus_realize;
+}
+
+static const TypeInfo lbus_info = {
+.name = TYPE_LBUS,
+.parent = TYPE_BUS,
+.instance_init = lbus_init,
+.instance_size = sizeof(LBus),
+.class_init = lbus_class_init,
+};
+
+static Property lbus_device_props[] = {
+DEFINE_PROP_UINT32("address", LBusDevice, address, 0),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+DeviceState *lbus_create_device(LBus *bus, const char *type, uint32_t addr)
+{
+DeviceState *dev;
+LBusNode *node;
+
+dev = qdev_new(type);
+qdev_prop_set_uint8(dev, "address", addr);
+qdev_realize_and_unref(dev, >bus, _fatal);
+
+/* Move to post_load */
+node = g_malloc(sizeof(struct LBusNode));
+node->ldev = LBUS_DEVICE(dev);
+QLIST_INSERT_HEAD(>devices, node, next);
+
+return dev;
+}
+
+static void lbus_device_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->bus_type = TYPE_LBUS;
+device_class_set_props(dc, lbus_device_props);
+}
+
+static const TypeInfo lbus_device_type_info = {
+.name = TYPE_LBUS_DEVICE,
+.parent = TYPE_DEVICE,
+.instance_size = sizeof(LBusDevice),
+.abstract = true,
+.class_init = lbus_device_class_init,
+.class_size = sizeof(LBusDeviceClass),
+};
+
+static void lbus_register_types(void)
+{
+type_register_static(_info);
+type_register_static(_device_type_info);
+}
+
+type_init(lbus_register_types);
diff --git a/hw/fsi/meson.build b/hw/fsi/meson.build
new file mode 100644
index 00..e1007d5fea
--- /dev/null
+++ b/hw/fsi/meson.build
@@ -0,0 +1 @@
+system_ss.add(when: 'CONFIG_LBUS', if_true: files('lbus.c'))
diff --git a/hw/meson.build b/hw/meson.build
index c7ac7d3d75..6c71ee9cfa 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -43,6 +43,7 @@ subdir('virtio')
 subdir('watchdog')
 subdir('xen')
 subdir('xenpv')
+subdir('fsi')
 
 subdir('alpha')
 subdir('arm')
diff --git a/include/hw/fsi/bits.h b/include/hw/fsi/bits.h
new file mode 100644
index 00..338ae483cf
--- /dev/null
+++ b/include/hw/fsi/bits.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * Bit operation macros
+ */
+#ifndef FSI_BITS_H
+#define FSI_BITS_H
+
+#define BE_BIT(x)  BIT(31 - (x))
+#define GENMASK(t, b) \
+(((1ULL << ((t) + 1)) - 1) & ~((1ULL << (b)) - 1))
+#define BE_GENMASK(t, b)   GENMASK(BE_BIT(t), BE_BIT(b))
+
+#endif /* FSI_BITS_H */
diff --git 

[PATCH v1 6/7] hw/fsi: Aspeed APB2OPB interface

2023-08-25 Thread Ninad Palsule
This is a part of patchset where IBM's Flexible Service Interface is
introduced.

An APB-to-OPB bridge enabling access to the OPB from the ARM core in
the AST2600. Hardware limitations prevent the OPB from being directly
mapped into APB, so all accesses are indirect through the bridge.

Signed-off-by: Andrew Jeffery 
Signed-off-by: Cédric Le Goater 
Signed-off-by: Ninad Palsule 
---
 hw/arm/Kconfig  |   1 +
 hw/fsi/Kconfig  |   4 +
 hw/fsi/aspeed-apb2opb.c | 346 
 hw/fsi/meson.build  |   1 +
 hw/fsi/trace-events |   2 +
 hw/fsi/trace.h  |   1 +
 include/hw/fsi/aspeed-apb2opb.h |  32 +++
 meson.build |   1 +
 8 files changed, 388 insertions(+)
 create mode 100644 hw/fsi/aspeed-apb2opb.c
 create mode 100644 hw/fsi/trace-events
 create mode 100644 hw/fsi/trace.h
 create mode 100644 include/hw/fsi/aspeed-apb2opb.h

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 7e68348440..a6994cd9d7 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -589,6 +589,7 @@ config FSL_IMX7
 select PCI_EXPRESS_DESIGNWARE
 select SDHCI
 select UNIMP
+select APB2OPB_ASPEED
 
 config ARM_SMMUV3
 bool
diff --git a/hw/fsi/Kconfig b/hw/fsi/Kconfig
index 560ce536db..fbb021658d 100644
--- a/hw/fsi/Kconfig
+++ b/hw/fsi/Kconfig
@@ -1,3 +1,7 @@
+config APB2OPB_ASPEED
+bool
+select OPB
+
 config OPB
 bool
 select CFAM
diff --git a/hw/fsi/aspeed-apb2opb.c b/hw/fsi/aspeed-apb2opb.c
new file mode 100644
index 00..bbc63f2eb3
--- /dev/null
+++ b/hw/fsi/aspeed-apb2opb.c
@@ -0,0 +1,346 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2023 IBM Corp.
+ *
+ * ASPEED APB-OPB FSI interface
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+#include "hw/fsi/aspeed-apb2opb.h"
+#include "hw/qdev-core.h"
+
+#define TO_REG(x) (x >> 2)
+#define GENMASK(t, b) (((1ULL << ((t) + 1)) - 1) & ~((1ULL << (b)) - 1))
+
+#define APB2OPB_VERSIONTO_REG(0x00)
+#define   APB2OPB_VERSION_VER  GENMASK(7, 0)
+
+#define APB2OPB_TRIGGERTO_REG(0x04)
+#define   APB2OPB_TRIGGER_EN   BIT(0)
+
+#define APB2OPB_CONTROLTO_REG(0x08)
+#define   APB2OPB_CONTROL_OFF  GENMASK(31, 13)
+
+#define APB2OPB_OPB2FSITO_REG(0x0c)
+#define   APB2OPB_OPB2FSI_OFF  GENMASK(31, 22)
+
+#define APB2OPB_OPB0_SEL   TO_REG(0x10)
+#define APB2OPB_OPB1_SEL   TO_REG(0x28)
+#define   APB2OPB_OPB_SEL_EN   BIT(0)
+
+#define APB2OPB_OPB0_MODE  TO_REG(0x14)
+#define APB2OPB_OPB1_MODE  TO_REG(0x2c)
+#define   APB2OPB_OPB_MODE_RD  BIT(0)
+
+#define APB2OPB_OPB0_XFER  TO_REG(0x18)
+#define APB2OPB_OPB1_XFER  TO_REG(0x30)
+#define   APB2OPB_OPB_XFER_FULLBIT(1)
+#define   APB2OPB_OPB_XFER_HALFBIT(0)
+
+#define APB2OPB_OPB0_ADDR  TO_REG(0x1c)
+#define APB2OPB_OPB0_WRITE_DATATO_REG(0x20)
+
+#define APB2OPB_OPB1_DMA_ENTO_REG(0x24)
+#define APB2OPB_OPB1_DMA_EN_3  BIT(3)
+#define APB2OPB_OPB1_DMA_EN_2  BIT(2)
+#define APB2OPB_OPB1_DMA_EN_1  BIT(1)
+#define APB2OPB_OPB1_DMA_EN_0  BIT(0)
+
+#define APB2OPB_OPB1_ADDR  TO_REG(0x34)
+#define APB2OPB_OPB1_WRITE_DATA  TO_REG(0x38)
+
+#define APB2OPB_OPB_CLKTO_REG(0x3c)
+#define   APB2OPB_OPB_CLK_SYNC BIT(0)
+
+#define APB2OPB_IRQ_CLEAR  TO_REG(0x40)
+#define   APB2OPB_IRQ_CLEAR_EN BIT(0)
+
+#define APB2OPB_IRQ_MASK   TO_REG(0x44)
+#define   APB2OPB_IRQ_MASK_OPB1_TX_ACK BIT(17)
+#define   APB2OPB_IRQ_MASK_OPB0_TX_ACK BIT(16)
+#define   APB2OPB_IRQ_MASK_CH3_TCONT   BIT(15)
+#define   APB2OPB_IRQ_MASK_CH2_TCONT   BIT(14)
+#define   APB2OPB_IRQ_MASK_CH1_TCONT   BIT(13)
+#define   APB2OPB_IRQ_MASK_CH0_TCONT   BIT(12)
+#define   APB2OPB_IRQ_MASK_CH3_FIFO_EMPTY  BIT(11)
+#define   APB2OPB_IRQ_MASK_CH2_FIFO_EMPTY  BIT(10)
+#define   APB2OPB_IRQ_MASK_CH1_FIFO_EMPTY  BIT(9)
+#define   APB2OPB_IRQ_MASK_CH0_FIFO_EMPTY  BIT(8)
+#define   APB2OPB_IRQ_MASK_CH3_FIFO_FULL   BIT(7)
+#define   APB2OPB_IRQ_MASK_CH2_FIFO_FULL   BIT(6)
+#define   APB2OPB_IRQ_MASK_CH1_FIFO_FULL   BIT(5)
+#define   APB2OPB_IRQ_MASK_CH0_FIFO_FULL   BIT(4)
+#define   APB2OPB_IRQ_MASK_CH3_DMA_EOT BIT(3)
+#define   APB2OPB_IRQ_MASK_CH2_DMA_EOT BIT(2)
+#define   APB2OPB_IRQ_MASK_CH1_DMA_EOT BIT(1)
+#define   APB2OPB_IRQ_MASK_CH0_DMA_EOT BIT(0)
+
+#define APB2OPB_IRQ_STSTO_REG(0x48)
+#define   APB2OPB_IRQ_STS_MASTER_ERROR BIT(28)
+#define   APB2OPB_IRQ_STS_PORT_ERROR   BIT(27)
+#define   APB2OPB_IRQ_STS_HOTPLUG  BIT(26)

[PATCH vOther2 1/1] qemu-nbd: Restore "qemu-nbd -v --fork" output

2023-08-25 Thread Denis V. Lunev
Closing stderr earlier is good for daemonized qemu-nbd under ssh
earlier, but breaks the case where -v is being used to track what is
happening in the server, as in iotest 233.

When we know we are verbose, we should preserve original stderr and
restore it once the setup stage is done. This commit restores the
original behavior with -v option. In this case original output
inside the test is kept intact.

Reported-by: Kevin Wolf 
Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: Vladimir Sementsov-Ogievskiy 
CC: Hanna Reitz 
CC: Mike Maslenkin 
Fixes: 5c56dd27a2 ("qemu-nbd: fix regression with qemu-nbd --fork run over ssh")
---
Changes from v1:
* fixed compilation with undefined HAVE_NBD_DEVICE, thanks to Mike Maslenkin

 qemu-nbd.c | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index aaccaa3318..19a4147d24 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -253,6 +253,13 @@ static int qemu_nbd_client_list(SocketAddress *saddr, 
QCryptoTLSCreds *tls,
 }
 
 
+struct NbdClientOpts {
+char *device;
+bool fork_process;
+bool verbose;
+int stderr;
+};
+
 #if HAVE_NBD_DEVICE
 static void *show_parts(void *arg)
 {
@@ -271,12 +278,6 @@ static void *show_parts(void *arg)
 return NULL;
 }
 
-struct NbdClientOpts {
-char *device;
-bool fork_process;
-bool verbose;
-};
-
 static void *nbd_client_thread(void *arg)
 {
 struct NbdClientOpts *opts = arg;
@@ -323,11 +324,14 @@ static void *nbd_client_thread(void *arg)
 opts->device, srcpath);
 } else {
 /* Close stderr so that the qemu-nbd process exits.  */
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts->stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts->stderr != STDOUT_FILENO) {
+close(opts->stderr);
+}
 }
 
 if (nbd_client(fd) < 0) {
@@ -589,9 +593,9 @@ int main(int argc, char **argv)
 const char *pid_file_name = NULL;
 const char *selinux_label = NULL;
 BlockExportOptions *export_opts;
-#if HAVE_NBD_DEVICE
-struct NbdClientOpts opts;
-#endif
+struct NbdClientOpts opts = {
+.stderr = STDOUT_FILENO,
+};
 
 #ifdef CONFIG_POSIX
 os_setup_early_signal_handling();
@@ -944,6 +948,15 @@ int main(int argc, char **argv)
 
 close(stderr_fd[0]);
 
+/* Remember parent's stderr if we will be restoring it. */
+if (verbose /* fork_process is set */) {
+opts.stderr = dup(STDERR_FILENO);
+if (opts.stderr < 0) {
+error_report("Could not dup stdedd: %s", strerror(errno));
+exit(EXIT_FAILURE);
+}
+}
+
 ret = qemu_daemon(1, 0);
 saved_errno = errno;/* dup2 will overwrite error below */
 
@@ -1152,6 +1165,7 @@ int main(int argc, char **argv)
 .device = device,
 .fork_process = fork_process,
 .verbose = verbose,
+.stderr = STDOUT_FILENO,
 };
 
 ret = pthread_create(_thread, NULL, nbd_client_thread, );
@@ -1180,11 +1194,14 @@ int main(int argc, char **argv)
 }
 
 if (fork_process) {
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts.stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts.stderr != STDOUT_FILENO) {
+close(opts.stderr);
+}
 }
 
 state = RUNNING;
-- 
2.34.1




Re: [PATCH] qemu-nbd: Restore "qemu-nbd -v --fork" output

2023-08-25 Thread Denis V. Lunev

On 8/24/23 22:03, Eric Blake wrote:

Closing stderr earlier is good for daemonized qemu-nbd under ssh
earlier, but breaks the case where -v is being used to track what is
happening in the server, as in iotest 233.

When we know we are verbose, we do NOT want qemu_daemon to close
stderr.  For management purposes, we still need to temporarily
override the daemon child's stderr with the pipe to the parent until
after the pid file is created; but since qemu_daemon would normally
set stdout to /dev/null had we not been verbose, we can use stdout as
a place to stash our original stderr.  Thus, whether normal or vebose,
when the management handoff is complete, copying stdout back to stderr
does the right thing for the rest of the life of the daemon child.

Note that while the error messages expected by iotest 233 are now
restored, the change in file descriptors means they now show up
earlier in the testsuite output.

Reported-by: Kevin Wolf 
CC: Denis V. Lunev 
CC: qemu-sta...@nongnu.org
Fixes: 5c56dd27a2 ("qemu-nbd: fix regression with qemu-nbd --fork run over ssh")
Signed-off-by: Eric Blake 
---
  qemu-nbd.c | 21 -
  tests/qemu-iotests/233.out | 20 ++--
  2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index aaccaa33184..a105094fb17 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -944,9 +944,24 @@ int main(int argc, char **argv)

  close(stderr_fd[0]);

-ret = qemu_daemon(1, 0);
+ret = qemu_daemon(1, verbose);
  saved_errno = errno;/* dup2 will overwrite error below */

+if (verbose) {
+/* We want stdin at /dev/null when qemu_daemon didn't do it */
+stdin = freopen("/dev/null", "r", stdin);
+if (stdin == NULL) {
+error_report("Failed to redirect stdin: %s",
+ strerror(errno));
+exit(EXIT_FAILURE);
+}
+/* To keep the parent's stderr alive, copy it to stdout */
+if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+error_report("Failed to redirect stdout: %s",
+ strerror(errno));
+exit(EXIT_FAILURE);
+}
+}
  /* Temporarily redirect stderr to the parent's pipe...  */
  if (dup2(stderr_fd[1], STDERR_FILENO) < 0) {
  char str[256];
@@ -1180,6 +1195,10 @@ int main(int argc, char **argv)
  }

  if (fork_process) {
+/*
+ * See above. If verbose is false, stdout is /dev/null (thanks
+ * to qemu_daemon); otherwise, stdout is the parent's stderr.
+ */
  if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
  error_report("Could not set stderr to /dev/null: %s",
   strerror(errno));
diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out
index 237c82767ea..b09a197020a 100644
--- a/tests/qemu-iotests/233.out
+++ b/tests/qemu-iotests/233.out
@@ -41,8 +41,10 @@ exports available: 1
min block: 1

  == check TLS fail over TCP with mismatched hostname ==
+qemu-nbd: option negotiation failed: Failed to read opts magic: Cannot read 
from TLS channel: Software caused connection abort
  qemu-img: Could not open 
'driver=nbd,host=localhost,port=PORT,tls-creds=tls0': Certificate does not 
match the hostname localhost
  qemu-nbd: Certificate does not match the hostname localhost
+qemu-nbd: option negotiation failed: Failed to read opts magic: Cannot read 
from TLS channel: Software caused connection abort

  == check TLS works over TCP with mismatched hostname and override ==
  image: nbd://localhost:PORT
@@ -55,7 +57,9 @@ exports available: 1
min block: 1

  == check TLS with different CA fails ==
+qemu-nbd: option negotiation failed: Verify failed: No certificate was found.
  qemu-img: Could not open 
'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': The certificate hasn't 
got a known issuer
+qemu-nbd: option negotiation failed: Verify failed: No certificate was found.
  qemu-nbd: The certificate hasn't got a known issuer

  == perform I/O over TLS ==
@@ -67,11 +71,15 @@ read 1048576/1048576 bytes at offset 1048576
  1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

  == check TLS with authorization ==
+qemu-nbd: option negotiation failed: TLS x509 authz check for C=South 
Pacific,L=R'lyeh,O=Cthulhu Dark Lord Enterprises client1,CN=localhost is denied
  qemu-img: Could not open 
'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': Failed to read option 
reply: Cannot read from TLS channel: Software caused connection abort
+qemu-nbd: option negotiation failed: TLS x509 authz check for C=South 
Pacific,L=R'lyeh,O=Cthulhu Dark Lord Enterprises client3,CN=localhost is denied
  qemu-img: Could not open 
'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': Failed to read option 

Re: [PATCH v11 0/9] rutabaga_gfx + gfxstream

2023-08-25 Thread Alyssa Ross
Alyssa Ross  writes:

> Gurchetan Singh  writes:
>
>> On Fri, Aug 25, 2023 at 12:11 AM Alyssa Ross  wrote:
>>
>>> Gurchetan Singh  writes:
>>>
>>> > On Wed, Aug 23, 2023 at 4:07 AM Alyssa Ross  wrote:
>>> >
>>> >> Gurchetan Singh  writes:
>>> >>
>>> >> > - Official "release commits" issued for rutabaga_gfx_ffi,
>>> >> >   gfxstream, aemu-base.  For example, see crrev.com/c/4778941
>>> >> >
>>> >> > - The release commits can make packaging easier, though once
>>> >> >   again all known users will likely just build from sources
>>> >> >   anyways
>>> >>
>>> >> It's a small thing, but could there be actual tags, rather than just
>>> >> blessed commits?  It'd just make them easier to find, and save a bit of
>>> >> time in review for packages.
>>> >>
>>> >
>>> > I added:
>>> >
>>> >
>>> https://crosvm.dev/book/appendix/rutabaga_gfx.html#latest-releases-for-potential-packaging
>>> >
>>> > Tags are possible, but I want to clarify the use case before packaging.
>>> > Where are you thinking of packaging it for (Debian??)? Are you mostly
>>> > interested in Wayland passthrough (my guess) or gfxstream too?  Depending
>>> > your use case, we may be able to minimize the work involved.
>>>
>>> Packaging for Nixpkgs (where I already maintain what to my knowledge is
>>> the only crosvm distro package).  I'm personally mostly interested in
>>> Wayland passthroug, but I wouldn't be surprised if others are interested
>>> in gfxstream.  The packaging work is already done, I've just been
>>> holding off actually pushing the packages waiting for the stable
>>> releases.
>>>
>>> The reason that tags would be useful is that it allows a reviewer of the
>>> package to see at a glance that the package is built from a stable
>>> release.  If it's just built from a commit hash, they have to go and
>>> verify that it's a stable release, which is mildly annoying and
>>> unconventional.
>>>
>>
>> Understood.  Request to have gfxstream and AEMU v0.1.2 release tags made.
>>
>> For rutabaga_gfx_ffi, is the crates.io upload sufficient?
>>
>> https://crates.io/crates/rutabaga_gfx_ffi
>>
>> Debian, for example, treats crates.io as the source of truth and builds
>> tooling around that.  I wonder if Nixpkgs as similar tooling around
>> crates.io.
>
> We do, and I'll use the crates.io release for the package — good
> suggestion, but it's still useful to also have a tag in a git repo.  It
> makes it easier if I need to do a bisect, for example.  As a distro
> developer, I'm frequently jumping across codebases I am not very
> familiar with to try to track down regressions, etc., and it's much
> easier when I don't have to learn some special quirk of the package like
> not having git tags.

Aha, trying to switch my package over to it has revealed that there is
actually a reason not to use the crates.io release.  It doesn't include
a Cargo.lock, which would mean we'd have to obtain one from elsewhere.
Either from the crosvm git repo, at which point we might just get all
the sources from there, or by vendoring a Cargo.lock into our own git
tree for packages, which we try to avoid because when you have a lot of
them, they become quite a large proportion of the overall size of the
repo.

(This probably differs from Debian, etc., because in Nixpkgs, we don't
package each crate dependency separately.  We only have packages for
applications (or occasionally, C ABI libraries written in Rust), and
each of those gets to bring in whatever crate dependencies it wants as
part of its build.  This means we use the upstream Cargo.lock, and
accept that different Rust packages will use lots of different versions
of dependencies, which I don't believe is the case with other distros
that take a more purist approach to Rust packaging.)


signature.asc
Description: PGP signature


Re: [PATCH v11 0/9] rutabaga_gfx + gfxstream

2023-08-25 Thread Alyssa Ross
Gurchetan Singh  writes:

> On Fri, Aug 25, 2023 at 12:11 AM Alyssa Ross  wrote:
>
>> Gurchetan Singh  writes:
>>
>> > On Wed, Aug 23, 2023 at 4:07 AM Alyssa Ross  wrote:
>> >
>> >> Gurchetan Singh  writes:
>> >>
>> >> > - Official "release commits" issued for rutabaga_gfx_ffi,
>> >> >   gfxstream, aemu-base.  For example, see crrev.com/c/4778941
>> >> >
>> >> > - The release commits can make packaging easier, though once
>> >> >   again all known users will likely just build from sources
>> >> >   anyways
>> >>
>> >> It's a small thing, but could there be actual tags, rather than just
>> >> blessed commits?  It'd just make them easier to find, and save a bit of
>> >> time in review for packages.
>> >>
>> >
>> > I added:
>> >
>> >
>> https://crosvm.dev/book/appendix/rutabaga_gfx.html#latest-releases-for-potential-packaging
>> >
>> > Tags are possible, but I want to clarify the use case before packaging.
>> > Where are you thinking of packaging it for (Debian??)? Are you mostly
>> > interested in Wayland passthrough (my guess) or gfxstream too?  Depending
>> > your use case, we may be able to minimize the work involved.
>>
>> Packaging for Nixpkgs (where I already maintain what to my knowledge is
>> the only crosvm distro package).  I'm personally mostly interested in
>> Wayland passthroug, but I wouldn't be surprised if others are interested
>> in gfxstream.  The packaging work is already done, I've just been
>> holding off actually pushing the packages waiting for the stable
>> releases.
>>
>> The reason that tags would be useful is that it allows a reviewer of the
>> package to see at a glance that the package is built from a stable
>> release.  If it's just built from a commit hash, they have to go and
>> verify that it's a stable release, which is mildly annoying and
>> unconventional.
>>
>
> Understood.  Request to have gfxstream and AEMU v0.1.2 release tags made.
>
> For rutabaga_gfx_ffi, is the crates.io upload sufficient?
>
> https://crates.io/crates/rutabaga_gfx_ffi
>
> Debian, for example, treats crates.io as the source of truth and builds
> tooling around that.  I wonder if Nixpkgs as similar tooling around
> crates.io.

We do, and I'll use the crates.io release for the package — good
suggestion, but it's still useful to also have a tag in a git repo.  It
makes it easier if I need to do a bisect, for example.  As a distro
developer, I'm frequently jumping across codebases I am not very
familiar with to try to track down regressions, etc., and it's much
easier when I don't have to learn some special quirk of the package like
not having git tags.


signature.asc
Description: PGP signature


[PATCH vOther 1/1] qemu-nbd: Restore "qemu-nbd -v --fork" output

2023-08-25 Thread Denis V. Lunev
Closing stderr earlier is good for daemonized qemu-nbd under ssh
earlier, but breaks the case where -v is being used to track what is
happening in the server, as in iotest 233.

When we know we are verbose, we should preserve original stderr and
restore it once the setup stage is done. This commit restores the
original behavior with -v option. In this case original output
inside the test is kept intact.

Reported-by: Kevin Wolf 
Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: Vladimir Sementsov-Ogievskiy 
CC: Hanna Reitz 
Fixes: 5c56dd27a2 ("qemu-nbd: fix regression with qemu-nbd --fork run over ssh")
---
After lengthy thoughts there is a different proposal to fix the
introduced regression. Under this approach we could keep original
test output. This looks important to me.

Eric, do you have any opinion?

Thank you in advance,
Den

 qemu-nbd.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index aaccaa3318..8322bd5b5b 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -275,6 +275,7 @@ struct NbdClientOpts {
 char *device;
 bool fork_process;
 bool verbose;
+int stderr;
 };
 
 static void *nbd_client_thread(void *arg)
@@ -323,11 +324,14 @@ static void *nbd_client_thread(void *arg)
 opts->device, srcpath);
 } else {
 /* Close stderr so that the qemu-nbd process exits.  */
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts->stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts->stderr != STDOUT_FILENO) {
+close(opts->stderr);
+}
 }
 
 if (nbd_client(fd) < 0) {
@@ -590,7 +594,9 @@ int main(int argc, char **argv)
 const char *selinux_label = NULL;
 BlockExportOptions *export_opts;
 #if HAVE_NBD_DEVICE
-struct NbdClientOpts opts;
+struct NbdClientOpts opts = {
+.stderr = STDOUT_FILENO,
+};
 #endif
 
 #ifdef CONFIG_POSIX
@@ -944,6 +950,15 @@ int main(int argc, char **argv)
 
 close(stderr_fd[0]);
 
+/* Remember parent's stderr if we will be restoring it. */
+if (verbose /* fork_process is set */) {
+opts.stderr = dup(STDERR_FILENO);
+if (opts.stderr < 0) {
+error_report("Could not dup stdedd: %s", strerror(errno));
+exit(EXIT_FAILURE);
+}
+}
+
 ret = qemu_daemon(1, 0);
 saved_errno = errno;/* dup2 will overwrite error below */
 
@@ -1180,11 +1195,14 @@ int main(int argc, char **argv)
 }
 
 if (fork_process) {
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts.stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts.stderr != STDOUT_FILENO) {
+close(opts.stderr);
+}
 }
 
 state = RUNNING;
-- 
2.34.1




[PATCH vOther 1/1] qemu-nbd: Restore "qemu-nbd -v --fork" output

2023-08-25 Thread Denis V. Lunev
Closing stderr earlier is good for daemonized qemu-nbd under ssh
earlier, but breaks the case where -v is being used to track what is
happening in the server, as in iotest 233.

When we know we are verbose, we should preserve original stderr and
restore it once the setup stage is done. This commit restores the
original behavior with -v option. In this case original output
inside the test is kept intact.

Reported-by: Kevin Wolf 
Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: Vladimir Sementsov-Ogievskiy 
CC: Hanna Reitz 
Fixes: 5c56dd27a2 ("qemu-nbd: fix regression with qemu-nbd --fork run over ssh")
---
After lengthy thoughts there is a different proposal to fix the
introduced regression. Under this approach we could keep original
test output. This looks important to me.

Eric, do you have any opinion?

Thank you in advance,
Den

 qemu-nbd.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index aaccaa3318..8322bd5b5b 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -275,6 +275,7 @@ struct NbdClientOpts {
 char *device;
 bool fork_process;
 bool verbose;
+int stderr;
 };
 
 static void *nbd_client_thread(void *arg)
@@ -323,11 +324,14 @@ static void *nbd_client_thread(void *arg)
 opts->device, srcpath);
 } else {
 /* Close stderr so that the qemu-nbd process exits.  */
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts->stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts->stderr != STDOUT_FILENO) {
+close(opts->stderr);
+}
 }
 
 if (nbd_client(fd) < 0) {
@@ -590,7 +594,9 @@ int main(int argc, char **argv)
 const char *selinux_label = NULL;
 BlockExportOptions *export_opts;
 #if HAVE_NBD_DEVICE
-struct NbdClientOpts opts;
+struct NbdClientOpts opts = {
+.stderr = STDOUT_FILENO,
+};
 #endif
 
 #ifdef CONFIG_POSIX
@@ -944,6 +950,15 @@ int main(int argc, char **argv)
 
 close(stderr_fd[0]);
 
+/* Remember parent's stderr if we will be restoring it. */
+if (verbose /* fork_process is set */) {
+opts.stderr = dup(STDERR_FILENO);
+if (opts.stderr < 0) {
+error_report("Could not dup stdedd: %s", strerror(errno));
+exit(EXIT_FAILURE);
+}
+}
+
 ret = qemu_daemon(1, 0);
 saved_errno = errno;/* dup2 will overwrite error below */
 
@@ -1180,11 +1195,14 @@ int main(int argc, char **argv)
 }
 
 if (fork_process) {
-if (dup2(STDOUT_FILENO, STDERR_FILENO) < 0) {
+if (dup2(opts.stderr, STDERR_FILENO) < 0) {
 error_report("Could not set stderr to /dev/null: %s",
  strerror(errno));
 exit(EXIT_FAILURE);
 }
+if (opts.stderr != STDOUT_FILENO) {
+close(opts.stderr);
+}
 }
 
 state = RUNNING;
-- 
2.34.1




Re: [PATCH v12 0/9] rutabaga_gfx + gfxstream

2023-08-25 Thread Gurchetan Singh
On Thu, Aug 24, 2023 at 9:53 PM Akihiko Odaki 
wrote:

> On 2023/08/25 8:40, Gurchetan Singh wrote:
> > From: Gurchetan Singh 
> >
> > Prior versions:
> >
> > Changes since v11:
> > - Incorporated review feedback
> >
> > How to build both rutabaga and gfxstream guest/host libs:
> >
> > https://crosvm.dev/book/appendix/rutabaga_gfx.html
> >
> > Branch containing this patch series (now on QEMU Gitlab):
> >
> > https://gitlab.com/gurchetansingh/qemu/-/commits/qemu-gfxstream-v12
> >
> > Antonio Caggiano (2):
> >virtio-gpu: CONTEXT_INIT feature
> >virtio-gpu: blob prep
> >
> > Dr. David Alan Gilbert (1):
> >virtio: Add shared memory capability
> >
> > Gerd Hoffmann (1):
> >virtio-gpu: hostmem
> >
> > Gurchetan Singh (5):
> >gfxstream + rutabaga prep: added need defintions, fields, and options
> >gfxstream + rutabaga: add initial support for gfxstream
> >gfxstream + rutabaga: meson support
> >gfxstream + rutabaga: enable rutabaga
> >docs/system: add basic virtio-gpu documentation
> >
> >   docs/system/device-emulation.rst |1 +
> >   docs/system/devices/virtio-gpu.rst   |  112 +++
> >   hw/display/meson.build   |   22 +
> >   hw/display/virtio-gpu-base.c |6 +-
> >   hw/display/virtio-gpu-pci-rutabaga.c |   47 ++
> >   hw/display/virtio-gpu-pci.c  |   14 +
> >   hw/display/virtio-gpu-rutabaga.c | 1119 ++
> >   hw/display/virtio-gpu.c  |   16 +-
> >   hw/display/virtio-vga-rutabaga.c |   50 ++
> >   hw/display/virtio-vga.c  |   33 +-
> >   hw/virtio/virtio-pci.c   |   18 +
> >   include/hw/virtio/virtio-gpu-bswap.h |   15 +
> >   include/hw/virtio/virtio-gpu.h   |   41 +
> >   include/hw/virtio/virtio-pci.h   |4 +
> >   meson.build  |7 +
> >   meson_options.txt|2 +
> >   scripts/meson-buildoptions.sh|3 +
> >   softmmu/qdev-monitor.c   |3 +
> >   softmmu/vl.c |1 +
> >   19 files changed, 1495 insertions(+), 19 deletions(-)
> >   create mode 100644 docs/system/devices/virtio-gpu.rst
> >   create mode 100644 hw/display/virtio-gpu-pci-rutabaga.c
> >   create mode 100644 hw/display/virtio-gpu-rutabaga.c
> >   create mode 100644 hw/display/virtio-vga-rutabaga.c
> >
>
> Thanks for keeping working on this. For the entire series:
> Reviewed-by: Akihiko Odaki 
> Tested-by: Akihiko Odaki 
>

Awesome, thanks.  I'll wait a few days for possible additional comments,
otherwise I'll send out v13 with additional r-b tags.


Re: [PATCH v12 8/9] gfxstream + rutabaga: enable rutabaga

2023-08-25 Thread Gurchetan Singh
On Fri, Aug 25, 2023 at 6:55 AM Antonio Caggiano 
wrote:

> Hi Gurchetan,
>
> Thank you for this series and for including some of my patches :)
>
> On 25/08/2023 01:40, Gurchetan Singh wrote:
> > This change enables rutabaga to receive virtio-gpu-3d hypercalls
> > when it is active.
> >
> > Signed-off-by: Gurchetan Singh 
> > Tested-by: Alyssa Ross 
> > Tested-by: Emmanouil Pitsidianakis 
> > Reviewed-by: Emmanouil Pitsidianakis 
> > ---
> > v3: Whitespace fix (Akihiko)
> > v9: reorder virtio_gpu_have_udmabuf() after checking if rutabaga
> >  is enabled to avoid spurious warnings (Akihiko)
> >
> >   hw/display/virtio-gpu-base.c | 3 ++-
> >   hw/display/virtio-gpu.c  | 5 +++--
> >   softmmu/qdev-monitor.c   | 3 +++
> >   softmmu/vl.c | 1 +
> >   4 files changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
> > index 4f2b0ba1f3..50c5373b65 100644
> > --- a/hw/display/virtio-gpu-base.c
> > +++ b/hw/display/virtio-gpu-base.c
> > @@ -223,7 +223,8 @@ virtio_gpu_base_get_features(VirtIODevice *vdev,
> uint64_t features,
> >   {
> >   VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
> >
> > -if (virtio_gpu_virgl_enabled(g->conf)) {
> > +if (virtio_gpu_virgl_enabled(g->conf) ||
> > +virtio_gpu_rutabaga_enabled(g->conf)) {
> >   features |= (1 << VIRTIO_GPU_F_VIRGL);
> >   }
> >   if (virtio_gpu_edid_enabled(g->conf)) {
> > diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
> > index 3e658f1fef..fe094addef 100644
> > --- a/hw/display/virtio-gpu.c
> > +++ b/hw/display/virtio-gpu.c
> > @@ -1361,8 +1361,9 @@ void virtio_gpu_device_realize(DeviceState *qdev,
> Error **errp)
> >   VirtIOGPU *g = VIRTIO_GPU(qdev);
> >
> >   if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
> > -if (!virtio_gpu_have_udmabuf()) {
> > -error_setg(errp, "cannot enable blob resources without
> udmabuf");
> > +if (!virtio_gpu_rutabaga_enabled(g->parent_obj.conf) &&
> > +!virtio_gpu_have_udmabuf()) {
> > +error_setg(errp, "need rutabaga or udmabuf for blob
> resources");
>
> Does that mean udmabuf is not required at all when using rutabaga?
> How does rutabaga handle blob resources?
>

It's not required, since it's a Linux-only thing.  Some use cases do prefer
to use guest memory and we do have provisional support for that (see
`handle` field of `rutabaga_resource_create_blob`).  Though more testing is
required on the target platform, and likely virtio-gpu spec changes for a
full-functional solution.



>
> >   return;
> >   }
> >
> > diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
> > index 74f4e41338..1b8005ae55 100644
> > --- a/softmmu/qdev-monitor.c
> > +++ b/softmmu/qdev-monitor.c
> > @@ -86,6 +86,9 @@ static const QDevAlias qdev_alias_table[] = {
> >   { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI },
> >   { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO },
> >   { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI },
> > +{ "virtio-gpu-rutabaga-device", "virtio-gpu-rutabaga",
> > +  QEMU_ARCH_VIRTIO_MMIO },
> > +{ "virtio-gpu-rutabaga-pci", "virtio-gpu-rutabaga",
> QEMU_ARCH_VIRTIO_PCI },
> >   { "virtio-input-host-device", "virtio-input-host",
> QEMU_ARCH_VIRTIO_MMIO },
> >   { "virtio-input-host-ccw", "virtio-input-host",
> QEMU_ARCH_VIRTIO_CCW },
> >   { "virtio-input-host-pci", "virtio-input-host",
> QEMU_ARCH_VIRTIO_PCI },
> > diff --git a/softmmu/vl.c b/softmmu/vl.c
> > index b0b96f67fa..2f98eefdf3 100644
> > --- a/softmmu/vl.c
> > +++ b/softmmu/vl.c
> > @@ -216,6 +216,7 @@ static struct {
> >   { .driver = "ati-vga",  .flag = _vga   },
> >   { .driver = "vhost-user-vga",   .flag = _vga   },
> >   { .driver = "virtio-vga-gl",.flag = _vga   },
> > +{ .driver = "virtio-vga-rutabaga",  .flag = _vga   },
> >   };
> >
> >   static QemuOptsList qemu_rtc_opts = {
>
> Patches 5 to 9:
> Reviewed-by: Antonio Caggiano 
>
> Cheers,
> Antonio
>


Re: [PATCH v11 0/9] rutabaga_gfx + gfxstream

2023-08-25 Thread Gurchetan Singh
On Fri, Aug 25, 2023 at 12:11 AM Alyssa Ross  wrote:

> Gurchetan Singh  writes:
>
> > On Wed, Aug 23, 2023 at 4:07 AM Alyssa Ross  wrote:
> >
> >> Gurchetan Singh  writes:
> >>
> >> > - Official "release commits" issued for rutabaga_gfx_ffi,
> >> >   gfxstream, aemu-base.  For example, see crrev.com/c/4778941
> >> >
> >> > - The release commits can make packaging easier, though once
> >> >   again all known users will likely just build from sources
> >> >   anyways
> >>
> >> It's a small thing, but could there be actual tags, rather than just
> >> blessed commits?  It'd just make them easier to find, and save a bit of
> >> time in review for packages.
> >>
> >
> > I added:
> >
> >
> https://crosvm.dev/book/appendix/rutabaga_gfx.html#latest-releases-for-potential-packaging
> >
> > Tags are possible, but I want to clarify the use case before packaging.
> > Where are you thinking of packaging it for (Debian??)? Are you mostly
> > interested in Wayland passthrough (my guess) or gfxstream too?  Depending
> > your use case, we may be able to minimize the work involved.
>
> Packaging for Nixpkgs (where I already maintain what to my knowledge is
> the only crosvm distro package).  I'm personally mostly interested in
> Wayland passthroug, but I wouldn't be surprised if others are interested
> in gfxstream.  The packaging work is already done, I've just been
> holding off actually pushing the packages waiting for the stable
> releases.
>
> The reason that tags would be useful is that it allows a reviewer of the
> package to see at a glance that the package is built from a stable
> release.  If it's just built from a commit hash, they have to go and
> verify that it's a stable release, which is mildly annoying and
> unconventional.
>

Understood.  Request to have gfxstream and AEMU v0.1.2 release tags made.

For rutabaga_gfx_ffi, is the crates.io upload sufficient?

https://crates.io/crates/rutabaga_gfx_ffi

Debian, for example, treats crates.io as the source of truth and builds
tooling around that.  I wonder if Nixpkgs as similar tooling around
crates.io.


Re: [PATCH V3 00/10] fix migration of suspended runstate

2023-08-25 Thread Steven Sistare
On 8/25/2023 11:07 AM, Peter Xu wrote:
> On Fri, Aug 25, 2023 at 09:28:28AM -0400, Steven Sistare wrote:
>> On 8/24/2023 5:09 PM, Steven Sistare wrote:
>>> On 8/17/2023 2:23 PM, Peter Xu wrote:
 On Mon, Aug 14, 2023 at 11:54:26AM -0700, Steve Sistare wrote:
> Migration of a guest in the suspended runstate is broken.  The incoming
> migration code automatically tries to wake the guest, which is wrong;
> the guest should end migration in the same runstate it started.  Further,
> for a restored snapshot, the automatic wakeup fails.  The runstate is
> RUNNING, but the guest is not.  See the commit messages for the details.

 Hi Steve,

 I drafted two small patches to show what I meant, on top of this series.
 Before applying these two, one needs to revert patch 1 in this series.

 After applied, it should also pass all three new suspend tests.  We can
 continue the discussion here based on the patches.
>>>
>>> Your 2 patches look good.  I suggest we keep patch 1, and I squash patch 2
>>> into the other patches.
> 
> Yes.  Feel free to reorganize / modify /.. the changes in whatever way you
> prefer in the final patchset.
> 
>>>
>>> There is one more fix needed: on the sending side, if the state is 
>>> suspended,
>>> then ticks must be disabled so the tick globals are updated before they are
>>> written to vmstate.  Otherwise, tick starts at 0 in the receiver when
>>> cpu_enable_ticks is called.
>>>
>>> ---
>>> diff --git a/migration/migration.c b/migration/migration.c
>> [...]
>>> ---
>>
>> This diff is just a rough draft.  I need to resume ticks if the migration
>> fails or is cancelled, and I am trying to push the logic into vm_stop,
>> vm_stop_force_state, and vm_start, and/or vm_prepare_start.
> 
> Yes this sounds better than hard code things into migration codes, thanks.
> 
> Maybe at least all the migration related code paths should always use
> vm_stop_force_state() (e.g. save_snapshot)?
> 
> At the meantime, AFAIU we should allow runstate_is_running() to return true
> even for suspended, matching current usages of vm_start() / vm_stop().  But
> again that can have risk of breaking existing users.
> 
> I bet you may have a better grasp of what it should look like to solve the
> current "migrate suspended VM" problem at the minimum but hopefully still
> in a clean way, so I assume I'll just wait and see.

I found a better way.
Rather than disabling ticks, I added a pre_save handler to capture and save
the correct timer state even if the timer is running, using the logic from
cpr_disable_ticks. No changes needed in the migration code:


diff --git a/softmmu/cpu-timers.c b/softmmu/cpu-timers.c
index 117408c..d5af317 100644
--- a/softmmu/cpu-timers.c
+++ b/softmmu/cpu-timers.c
@@ -157,6 +157,36 @@ static bool icount_shift_state_needed(void *opaque)
 return icount_enabled() == 2;
 }

+static int cpu_pre_save_ticks(void *opaque)
+{
+TimersState *t = _state;
+TimersState *snap = opaque;
+
+seqlock_write_lock(>vm_clock_seqlock, >vm_clock_lock);
+
+if (t->cpu_ticks_enabled) {
+snap->cpu_ticks_offset = t->cpu_ticks_offset + cpu_get_host_ticks();
+snap->cpu_clock_offset = cpu_get_clock_locked();
+} else {
+snap->cpu_ticks_offset = t->cpu_ticks_offset;
+snap->cpu_clock_offset = t->cpu_clock_offset;
+}
+seqlock_write_unlock(>vm_clock_seqlock, >vm_clock_lock);
+return 0;
+}
+
+static int cpu_post_load_ticks(void *opaque, int version_id)
+{
+TimersState *t = _state;
+TimersState *snap = opaque;
+
+seqlock_write_lock(>vm_clock_seqlock, >vm_clock_lock);
+t->cpu_ticks_offset = snap->cpu_ticks_offset;
+t->cpu_clock_offset = snap->cpu_clock_offset;
+seqlock_write_unlock(>vm_clock_seqlock, >vm_clock_lock);
+return 0;
+}
+
 /*
  * Subsection for warp timer migration is optional, because may not be created
  */
@@ -221,6 +251,8 @@ static const VMStateDescription vmstate_timers = {
 .name = "timer",
 .version_id = 2,
 .minimum_version_id = 1,
+.pre_save = cpu_pre_save_ticks,
+.post_load = cpu_post_load_ticks,
 .fields = (VMStateField[]) {
 VMSTATE_INT64(cpu_ticks_offset, TimersState),
 VMSTATE_UNUSED(8),
@@ -269,9 +301,11 @@ TimersState timers_state;
 /* initialize timers state and the cpu throttle for convenience */
 void cpu_timers_init(void)
 {
+static TimersState timers_snapshot;
+
 seqlock_init(_state.vm_clock_seqlock);
 qemu_spin_init(_state.vm_clock_lock);
-vmstate_register(NULL, 0, _timers, _state);
+vmstate_register(NULL, 0, _timers, _snapshot);

 cpu_throttle_init();
 }


- Steve



[PATCH 3/3] hw/mips/jazz: Simplify the NIC setup code

2023-08-25 Thread Thomas Huth
The for-loop does not make much sense here - it is always left after
the first iteration, so we can also check for nb_nics == 1 instead
which is way easier to understand.

Also, the checks for nd->model are superfluous since the code in
mips_jazz_init_net() calls qemu_check_nic_model() that already
takes care of this (i.e. initializing nd->model if it has not been
set yet, and checking whether it is the "help" option or the
supported NIC model).

Signed-off-by: Thomas Huth 
---
 hw/mips/jazz.c | 21 +
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index a95a1bd743..86cd1d2fb2 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -172,7 +172,6 @@ static void mips_jazz_init(MachineState *machine,
 MemoryRegion *rtc = g_new(MemoryRegion, 1);
 MemoryRegion *dma_dummy = g_new(MemoryRegion, 1);
 MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1);
-NICInfo *nd;
 DeviceState *dev, *rc4030;
 MMIOKBDState *i8042;
 SysBusDevice *sysbus;
@@ -315,21 +314,11 @@ static void mips_jazz_init(MachineState *machine,
 }
 
 /* Network controller */
-for (n = 0; n < nb_nics; n++) {
-nd = _table[n];
-if (!nd->model) {
-nd->model = g_strdup("dp83932");
-}
-if (strcmp(nd->model, "dp83932") == 0) {
-mips_jazz_init_net(nd, rc4030_dma_mr, rc4030, dp8393x_prom);
-break;
-} else if (is_help_option(nd->model)) {
-error_report("Supported NICs: dp83932");
-exit(1);
-} else {
-error_report("Unsupported NIC: %s", nd->model);
-exit(1);
-}
+if (nb_nics == 1) {
+mips_jazz_init_net(_table[0], rc4030_dma_mr, rc4030, dp8393x_prom);
+} else if (nb_nics > 1) {
+error_report("This machine only supports one NIC");
+exit(1);
 }
 
 /* SCSI adapter */
-- 
2.39.3




[PATCH 2/3] hw/mips/jazz: Move the NIC init code into a separate function

2023-08-25 Thread Thomas Huth
The mips_jazz_init() function is already quite big, so moving
away some code here can help to make it more understandable.
Additionally, by moving this code into a separate function, the
next patch (that will refactor the for-loop around the NIC init
code) will be much shorter and easier to understand.

Signed-off-by: Thomas Huth 
---
 hw/mips/jazz.c | 62 --
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 358bb6f74f..a95a1bd743 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -114,6 +114,40 @@ static const MemoryRegionOps dma_dummy_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void mips_jazz_init_net(NICInfo *nd, IOMMUMemoryRegion *rc4030_dma_mr,
+   DeviceState *rc4030, MemoryRegion *dp8393x_prom)
+{
+DeviceState *dev;
+SysBusDevice *sysbus;
+int checksum, i;
+uint8_t *prom;
+
+qemu_check_nic_model(nd, "dp83932");
+
+dev = qdev_new("dp8393x");
+qdev_set_nic_properties(dev, nd);
+qdev_prop_set_uint8(dev, "it_shift", 2);
+qdev_prop_set_bit(dev, "big_endian", target_words_bigendian());
+object_property_set_link(OBJECT(dev), "dma_mr",
+ OBJECT(rc4030_dma_mr), _abort);
+sysbus = SYS_BUS_DEVICE(dev);
+sysbus_realize_and_unref(sysbus, _fatal);
+sysbus_mmio_map(sysbus, 0, 0x80001000);
+sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4));
+
+/* Add MAC address with valid checksum to PROM */
+prom = memory_region_get_ram_ptr(dp8393x_prom);
+checksum = 0;
+for (i = 0; i < 6; i++) {
+prom[i] = nd->macaddr.a[i];
+checksum += prom[i];
+if (checksum > 0xff) {
+checksum = (checksum + 1) & 0xff;
+}
+}
+prom[7] = 0xff - checksum;
+}
+
 #define MAGNUM_BIOS_SIZE_MAX 0x7e000
 #define MAGNUM_BIOS_SIZE   
\
 (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX)
@@ -287,33 +321,7 @@ static void mips_jazz_init(MachineState *machine,
 nd->model = g_strdup("dp83932");
 }
 if (strcmp(nd->model, "dp83932") == 0) {
-int checksum, i;
-uint8_t *prom;
-
-qemu_check_nic_model(nd, "dp83932");
-
-dev = qdev_new("dp8393x");
-qdev_set_nic_properties(dev, nd);
-qdev_prop_set_uint8(dev, "it_shift", 2);
-qdev_prop_set_bit(dev, "big_endian", target_words_bigendian());
-object_property_set_link(OBJECT(dev), "dma_mr",
- OBJECT(rc4030_dma_mr), _abort);
-sysbus = SYS_BUS_DEVICE(dev);
-sysbus_realize_and_unref(sysbus, _fatal);
-sysbus_mmio_map(sysbus, 0, 0x80001000);
-sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4));
-
-/* Add MAC address with valid checksum to PROM */
-prom = memory_region_get_ram_ptr(dp8393x_prom);
-checksum = 0;
-for (i = 0; i < 6; i++) {
-prom[i] = nd->macaddr.a[i];
-checksum += prom[i];
-if (checksum > 0xff) {
-checksum = (checksum + 1) & 0xff;
-}
-}
-prom[7] = 0xff - checksum;
+mips_jazz_init_net(nd, rc4030_dma_mr, rc4030, dp8393x_prom);
 break;
 } else if (is_help_option(nd->model)) {
 error_report("Supported NICs: dp83932");
-- 
2.39.3




[PATCH 0/3] hw/mips/jazz: Rework the NIC init code

2023-08-25 Thread Thomas Huth
The NIC init code of the jazz machines is rather cumbersome, with
a for-loop around it that is always left after the first iteration.
This patch series reworks this a little bit to make the code more
readable and shorter.

Thomas Huth (3):
  hw/mips/jazz: Remove the big_endian variable
  hw/mips/jazz: Move the NIC init code into a separate function
  hw/mips/jazz: Simplify the NIC setup code

 hw/mips/jazz.c | 89 +++---
 1 file changed, 40 insertions(+), 49 deletions(-)

-- 
2.39.3




[PATCH 1/3] hw/mips/jazz: Remove the big_endian variable

2023-08-25 Thread Thomas Huth
There is an easier way to get a value that can be used to decide
whether the target is big endian or not: Simply use the
target_words_bigendian() function instead.

Signed-off-by: Thomas Huth 
---
 hw/mips/jazz.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index ca4426a92c..358bb6f74f 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -125,7 +125,7 @@ static void mips_jazz_init(MachineState *machine,
 {
 MemoryRegion *address_space = get_system_memory();
 char *filename;
-int bios_size, n, big_endian;
+int bios_size, n;
 Clock *cpuclk;
 MIPSCPU *cpu;
 MIPSCPUClass *mcc;
@@ -157,12 +157,6 @@ static void mips_jazz_init(MachineState *machine,
 [JAZZ_PICA61] = {, 4},
 };
 
-#if TARGET_BIG_ENDIAN
-big_endian = 1;
-#else
-big_endian = 0;
-#endif
-
 if (machine->ram_size > 256 * MiB) {
 error_report("RAM size more than 256Mb is not supported");
 exit(EXIT_FAILURE);
@@ -301,7 +295,7 @@ static void mips_jazz_init(MachineState *machine,
 dev = qdev_new("dp8393x");
 qdev_set_nic_properties(dev, nd);
 qdev_prop_set_uint8(dev, "it_shift", 2);
-qdev_prop_set_bit(dev, "big_endian", big_endian > 0);
+qdev_prop_set_bit(dev, "big_endian", target_words_bigendian());
 object_property_set_link(OBJECT(dev), "dma_mr",
  OBJECT(rc4030_dma_mr), _abort);
 sysbus = SYS_BUS_DEVICE(dev);
-- 
2.39.3




Re: [PATCH v2 2/4] tests/migration-test: Add a test for null parameter setups

2023-08-25 Thread Thomas Huth

On 25/08/2023 19.15, Peter Xu wrote:

Add a test for StrOrNull parameters (tls-*).

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
  tests/qtest/migration-test.c | 21 +
  1 file changed, 21 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 62d3f37021..64efee8b04 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1471,6 +1471,26 @@ static void test_postcopy_preempt_all(void)
  
  #endif
  
+/*

+ * We have a few parameters that allows null as input, test them to make
+ * sure they won't crash (where some used to).
+ */
+static void test_null_parameters(void)
+{
+const char *allow_null_params[] = {
+"tls-authz", "tls-hostname", "tls-creds"};


I'd place the ending bracket on a new line.


+QTestState *vm = qtest_init("");
+int i;
+
+for (i = 0; i < sizeof(allow_null_params) / sizeof(const char *); i++) {


Could you use ARRAY_SIZE() instead of calculating it on your own?


+qtest_qmp_assert_success(vm, "{ 'execute': 'migrate-set-parameters',"
+ "'arguments': { %s: null } }",
+ allow_null_params[i]);
+}
+
+qtest_quit(vm);
+}
+
  static void test_baddest(void)
  {
  MigrateStart args = {
@@ -2827,6 +2847,7 @@ int main(int argc, char **argv)
  }
  }
  
+qtest_add_func("/migration/null_parameters", test_null_parameters);

  qtest_add_func("/migration/bad_dest", test_baddest);
  qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
  qtest_add_func("/migration/precopy/unix/xbzrle", 
test_precopy_unix_xbzrle);


 Thomas




[PATCH v2 0/4] qapi/migration: Dedup migration parameter objects and fix tls-authz crash

2023-08-25 Thread Peter Xu
v2:
- Collected R-bs
- Patch 3: convert to use StrOrNull rather than str for the tls_fields
  (it contains a lot of changes, I'll skip listing details, but please
   refer to the commit message)

Patch 1 fixes the tls-authz crashing when someone specifies "null"
parameter for tls-authz.

Patch 2 added a test case for all three tls-auth parameters specifying
"null" to make sure nothing will crash ever with 'null' passed into it.

Patch 3-4 are the proposed patches to deduplicate the three migration
parameter objects in qapi/migration.json.  Note that in this version (patch
3) we used 'str' to replace 'StrOrNull' for tls-* parameters to make then
deduplicate-able.

Please review, thanks.

Peter Xu (4):
  migration/qmp: Fix crash on setting tls-authz with null
  tests/migration-test: Add a test for null parameter setups
  migration/qapi: Replace @MigrateSetParameters with
@MigrationParameters
  migration/qapi: Drop @MigrationParameter enum

 qapi/migration.json| 370 +
 include/hw/qdev-properties.h   |   3 +
 migration/options.h|  50 +
 hw/core/qdev-properties.c  |  40 
 migration/migration-hmp-cmds.c |  23 +-
 migration/options.c| 266 ++--
 migration/tls.c|   3 +-
 tests/qtest/migration-test.c   |  21 ++
 8 files changed, 246 insertions(+), 530 deletions(-)

-- 
2.41.0




[PATCH v2 4/4] migration/qapi: Drop @MigrationParameter enum

2023-08-25 Thread Peter Xu
Drop the enum in qapi because it is never used in QMP APIs.  Instead making
it an internal definition for QEMU so that we can decouple it from QAPI,
and also we can deduplicate the QAPI documentations.

Signed-off-by: Peter Xu 
---
 qapi/migration.json| 179 -
 migration/options.h|  47 +
 migration/migration-hmp-cmds.c |   3 +-
 migration/options.c|  51 ++
 4 files changed, 100 insertions(+), 180 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 45d69787ae..eeb1878c4f 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -672,185 +672,6 @@
   'bitmaps': [ 'BitmapMigrationBitmapAlias' ]
   } }
 
-##
-# @MigrationParameter:
-#
-# Migration parameters enumeration
-#
-# @announce-initial: Initial delay (in milliseconds) before sending
-# the first announce (Since 4.0)
-#
-# @announce-max: Maximum delay (in milliseconds) between packets in
-# the announcement (Since 4.0)
-#
-# @announce-rounds: Number of self-announce packets sent after
-# migration (Since 4.0)
-#
-# @announce-step: Increase in delay (in milliseconds) between
-# subsequent packets in the announcement (Since 4.0)
-#
-# @compress-level: Set the compression level to be used in live
-# migration, the compression level is an integer between 0 and 9,
-# where 0 means no compression, 1 means the best compression
-# speed, and 9 means best compression ratio which will consume
-# more CPU.
-#
-# @compress-threads: Set compression thread count to be used in live
-# migration, the compression thread count is an integer between 1
-# and 255.
-#
-# @compress-wait-thread: Controls behavior when all compression
-# threads are currently busy.  If true (default), wait for a free
-# compression thread to become available; otherwise, send the page
-# uncompressed.  (Since 3.1)
-#
-# @decompress-threads: Set decompression thread count to be used in
-# live migration, the decompression thread count is an integer
-# between 1 and 255. Usually, decompression is at least 4 times as
-# fast as compression, so set the decompress-threads to the number
-# about 1/4 of compress-threads is adequate.
-#
-# @throttle-trigger-threshold: The ratio of bytes_dirty_period and
-# bytes_xfer_period to trigger throttling.  It is expressed as
-# percentage.  The default value is 50. (Since 5.0)
-#
-# @cpu-throttle-initial: Initial percentage of time guest cpus are
-# throttled when migration auto-converge is activated.  The
-# default value is 20. (Since 2.7)
-#
-# @cpu-throttle-increment: throttle percentage increase each time
-# auto-converge detects that migration is not making progress.
-# The default value is 10. (Since 2.7)
-#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
-# the tail stage of throttling, the Guest is very sensitive to CPU
-# percentage while the @cpu-throttle -increment is excessive
-# usually at tail stage.  If this parameter is true, we will
-# compute the ideal CPU percentage used by the Guest, which may
-# exactly make the dirty rate match the dirty rate threshold.
-# Then we will choose a smaller throttle increment between the one
-# specified by @cpu-throttle-increment and the one generated by
-# ideal CPU percentage.  Therefore, it is compatible to
-# traditional throttling, meanwhile the throttle increment won't
-# be excessive at tail stage.  The default value is false.  (Since
-# 5.1)
-#
-# @tls-creds: ID of the 'tls-creds' object that provides credentials
-# for establishing a TLS connection over the migration data
-# channel.  On the outgoing side of the migration, the credentials
-# must be for a 'client' endpoint, while for the incoming side the
-# credentials must be for a 'server' endpoint.  Setting this will
-# enable TLS for all migrations.  The default is unset, resulting
-# in unsecured migration at the QEMU level.  (Since 2.7)
-#
-# @tls-hostname: hostname of the target host for the migration.  This
-# is required when using x509 based TLS credentials and the
-# migration URI does not already include a hostname.  For example
-# if using fd: or exec: based migration, the hostname must be
-# provided so that the server's x509 certificate identity can be
-# validated.  (Since 2.7)
-#
-# @tls-authz: ID of the 'authz' object subclass that provides access
-# control checking of the TLS x509 certificate distinguished name.
-# This object is only resolved at time of use, so can be deleted
-# and recreated on the fly while the migration server is active.
-# If missing, it will default to denying access (Since 4.0)
-#
-# @max-bandwidth: to set maximum speed for migration.  maximum speed
-# in bytes per second.  (Since 2.8)
-#
-# @downtime-limit: set maximum tolerated downtime for migration.
-# maximum 

[PATCH v2 2/4] tests/migration-test: Add a test for null parameter setups

2023-08-25 Thread Peter Xu
Add a test for StrOrNull parameters (tls-*).

Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 62d3f37021..64efee8b04 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1471,6 +1471,26 @@ static void test_postcopy_preempt_all(void)
 
 #endif
 
+/*
+ * We have a few parameters that allows null as input, test them to make
+ * sure they won't crash (where some used to).
+ */
+static void test_null_parameters(void)
+{
+const char *allow_null_params[] = {
+"tls-authz", "tls-hostname", "tls-creds"};
+QTestState *vm = qtest_init("");
+int i;
+
+for (i = 0; i < sizeof(allow_null_params) / sizeof(const char *); i++) {
+qtest_qmp_assert_success(vm, "{ 'execute': 'migrate-set-parameters',"
+ "'arguments': { %s: null } }",
+ allow_null_params[i]);
+}
+
+qtest_quit(vm);
+}
+
 static void test_baddest(void)
 {
 MigrateStart args = {
@@ -2827,6 +2847,7 @@ int main(int argc, char **argv)
 }
 }
 
+qtest_add_func("/migration/null_parameters", test_null_parameters);
 qtest_add_func("/migration/bad_dest", test_baddest);
 qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
 qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle);
-- 
2.41.0




[PATCH v2 3/4] migration/qapi: Replace @MigrateSetParameters with @MigrationParameters

2023-08-25 Thread Peter Xu
Quotting from Markus in his replies:

  migrate-set-parameters sets migration parameters, and
  query-migrate-parameters gets them.  Unsurprisingly, the former's
  argument type MigrateSetParameters is quite close to the latter's
  return type MigrationParameters.  The differences are subtle:

  1. Since migrate-set-parameters supports setting selected parameters,
 its arguments must all be optional (so you can omit the ones you
 don't want to change).  query-migrate-parameters results are also
 all optional, but almost all of them are in fact always present.

  2. For parameters @tls_creds, @tls_hostname, @tls_authz,
 migrate-set-parameters interprets special value "" as "reset to
 default".  Works, because "" is semantically invalid.  Not a
 general solution, because a semantically invalid value need not
 exist.  Markus added a general solution in commit 01fa559826
 ("migration: Use JSON null instead of "" to reset parameter to
 default").  This involved changing the type from 'str' to
 'StrOrNull'.

  3. When parameter @block-bitmap-mapping has not been set,
 query-migrate-parameters does not return it (absent optional
 member).  Clean (but undocumented).  When parameters @tls_creds,
 @tls_hostname, @tls_authz have not been set, it returns the
 semantically invalid value "".  Not so clean (and just as
 undocumented).

Here to deduplicate the two objects: keep @MigrationParameters as the name
of object to use in both places, drop @MigrateSetParameters, at the
meantime switch types of @tls* fields from "str" to "StrOrNull" types.

I found that the TLS code wasn't so much relying on tls_* fields being
non-NULL at all.  Actually on the other way round: if we set tls_authz to
an empty string (NOTE: currently, migrate_init() missed initializing
tls_authz; also touched it up in this patch), we can already fail one of
the migration-test (tls/x509/default-host), as qauthz_is_allowed_by_id()
will assume tls_authz set even if tls_auths is an empty string.

It means we're actually relying on tls_* fields being NULL even if it's the
empty string.

Let's just make it a rule to return NULL for empty string on these fields
internally.  For that, when converting a StrOrNull into a char* (where we
introduced a helper here in this patch) we'll also make the empty string to
be NULL, to make it always work.  And it doesn't show any issue either when
applying that logic to both tls_creds and tls_hostname.

With above, we can safely change both migration_tls_client_create() and
migrate_tls() to not check the empty string too finally.. not needed
anymore.

Also, we can drop the hackish conversions in qmp_migrate_set_parameters()
where we want to make sure it's a QSTRING; it's not needed now.

This greatly deduplicates the code not only in qapi/migration.json, but
also in the generic migration code.

Markus helped greatly with this patch.  Besides a better commit
message (where I just "stole" from the reply), debugged and resolved a
double free, but also provided the StrOrNull property implementation to be
used in MigrationState object when switching tls_* fields to StrOrNull.

Co-developed-by: Markus Armbruster 
Signed-off-by: Peter Xu 
---
 qapi/migration.json| 191 +---
 include/hw/qdev-properties.h   |   3 +
 migration/options.h|   3 +
 hw/core/qdev-properties.c  |  40 ++
 migration/migration-hmp-cmds.c |  20 +--
 migration/options.c| 220 ++---
 migration/tls.c|   3 +-
 7 files changed, 125 insertions(+), 355 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 8843e74b59..45d69787ae 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -851,189 +851,6 @@
{ 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] },
'vcpu-dirty-limit'] }
 
-##
-# @MigrateSetParameters:
-#
-# @announce-initial: Initial delay (in milliseconds) before sending
-# the first announce (Since 4.0)
-#
-# @announce-max: Maximum delay (in milliseconds) between packets in
-# the announcement (Since 4.0)
-#
-# @announce-rounds: Number of self-announce packets sent after
-# migration (Since 4.0)
-#
-# @announce-step: Increase in delay (in milliseconds) between
-# subsequent packets in the announcement (Since 4.0)
-#
-# @compress-level: compression level
-#
-# @compress-threads: compression thread count
-#
-# @compress-wait-thread: Controls behavior when all compression
-# threads are currently busy.  If true (default), wait for a free
-# compression thread to become available; otherwise, send the page
-# uncompressed.  (Since 3.1)
-#
-# @decompress-threads: decompression thread count
-#
-# @throttle-trigger-threshold: The ratio of bytes_dirty_period and
-# bytes_xfer_period to trigger throttling.  It is expressed as
-# percentage.  The default value is 50. (Since 5.0)
-#
-# 

[PATCH v2 1/4] migration/qmp: Fix crash on setting tls-authz with null

2023-08-25 Thread Peter Xu
QEMU will crash if anyone tries to set tls-authz (which is a type
StrOrNull) with 'null' value.  Fix it in the easy way by converting it to
qstring just like the other two tls parameters.

Cc: qemu-sta...@nongnu.org # v4.0+
Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration 
parameter")
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Fabiano Rosas 
Signed-off-by: Peter Xu 
---
 migration/options.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/migration/options.c b/migration/options.c
index 1d1e1321b0..6bbfd4853d 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -1408,20 +1408,25 @@ void qmp_migrate_set_parameters(MigrateSetParameters 
*params, Error **errp)
 {
 MigrationParameters tmp;
 
-/* TODO Rewrite "" to null instead */
+/* TODO Rewrite "" to null instead for all three tls_* parameters */
 if (params->tls_creds
 && params->tls_creds->type == QTYPE_QNULL) {
 qobject_unref(params->tls_creds->u.n);
 params->tls_creds->type = QTYPE_QSTRING;
 params->tls_creds->u.s = strdup("");
 }
-/* TODO Rewrite "" to null instead */
 if (params->tls_hostname
 && params->tls_hostname->type == QTYPE_QNULL) {
 qobject_unref(params->tls_hostname->u.n);
 params->tls_hostname->type = QTYPE_QSTRING;
 params->tls_hostname->u.s = strdup("");
 }
+if (params->tls_authz
+&& params->tls_authz->type == QTYPE_QNULL) {
+qobject_unref(params->tls_authz->u.n);
+params->tls_authz->type = QTYPE_QSTRING;
+params->tls_authz->u.s = strdup("");
+}
 
 migrate_params_test_apply(params, );
 
-- 
2.41.0




Re: [PATCH for-8.2 3/4] migration/qapi: Replace @MigrateSetParameters with @MigrationParameters

2023-08-25 Thread Peter Xu
On Mon, Aug 14, 2023 at 06:19:46PM -0400, Peter Xu wrote:
> Here to deduplicate the two objects, logically it'll be safe only if we use
> "StrOrNull" to replace "str" type, not vice versa.  However we may face
> difficulty using StrOrNull as part of MigrationState.parameters [1] when
> replacing existing @MigrationParameters to use StrOrNull.  With the fact
> that nobody seems to be using "null" for tls-* fields (see the long
> standing qemu crash bug on tls-authz when "null" was passed in), let's use
> "str" to represent both objects.
> 
> This greatly deduplicates the code not only in qapi/migration.json, but
> also in the generic migration code on handling transitions between
> StrOrNull <-> str types.
> 
> [1] https://lore.kernel.org/all/ZNKfoqM0V6pcvrz%2F@x1n/

Markus helped me to work out this problem.  I'll send a new version soon to
switch to StrOrNull for all tls* fields.

Thanks,

-- 
Peter Xu




[PATCH] virtio: use shadow_avail_idx while checking number of heads

2023-08-25 Thread Ilya Maximets
We do not need the most up to date number of heads, we only want to
know if there is at least one.

Use shadow variable as long as it is not equal to the last available
index checked.  This avoids expensive qatomic dereference of the
RCU-protected memory region cache as well as the memory access itself
and the subsequent memory barrier.

The change improves performance of the af-xdp network backend by 2-3%.

Signed-off-by: Ilya Maximets 
---
 hw/virtio/virtio.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..04bf7cc977 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -999,7 +999,15 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement 
*elem,
 /* Called within rcu_read_lock().  */
 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 {
-uint16_t num_heads = vring_avail_idx(vq) - idx;
+uint16_t num_heads;
+
+if (vq->shadow_avail_idx != idx) {
+num_heads = vq->shadow_avail_idx - idx;
+
+return num_heads;
+}
+
+num_heads = vring_avail_idx(vq) - idx;
 
 /* Check it isn't doing very strange things with descriptor numbers. */
 if (num_heads > vq->vring.num) {
-- 
2.40.1




[PATCH] virtio: remove unnecessary thread fence while reading next descriptor

2023-08-25 Thread Ilya Maximets
It was supposed to be a compiler barrier and it was a compiler barrier
initially called 'wmb' (??) when virtio core support was introduced.
Later all the instances of 'wmb' were switched to smp_wmb to fix memory
ordering issues on non-x86 platforms.  However, this one doesn't need
to be an actual barrier.  It's enough for it to stay a compiler barrier
as its only purpose is to ensure that the value is not read twice.

There is no counterpart read barrier in the drivers, AFAICT.  And even
if we needed an actual barrier, it shouldn't have been a write barrier.

Signed-off-by: Ilya Maximets 
---
 hw/virtio/virtio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..6eb8586858 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1051,7 +1051,7 @@ static int virtqueue_split_read_next_desc(VirtIODevice 
*vdev, VRingDesc *desc,
 /* Check they're not leading us off end of descriptors. */
 *next = desc->next;
 /* Make sure compiler knows to grab that: we don't want it changing! */
-smp_wmb();
+barrier();
 
 if (*next >= max) {
 virtio_error(vdev, "Desc next is %u", *next);
-- 
2.40.1




Re: [Qemu PATCH v2 9/9] hw/mem/cxl_type3: Add dpa range validation for accesses to dc regions

2023-08-25 Thread Fan Ni
On Fri, Aug 25, 2023 at 12:42:56PM +0100, Jonathan Cameron wrote:
> On Thu, 24 Aug 2023 13:49:00 -0700
> Fan Ni  wrote:
>
> > On Mon, Aug 07, 2023 at 09:53:42AM +0100, Jonathan Cameron wrote:
> > > On Tue, 25 Jul 2023 18:39:56 +
> > > Fan Ni  wrote:
> > >
> > > > From: Fan Ni 
> > > >
> > > > Not all dpa range in the dc regions is valid to access until an extent
> > > > covering the range has been added. Add a bitmap for each region to
> > > > record whether a dc block in the region has been backed by dc extent.
> > > > For the bitmap, a bit in the bitmap represents a dc block. When a dc
> > > > extent is added, all the bits of the blocks in the extent will be set,
> > > > which will be cleared when the extent is released.
> > > >
> > > > Signed-off-by: Fan Ni 
> > > Hi Fan,
> > >
> > > A few of the bits of feedback apply broadly across the series.  Given I'm
> > > rebasing this anyway to give myself something to test I'll tidy things up
> > > (feel free to disagree with and revert any changes !)
> > > and push a tree out in next day or two.  I'll message when I've done so.
> > >
> > > Jonathan
> >
> > Hi Jonathan,
> > I tried DCD with your branch "cxl-2023-08-07", and noticed the
> > following,
> > 1. You made some changes to the bitmap functionality, now it is only
> > used to validate extents when adding/releasing dc extents. My original
> > thought of adding the bitmap is to 1) validating extents for extent
> > add/release as you do; 2) Add validating when doing read/write to the dc
> > regions since some address region may not have valid extent added yet.
> > Do you think 2) is not necessary?
>
> Change wasn't intentional. I probably just messed up the rebase!

Just double checked the code. The logic is still there, but in another
patch in the series, so no issue and ignore my previous question.
Sorry for the confusion.

>
> >
> > 2. Your change introduced a bug in the code.
> > https://gitlab.com/jic23/qemu/-/blob/cxl-2023-08-07/hw/cxl/cxl-mailbox-utils.c?ref_type=heads#L1394
> > ct3d->dc.num_regions should be ct3d->dc.num_regions-1.
> Thanks.  Given I might forget about about it, if you want to incorporate that 
> in
> your next version that would be great. I might remember to fix it in the 
> meantime!
>
> Jonathan
>

My code does not have this. It seems you added the lastregion variable
to record the last region, while I use the following logic to iterate
the regions and record last region automatically while collecting
min_block_size.

+for (i = 1; i < dev->dc.num_regions; i++) {
+region = >dc.regions[i];
+if (min_block_size > region->block_size) {
+min_block_size = region->block_size;
+}
+}
+
+blk_bitmap = bitmap_new((region->len + region->base
+- dev->dc.regions[0].base) / min_block_size);


Fan

> >
> > Thanks,
> > Fan
> >
> > >
> > > > ---
> > > >  hw/mem/cxl_type3.c  | 155 
> > > >  include/hw/cxl/cxl_device.h |   1 +
> > > >  2 files changed, 156 insertions(+)
> > > >
> > > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > > > index 41a828598a..51943a36fc 100644
> > > > --- a/hw/mem/cxl_type3.c
> > > > +++ b/hw/mem/cxl_type3.c
> > > > @@ -787,13 +787,37 @@ static int cxl_create_dc_regions(CXLType3Dev 
> > > > *ct3d)
> > > >  /* dsmad_handle is set when creating cdat table entries */
> > > >  region->flags = 0;
> > > >
> > > > +region->blk_bitmap = bitmap_new(region->len / 
> > > > region->block_size);
> > >
> > > In common with many allocators in qemu if this fails it calls abort()
> > > internally so no need to handle potential errors.
> > >
> > > > +if (!region->blk_bitmap) {
> > > > +break;
> > > > +}
> > > > +
> > > >  region_base += region->len;
> > > >  }
> > > > +
> > > > +if (i < ct3d->dc.num_regions) {
> > > > +while (--i >= 0) {
> > > > +g_free(ct3d->dc.regions[i].blk_bitmap);
> > > > +}
> > > > +return -1;
> > > > +}
> > > > +
> > > >  QTAILQ_INIT(>dc.extents);
> > > >
> > > >  return 0;
> > > >  }
> > > >
> > > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > > +{
> > > > +int i;
> > > > +struct CXLDCD_Region *region;
> > > > +
> > > > +for (i = 0; i < ct3d->dc.num_regions; i++) {
> > > > +region = >dc.regions[i];
> > > > +g_free(region->blk_bitmap);
> > > > +}
> > > > +}
> > > > +
> > > >  static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
> > > >  {
> > > >  DeviceState *ds = DEVICE(ct3d);
> > > > @@ -1021,6 +1045,7 @@ err_free_special_ops:
> > > >  g_free(regs->special_ops);
> > > >  err_address_space_free:
> > > >  if (ct3d->dc.host_dc) {
> > > > +cxl_destroy_dc_regions(ct3d);
> > > >  address_space_destroy(>dc.host_dc_as);
> > > >  }
> > > >  if (ct3d->hostpmem) {
> > > > @@ -1043,6 +1068,7 @@ static void ct3_exit(PCIDevice *pci_dev)
> > > >  

Re: [PATCH 1/2] docs tests: Fix use of migrate_set_parameter

2023-08-25 Thread Thomas Huth

On 25/08/2023 17.59, Markus Armbruster wrote:

docs/multi-thread-compression.txt uses parameter names with
underscores instead of dashes.  Wrong since day one.

docs/rdma.txt, tests/qemu-iotests/181, and tests/qtest/test-hmp.c are
wrong the same way since commit cbde7be900d2 (v6.0.0).  Hard to see,
as test-hmp doesn't check whether the commands work, and iotest 181
appears to be unaffected.

Fixes: 263170e679df (docs: Add a doc about multiple thread compression)
Fixes: cbde7be900d2 (migrate: remove QMP/HMP commands for speed, downtime and 
cache size)
Signed-off-by: Markus Armbruster 
---



Reviewed-by: Thomas Huth 




Re: [PATCH 3/3] target/i386: Fix duplicated feature name in FEAT_KVM

2023-08-25 Thread Tim Wiederhake
On Thu, 2023-08-24 at 17:12 +0200, Philippe Mathieu-Daudé wrote:
> On 24/8/23 15:57, Tim Wiederhake wrote:
> > The mistake became apparent as there were two features with the
> > same name
> > in this cpuid leaf. The names are now in line with the
> > documentation from
> > https://kernel.org/doc/html/latest/virt/kvm/x86/cpuid.html
> > 
> 
> Fixes: 642258c6c7 ("kvm: add kvmclock to its second bit")
> ?
> 
Right, added that locally. Thanks!

> > Signed-off-by: Tim Wiederhake 
> > ---
> >   target/i386/cpu.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 0b74d80371..ceb291f8a8 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -852,7 +852,7 @@ FeatureWordInfo
> > feature_word_info[FEATURE_WORDS] = {
> >   [FEAT_KVM] = {
> >   .type = CPUID_FEATURE_WORD,
> >   .feat_names = {
> > -    "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
> > +    "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock2",
> >   "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-
> > pv-unhalt",
> >   NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi",
> >   "kvm-poll-control", "kvm-pv-sched-yield", "kvm-
> > asyncpf-int", "kvm-msi-ext-dest-id",
> 




Re: [PATCH 2/2] tests/qtest/test-hmp: Fix migrate_set_parameter xbzrle-cache-size test

2023-08-25 Thread Thomas Huth

On 25/08/2023 17.59, Markus Armbruster wrote:

The command always fails with "Error: Parameter 'xbzrle_cache_size'
expects a power of two no less than the target page size".  The test
passes anyway.  Change the argument from 1 to 64k to make the test a
bit more useful.

Signed-off-by: Markus Armbruster 
---
  tests/qtest/test-hmp.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/test-hmp.c b/tests/qtest/test-hmp.c
index c0d2d70689..fc9125f8bb 100644
--- a/tests/qtest/test-hmp.c
+++ b/tests/qtest/test-hmp.c
@@ -45,7 +45,7 @@ static const char *hmp_cmds[] = {
  "log all",
  "log none",
  "memsave 0 4096 \"/dev/null\"",
-"migrate_set_parameter xbzrle-cache-size 1",
+"migrate_set_parameter xbzrle-cache-size 64k",
  "migrate_set_parameter downtime-limit 1",
  "migrate_set_parameter max-bandwidth 1",
  "netdev_add user,id=net1",


Reviewed-by: Thomas Huth 




[PATCH 2/2] tests/qtest/test-hmp: Fix migrate_set_parameter xbzrle-cache-size test

2023-08-25 Thread Markus Armbruster
The command always fails with "Error: Parameter 'xbzrle_cache_size'
expects a power of two no less than the target page size".  The test
passes anyway.  Change the argument from 1 to 64k to make the test a
bit more useful.

Signed-off-by: Markus Armbruster 
---
 tests/qtest/test-hmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/test-hmp.c b/tests/qtest/test-hmp.c
index c0d2d70689..fc9125f8bb 100644
--- a/tests/qtest/test-hmp.c
+++ b/tests/qtest/test-hmp.c
@@ -45,7 +45,7 @@ static const char *hmp_cmds[] = {
 "log all",
 "log none",
 "memsave 0 4096 \"/dev/null\"",
-"migrate_set_parameter xbzrle-cache-size 1",
+"migrate_set_parameter xbzrle-cache-size 64k",
 "migrate_set_parameter downtime-limit 1",
 "migrate_set_parameter max-bandwidth 1",
 "netdev_add user,id=net1",
-- 
2.41.0




[PATCH 0/2] docs tests: Fix use of migrate_set_parameter

2023-08-25 Thread Markus Armbruster
I spotted a bad use of migrate_set_parameter in test-hmp.c, and looked
for more.

I also looked for more failing HMP commands in test-hmp.c.  I found
some, but they fail only on some machines, which feels okay.  They
are:

* device_add usb-mouse,id=mouse1
  device_del mouse1

  Fail when the device is not compiled in, and when the machine
  doesn't provide USB.

* memsave 0 4096 "/dev/null"

  Fails when the machine doesn't have memory there.

* screendump /dev/null

  Fails when there is no console.

* dump-guest-memory /dev/null 0 4096

  Fails for targets that don't support dumping, and when the machine
  doesn't have memory there.

* nmi

  Fails when the machine doesn't provide NMIs.

Markus Armbruster (2):
  docs tests: Fix use of migrate_set_parameter
  tests/qtest/test-hmp: Fix migrate_set_parameter xbzrle-cache-size test

 docs/multi-thread-compression.txt | 12 ++--
 docs/rdma.txt |  2 +-
 tests/qtest/test-hmp.c|  6 +++---
 tests/qemu-iotests/181|  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

-- 
2.41.0




[PATCH 1/2] docs tests: Fix use of migrate_set_parameter

2023-08-25 Thread Markus Armbruster
docs/multi-thread-compression.txt uses parameter names with
underscores instead of dashes.  Wrong since day one.

docs/rdma.txt, tests/qemu-iotests/181, and tests/qtest/test-hmp.c are
wrong the same way since commit cbde7be900d2 (v6.0.0).  Hard to see,
as test-hmp doesn't check whether the commands work, and iotest 181
appears to be unaffected.

Fixes: 263170e679df (docs: Add a doc about multiple thread compression)
Fixes: cbde7be900d2 (migrate: remove QMP/HMP commands for speed, downtime and 
cache size)
Signed-off-by: Markus Armbruster 
---
 docs/multi-thread-compression.txt | 12 ++--
 docs/rdma.txt |  2 +-
 tests/qtest/test-hmp.c|  6 +++---
 tests/qemu-iotests/181|  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/multi-thread-compression.txt 
b/docs/multi-thread-compression.txt
index bb88c6bdf1..95b1556f67 100644
--- a/docs/multi-thread-compression.txt
+++ b/docs/multi-thread-compression.txt
@@ -117,13 +117,13 @@ to support the multiple thread compression migration:
 {qemu} migrate_set_capability compress on
 
 3. Set the compression thread count on source:
-{qemu} migrate_set_parameter compress_threads 12
+{qemu} migrate_set_parameter compress-threads 12
 
 4. Set the compression level on the source:
-{qemu} migrate_set_parameter compress_level 1
+{qemu} migrate_set_parameter compress-level 1
 
 5. Set the decompression thread count on destination:
-{qemu} migrate_set_parameter decompress_threads 3
+{qemu} migrate_set_parameter decompress-threads 3
 
 6. Start outgoing migration:
 {qemu} migrate -d tcp:destination.host:
@@ -133,9 +133,9 @@ to support the multiple thread compression migration:
 
 The following are the default settings:
 compress: off
-compress_threads: 8
-decompress_threads: 2
-compress_level: 1 (which means best speed)
+compress-threads: 8
+decompress-threads: 2
+compress-level: 1 (which means best speed)
 
 So, only the first two steps are required to use the multiple
 thread compression in migration. You can do more if the default
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 2b4cdea1d8..bd8dd799a9 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -89,7 +89,7 @@ RUNNING:
 First, set the migration speed to match your hardware's capabilities:
 
 QEMU Monitor Command:
-$ migrate_set_parameter max_bandwidth 40g # or whatever is the MAX of your 
RDMA device
+$ migrate_set_parameter max-bandwidth 40g # or whatever is the MAX of your 
RDMA device
 
 Next, on the destination machine, add the following to the QEMU command line:
 
diff --git a/tests/qtest/test-hmp.c b/tests/qtest/test-hmp.c
index 6704be239b..c0d2d70689 100644
--- a/tests/qtest/test-hmp.c
+++ b/tests/qtest/test-hmp.c
@@ -45,9 +45,9 @@ static const char *hmp_cmds[] = {
 "log all",
 "log none",
 "memsave 0 4096 \"/dev/null\"",
-"migrate_set_parameter xbzrle_cache_size 1",
-"migrate_set_parameter downtime_limit 1",
-"migrate_set_parameter max_bandwidth 1",
+"migrate_set_parameter xbzrle-cache-size 1",
+"migrate_set_parameter downtime-limit 1",
+"migrate_set_parameter max-bandwidth 1",
 "netdev_add user,id=net1",
 "set_link net1 off",
 "set_link net1 on",
diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181
index cb96d09ae5..dc90a10757 100755
--- a/tests/qemu-iotests/181
+++ b/tests/qemu-iotests/181
@@ -109,7 +109,7 @@ if [ ${QEMU_STATUS[$dest]} -lt 0 ]; then
 _notrun 'Postcopy is not supported'
 fi
 
-_send_qemu_cmd $src 'migrate_set_parameter max_bandwidth 4k' "(qemu)"
+_send_qemu_cmd $src 'migrate_set_parameter max-bandwidth 4k' "(qemu)"
 _send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)"
 _send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"
 _send_qemu_cmd $src 'migrate_start_postcopy' "(qemu)"
-- 
2.41.0




Re: [RFC PATCH 3/3] softmmu/vl: Add qemu_cpu_opts QemuOptsList

2023-08-25 Thread Andrew Jones
On Fri, Aug 25, 2023 at 08:16:51PM +0800, LIU Zhiwei wrote:
> This make the cpu works the similar way like the -device option.
> 
> For device option,
> """
> ./qemu-system-riscv64 -device e1000,help
> e1000 options:
>   acpi-index=-  (default: 0)
>   addr=   - Slot and optional function number, example: 06.0 
> or 06 (default: -1)
>   autonegotiation= - on/off (default: true)
>   bootindex=
>   extra_mac_registers= - on/off (default: true)
>   failover_pair_id=
> """
> 
> After this patch, the cpu can output its configurations,
> """
> ./qemu-system-riscv64 -cpu rv64,help
> Enable extension:
>   
> rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_zba_zbb_zbc_zbs_sstc_svadu
> """

I recommend we make it more similar to -device and list the properties
(not just extensions). Besides a listing being easier to read than the
isa string format, listing properties would also output, e.g.

 cbom_blocksize=-  (default: 64)

which would also be helpful.

Thanks,
drew

> 
> Signed-off-by: LIU Zhiwei 
> ---
>  cpu.c |  2 +-
>  include/hw/core/cpu.h | 11 +++
>  softmmu/vl.c  | 35 +++
>  3 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/cpu.c b/cpu.c
> index 03a313cd72..712bd02684 100644
> --- a/cpu.c
> +++ b/cpu.c
> @@ -257,7 +257,7 @@ void cpu_exec_initfn(CPUState *cpu)
>  #endif
>  }
>  
> -static const char *cpu_type_by_name(const char *cpu_model)
> +const char *cpu_type_by_name(const char *cpu_model)
>  {
>  ObjectClass *oc;
>  const char *cpu_type;
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index fdcbe87352..49d41afdfa 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -657,6 +657,17 @@ CPUState *cpu_create(const char *typename);
>   */
>  const char *parse_cpu_option(const char *cpu_option);
>  
> +/**
> + * cpu_type_by_name:
> + * @cpu_model: The -cpu command line model name.
> + *
> + * Looks up type name by the -cpu command line model name
> + *
> + * Returns: type name of CPU or prints error and terminates process
> + *  if an error occurred.
> + */
> +const char *cpu_type_by_name(const char *cpu_model);
> +
>  /**
>   * cpu_has_work:
>   * @cpu: The vCPU to check.
> diff --git a/softmmu/vl.c b/softmmu/vl.c
> index b0b96f67fa..bc30f3954d 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -218,6 +218,15 @@ static struct {
>  { .driver = "virtio-vga-gl",.flag = _vga   },
>  };
>  
> +static QemuOptsList qemu_cpu_opts = {
> +.name = "cpu",
> +.implied_opt_name = "cpu_model",
> +.head = QTAILQ_HEAD_INITIALIZER(qemu_cpu_opts.head),
> +.desc = {
> +{ /* end of list */ }
> +},
> +};
> +
>  static QemuOptsList qemu_rtc_opts = {
>  .name = "rtc",
>  .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head),
> @@ -1140,6 +1149,21 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, 
> Error **errp)
>  return 0;
>  }
>  
> +static int cpu_help_func(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +const char *cpu_model, *cpu_type;
> +cpu_model = qemu_opt_get(opts, "cpu_model");
> +if (!cpu_model) {
> +return 1;
> +}
> +if (!qemu_opt_has_help_opt(opts)) {
> +return 0;
> +}
> +cpu_type = cpu_type_by_name(cpu_model);
> +list_cpu_props((CPUState *)object_new(cpu_type));
> +return 1;
> +}
> +
>  static int device_help_func(void *opaque, QemuOpts *opts, Error **errp)
>  {
>  return qdev_device_help(opts);
> @@ -2467,6 +2491,11 @@ static void qemu_process_help_options(void)
>  exit(0);
>  }
>  
> +if (qemu_opts_foreach(qemu_find_opts("cpu"),
> +  cpu_help_func, NULL, NULL)) {
> +exit(0);
> +}
> +
>  if (qemu_opts_foreach(qemu_find_opts("device"),
>device_help_func, NULL, NULL)) {
>  exit(0);
> @@ -2680,6 +2709,7 @@ void qemu_init(int argc, char **argv)
>  qemu_add_drive_opts(_runtime_opts);
>  qemu_add_opts(_chardev_opts);
>  qemu_add_opts(_device_opts);
> +qemu_add_opts(_cpu_opts);
>  qemu_add_opts(_netdev_opts);
>  qemu_add_opts(_nic_opts);
>  qemu_add_opts(_net_opts);
> @@ -2756,6 +2786,11 @@ void qemu_init(int argc, char **argv)
>  case QEMU_OPTION_cpu:
>  /* hw initialization will check this */
>  cpu_option = optarg;
> +opts = qemu_opts_parse_noisily(qemu_find_opts("cpu"),
> +   optarg, true);
> +if (!opts) {
> +exit(1);
> +}
>  break;
>  case QEMU_OPTION_hda:
>  case QEMU_OPTION_hdb:
> -- 
> 2.17.1
> 



Re: [PATCH 6/6] iotests: add test 314 for "qemu-img rebase" with compression

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

The test cases considered so far:

1. Check that compression mode isn't compatible with "-f raw" (raw
format doesn't support compression).
2. Check that rebasing an image onto no backing file preserves the data
and writes the copied clusters actually compressed.
3. Same as 2, but with a raw backing file (i.e. the clusters copied from the
backing are originally uncompressed -- we check they end up compressed
after being merged).
4. Remove a single delta from a backing chain, perform the same checks
as in 2.
5. Check that even when backing and overlay are initially uncompressed,
copied clusters end up compressed when rebase with compression is
performed.

Signed-off-by: Andrey Drobyshev 
---
  tests/qemu-iotests/314 | 165 +
  tests/qemu-iotests/314.out |  75 +
  2 files changed, 240 insertions(+)
  create mode 100755 tests/qemu-iotests/314
  create mode 100644 tests/qemu-iotests/314.out


Reviewed-by: Hanna Czenczek 




Re: [PATCH 5/6] qemu-img: add compression option to rebase subcommand

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

If we rebase an image whose backing file has compressed clusters, we
might end up wasting disk space since the copied clusters are now
uncompressed.  In order to have better control over this, let's add
"--compress" option to the "qemu-img rebase" command.

Note that this option affects only the clusters which are actually being
copied from the original backing file.  The clusters which were
uncompressed in the target image will remain so.

Signed-off-by: Andrey Drobyshev 
---
  docs/tools/qemu-img.rst |  6 --
  qemu-img-cmds.hx|  4 ++--
  qemu-img.c  | 19 +--
  3 files changed, 23 insertions(+), 6 deletions(-)


Interesting.  I was about to protest because we only really support 
writing compressed clusters to new and empty images, so the qcow2 driver 
does not allow overwriting existing clusters with compressed data.  But 
by design we skip all clusters that are anything but unallocated in the 
top image (i.e. the one we are going to write to), so this should indeed 
work out well.


Reviewed-by: Hanna Czenczek 


diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 15aeddc6d8..973a912dec 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -663,7 +663,7 @@ Command description:
  
List, apply, create or delete snapshots in image *FILENAME*.
  
-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME

+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
  
Changes the backing file of an image. Only the formats ``qcow2`` and

``qed`` support changing the backing file.
@@ -690,7 +690,9 @@ Command description:
  
  In order to achieve this, any clusters that differ between

  *BACKING_FILE* and the old backing file of *FILENAME* are merged
-into *FILENAME* before actually changing the backing file.
+into *FILENAME* before actually changing the backing file. With ``-c``


“With the ``-c`` option specified, [...]”


+option specified, the clusters which are being merged (but not the
+entire *FILENAME* image) are written in the compressed mode.


“[...] are compressed when written.”


  Note that the safe mode is an expensive operation, comparable to
  converting an image. It only works if the old backing file still





Re: [PATCH V3 00/10] fix migration of suspended runstate

2023-08-25 Thread Peter Xu
On Fri, Aug 25, 2023 at 09:28:28AM -0400, Steven Sistare wrote:
> On 8/24/2023 5:09 PM, Steven Sistare wrote:
> > On 8/17/2023 2:23 PM, Peter Xu wrote:
> >> On Mon, Aug 14, 2023 at 11:54:26AM -0700, Steve Sistare wrote:
> >>> Migration of a guest in the suspended runstate is broken.  The incoming
> >>> migration code automatically tries to wake the guest, which is wrong;
> >>> the guest should end migration in the same runstate it started.  Further,
> >>> for a restored snapshot, the automatic wakeup fails.  The runstate is
> >>> RUNNING, but the guest is not.  See the commit messages for the details.
> >>
> >> Hi Steve,
> >>
> >> I drafted two small patches to show what I meant, on top of this series.
> >> Before applying these two, one needs to revert patch 1 in this series.
> >>
> >> After applied, it should also pass all three new suspend tests.  We can
> >> continue the discussion here based on the patches.
> > 
> > Your 2 patches look good.  I suggest we keep patch 1, and I squash patch 2
> > into the other patches.

Yes.  Feel free to reorganize / modify /.. the changes in whatever way you
prefer in the final patchset.

> > 
> > There is one more fix needed: on the sending side, if the state is 
> > suspended,
> > then ticks must be disabled so the tick globals are updated before they are
> > written to vmstate.  Otherwise, tick starts at 0 in the receiver when
> > cpu_enable_ticks is called.
> > 
> > ---
> > diff --git a/migration/migration.c b/migration/migration.c
> [...]
> > ---
> 
> This diff is just a rough draft.  I need to resume ticks if the migration
> fails or is cancelled, and I am trying to push the logic into vm_stop,
> vm_stop_force_state, and vm_start, and/or vm_prepare_start.

Yes this sounds better than hard code things into migration codes, thanks.

Maybe at least all the migration related code paths should always use
vm_stop_force_state() (e.g. save_snapshot)?

At the meantime, AFAIU we should allow runstate_is_running() to return true
even for suspended, matching current usages of vm_start() / vm_stop().  But
again that can have risk of breaking existing users.

I bet you may have a better grasp of what it should look like to solve the
current "migrate suspended VM" problem at the minimum but hopefully still
in a clean way, so I assume I'll just wait and see.

Thanks,

-- 
Peter Xu




Re: [PATCH 4/6] qemu-img: rebase: avoid unnecessary COW operations

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

When rebasing an image from one backing file to another, we need to
compare data from old and new backings.  If the diff between that data
happens to be unaligned to the target cluster size, we might end up
doing partial writes, which would lead to copy-on-write and additional IO.

Consider the following simple case (virtual_size == cluster_size == 64K):

base <-- inc1 <-- inc2

qemu-io -c "write -P 0xaa 0 32K" base.qcow2
qemu-io -c "write -P 0xcc 32K 32K" base.qcow2
qemu-io -c "write -P 0xbb 0 32K" inc1.qcow2
qemu-io -c "write -P 0xcc 32K 32K" inc1.qcow2
qemu-img rebase -f qcow2 -b base.qcow2 -F qcow2 inc2.qcow2

While doing rebase, we'll write a half of the cluster to inc2, and block
layer will have to read the 2nd half of the same cluster from the base image
inc1 while doing this write operation, although the whole cluster is already
read earlier to perform data comparison.

In order to avoid these unnecessary IO cycles, let's make sure every
write request is aligned to the overlay cluster size.

Signed-off-by: Andrey Drobyshev 
---
  qemu-img.c | 72 +++---
  1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 60f4c06487..9a469cd609 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3513,6 +3513,7 @@ static int img_rebase(int argc, char **argv)
  uint8_t *buf_new = NULL;
  BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
  BlockDriverState *unfiltered_bs;
+BlockDriverInfo bdi = {0};
  char *filename;
  const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
  int c, flags, src_flags, ret;
@@ -3646,6 +3647,15 @@ static int img_rebase(int argc, char **argv)
  }
  }
  
+/* We need overlay cluster size to make sure write requests are aligned */

+ret = bdrv_get_info(unfiltered_bs, );
+if (ret < 0) {
+error_report("could not get block driver info");
+goto out;
+} else if (bdi.cluster_size == 0) {
+bdi.cluster_size = 1;
+}
+
  /* For safe rebasing we need to compare old and new backing file */
  if (!unsafe) {
  QDict *options = NULL;
@@ -3744,6 +3754,7 @@ static int img_rebase(int argc, char **argv)
  int64_t new_backing_size = 0;
  uint64_t offset;
  int64_t n;
+int64_t n_old = 0, n_new = 0;
  float local_progress = 0;
  
  buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);

@@ -3784,7 +3795,7 @@ static int img_rebase(int argc, char **argv)
  }
  
  for (offset = 0; offset < size; offset += n) {

-bool buf_old_is_zero = false;
+bool old_backing_eof = false;
  
  /* How many bytes can we handle with the next read? */

  n = MIN(IO_BUF_SIZE, size - offset);
@@ -3829,33 +3840,38 @@ static int img_rebase(int argc, char **argv)
  }
  }
  
+/* At this point n must be aligned to the target cluster size. */

+if (offset + n < size) {
+assert(n % bdi.cluster_size == 0);


This is not correct.  First, bdrv_is_allocated_above() operates not on 
the top image, but on images in the backing chain, which may have 
different cluster sizes and so may lead to `n`s that are not aligned to 
the top image’s cluster size:


$ ./qemu-img create -f qcow2 base.qcow2 64M
$ ./qemu-img create -f qcow2 -b base.qcow2 -F qcow2 mid.qcow2 64M
$ ./qemu-img create -f qcow2 -o cluster_size=2M -b mid.qcow2 -F qcow2 
top.qcow2 64M

$ ./qemu-io -c 'write 64k 64k' mid.qcow2
$ ./qemu-img rebase -b base.qcow2 top.qcow2
qemu-img: ../qemu-img.c:3845: img_rebase: Assertion `n % 
bdi.cluster_size == 0' failed.
[1]    636690 IOT instruction (core dumped)  ./qemu-img rebase -b 
base.qcow2 top.qcow2


Second, and this is a more theoretical thing, it would also be broken 
for images with cluster sizes greater than IO_BUF_SIZE.  Now, 
IO_BUF_SIZE is 2 MB, which happens to be precisely the maximum cluster 
size we support for qcow2, and for vmdk we always create images with 64 
kB clusters (I believe), but the vmdk code seems happy to open 
pre-existing images with cluster sizes up to 512 MB. Still, even for 
qcow2, we could easily increase the limit from 2 MB at any point, and 
there is no explicit correlation why IO_BUF_SIZE happens to be exactly 
what the current maximum cluster size for qcow2 is.  One way to get 
around this would be to use MAX(IO_BUF_SIZE, bdi.cluster_size) for the 
buffer size, which would give such an explicit correlation.



+}
+
+/*
+ * Much like the with the target image, we'll try to read as much
+ * of the old and new backings as we can.
+ */
+n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
+if (blk_new_backing) {
+n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
+}
+
   

Re: [PATCH 2/6] qemu-iotests: 024: add rebasing test case for overlay_size > backing_size

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

Before previous commit, rebase was getting infitely stuck in case of
rebasing within the same backing chain and when overlay_size > backing_size.
Let's add this case to the rebasing test 024 to make sure it doesn't
break again.

Signed-off-by: Andrey Drobyshev 
---
  tests/qemu-iotests/024 | 57 ++
  tests/qemu-iotests/024.out | 30 
  2 files changed, 87 insertions(+)


Reviewed-by: Hanna Czenczek 




Re: [PATCH 3/6] qemu-img: rebase: use backing files' BlockBackend for buffer alignment

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

Since commit bb1c05973cf ("qemu-img: Use qemu_blockalign"), buffers for
the data read from the old and new backing files are aligned using
BlockDriverState (or BlockBackend later on) referring to the target image.
However, this isn't quite right, because target image is only being
written to and has nothing to do with those buffers.  Let's fix that.


I don’t understand.  The write to the target image does use one of those 
buffers (buf_old, specifically).


This change is correct for buf_new/blk_new_backing, but for buf_old, in 
theory, we need a buffer that fulfills both the alignment requirements 
of blk and blk_old_backing.  (Not that this patch really makes the 
situation worse for buf_old.)


Hanna




Re: [PATCH 1/6] qemu-img: rebase: stop when reaching EOF of old backing file

2023-08-25 Thread Hanna Czenczek

On 01.06.23 21:28, Andrey Drobyshev via wrote:

In case when we're rebasing within one backing chain, and when target image
is larger than old backing file, bdrv_is_allocated_above() ends up setting
*pnum = 0.  As a result, target offset isn't getting incremented, and we
get stuck in an infinite for loop.  Let's detect this case and proceed
further down the loop body, as the offsets beyond the old backing size need
to be explicitly zeroed.

Signed-off-by: Andrey Drobyshev 
---
  qemu-img.c | 13 -
  1 file changed, 12 insertions(+), 1 deletion(-)


Reviewed-by: Hanna Czenczek 




Re: [RFC PATCH 0/3] Add API for list cpu extensions

2023-08-25 Thread Daniel Henrique Barboza

Hi Zhiwei! I have two observations:

- this API doesn't play well with KVM as is. In a KVM environment, asking for 
the
enabled extensions of the 'host' CPU returns:

$ ./mnt/qemu/bin/qemu-system-riscv64 -cpu host,help
Enable extension:

rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintntl_zihintpause_zawrs_zfa_zba_zbb_zbc_zbs_sstc_svadu

This is the same set of extensions enabled in the 'rv64' CPU for TCG. This is
happening because they're sharing the same code that creates properties.

If I apply these patches on top of the "split TCG/KVM accelerators from cpu.c" I
sent earlier, this happens:

$ ./mnt/qemu/bin/qemu-system-riscv64 -cpu host,help
Enable extension:
rv64

For TCG only CPUs (vendor CPUs) the API works even on a KVM host, regardless of
applying on top of riscv-to-apply.next or those accel patches:

$ ./mnt/qemu/bin/qemu-system-riscv64 -cpu veyron-v1,help
Enable extension:

rv64ch_zicbom_zicboz_zicsr_zifencei_zba_zbb_zbc_zbs_smaia_smstateen_ssaia_sscofpmf_sstc_svinval_svnapot_svpbmt_xventanacondops

It seems to me that 'cpu help' doesn't engage the KVM driver accel_init() 
function.
If we decide to go ahead with this API we'll need to either figure out if 
accel-specific
initialization is possible. If not, we should declare that this API works only 
for TCG.


- I think the presence of the 'cpu help' API limits the command line parsing 
altogether,
making cheeky things like this possible:


(disabling extensions in the cmd line and asking the extensions)
$ ./build/qemu-system-riscv64 -cpu veyron-v1,icbom=false,icboz=false,help
Enable extension:

rv64ch_zicbom_zicboz_zicsr_zifencei_zba_zbb_zbc_zbs_smaia_smstateen_ssaia_sscofpmf_sstc_svinval_svnapot_svpbmt_xventanacondops


(silly option ignored)
$ ./build/qemu-system-riscv64 -cpu veyron-v1,lalala=true,help
Enable extension:

rv64ch_zicbom_zicboz_zicsr_zifencei_zba_zbb_zbc_zbs_smaia_smstateen_ssaia_sscofpmf_sstc_svinval_svnapot_svpbmt_xventanacondops


This is not a gamebreaker but something to keep in mind when using this API. 
Thanks,


Daniel



On 8/25/23 09:16, LIU Zhiwei wrote:

Some times we want to know what is the really mean of one cpu option.
For example, in RISC-V, we usually specify a cpu in this way:
-cpu rv64,v=on

If we don't look into the source code, we can't get the ISA extensions
of this -cpu command line.

In this patch set, we add one list_cpu_props API for common cores. It
will output the enabled ISA extensions.

In the near future, I will also list all possible user configurable
options and all possible extensions for this cpu.

In order to reuse the options parse code, I also add a QemuOptsList
for cpu.


After this patch, we can output the extensions for cpu,
"""
  ./qemu-system-riscv64 -cpu rv64,help
 Enable extension:
 
rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_zba_zbb_zbc_zbs_sstc_svadu
"""

Notice currently this patch is only working for RISC-V system mode.

Thanks Andrew Jones for your suggestion!

Todo:
1) Output all possible user configurable options and all extensions.
2) Add support for RISC-V linux-user mode
3) Add support for other archs


LIU Zhiwei (3):
   cpu: Add new API cpu_type_by_name
   target/riscv: Add API list_cpu_props
   softmmu/vl: Add qemu_cpu_opts QemuOptsList

  cpu.c | 39 +++
  include/exec/cpu-common.h |  1 +
  include/hw/core/cpu.h | 11 +++
  softmmu/vl.c  | 35 +++
  target/riscv/cpu.c| 10 ++
  target/riscv/cpu.h|  2 ++
  6 files changed, 86 insertions(+), 12 deletions(-)





Re: [PATCH v2 1/3] block: add BDRV_BLOCK_COMPRESSED flag for bdrv_block_status()

2023-08-25 Thread Hanna Czenczek

On 06.07.23 18:30, Andrey Drobyshev wrote:

Functions qcow2_get_host_offset(), get_cluster_offset(),
vmdk_co_block_status() explicitly report compressed cluster types when data
is compressed.  However, this information is never passed further.  Let's
make use of it by adding new BDRV_BLOCK_COMPRESSED flag for
bdrv_block_status(), so that caller may know that the data range is
compressed.  In particular, we're going to use this flag to tweak
"qemu-img map" output.

This new flag is only being utilized by qcow, qcow2 and vmdk formats, as only
those support compression.

Signed-off-by: Andrey Drobyshev 
---
  block/qcow.c | 5 -
  block/qcow2.c| 3 +++
  block/vmdk.c | 2 ++
  include/block/block-common.h | 3 +++
  4 files changed, 12 insertions(+), 1 deletion(-)


Reviewed-by: Hanna Czenczek 




Re: [PATCH v2 2/3] qemu-img: map: report compressed data blocks

2023-08-25 Thread Hanna Czenczek

On 06.07.23 18:30, Andrey Drobyshev wrote:

Right now "qemu-img map" reports compressed blocks as containing data
but having no host offset.  This is not very informative.  Instead,
let's add another boolean field named "compressed" in case JSON output
mode is specified.  This is achieved by utilizing new allocation status
flag BDRV_BLOCK_COMPRESSED for bdrv_block_status().

Signed-off-by: Andrey Drobyshev 
---
  qapi/block-core.json |  7 +--
  qemu-img.c   | 16 +---
  2 files changed, 18 insertions(+), 5 deletions(-)


Patch 3 must be merged into this patch.  Every test must pass on every 
commit so we don’t break bisecting.



diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5dd5f7e4b0..b263d2cd30 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -409,6 +409,9 @@
  #
  # @zero: whether the virtual blocks read as zeroes
  #
+# @compressed: true indicates that data is stored compressed.  Optional,
+# only valid for the formats whith support compression


This is missing information for when this field was introduced (i.e. a 
“(since 8.2)”).


I also wonder why this field is optional.  We have compression 
information even for formats that don’t support compression, 
specifically, nothing is compressed.  I would just make this field 
mandatory and print it always.  (A technical reason to do so is that 
this patch uses block_driver_can_compress() to figure out whether there 
is compression support; but that function only tells whether the driver 
can write compressed data.  Even if it cannot do that, the format may 
still support compression, and the driver may be able to read compressed 
data, just not write it.)


Hanna


+#
  # @depth: number of layers (0 = top image, 1 = top image's backing
  # file, ..., n - 1 = bottom image (where n is the number of images
  # in the chain)) before reaching one for which the range is
@@ -426,8 +429,8 @@





Re: [PATCH v12 8/9] gfxstream + rutabaga: enable rutabaga

2023-08-25 Thread Antonio Caggiano

Hi Gurchetan,

Thank you for this series and for including some of my patches :)

On 25/08/2023 01:40, Gurchetan Singh wrote:

This change enables rutabaga to receive virtio-gpu-3d hypercalls
when it is active.

Signed-off-by: Gurchetan Singh 
Tested-by: Alyssa Ross 
Tested-by: Emmanouil Pitsidianakis 
Reviewed-by: Emmanouil Pitsidianakis 
---
v3: Whitespace fix (Akihiko)
v9: reorder virtio_gpu_have_udmabuf() after checking if rutabaga
 is enabled to avoid spurious warnings (Akihiko)

  hw/display/virtio-gpu-base.c | 3 ++-
  hw/display/virtio-gpu.c  | 5 +++--
  softmmu/qdev-monitor.c   | 3 +++
  softmmu/vl.c | 1 +
  4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 4f2b0ba1f3..50c5373b65 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -223,7 +223,8 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t 
features,
  {
  VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
  
-if (virtio_gpu_virgl_enabled(g->conf)) {

+if (virtio_gpu_virgl_enabled(g->conf) ||
+virtio_gpu_rutabaga_enabled(g->conf)) {
  features |= (1 << VIRTIO_GPU_F_VIRGL);
  }
  if (virtio_gpu_edid_enabled(g->conf)) {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 3e658f1fef..fe094addef 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1361,8 +1361,9 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error 
**errp)
  VirtIOGPU *g = VIRTIO_GPU(qdev);
  
  if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {

-if (!virtio_gpu_have_udmabuf()) {
-error_setg(errp, "cannot enable blob resources without udmabuf");
+if (!virtio_gpu_rutabaga_enabled(g->parent_obj.conf) &&
+!virtio_gpu_have_udmabuf()) {
+error_setg(errp, "need rutabaga or udmabuf for blob resources");


Does that mean udmabuf is not required at all when using rutabaga?
How does rutabaga handle blob resources?


  return;
  }
  
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c

index 74f4e41338..1b8005ae55 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -86,6 +86,9 @@ static const QDevAlias qdev_alias_table[] = {
  { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI },
  { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO },
  { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI },
+{ "virtio-gpu-rutabaga-device", "virtio-gpu-rutabaga",
+  QEMU_ARCH_VIRTIO_MMIO },
+{ "virtio-gpu-rutabaga-pci", "virtio-gpu-rutabaga", QEMU_ARCH_VIRTIO_PCI },
  { "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO 
},
  { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW },
  { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI },
diff --git a/softmmu/vl.c b/softmmu/vl.c
index b0b96f67fa..2f98eefdf3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -216,6 +216,7 @@ static struct {
  { .driver = "ati-vga",  .flag = _vga   },
  { .driver = "vhost-user-vga",   .flag = _vga   },
  { .driver = "virtio-vga-gl",.flag = _vga   },
+{ .driver = "virtio-vga-rutabaga",  .flag = _vga   },
  };
  
  static QemuOptsList qemu_rtc_opts = {


Patches 5 to 9:
Reviewed-by: Antonio Caggiano 

Cheers,
Antonio



Re: [RFC PATCH 2/3] target/riscv: Add API list_cpu_props

2023-08-25 Thread Daniel Henrique Barboza




On 8/25/23 09:16, LIU Zhiwei wrote:

This API used for output current configuration for one specified CPU.
Currently only RISC-V frontend implements this API.

Signed-off-by: LIU Zhiwei 
---
  cpu.c |  8 
  include/exec/cpu-common.h |  1 +
  target/riscv/cpu.c| 10 ++
  target/riscv/cpu.h|  2 ++
  4 files changed, 21 insertions(+)

diff --git a/cpu.c b/cpu.c
index e1a9239d0f..03a313cd72 100644
--- a/cpu.c
+++ b/cpu.c
@@ -299,6 +299,14 @@ void list_cpus(void)
  #endif
  }
  
+void list_cpu_props(CPUState *cs)

+{
+/* XXX: implement xxx_cpu_list_props for targets that still miss it */
+#if defined(cpu_list_props)
+cpu_list_props(cs);
+#endif
+}
+
  #if defined(CONFIG_USER_ONLY)
  void tb_invalidate_phys_addr(hwaddr addr)
  {
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 87dc9a752c..b3160d9218 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -166,5 +166,6 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
  
  /* vl.c */

  void list_cpus(void);
+void list_cpu_props(CPUState *);
  
  #endif /* CPU_COMMON_H */

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 6b93b04453..3ea18de06f 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2226,6 +2226,16 @@ void riscv_cpu_list(void)
  g_slist_free(list);
  }
  
+void riscv_cpu_list_props(CPUState *cs)

+{
+char *enabled_isa;
+
+enabled_isa = riscv_isa_string(RISCV_CPU(cs));
+qemu_printf("Enable extension:\n");


I suggest "Enabled extensions". LGTM otherwise.

Daniel


+qemu_printf("\t%s\n", enabled_isa);
+/* TODO: output all user configurable options and all possible extensions 
*/
+}
+
  #define DEFINE_CPU(type_name, initfn)  \
  {  \
  .name = type_name, \
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6ea22e0eea..af1d47605b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -443,9 +443,11 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
  bool probe, uintptr_t retaddr);
  char *riscv_isa_string(RISCVCPU *cpu);
  void riscv_cpu_list(void);
+void riscv_cpu_list_props(CPUState *cs);
  void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp);
  
  #define cpu_list riscv_cpu_list

+#define cpu_list_props riscv_cpu_list_props
  #define cpu_mmu_index riscv_cpu_mmu_index
  
  #ifndef CONFIG_USER_ONLY




Re: [PATCH V3 00/10] fix migration of suspended runstate

2023-08-25 Thread Steven Sistare
On 8/24/2023 5:09 PM, Steven Sistare wrote:
> On 8/17/2023 2:23 PM, Peter Xu wrote:
>> On Mon, Aug 14, 2023 at 11:54:26AM -0700, Steve Sistare wrote:
>>> Migration of a guest in the suspended runstate is broken.  The incoming
>>> migration code automatically tries to wake the guest, which is wrong;
>>> the guest should end migration in the same runstate it started.  Further,
>>> for a restored snapshot, the automatic wakeup fails.  The runstate is
>>> RUNNING, but the guest is not.  See the commit messages for the details.
>>
>> Hi Steve,
>>
>> I drafted two small patches to show what I meant, on top of this series.
>> Before applying these two, one needs to revert patch 1 in this series.
>>
>> After applied, it should also pass all three new suspend tests.  We can
>> continue the discussion here based on the patches.
> 
> Your 2 patches look good.  I suggest we keep patch 1, and I squash patch 2
> into the other patches.
> 
> There is one more fix needed: on the sending side, if the state is suspended,
> then ticks must be disabled so the tick globals are updated before they are
> written to vmstate.  Otherwise, tick starts at 0 in the receiver when
> cpu_enable_ticks is called.
> 
> ---
> diff --git a/migration/migration.c b/migration/migration.c
[...]
> ---

This diff is just a rough draft.  I need to resume ticks if the migration
fails or is cancelled, and I am trying to push the logic into vm_stop,
vm_stop_force_state, and vm_start, and/or vm_prepare_start.

- Steve



Re: [PATCH v5 0/9] Misc fixes for throttle

2023-08-25 Thread Hanna Czenczek

On 28.07.23 04:19, zhenwei pi wrote:

[...]


Zhenwei Pi (9):
   throttle: introduce enum ThrottleDirection
   test-throttle: use enum ThrottleDirection
   throttle: support read-only and write-only
   test-throttle: test read only and write only
   cryptodev: use NULL throttle timer cb for read direction
   throttle: use enum ThrottleDirection instead of bool is_write
   throttle: use THROTTLE_MAX/ARRAY_SIZE for hard code
   fsdev: Use ThrottleDirection instread of bool is_write
   block/throttle-groups: Use ThrottleDirection instread of bool is_write


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Hanna




[PATCH v2 03/16] softmmu/physmem: Fixup qemu_ram_block_from_host() documentation

2023-08-25 Thread David Hildenbrand
Let's fixup the documentation (e.g., removing traces of the ram_addr
parameter that no longer exists) and move it to the header file while at
it.

Suggested-by: Igor Mammedov 
Acked-by: Igor Mammedov 
Reviewed-by: Peter Xu 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: David Hildenbrand 
---
 include/exec/cpu-common.h | 15 +++
 softmmu/physmem.c | 17 -
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 87dc9a752c..cc491a4bf4 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,6 +76,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 RAMBlock *qemu_ram_block_by_name(const char *name);
+
+/*
+ * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
+ *
+ * @ptr: The host pointer to translate.
+ * @round_offset: Whether to round the result offset down to a target page
+ * @offset: Will be set to the offset within the returned RAMBlock.
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the memory region that owns the RAMBlock.
+ */
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset);
 ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3df73542e1..57469b3c97 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -2181,23 +2181,6 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void 
*host)
 return res;
 }
 
-/*
- * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
- * in that RAMBlock.
- *
- * ptr: Host pointer to look up
- * round_offset: If true round the result offset down to a page boundary
- * *ram_addr: set to result ram_addr
- * *offset: set to result offset within the RAMBlock
- *
- * Returns: RAMBlock (or NULL if not found)
- *
- * By the time this function returns, the returned pointer is not protected
- * by RCU anymore.  If the caller is not within an RCU critical section and
- * does not hold the iothread lock, it must have other means of protecting the
- * pointer, such as a reference to the region that includes the incoming
- * ram_addr_t.
- */
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *offset)
 {
-- 
2.41.0




[PATCH v2 16/16] virtio-mem: Mark memslot alias memory regions unmergeable

2023-08-25 Thread David Hildenbrand
Let's mark the memslot alias memory regions as unmergable, such that
flatview and vhost won't merge adjacent memory region aliases and we can
atomically map/unmap individual aliases without affecting adjacent
alias memory regions.

This handles vhost and vfio in multiple-memslot mode correctly (which do
not support atomic memslot updates) and avoids the temporary removal of
large memslots, which can be an expensive operation. For example, vfio
might have to unpin + repin a lot of memory, which is undesired.

Signed-off-by: David Hildenbrand 
---
 hw/virtio/virtio-mem.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 724fcb189a..50770b577a 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -959,6 +959,12 @@ static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)
 memory_region_init_alias(>memslots[idx], OBJECT(vmem), name,
  >memdev->mr, memslot_offset,
  memslot_size);
+/*
+ * We want to be able to atomically and efficiently activate/deactivate
+ * individual memslots without affecting adjacent memslots in memory
+ * notifiers.
+ */
+memory_region_set_unmergeable(>memslots[idx], true);
 }
 }
 
-- 
2.41.0




[PATCH v2 12/16] memory-device, vhost: Support automatic decision on the number of memslots

2023-08-25 Thread David Hildenbrand
We want to support memory devices that can automatically decide how many
memslots they will use. In the worst case, they have to use a single
memslot.

The target use cases are virtio-mem and the hyper-v balloon.

Let's calculate a reasonable limit such a memory device may use, and
instruct the device to make a decision based on that limit. Use a simple
heuristic that considers:
* A memslot soft-limit for all memory devices of 256; also, to not
  consume too many memslots -- which could harm performance.
* Actually still free and unreserved memslots
* The percentage of the remaining device memory region that memory device
  will occupy.

Further, while we properly check before plugging a memory device whether
there still is are free memslots, we have other memslot consumers (such as
boot memory, PCI BARs) that don't perform any checks and might dynamically
consume memslots without any prior reservation. So we might succeed in
plugging a memory device, but once we dynamically map a PCI BAR we would
be in trouble. Doing accounting / reservation / checks for all such
users is problematic (e.g., sometimes we might temporarily split boot
memory into two memslots, triggered by the BIOS).

We use the historic magic memslot number of 509 as orientation to when
supporting 256 memory devices -> memslots (leaving 253 for boot memory and
other devices) has been proven to work reliable. We'll fallback to
suggesting a single memslot if we don't have at least 509 total memslots.

Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.

Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).

Signed-off-by: David Hildenbrand 
---
 hw/mem/memory-device.c | 93 --
 hw/virtio/vhost.c  | 14 -
 include/hw/boards.h|  4 ++
 include/hw/mem/memory-device.h | 32 
 stubs/memory_device.c  |  5 ++
 5 files changed, 144 insertions(+), 4 deletions(-)

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 4f1f841517..005e9b3a93 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -85,13 +85,90 @@ unsigned int memory_devices_get_reserved_memslots(void)
 return get_reserved_memslots(current_machine);
 }
 
+bool memory_devices_memslot_auto_decision_active(void)
+{
+if (!current_machine->device_memory) {
+return false;
+}
+
+return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+ MemoryRegion *mr)
+{
+const unsigned int max = MIN(vhost_get_max_memslots(),
+ kvm_get_max_memslots());
+const unsigned int free = MIN(vhost_get_free_memslots(),
+  kvm_get_free_memslots());
+const unsigned int reserved = get_reserved_memslots(ms);
+const uint64_t size = memory_region_size(mr);
+uint64_t available_space;
+unsigned int memslots;
+
+/*
+ * If we only have less overall memslots than what we consider reasonable,
+ * just keep it to a minimum.
+ */
+if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+return 1;
+}
+
+/*
+ * Consider our soft-limit across all memory devices. We don't really
+ * expect to exceed this limit in reasonable configurations.
+ */
+if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+ms->device_memory->required_memslots) {
+return 1;
+}
+memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+   ms->device_memory->required_memslots;
+
+/*
+ * Consider the actually still free memslots. This is only relevant if
+ * other memslot consumers would consume *significantly* more memslots than
+ * what we prepared for (> 253). Unlikely, but let's just handle it
+ * cleanly.
+ */
+memslots = MIN(memslots, free - reserved);
+if (memslots < 1 || unlikely(free < reserved)) {
+return 1;
+}
+
+/* We cannot have any other memory devices? So give all to this device. */
+if (size == ms->maxram_size - ms->ram_size) {
+return memslots;
+}
+
+/*
+ * Simple heuristic: equally distribute the memslots over the space
+ * still available for memory devices.
+ */
+

[PATCH v2 10/16] kvm: Add stub for kvm_get_max_memslots()

2023-08-25 Thread David Hildenbrand
We'll need the stub soon from memory device context.

While at it, use "unsigned int" as return value and place the
declaration next to kvm_get_free_memslots().

Signed-off-by: David Hildenbrand 
---
 accel/kvm/kvm-all.c| 2 +-
 accel/stubs/kvm-stub.c | 5 +
 include/sysemu/kvm.h   | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 0fcea923a1..fea287ec7a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -174,7 +174,7 @@ void kvm_resample_fd_notify(int gsi)
 }
 }
 
-int kvm_get_max_memslots(void)
+unsigned int kvm_get_max_memslots(void)
 {
 KVMState *s = KVM_STATE(current_accel());
 
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index f39997d86e..ca6a1d9698 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -109,6 +109,11 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, 
EventNotifier *n,
 return -ENOSYS;
 }
 
+unsigned int kvm_get_max_memslots(void)
+{
+return UINT_MAX;
+}
+
 unsigned int kvm_get_free_memslots(void)
 {
 return UINT_MAX;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 321427a543..cb9a7f131c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -216,6 +216,7 @@ typedef struct KVMRouteChange {
 
 /* external API */
 
+unsigned int kvm_get_max_memslots(void);
 unsigned int kvm_get_free_memslots(void);
 bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
@@ -564,7 +565,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void 
*source);
  */
 int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
 struct ppc_radix_page_info *kvm_get_radix_page_info(void);
-int kvm_get_max_memslots(void);
 
 /* Notify resamplefd for EOI of specific interrupts. */
 void kvm_resample_fd_notify(int gsi);
-- 
2.41.0




[PATCH v2 11/16] vhost: Add vhost_get_max_memslots()

2023-08-25 Thread David Hildenbrand
Let's add vhost_get_max_memslots(), to perform a similar task as
kvm_get_max_memslots().

Signed-off-by: David Hildenbrand 
---
 hw/virtio/vhost-stub.c|  5 +
 hw/virtio/vhost.c | 11 +++
 include/hw/virtio/vhost.h |  1 +
 3 files changed, 17 insertions(+)

diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c
index d53dd9d288..52d42adab2 100644
--- a/hw/virtio/vhost-stub.c
+++ b/hw/virtio/vhost-stub.c
@@ -2,6 +2,11 @@
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
 
+unsigned int vhost_get_max_memslots(void)
+{
+return UINT_MAX;
+}
+
 unsigned int vhost_get_free_memslots(void)
 {
 return UINT_MAX;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index f7e1ac12a8..ee193b07c7 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -55,6 +55,17 @@ static unsigned int used_shared_memslots;
 static QLIST_HEAD(, vhost_dev) vhost_devices =
 QLIST_HEAD_INITIALIZER(vhost_devices);
 
+unsigned int vhost_get_max_memslots(void)
+{
+unsigned int max = UINT_MAX;
+struct vhost_dev *hdev;
+
+QLIST_FOREACH(hdev, _devices, entry) {
+max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
+}
+return max;
+}
+
 unsigned int vhost_get_free_memslots(void)
 {
 unsigned int free = UINT_MAX;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 603bf834be..c7e5467693 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -315,6 +315,7 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const 
int *feature_bits,
  */
 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
 uint64_t features);
+unsigned int vhost_get_max_memslots(void);
 unsigned int vhost_get_free_memslots(void);
 
 int vhost_net_set_backend(struct vhost_dev *hdev,
-- 
2.41.0




[PATCH v2 09/16] memory-device, vhost: Support memory devices that dynamically consume memslots

2023-08-25 Thread David Hildenbrand
We want to support memory devices that have a dynamically managed memory
region container as device memory region. This device memory region maps
multiple RAM memory subregions (e.g., aliases to the same RAM memory
region), whereby these subregions can be (un)mapped on demand.

Each RAM subregion will consume a memslot in KVM and vhost, resulting in
such a new device consuming memslots dynamically, and initially usually
0. We already track the number of used vs. required memslots for all
memslots. From that, we can derive the number of reserved memslots that
must not be used otherwise.

The target use case is virtio-mem and the hyper-v balloon, which will
dynamically map aliases to RAM memory region into their device memory
region container.

Properly document what's supported and what's not and extend the vhost
memslot check accordingly.

Signed-off-by: David Hildenbrand 
---
 hw/mem/memory-device.c | 28 ++--
 hw/virtio/vhost.c  | 18 ++
 include/hw/mem/memory-device.h |  7 +++
 stubs/memory_device.c  |  5 +
 4 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index ee77f9d290..4f1f841517 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -62,19 +62,43 @@ static unsigned int 
memory_device_get_memslots(MemoryDeviceState *md)
 return 1;
 }
 
+/*
+ * Memslots that are reserved by memory devices (required but still reported
+ * as free from KVM / vhost).
+ */
+static unsigned int get_reserved_memslots(MachineState *ms)
+{
+if (ms->device_memory->used_memslots >
+ms->device_memory->required_memslots) {
+/* This is unexpected, and we warned already in the memory notifier. */
+return 0;
+}
+return ms->device_memory->required_memslots -
+   ms->device_memory->used_memslots;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+if (!current_machine->device_memory) {
+return 0;
+}
+return get_reserved_memslots(current_machine);
+}
+
 static void memory_device_check_addable(MachineState *ms, MemoryDeviceState 
*md,
 MemoryRegion *mr, Error **errp)
 {
 const uint64_t used_region_size = ms->device_memory->used_region_size;
 const uint64_t size = memory_region_size(mr);
 const unsigned int required_memslots = memory_device_get_memslots(md);
+const unsigned int reserved_memslots = get_reserved_memslots(ms);
 
 /* we will need memory slots for kvm and vhost */
-if (kvm_get_free_memslots() < required_memslots) {
+if (kvm_get_free_memslots() < required_memslots + reserved_memslots) {
 error_setg(errp, "hypervisor has not enough free memory slots left");
 return;
 }
-if (vhost_get_free_memslots() < required_memslots) {
+if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
 error_setg(errp, "a used vhost backend has not enough free memory 
slots left");
 return;
 }
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 8e84dca246..f7e1ac12a8 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -23,6 +23,7 @@
 #include "qemu/log.h"
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/virtio-bus.h"
+#include "hw/mem/memory-device.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file-types.h"
 #include "sysemu/dma.h"
@@ -1423,7 +1424,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type, uint32_t busyloop_timeout,
Error **errp)
 {
-unsigned int used;
+unsigned int used, reserved, limit;
 uint64_t features;
 int i, r, n_initialized_vqs = 0;
 
@@ -1529,9 +1530,18 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 } else {
 used = used_memslots;
 }
-if (used > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
-error_setg(errp, "vhost backend memory slots limit is less"
-   " than current number of present memory slots");
+/*
+ * We assume that all reserved memslots actually require a real memslot
+ * in our vhost backend. This might not be true, for example, if the
+ * memslot would be ROM. If ever relevant, we can optimize for that --
+ * but we'll need additional information about the reservations.
+ */
+reserved = memory_devices_get_reserved_memslots();
+limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+if (used + reserved > limit) {
+error_setg(errp, "vhost backend memory slots limit (%d) is less"
+   " than current number of used (%d) and reserved (%d)"
+   " memory slots for memory devices.", limit, used, reserved);
 r = -EINVAL;
 goto fail_busyloop;
 }
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index b51a579fb9..c7b624da6a 100644
--- 

[PATCH v2 06/16] memory-device: Support memory devices with multiple memslots

2023-08-25 Thread David Hildenbrand
We want to support memory devices that have a memory region container as
device memory region that maps multiple RAM memory regions. Let's start
by supporting memory devices that statically map multiple RAM memory
regions and, thereby, consume multiple memslots.

We already have one device that uses a container as device memory region:
NVDIMMs. However, a NVDIMM always ends up consuming exactly one memslot.

Let's add support for that by asking the memory device via a new
callback how many memslots it requires.

Signed-off-by: David Hildenbrand 
---
 hw/mem/memory-device.c | 27 +++
 include/hw/mem/memory-device.h | 18 ++
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 22c12a4345..4613a15e1f 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -52,19 +52,30 @@ static int memory_device_build_list(Object *obj, void 
*opaque)
 return 0;
 }
 
-static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
-Error **errp)
+static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
+{
+const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+
+if (mdc->get_memslots) {
+return mdc->get_memslots(md);
+}
+return 1;
+}
+
+static void memory_device_check_addable(MachineState *ms, MemoryDeviceState 
*md,
+MemoryRegion *mr, Error **errp)
 {
 const uint64_t used_region_size = ms->device_memory->used_region_size;
 const uint64_t size = memory_region_size(mr);
+const unsigned int required_memslots = memory_device_get_memslots(md);
 
-/* we will need a new memory slot for kvm and vhost */
-if (!kvm_get_free_memslots()) {
-error_setg(errp, "hypervisor has no free memory slots left");
+/* we will need memory slots for kvm and vhost */
+if (kvm_get_free_memslots() < required_memslots) {
+error_setg(errp, "hypervisor has not enough free memory slots left");
 return;
 }
-if (!vhost_get_free_memslots()) {
-error_setg(errp, "a used vhost backend has no free memory slots left");
+if (vhost_get_free_memslots() < required_memslots) {
+error_setg(errp, "a used vhost backend has not enough free memory 
slots left");
 return;
 }
 
@@ -233,7 +244,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, 
MachineState *ms,
 goto out;
 }
 
-memory_device_check_addable(ms, mr, _err);
+memory_device_check_addable(ms, md, mr, _err);
 if (local_err) {
 goto out;
 }
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 48d2611fc5..b51a579fb9 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -41,6 +41,11 @@ typedef struct MemoryDeviceState MemoryDeviceState;
  * successive memory regions are used, a covering memory region has to
  * be provided. Scattered memory regions are not supported for single
  * devices.
+ *
+ * The device memory region returned via @get_memory_region may either be a
+ * single RAM memory region or a memory region container with subregions
+ * that are RAM memory regions or aliases to RAM memory regions. Other
+ * memory regions or subregions are not supported.
  */
 struct MemoryDeviceClass {
 /* private */
@@ -88,6 +93,19 @@ struct MemoryDeviceClass {
  */
 MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
 
+/*
+ * Optional for memory devices that require only a single memslot,
+ * required for all other memory devices: Return the number of memslots
+ * (distinct RAM memory regions in the device memory region) that are
+ * required by the device.
+ *
+ * If this function is not implemented, the assumption is "1".
+ *
+ * Called when (un)plugging the memory device, to check if the requirements
+ * can be satisfied, and to do proper accounting.
+ */
+unsigned int (*get_memslots)(MemoryDeviceState *md);
+
 /*
  * Optional: Return the desired minimum alignment of the device in guest
  * physical address space. The final alignment is computed based on this
-- 
2.41.0




[PATCH v2 14/16] virtio-mem: Expose device memory via multiple memslots if enabled

2023-08-25 Thread David Hildenbrand
Having large virtio-mem devices that only expose little memory to a VM
is currently a problem: we map the whole sparse memory region into the
guest using a single memslot, resulting in one gigantic memslot in KVM.
KVM allocates metadata for the whole memslot, which can result in quite
some memory waste.

Assuming we have a 1 TiB virtio-mem device and only expose little (e.g.,
1 GiB) memory, we would create a single 1 TiB memslot and KVM has to
allocate metadata for that 1 TiB memslot: on x86, this implies allocating
a significant amount of memory for metadata:

(1) RMAP: 8 bytes per 4 KiB, 8 bytes per 2 MiB, 8 bytes per 1 GiB
-> For 1 TiB: 2147483648 + 4194304 + 8192 = ~ 2 GiB (0.2 %)

With the TDP MMU (cat /sys/module/kvm/parameters/tdp_mmu) this gets
allocated lazily when required for nested VMs
(2) gfn_track: 2 bytes per 4 KiB
-> For 1 TiB: 536870912 = ~512 MiB (0.05 %)
(3) lpage_info: 4 bytes per 2 MiB, 4 bytes per 1 GiB
-> For 1 TiB: 2097152 + 4096 = ~2 MiB (0.0002 %)
(4) 2x dirty bitmaps for tracking: 2x 1 bit per 4 KiB page
-> For 1 TiB: 536870912 = 64 MiB (0.006 %)

So we primarily care about (1) and (2). The bad thing is, that the
memory consumption *doubles* once SMM is enabled, because we create the
memslot once for !SMM and once for SMM.

Having a 1 TiB memslot without the TDP MMU consumes around:
* With SMM: 5 GiB
* Without SMM: 2.5 GiB
Having a 1 TiB memslot with the TDP MMU consumes around:
* With SMM: 1 GiB
* Without SMM: 512 MiB

... and that's really something we want to optimize, to be able to just
start a VM with small boot memory (e.g., 4 GiB) and a virtio-mem device
that can grow very large (e.g., 1 TiB).

Consequently, using multiple memslots and only mapping the memslots we
really need can significantly reduce memory waste and speed up
memslot-related operations. Let's expose the sparse RAM memory region using
multiple memslots, mapping only the memslots we currently need into our
device memory region container.

* With VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, we only map the memslots that
  actually have memory plugged, and dynamically (un)map when
  (un)plugging memory blocks.

* Without VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, we always map the memslots
  covered by the usable region, and dynamically (un)map when resizing the
  usable region.

We'll auto-detect the number of memslots to use based on the memslot limit
provided by the core. We'll use at most 1 memslot per gigabyte. Note that
our global limit of memslots accross all memory devices is currently set to
256: even with multiple large virtio-mem devices, we'd still have a sane
limit on the number of memslots used.

The default is a single memslot for now ("multiple-memslots=off"). The
optimization must be enabled manually using "multiple-memslots=on", because
some vhost setups (e.g., hotplug of vhost-user devices) might be
problematic until we support more memslots especially in vhost-user
backends.

Note that "multiple-memslots=on" is just a hint that multiple memslots
*may* be used for internal optimizations, not that multiple memslots
*must* be used. The actual number of memslots that are used is an
internal detail: for example, once memslot metadata is no longer an
issue, we could simply stop optimizing for that. Migration source and
destination can differ on the setting of "multiple-memslots".

Signed-off-by: David Hildenbrand 
---
 hw/virtio/virtio-mem-pci.c |  21 +++
 hw/virtio/virtio-mem.c | 266 -
 include/hw/virtio/virtio-mem.h |  23 ++-
 3 files changed, 306 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c
index c4597e029e..1b4e9a3284 100644
--- a/hw/virtio/virtio-mem-pci.c
+++ b/hw/virtio/virtio-mem-pci.c
@@ -48,6 +48,25 @@ static MemoryRegion 
*virtio_mem_pci_get_memory_region(MemoryDeviceState *md,
 return vmc->get_memory_region(vmem, errp);
 }
 
+static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md,
+   unsigned int limit)
+{
+VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+VirtIOMEM *vmem = VIRTIO_MEM(_mem->vdev);
+VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+vmc->decide_memslots(vmem, limit);
+}
+
+static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md)
+{
+VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+VirtIOMEM *vmem = VIRTIO_MEM(_mem->vdev);
+VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+return vmc->get_memslots(vmem);
+}
+
 static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,
 Error **errp)
 {
@@ -150,6 +169,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, 
void *data)
 mdc->set_addr = virtio_mem_pci_set_addr;
 mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
 mdc->get_memory_region = virtio_mem_pci_get_memory_region;
+mdc->decide_memslots = virtio_mem_pci_decide_memslots;

[PATCH v2 05/16] vhost: Return number of free memslots

2023-08-25 Thread David Hildenbrand
Let's return the number of free slots instead of only checking if there
is a free slot. Required to support memory devices that consume multiple
memslots.

This is a preparation for memory devices that consume multiple memslots.

Signed-off-by: David Hildenbrand 
---
 hw/mem/memory-device.c| 2 +-
 hw/virtio/vhost-stub.c| 4 ++--
 hw/virtio/vhost.c | 4 ++--
 include/hw/virtio/vhost.h | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 7c24685796..22c12a4345 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -63,7 +63,7 @@ static void memory_device_check_addable(MachineState *ms, 
MemoryRegion *mr,
 error_setg(errp, "hypervisor has no free memory slots left");
 return;
 }
-if (!vhost_has_free_slot()) {
+if (!vhost_get_free_memslots()) {
 error_setg(errp, "a used vhost backend has no free memory slots left");
 return;
 }
diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c
index aa858ef3fb..d53dd9d288 100644
--- a/hw/virtio/vhost-stub.c
+++ b/hw/virtio/vhost-stub.c
@@ -2,9 +2,9 @@
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
 
-bool vhost_has_free_slot(void)
+unsigned int vhost_get_free_memslots(void)
 {
-return true;
+return UINT_MAX;
 }
 
 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index c16ad14535..8e84dca246 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -54,7 +54,7 @@ static unsigned int used_shared_memslots;
 static QLIST_HEAD(, vhost_dev) vhost_devices =
 QLIST_HEAD_INITIALIZER(vhost_devices);
 
-bool vhost_has_free_slot(void)
+unsigned int vhost_get_free_memslots(void)
 {
 unsigned int free = UINT_MAX;
 struct vhost_dev *hdev;
@@ -71,7 +71,7 @@ bool vhost_has_free_slot(void)
 }
 free = MIN(free, cur_free);
 }
-return free > 0;
+return free;
 }
 
 static void vhost_dev_sync_region(struct vhost_dev *dev,
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 6a173cb9fa..603bf834be 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -315,7 +315,7 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const 
int *feature_bits,
  */
 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
 uint64_t features);
-bool vhost_has_free_slot(void);
+unsigned int vhost_get_free_memslots(void);
 
 int vhost_net_set_backend(struct vhost_dev *hdev,
   struct vhost_vring_file *file);
-- 
2.41.0




[PATCH v2 04/16] kvm: Return number of free memslots

2023-08-25 Thread David Hildenbrand
Let's return the number of free slots instead of only checking if there
is a free slot. While at it, check all address spaces, which will also
consider SMM under x86 correctly.

Make the stub return UINT_MAX, such that we can call the function
unconditionally.

This is a preparation for memory devices that consume multiple memslots.

Signed-off-by: David Hildenbrand 
---
 accel/kvm/kvm-all.c  | 33 -
 accel/stubs/kvm-stub.c   |  4 ++--
 hw/mem/memory-device.c   |  2 +-
 include/sysemu/kvm.h |  2 +-
 include/sysemu/kvm_int.h |  1 +
 5 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d07f1ecbd3..0fcea923a1 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -181,6 +181,24 @@ int kvm_get_max_memslots(void)
 return s->nr_slots;
 }
 
+unsigned int kvm_get_free_memslots(void)
+{
+unsigned int used_slots = 0;
+KVMState *s = kvm_state;
+int i;
+
+kvm_slots_lock();
+for (i = 0; i < s->nr_as; i++) {
+if (!s->as[i].ml) {
+continue;
+}
+used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
+}
+kvm_slots_unlock();
+
+return s->nr_slots - used_slots;
+}
+
 /* Called with KVMMemoryListener.slots_lock held */
 static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 {
@@ -196,19 +214,6 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 return NULL;
 }
 
-bool kvm_has_free_slot(MachineState *ms)
-{
-KVMState *s = KVM_STATE(ms->accelerator);
-bool result;
-KVMMemoryListener *kml = >memory_listener;
-
-kvm_slots_lock();
-result = !!kvm_get_free_slot(kml);
-kvm_slots_unlock();
-
-return result;
-}
-
 /* Called with KVMMemoryListener.slots_lock held */
 static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
 {
@@ -1387,6 +1392,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
 }
 start_addr += slot_size;
 size -= slot_size;
+kml->nr_used_slots--;
 } while (size);
 return;
 }
@@ -1412,6 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
 ram_start_offset += slot_size;
 ram += slot_size;
 size -= slot_size;
+kml->nr_used_slots++;
 } while (size);
 }
 
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 235dc661bc..f39997d86e 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -109,9 +109,9 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, 
EventNotifier *n,
 return -ENOSYS;
 }
 
-bool kvm_has_free_slot(MachineState *ms)
+unsigned int kvm_get_free_memslots(void)
 {
-return false;
+return UINT_MAX;
 }
 
 void kvm_init_cpu_signals(CPUState *cpu)
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 667d56bd29..7c24685796 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -59,7 +59,7 @@ static void memory_device_check_addable(MachineState *ms, 
MemoryRegion *mr,
 const uint64_t size = memory_region_size(mr);
 
 /* we will need a new memory slot for kvm and vhost */
-if (kvm_enabled() && !kvm_has_free_slot(ms)) {
+if (!kvm_get_free_memslots()) {
 error_setg(errp, "hypervisor has no free memory slots left");
 return;
 }
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index ccaf55caf7..321427a543 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -216,7 +216,7 @@ typedef struct KVMRouteChange {
 
 /* external API */
 
-bool kvm_has_free_slot(MachineState *ms);
+unsigned int kvm_get_free_memslots(void);
 bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
 int kvm_has_robust_singlestep(void);
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 511b42bde5..8b09e78b12 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate {
 typedef struct KVMMemoryListener {
 MemoryListener listener;
 KVMSlot *slots;
+int nr_used_slots;
 int as_id;
 QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
 QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
-- 
2.41.0




[PATCH v2 07/16] stubs: Rename qmp_memory_device.c to memory_device.c

2023-08-25 Thread David Hildenbrand
We want to place non-qmp stubs in there, so let's rename it. While at
it, put it into the MAINTAINERS file under "Memory devices".

Signed-off-by: David Hildenbrand 
---
 MAINTAINERS| 1 +
 stubs/{qmp_memory_device.c => memory_device.c} | 0
 stubs/meson.build  | 2 +-
 3 files changed, 2 insertions(+), 1 deletion(-)
 rename stubs/{qmp_memory_device.c => memory_device.c} (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6111b6b4d9..aee6d36966 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2860,6 +2860,7 @@ F: hw/mem/pc-dimm.c
 F: include/hw/mem/memory-device.h
 F: include/hw/mem/nvdimm.h
 F: include/hw/mem/pc-dimm.h
+F: stubs/memory_device.c
 F: docs/nvdimm.txt
 
 SPICE
diff --git a/stubs/qmp_memory_device.c b/stubs/memory_device.c
similarity index 100%
rename from stubs/qmp_memory_device.c
rename to stubs/memory_device.c
diff --git a/stubs/meson.build b/stubs/meson.build
index ef6e39a64d..cde44972bf 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -32,7 +32,7 @@ stub_ss.add(files('monitor.c'))
 stub_ss.add(files('monitor-core.c'))
 stub_ss.add(files('physmem.c'))
 stub_ss.add(files('qemu-timer-notify-cb.c'))
-stub_ss.add(files('qmp_memory_device.c'))
+stub_ss.add(files('memory_device.c'))
 stub_ss.add(files('qmp-command-available.c'))
 stub_ss.add(files('qmp-quit.c'))
 stub_ss.add(files('qtest.c'))
-- 
2.41.0




[PATCH v2 02/16] vhost: Remove vhost_backend_can_merge() callback

2023-08-25 Thread David Hildenbrand
Checking whether the memory regions are equal is sufficient: if they are
equal, then most certainly the contained fd is equal.

The whole vhost-user memslot handling is suboptimal and overly
complicated. We shouldn't have to lookup a RAM memory regions we got
notified about in vhost_user_get_mr_data() using a host pointer. But that
requires a bigger rework -- especially an alternative vhost_set_mem_table()
backend call that simply consumes MemoryRegionSections.

For now, let's just drop vhost_backend_can_merge().

Acked-by: Stefan Hajnoczi 
Reviewed-by: Igor Mammedov 
Acked-by: Igor Mammedov 
Reviewed-by: Peter Xu 
Signed-off-by: David Hildenbrand 
---
 hw/virtio/vhost-user.c| 14 --
 hw/virtio/vhost-vdpa.c|  1 -
 hw/virtio/vhost.c |  6 +-
 include/hw/virtio/vhost-backend.h |  4 
 4 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 1e7553352a..e6de930872 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2205,19 +2205,6 @@ static int vhost_user_migration_done(struct vhost_dev 
*dev, char* mac_addr)
 return -ENOTSUP;
 }
 
-static bool vhost_user_can_merge(struct vhost_dev *dev,
- uint64_t start1, uint64_t size1,
- uint64_t start2, uint64_t size2)
-{
-ram_addr_t offset;
-int mfd, rfd;
-
-(void)vhost_user_get_mr_data(start1, , );
-(void)vhost_user_get_mr_data(start2, , );
-
-return mfd == rfd;
-}
-
 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
 {
 VhostUserMsg msg;
@@ -2764,7 +2751,6 @@ const VhostOps user_ops = {
 .vhost_set_vring_enable = vhost_user_set_vring_enable,
 .vhost_requires_shm_log = vhost_user_requires_shm_log,
 .vhost_migration_done = vhost_user_migration_done,
-.vhost_backend_can_merge = vhost_user_can_merge,
 .vhost_net_set_mtu = vhost_user_net_set_mtu,
 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 42f2a4bae9..8f07bee041 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1508,7 +1508,6 @@ const VhostOps vdpa_ops = {
 .vhost_set_config = vhost_vdpa_set_config,
 .vhost_requires_shm_log = NULL,
 .vhost_migration_done = NULL,
-.vhost_backend_can_merge = NULL,
 .vhost_net_set_mtu = NULL,
 .vhost_set_iotlb_callback = NULL,
 .vhost_send_device_iotlb_msg = NULL,
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index c1e6148833..c16ad14535 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -728,11 +728,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
 size_t offset = mrs_gpa - prev_gpa_start;
 
 if (prev_host_start + offset == mrs_host &&
-section->mr == prev_sec->mr &&
-(!dev->vhost_ops->vhost_backend_can_merge ||
- dev->vhost_ops->vhost_backend_can_merge(dev,
-mrs_host, mrs_size,
-prev_host_start, prev_size))) {
+section->mr == prev_sec->mr) {
 uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
 need_add = false;
 prev_sec->offset_within_address_space =
diff --git a/include/hw/virtio/vhost-backend.h 
b/include/hw/virtio/vhost-backend.h
index df2821ddae..12d578824b 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev 
*dev,
 typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
 typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
char *mac_addr);
-typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
-   uint64_t start1, uint64_t size1,
-   uint64_t start2, uint64_t size2);
 typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
 uint64_t guest_cid);
 typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
@@ -163,7 +160,6 @@ typedef struct VhostOps {
 vhost_set_vring_enable_op vhost_set_vring_enable;
 vhost_requires_shm_log_op vhost_requires_shm_log;
 vhost_migration_done_op vhost_migration_done;
-vhost_backend_can_merge_op vhost_backend_can_merge;
 vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
 vhost_vsock_set_running_op vhost_vsock_set_running;
 vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
-- 
2.41.0




[PATCH v2 13/16] memory: Clarify mapping requirements for RamDiscardManager

2023-08-25 Thread David Hildenbrand
We really only care about the RAM memory region not being mapped into
an address space yet as long as we're still setting up the
RamDiscardManager. Once mapped into an address space, memory notifiers
would get notified about such a region and any attempts to modify the
RamDiscardManager would be wrong.

While "mapped into an address space" is easy to check for RAM regions that
are mapped directly (following the ->container links), it's harder to
check when such regions are mapped indirectly via aliases. For now, we can
only detect that a region is mapped through an alias (->mapped_via_alias),
but we don't have a handle on these aliases to follow all their ->container
links to test if they are eventually mapped into an address space.

So relax the assertion in memory_region_set_ram_discard_manager(),
remove the check in memory_region_get_ram_discard_manager() and clarify
the doc.

Signed-off-by: David Hildenbrand 
---
 include/exec/memory.h | 5 +++--
 softmmu/memory.c  | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 68284428f8..5feb704585 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -593,8 +593,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection 
*section, void *opaque);
  * populated (consuming memory), to be used/accessed by the VM.
  *
  * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
- * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
- * mapped.
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
  *
  * The #RamDiscardManager is intended to be used by technologies that are
  * incompatible with discarding of RAM (e.g., VFIO, which may pin all
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..c1e8aa133f 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -2081,7 +2081,7 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion 
*iommu_mr)
 
 RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
 {
-if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+if (!memory_region_is_ram(mr)) {
 return NULL;
 }
 return mr->rdm;
@@ -2090,7 +2090,7 @@ RamDiscardManager 
*memory_region_get_ram_discard_manager(MemoryRegion *mr)
 void memory_region_set_ram_discard_manager(MemoryRegion *mr,
RamDiscardManager *rdm)
 {
-g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+g_assert(memory_region_is_ram(mr));
 g_assert(!rdm || !mr->rdm);
 mr->rdm = rdm;
 }
-- 
2.41.0




[PATCH v2 00/16] virtio-mem: Expose device memory through multiple memslots

2023-08-25 Thread David Hildenbrand
Quoting from patch #14:

Having large virtio-mem devices that only expose little memory to a VM
is currently a problem: we map the whole sparse memory region into the
guest using a single memslot, resulting in one gigantic memslot in KVM.
KVM allocates metadata for the whole memslot, which can result in quite
some memory waste.

Assuming we have a 1 TiB virtio-mem device and only expose little (e.g.,
1 GiB) memory, we would create a single 1 TiB memslot and KVM has to
allocate metadata for that 1 TiB memslot: on x86, this implies allocating
a significant amount of memory for metadata:

(1) RMAP: 8 bytes per 4 KiB, 8 bytes per 2 MiB, 8 bytes per 1 GiB
-> For 1 TiB: 2147483648 + 4194304 + 8192 = ~ 2 GiB (0.2 %)

With the TDP MMU (cat /sys/module/kvm/parameters/tdp_mmu) this gets
allocated lazily when required for nested VMs
(2) gfn_track: 2 bytes per 4 KiB
-> For 1 TiB: 536870912 = ~512 MiB (0.05 %)
(3) lpage_info: 4 bytes per 2 MiB, 4 bytes per 1 GiB
-> For 1 TiB: 2097152 + 4096 = ~2 MiB (0.0002 %)
(4) 2x dirty bitmaps for tracking: 2x 1 bit per 4 KiB page
-> For 1 TiB: 536870912 = 64 MiB (0.006 %)

So we primarily care about (1) and (2). The bad thing is, that the
memory consumption *doubles* once SMM is enabled, because we create the
memslot once for !SMM and once for SMM.

Having a 1 TiB memslot without the TDP MMU consumes around:
* With SMM: 5 GiB
* Without SMM: 2.5 GiB
Having a 1 TiB memslot with the TDP MMU consumes around:
* With SMM: 1 GiB
* Without SMM: 512 MiB

... and that's really something we want to optimize, to be able to just
start a VM with small boot memory (e.g., 4 GiB) and a virtio-mem device
that can grow very large (e.g., 1 TiB).

Consequently, using multiple memslots and only mapping the memslots we
really need can significantly reduce memory waste and speed up
memslot-related operations. Let's expose the sparse RAM memory region using
multiple memslots, mapping only the memslots we currently need into our
device memory region container.

The hyper-v balloon driver has similar demands [1].

For virtio-mem, this has to be turned manually on ("multiple-memslots=on"),
due to the interaction with vhost (below).

If we have less than 509 memslots available, we always default to a single
memslot. Otherwise, we automatically decide how many memslots to use
based on a simple heuristic (see patch #12), and try not to use more than
256 memslots across all memory devices: our historical DIMM limit.

As soon as any memory devices automatically decided on using more than
one memslot, vhost devices that support less than 509 memslots (e.g.,
currently most vhost-user devices like with virtiofsd) can no longer be
plugged as a precaution.

Quoting from patch #12:

Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.

Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).


The recommendation is to plug such vhost devices before the virtio-mem
decides, or to not set "multiple-memslots=on". As soon as these devices
support a reasonable number of memslots (>= 509), this will start working
automatically.

I run some tests on x86_64, now also including vfio tests. Seems to work
as expected, even when multiple memslots are used.


Patch #1 -- #3 are from [2] that were not picked up yet.

Patch #4 -- #12 add handling of multiple memslots to memory devices

Patch #13 -- #14 add "multiple-memslots=on" support to virtio-mem

Patch #15 -- #16 make sure that virtio-mem memslots can be enabled/disable
atomically


v1 -> v2:
* Include patches from [1]
* A lot of code simplification and reorganization, too many to spell out
* *don't* add a general soft-limit on memslots, to avoid warning in sane
  setups
* Simplify handling of vhost devices with a small number of memslots:
  simply fail plugging them
* "virtio-mem: Expose device memory via multiple memslots if enabled"
 -> Fix one "is this the last memslot" check
* Much more testing


[1] https://lkml.kernel.org/r/cover.1689786474.git.maciej.szmigi...@oracle.com
[2] https://lkml.kernel.org/r/20230523185915.540373-1-da...@redhat.com

Cc: Paolo Bonzini 
Cc: Igor Mammedov 
Cc: Xiao 

[PATCH v2 08/16] memory-device: Track required and actually used memslots in DeviceMemoryState

2023-08-25 Thread David Hildenbrand
Let's track how many memslots are required by plugged memory devices and
how many are currently actually getting used by plugged memory
devices.

"required - used" is the number of reserved memslots. For now, the number
of used and required memslots is always equal, and there are no
reservations. This is a preparation for memory devices that want to
dynamically consume memslots after initially specifying how many they
require -- where we'll end up with reserved memslots.

To track the number of used memslots, create a new address space for
our device memory and register a memory listener (add/remove) for that
address space.

Signed-off-by: David Hildenbrand 
---
 hw/mem/memory-device.c | 54 ++
 include/hw/boards.h| 10 +++-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 4613a15e1f..ee77f9d290 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -286,6 +286,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState 
*ms)
 g_assert(ms->device_memory);
 
 ms->device_memory->used_region_size += memory_region_size(mr);
+ms->device_memory->required_memslots += memory_device_get_memslots(md);
 memory_region_add_subregion(>device_memory->mr,
 addr - ms->device_memory->base, mr);
 trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
@@ -305,6 +306,7 @@ void memory_device_unplug(MemoryDeviceState *md, 
MachineState *ms)
 
 memory_region_del_subregion(>device_memory->mr, mr);
 ms->device_memory->used_region_size -= memory_region_size(mr);
+ms->device_memory->required_memslots -= memory_device_get_memslots(md);
 trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
mdc->get_addr(md));
 }
@@ -324,6 +326,50 @@ uint64_t memory_device_get_region_size(const 
MemoryDeviceState *md,
 return memory_region_size(mr);
 }
 
+static void memory_devices_region_mod(MemoryListener *listener,
+  MemoryRegionSection *mrs, bool add)
+{
+DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
+  listener);
+
+if (!memory_region_is_ram(mrs->mr)) {
+warn_report("Unexpected memory region mapped into device memory 
region.");
+return;
+}
+
+/*
+ * The expectation is that each distinct RAM memory region section in
+ * our region for memory devices consumes exactly one memslot in KVM
+ * and in vhost. For vhost, this is true, except:
+ * * ROM memory regions don't consume a memslot. These get used very
+ *   rarely for memory devices (R/O NVDIMMs).
+ * * Memslots without a fd (memory-backend-ram) don't necessarily
+ *   consume a memslot. Such setups are quite rare and possibly bogus:
+ *   the memory would be inaccessible by such vhost devices.
+ *
+ * So for vhost, in corner cases we might over-estimate the number of
+ * memslots that are currently used or that might still be reserved
+ * (required - used).
+ */
+dms->used_memslots += add ? 1 : -1;
+
+if (dms->used_memslots > dms->required_memslots) {
+warn_report("Memory devices use more memory slots than indicated as 
required.");
+}
+}
+
+static void memory_devices_region_add(MemoryListener *listener,
+  MemoryRegionSection *mrs)
+{
+return memory_devices_region_mod(listener, mrs, true);
+}
+
+static void memory_devices_region_del(MemoryListener *listener,
+  MemoryRegionSection *mrs)
+{
+return memory_devices_region_mod(listener, mrs, false);
+}
+
 void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
 {
 g_assert(size);
@@ -333,8 +379,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr 
base, uint64_t size)
 
 memory_region_init(>device_memory->mr, OBJECT(ms), "device-memory",
size);
+address_space_init(>device_memory->as, >device_memory->mr,
+   "device-memory");
 memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
 >device_memory->mr);
+
+/* Track the number of memslots used by memory devices. */
+ms->device_memory->listener.region_add = memory_devices_region_add;
+ms->device_memory->listener.region_del = memory_devices_region_del;
+memory_listener_register(>device_memory->listener,
+ >device_memory->as);
 }
 
 static const TypeInfo memory_device_info = {
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 3b541ffd24..e344ded607 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -296,15 +296,23 @@ struct MachineClass {
  * DeviceMemoryState:
  * @base: address in guest physical address space where the memory
  * address space for memory 

[PATCH v2 15/16] memory, vhost: Allow for marking memory device memory regions unmergeable

2023-08-25 Thread David Hildenbrand
Let's allow for marking memory regions unmergeable, to teach
flatview code and vhost to not merge adjacent aliases to the same memory
region into a larger memory section; instead, we want separate aliases to
stay separate such that we can atomically map/unmap aliases without
affecting other aliases.

This is desired for virtio-mem mapping device memory located on a RAM
memory region via multiple aliases into a memory region container,
resulting in separate memslots that can get (un)mapped atomically.

As an example with virtio-mem, the layout would look something like this:
  [...]
  00024000-0020bfff (prio 0, i/o): device-memory
00024000-00043fff (prio 0, i/o): virtio-mem
  00024000-00027fff (prio 0, ram): alias memslot-0 @mem2 
-3fff
  00028000-0002bfff (prio 0, ram): alias memslot-1 @mem2 
4000-7fff
  0002c000-0002 (prio 0, ram): alias memslot-2 @mem2 
8000-bfff
  [...]

Without unmergable memory regions, all three memslots would get merged into
a single memory section. For example, when mapping another alias (e.g.,
virtio-mem-memslot-3) or when unmapping any of the mapped aliases,
memory listeners will first get notified about the removal of the big
memory section to then get notified about re-adding of the new
(differently merged) memory section(s).

In an ideal world, memory listeners would be able to deal with that
atomically, like KVM nowadays does. However, (a) supporting this for other
memory listeners (vhost-user, vfio) is fairly hard: temporary removal
can result in all kinds of issues on concurrent access to guest memory;
and (b) this handling is undesired, because temporarily removing+readding
can consume quite some time on bigger memslots and is not efficient
(e.g., vfio unpinning and repinning pages ...).

Let's allow for marking a memory region unmergeable, such that we
can atomically (un)map aliases to the same memory region, similar to
(un)mapping individual DIMMs.

Similarly, teach vhost code to not redo what flatview core stopped doing:
don't merge such sections. Merging in vhost code is really only relevant
for handling random holes in boot memory where; without this merging,
the vhost-user backend wouldn't be able to mmap() some boot memory
backed on hugetlb.

We'll use this for virtio-mem next.

Signed-off-by: David Hildenbrand 
---
 hw/virtio/vhost.c |  4 ++--
 include/exec/memory.h | 22 ++
 softmmu/memory.c  | 31 +--
 3 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 24013b39d6..503a160c96 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -707,7 +707,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
mrs_size, mrs_host);
 }
 
-if (dev->n_tmp_sections) {
+if (dev->n_tmp_sections && !section->unmergeable) {
 /* Since we already have at least one section, lets see if
  * this extends it; since we're scanning in order, we only
  * have to look at the last one, and the FlatView that calls
@@ -740,7 +740,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
 size_t offset = mrs_gpa - prev_gpa_start;
 
 if (prev_host_start + offset == mrs_host &&
-section->mr == prev_sec->mr) {
+section->mr == prev_sec->mr && !prev_sec->unmergeable) {
 uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
 need_add = false;
 prev_sec->offset_within_address_space =
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 5feb704585..916d565533 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -95,6 +95,7 @@ struct ReservedRegion {
  * relative to the region's address space
  * @readonly: writes to this section are ignored
  * @nonvolatile: this section is non-volatile
+ * @unmergeable: this section should not get merged with adjacent sections
  */
 struct MemoryRegionSection {
 Int128 size;
@@ -104,6 +105,7 @@ struct MemoryRegionSection {
 hwaddr offset_within_address_space;
 bool readonly;
 bool nonvolatile;
+bool unmergeable;
 };
 
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
@@ -767,6 +769,7 @@ struct MemoryRegion {
 bool nonvolatile;
 bool rom_device;
 bool flush_coalesced_mmio;
+bool unmergeable;
 uint8_t dirty_log_mask;
 bool is_iommu;
 RAMBlock *ram_block;
@@ -2344,6 +2347,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t 
size);
 void memory_region_set_alias_offset(MemoryRegion *mr,
 hwaddr offset);
 
+/*
+ * memory_region_set_unmergeable: Set a memory region unmergeable
+ *
+ * Mark a memory region unmergeable, resulting in the memory region (or
+ * everything 

[PATCH v2 01/16] vhost: Rework memslot filtering and fix "used_memslot" tracking

2023-08-25 Thread David Hildenbrand
Having multiple vhost devices, some filtering out fd-less memslots and
some not, can mess up the "used_memslot" accounting. Consequently our
"free memslot" checks become unreliable and we might run out of free
memslots at runtime later.

An example sequence which can trigger a potential issue that involves
different vhost backends (vhost-kernel and vhost-user) and hotplugged
memory devices can be found at [1].

Let's make the filtering mechanism less generic and distinguish between
backends that support private memslots (without a fd) and ones that only
support shared memslots (with a fd). Track the used_memslots for both
cases separately and use the corresponding value when required.

Note: Most probably we should filter out MAP_PRIVATE fd-based RAM regions
(for example, via memory-backend-memfd,...,shared=off or as default with
 memory-backend-file) as well. When not using MAP_SHARED, it might not work
as expected. Add a TODO for now.

[1] https://lkml.kernel.org/r/fad9136f-08d3-3fd9-71a1-502069c00...@redhat.com

Fixes: 988a27754bbb ("vhost: allow backends to filter memory sections")
Cc: Tiwei Bie 
Acked-by: Igor Mammedov 
Reviewed-by: Peter Xu 
Signed-off-by: David Hildenbrand 
---
 hw/virtio/vhost-user.c|  7 ++--
 hw/virtio/vhost.c | 56 ++-
 include/hw/virtio/vhost-backend.h |  5 ++-
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8dcf049d42..1e7553352a 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2500,10 +2500,9 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, 
uint64_t session_id)
 return 0;
 }
 
-static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
-  MemoryRegionSection *section)
+static bool vhost_user_no_private_memslots(struct vhost_dev *dev)
 {
-return memory_region_get_fd(section->mr) >= 0;
+return true;
 }
 
 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
@@ -2746,6 +2745,7 @@ const VhostOps user_ops = {
 .vhost_backend_init = vhost_user_backend_init,
 .vhost_backend_cleanup = vhost_user_backend_cleanup,
 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
+.vhost_backend_no_private_memslots = vhost_user_no_private_memslots,
 .vhost_set_log_base = vhost_user_set_log_base,
 .vhost_set_mem_table = vhost_user_set_mem_table,
 .vhost_set_vring_addr = vhost_user_set_vring_addr,
@@ -2772,7 +2772,6 @@ const VhostOps user_ops = {
 .vhost_set_config = vhost_user_set_config,
 .vhost_crypto_create_session = vhost_user_crypto_create_session,
 .vhost_crypto_close_session = vhost_user_crypto_close_session,
-.vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
 .vhost_dev_start = vhost_user_dev_start,
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e2f6ffb446..c1e6148833 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -45,20 +45,33 @@
 static struct vhost_log *vhost_log;
 static struct vhost_log *vhost_log_shm;
 
+/* Memslots used by backends that support private memslots (without an fd). */
 static unsigned int used_memslots;
+
+/* Memslots used by backends that only support shared memslots (with an fd). */
+static unsigned int used_shared_memslots;
+
 static QLIST_HEAD(, vhost_dev) vhost_devices =
 QLIST_HEAD_INITIALIZER(vhost_devices);
 
 bool vhost_has_free_slot(void)
 {
-unsigned int slots_limit = ~0U;
+unsigned int free = UINT_MAX;
 struct vhost_dev *hdev;
 
 QLIST_FOREACH(hdev, _devices, entry) {
 unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
-slots_limit = MIN(slots_limit, r);
+unsigned int cur_free;
+
+if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
+hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
+cur_free = r - used_shared_memslots;
+} else {
+cur_free = r - used_memslots;
+}
+free = MIN(free, cur_free);
 }
-return slots_limit > used_memslots;
+return free > 0;
 }
 
 static void vhost_dev_sync_region(struct vhost_dev *dev,
@@ -474,8 +487,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
  * vhost_section: identify sections needed for vhost access
  *
  * We only care about RAM sections here (where virtqueue and guest
- * internals accessed by virtio might live). If we find one we still
- * allow the backend to potentially filter it out of our list.
+ * internals accessed by virtio might live).
  */
 static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
 {
@@ -502,8 +514,16 @@ static bool vhost_section(struct vhost_dev *dev, 
MemoryRegionSection *section)
 return false;
 }
 
-  

[PATCH 17/20] target/riscv/cpu.c: export isa_edata_arr[]

2023-08-25 Thread Daniel Henrique Barboza
This array will be read by the TCG accel class, allowing it to handle
priv spec verifications on its own. The array will remain here in cpu.c
because it's also used by the riscv,isa string function.

To export it we'll make it constant and finish it with an empty element
since ARRAY_SIZE() won't work outside of cpu.c. Get rid of its
ARRAY_SIZE() usage now to alleviate the changes for the next patch.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 47 +-
 target/riscv/cpu.h |  7 +++
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 3c9db46837..ac5ad4727c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -41,15 +41,6 @@ static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
 const uint32_t misa_bits[] = {RVI, RVE, RVM, RVA, RVF, RVD, RVV,
   RVC, RVS, RVU, RVH, RVJ, RVG};
 
-struct isa_ext_data {
-const char *name;
-int min_version;
-int ext_enable_offset;
-};
-
-#define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \
-{#_name, _min_ver, CPU_CFG_OFFSET(_prop)}
-
 /*
  * From vector_helper.c
  * Note that vector data is stored in host-endian 64-bit chunks,
@@ -61,6 +52,9 @@ struct isa_ext_data {
 #define BYTE(x)   (x)
 #endif
 
+#define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \
+{#_name, _min_ver, CPU_CFG_OFFSET(_prop)}
+
 /*
  * Here are the ordering rules of extension naming defined by RISC-V
  * specification :
@@ -81,7 +75,7 @@ struct isa_ext_data {
  * Single letter extensions are checked in riscv_cpu_validate_misa_priv()
  * instead.
  */
-static const struct isa_ext_data isa_edata_arr[] = {
+const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zicbom, PRIV_VERSION_1_12_0, ext_icbom),
 ISA_EXT_DATA_ENTRY(zicboz, PRIV_VERSION_1_12_0, ext_icboz),
 ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond),
@@ -160,6 +154,8 @@ static const struct isa_ext_data isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(xtheadmempair, PRIV_VERSION_1_11_0, ext_xtheadmempair),
 ISA_EXT_DATA_ENTRY(xtheadsync, PRIV_VERSION_1_11_0, ext_xtheadsync),
 ISA_EXT_DATA_ENTRY(xventanacondops, PRIV_VERSION_1_12_0, 
ext_XVentanaCondOps),
+
+DEFINE_PROP_END_OF_LIST(),
 };
 
 bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset)
@@ -178,14 +174,14 @@ void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t 
ext_offset, bool en)
 
 int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
 {
-int i;
+const RISCVIsaExtData *edata;
 
-for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-if (isa_edata_arr[i].ext_enable_offset != ext_offset) {
+for (edata = isa_edata_arr; edata && edata->name; edata++) {
+if (edata->ext_enable_offset != ext_offset) {
 continue;
 }
 
-return isa_edata_arr[i].min_version;
+return edata->min_version;
 }
 
 /* Default to oldest priv spec if no match found */
@@ -933,22 +929,21 @@ static void riscv_cpu_disas_set_info(CPUState *s, 
disassemble_info *info)
 void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
 {
 CPURISCVState *env = >env;
-int i;
+const RISCVIsaExtData *edata;
 
 /* Force disable extensions if priv spec version does not match */
-for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-if (isa_ext_is_enabled(cpu, isa_edata_arr[i].ext_enable_offset) &&
-(env->priv_ver < isa_edata_arr[i].min_version)) {
-isa_ext_update_enabled(cpu, isa_edata_arr[i].ext_enable_offset,
-   false);
+for (edata = isa_edata_arr; edata && edata->name; edata++) {
+if (isa_ext_is_enabled(cpu, edata->ext_enable_offset) &&
+(env->priv_ver < edata->min_version)) {
+isa_ext_update_enabled(cpu, edata->ext_enable_offset, false);
 #ifndef CONFIG_USER_ONLY
 warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx
 " because privilege spec version does not match",
-isa_edata_arr[i].name, env->mhartid);
+edata->name, env->mhartid);
 #else
 warn_report("disabling %s extension because "
 "privilege spec version does not match",
-isa_edata_arr[i].name);
+edata->name);
 #endif
 }
 }
@@ -1614,13 +1609,13 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str,
  int max_str_len)
 {
+const RISCVIsaExtData *edata;
 char *old = *isa_str;
 char *new = *isa_str;
-int i;
 
-for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-if (isa_ext_is_enabled(cpu, isa_edata_arr[i].ext_enable_offset)) {
-new = g_strconcat(old, "_", isa_edata_arr[i].name, NULL);
+for (edata = isa_edata_arr; edata && edata->name; edata++) {
+   

[PATCH 08/20] target/riscv: move 'host' CPU declaration to kvm.c

2023-08-25 Thread Daniel Henrique Barboza
This CPU only exists if we're compiling with KVM so move it to the kvm
specific file. While we're at it, change its class_init() to enable the
user_extensions_flag class property, sparing us from having to execute
riscv_cpu_add_user_properties() by hand and letting the post_init() hook
do the work.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 23 ---
 target/riscv/kvm.c | 29 +
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f67b782675..dbf81796d2 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -655,19 +655,6 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
 }
 #endif
 
-#if defined(CONFIG_KVM)
-static void riscv_host_cpu_init(Object *obj)
-{
-CPURISCVState *env = _CPU(obj)->env;
-#if defined(TARGET_RISCV32)
-set_misa(env, MXL_RV32, 0);
-#elif defined(TARGET_RISCV64)
-set_misa(env, MXL_RV64, 0);
-#endif
-riscv_cpu_add_user_properties(obj);
-}
-#endif /* CONFIG_KVM */
-
 static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model)
 {
 ObjectClass *oc;
@@ -2000,13 +1987,6 @@ static void riscv_vendor_cpu_class_init(ObjectClass *c, 
void *data)
 rcc->user_extension_properties = false;
 }
 
-#define DEFINE_CPU(type_name, initfn)  \
-{  \
-.name = type_name, \
-.parent = TYPE_RISCV_CPU,  \
-.instance_init = initfn\
-}
-
 #define DEFINE_DYNAMIC_CPU(type_name, initfn) \
 { \
 .name = type_name,\
@@ -2047,9 +2027,6 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 },
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init),
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX,  riscv_max_cpu_init),
-#if defined(CONFIG_KVM)
-DEFINE_CPU(TYPE_RISCV_CPU_HOST, riscv_host_cpu_init),
-#endif
 #if defined(TARGET_RISCV32)
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32,   rv32_base_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init),
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 7c6dec05e3..59004caa8e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -1217,3 +1217,32 @@ void kvm_riscv_aia_create(MachineState *machine, 
uint64_t group_shift,
 
 kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
 }
+
+static void riscv_host_cpu_init(Object *obj)
+{
+CPURISCVState *env = _CPU(obj)->env;
+
+#if defined(TARGET_RISCV32)
+env->misa_mxl_max = env->misa_mxl = MXL_RV32;
+#elif defined(TARGET_RISCV64)
+env->misa_mxl_max = env->misa_mxl = MXL_RV64;
+#endif
+}
+
+static void riscv_kvm_cpu_class_init(ObjectClass *c, void *data)
+{
+RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
+
+rcc->user_extension_properties = true;
+}
+
+static const TypeInfo riscv_kvm_cpu_type_infos[] = {
+{
+.name = TYPE_RISCV_CPU_HOST,
+.parent = TYPE_RISCV_CPU,
+.instance_init = riscv_host_cpu_init,
+.class_init = riscv_kvm_cpu_class_init,
+}
+};
+
+DEFINE_TYPES(riscv_kvm_cpu_type_infos)
-- 
2.41.0




[PATCH 05/20] target/riscv/cpu.c: add 'user_extension_properties' class prop

2023-08-25 Thread Daniel Henrique Barboza
We want to use a post_init hook to call the cpu_instance_init callback
from each accelerator, moving repetitive code from the cpu_init()
functions to be handled by the accelerator class. But first we need to
ensure that we don't change behavior - vendor CPUs shouldn't expose user
properties, generic CPUs should expose.

Create a new 'user_extension_properties' class property. It'll be
initialized during the class init of each CPU type, where only generic
(dynamic) CPUs will enable it. This new property will be used shortly.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu-qom.h |  3 +++
 target/riscv/cpu.c | 46 +-
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index f3fbe37a2c..7c76dc0dcc 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -24,6 +24,7 @@
 
 #define TYPE_RISCV_CPU "riscv-cpu"
 #define TYPE_RISCV_DYNAMIC_CPU "riscv-dynamic-cpu"
+#define TYPE_RISCV_VENDOR_CPU "riscv-vendor-cpu"
 
 #define RISCV_CPU_TYPE_SUFFIX "-" TYPE_RISCV_CPU
 #define RISCV_CPU_TYPE_NAME(name) (name RISCV_CPU_TYPE_SUFFIX)
@@ -68,5 +69,7 @@ struct RISCVCPUClass {
 /*< public >*/
 DeviceRealize parent_realize;
 ResettablePhases parent_phases;
+
+bool user_extension_properties;
 };
 #endif /* RISCV_CPU_QOM_H */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 839b83e52a..e2e8724dc2 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1975,6 +1975,20 @@ void riscv_cpu_list(void)
 g_slist_free(list);
 }
 
+static void riscv_dynamic_cpu_class_init(ObjectClass *c, void *data)
+{
+RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
+
+rcc->user_extension_properties = true;
+}
+
+static void riscv_vendor_cpu_class_init(ObjectClass *c, void *data)
+{
+RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
+
+rcc->user_extension_properties = false;
+}
+
 #define DEFINE_CPU(type_name, initfn)  \
 {  \
 .name = type_name, \
@@ -1989,6 +2003,13 @@ void riscv_cpu_list(void)
 .instance_init = initfn   \
 }
 
+#define DEFINE_VENDOR_CPU(type_name, initfn) \
+{\
+.name = type_name,   \
+.parent = TYPE_RISCV_VENDOR_CPU, \
+.instance_init = initfn  \
+}
+
 static const TypeInfo riscv_cpu_type_infos[] = {
 {
 .name = TYPE_RISCV_CPU,
@@ -2003,6 +2024,13 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 {
 .name = TYPE_RISCV_DYNAMIC_CPU,
 .parent = TYPE_RISCV_CPU,
+.class_init = riscv_dynamic_cpu_class_init,
+.abstract = true,
+},
+{
+.name = TYPE_RISCV_VENDOR_CPU,
+.parent = TYPE_RISCV_CPU,
+.class_init = riscv_vendor_cpu_class_init,
 .abstract = true,
 },
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init),
@@ -2012,17 +2040,17 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 #endif
 #if defined(TARGET_RISCV32)
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32,   rv32_base_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_E31,   rv32_sifive_e_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_E34,   rv32_imafcu_nommu_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U34,   rv32_sifive_u_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E31,   rv32_sifive_e_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E34,   
rv32_imafcu_nommu_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U34,   rv32_sifive_u_cpu_init),
 #elif defined(TARGET_RISCV64)
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE64,   rv64_base_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_E51,   rv64_sifive_e_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U54,   rv64_sifive_u_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_SHAKTI_C, rv64_sifive_u_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_THEAD_C906,   rv64_thead_c906_cpu_init),
-DEFINE_CPU(TYPE_RISCV_CPU_VEYRON_V1,rv64_veyron_v1_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E51,   rv64_sifive_e_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54,   rv64_sifive_u_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C, rv64_sifive_u_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906,   
rv64_thead_c906_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1,
rv64_veyron_v1_cpu_init),
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,  rv128_base_cpu_init),
 #endif
 };
-- 
2.41.0




[PATCH 15/20] target/riscv/tcg: introduce tcg_cpu_instance_init()

2023-08-25 Thread Daniel Henrique Barboza
tcg_cpu_instance_init() will be the 'cpu_instance_init' impl for the TCG
accelerator. It'll be called from within riscv_cpu_post_init(), via
accel_cpu_instance_init(), similar to what happens with KVM. In fact, to
preserve behavior, the implementation will be similar to what
riscv_cpu_post_init() already does.

In this patch we'll move riscv_cpu_add_user_properties() and
riscv_init_max_cpu_extensions() and all their dependencies to tcg-cpu.c.
All multi-extension properties code was moved. The 'multi_ext_user_opts'
hash table was also moved to tcg-cpu.c since it's a TCG only structure,
meaning that we won't have to worry about initializing a TCG hash table
when running a KVM CPU anymore.

riscv_cpu_add_user_properties() will remain in cpu.c for now due to how
much code it requires to be moved at the same time. We'll do that in the
next patch.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 141 +
 target/riscv/cpu.h |   2 +-
 target/riscv/tcg/tcg-cpu.c | 138 
 3 files changed, 140 insertions(+), 141 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f9aea6a80a..89b09a7e89 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -162,9 +162,6 @@ static const struct isa_ext_data isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(xventanacondops, PRIV_VERSION_1_12_0, 
ext_XVentanaCondOps),
 };
 
-/* Hash that stores user set extensions */
-static GHashTable *multi_ext_user_opts;
-
 bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset)
 {
 bool *ext_enabled = (void *)>cfg + ext_offset;
@@ -195,12 +192,6 @@ int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
 return PRIV_VERSION_1_10_0;
 }
 
-bool cpu_cfg_ext_is_user_set(uint32_t ext_offset)
-{
-return g_hash_table_contains(multi_ext_user_opts,
- GUINT_TO_POINTER(ext_offset));
-}
-
 const char * const riscv_int_regnames[] = {
 "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
 "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
@@ -281,9 +272,6 @@ static const char * const riscv_intr_names[] = {
 "reserved"
 };
 
-static void riscv_cpu_add_user_properties(Object *obj);
-static void riscv_init_max_cpu_extensions(Object *obj);
-
 const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
 {
 if (async) {
@@ -295,7 +283,7 @@ const char *riscv_cpu_get_trap_name(target_ulong cause, 
bool async)
 }
 }
 
-static void set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext)
+void set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext)
 {
 env->misa_mxl_max = env->misa_mxl = mxl;
 env->misa_ext_mask = env->misa_ext = ext;
@@ -1198,18 +1186,7 @@ static void riscv_cpu_set_irq(void *opaque, int irq, int 
level)
 
 static void riscv_cpu_post_init(Object *obj)
 {
-RISCVCPU *cpu = RISCV_CPU(obj);
-RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu);
-
 accel_cpu_instance_init(CPU(obj));
-
-if (rcc->user_extension_properties) {
-riscv_cpu_add_user_properties(obj);
-}
-
-if (cpu->cfg.max_features) {
-riscv_init_max_cpu_extensions(obj);
-}
 }
 
 static void riscv_cpu_init(Object *obj)
@@ -1222,8 +1199,6 @@ static void riscv_cpu_init(Object *obj)
 qdev_init_gpio_in(DEVICE(cpu), riscv_cpu_set_irq,
   IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX);
 #endif /* CONFIG_USER_ONLY */
-
-multi_ext_user_opts = g_hash_table_new(NULL, g_direct_equal);
 }
 
 typedef struct RISCVCPUMisaExtConfig {
@@ -1503,120 +1478,6 @@ Property riscv_cpu_options[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name,
-  void *opaque, Error **errp)
-{
-const RISCVCPUMultiExtConfig *multi_ext_cfg = opaque;
-bool value;
-
-if (!visit_type_bool(v, name, , errp)) {
-return;
-}
-
-isa_ext_update_enabled(RISCV_CPU(obj), multi_ext_cfg->offset, value);
-
-g_hash_table_insert(multi_ext_user_opts,
-GUINT_TO_POINTER(multi_ext_cfg->offset),
-(gpointer)value);
-}
-
-static void cpu_get_multi_ext_cfg(Object *obj, Visitor *v, const char *name,
-  void *opaque, Error **errp)
-{
-const RISCVCPUMultiExtConfig *multi_ext_cfg = opaque;
-bool value = isa_ext_is_enabled(RISCV_CPU(obj), multi_ext_cfg->offset);
-
-visit_type_bool(v, name, , errp);
-}
-
-static void cpu_add_multi_ext_prop(Object *cpu_obj,
-   const RISCVCPUMultiExtConfig *multi_cfg)
-{
-object_property_add(cpu_obj, multi_cfg->name, "bool",
-cpu_get_multi_ext_cfg,
-cpu_set_multi_ext_cfg,
-NULL, (void *)multi_cfg);
-
-/*
- * Set def val directly instead of using
- * object_property_set_bool() to save the set()
- * callback 

[PATCH 07/20] target/riscv/cpu.c: add .instance_post_init()

2023-08-25 Thread Daniel Henrique Barboza
All generic CPUs call riscv_cpu_add_user_properties(). The 'max' CPU
calls riscv_init_max_cpu_extensions(). Both can be moved to a common
instance_post_init() callback, implemented in riscv_cpu_post_init(),
called by all CPUs. The call order then becomes:

riscv_cpu_init() -> cpu_init() of each CPU -> .instance_post_init()

A CPU class that wants to add user flags will let us know via the
'user_extension_properties' property. Likewise, 'cfg.max_features' will
determine if any given CPU, regardless of being the 'max' CPU or not,
wants to enable the maximum amount of extensions.

In the near future riscv_cpu_post_init() will call the init() function
of the current accelerator, providing a hook for KVM and TCG accel
classes to change the init() process of the CPU.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index c35d58c64b..f67b782675 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -430,8 +430,6 @@ static void riscv_max_cpu_init(Object *obj)
 mlx = MXL_RV32;
 #endif
 set_misa(env, mlx, 0);
-riscv_cpu_add_user_properties(obj);
-riscv_init_max_cpu_extensions(obj);
 env->priv_ver = PRIV_VERSION_LATEST;
 #ifndef CONFIG_USER_ONLY
 set_satp_mode_max_supported(RISCV_CPU(obj), mlx == MXL_RV32 ?
@@ -445,7 +443,6 @@ static void rv64_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV64, 0);
-riscv_cpu_add_user_properties(obj);
 /* Set latest version of privileged specification */
 env->priv_ver = PRIV_VERSION_LATEST;
 #ifndef CONFIG_USER_ONLY
@@ -569,7 +566,6 @@ static void rv128_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV128, 0);
-riscv_cpu_add_user_properties(obj);
 /* Set latest version of privileged specification */
 env->priv_ver = PRIV_VERSION_LATEST;
 #ifndef CONFIG_USER_ONLY
@@ -582,7 +578,6 @@ static void rv32_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV32, 0);
-riscv_cpu_add_user_properties(obj);
 /* Set latest version of privileged specification */
 env->priv_ver = PRIV_VERSION_LATEST;
 #ifndef CONFIG_USER_ONLY
@@ -1212,6 +1207,20 @@ static void riscv_cpu_set_irq(void *opaque, int irq, int 
level)
 }
 #endif /* CONFIG_USER_ONLY */
 
+static void riscv_cpu_post_init(Object *obj)
+{
+RISCVCPU *cpu = RISCV_CPU(obj);
+RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu);
+
+if (rcc->user_extension_properties) {
+riscv_cpu_add_user_properties(obj);
+}
+
+if (cpu->cfg.max_features) {
+riscv_init_max_cpu_extensions(obj);
+}
+}
+
 static void riscv_cpu_init(Object *obj)
 {
 RISCVCPU *cpu = RISCV_CPU(obj);
@@ -2019,6 +2028,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 .instance_size = sizeof(RISCVCPU),
 .instance_align = __alignof__(RISCVCPU),
 .instance_init = riscv_cpu_init,
+.instance_post_init = riscv_cpu_post_init,
 .abstract = true,
 .class_size = sizeof(RISCVCPUClass),
 .class_init = riscv_cpu_class_init,
-- 
2.41.0




[PATCH 10/20] target/riscv: move riscv_cpu_add_kvm_properties() to kvm.c

2023-08-25 Thread Daniel Henrique Barboza
We'll introduce the KVM accelerator class with a 'cpu_instance_init'
implementation that is going to be invoked during the common
riscv_cpu_post_init() (via accel_cpu_instance_init()). This
instance_init will execute KVM exclusive code that TCG doesn't care
about, such as adding KVM specific properties, initing registers using a
KVM scratch CPU and so on.

The core of the forementioned cpu_instance_init impl is the current
riscv_cpu_add_kvm_properties() that is being used by the common code via
riscv_cpu_add_user_properties() in cpu.c. Move it to kvm.c, together
will all the relevant artifacts, exporting and renaming it to
kvm_riscv_cpu_add_kvm_properties() so cpu.c can keep using it for now.

To make this work we'll need to export riscv_cpu_extensions,
riscv_cpu_vendor_exts and riscv_cpu_experimental_exts from cpu.c as
well. The TCG accelerator will also need to access those in the near
future so this export will benefit us in the long run.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c   | 87 +++-
 target/riscv/cpu.h   | 14 +++
 target/riscv/kvm.c   | 66 +-
 target/riscv/kvm_riscv.h |  2 +-
 4 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 4eda853f1d..58b0ef2af8 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1344,7 +1344,7 @@ static RISCVCPUMisaExtConfig misa_ext_cfgs[] = {
 MISA_CFG(RVG, false),
 };
 
-static void riscv_cpu_add_misa_properties(Object *cpu_obj)
+void riscv_cpu_add_misa_properties(Object *cpu_obj)
 {
 int i;
 
@@ -1371,17 +1371,11 @@ static void riscv_cpu_add_misa_properties(Object 
*cpu_obj)
 }
 }
 
-typedef struct RISCVCPUMultiExtConfig {
-const char *name;
-uint32_t offset;
-bool enabled;
-} RISCVCPUMultiExtConfig;
-
 #define MULTI_EXT_CFG_BOOL(_name, _prop, _defval) \
 {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \
  .enabled = _defval}
 
-static const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
+const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 /* Defaults for standard extensions */
 MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false),
 MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
@@ -1441,7 +1435,7 @@ static const RISCVCPUMultiExtConfig 
riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
+const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
 MULTI_EXT_CFG_BOOL("xtheadba", ext_xtheadba, false),
 MULTI_EXT_CFG_BOOL("xtheadbb", ext_xtheadbb, false),
 MULTI_EXT_CFG_BOOL("xtheadbs", ext_xtheadbs, false),
@@ -1459,7 +1453,7 @@ static const RISCVCPUMultiExtConfig 
riscv_cpu_vendor_exts[] = {
 };
 
 /* These are experimental so mark with 'x-' */
-static const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
+const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
 MULTI_EXT_CFG_BOOL("x-zicond", ext_zicond, false),
 
 /* ePMP 0.9.3 */
@@ -1487,7 +1481,7 @@ static const RISCVCPUMultiExtConfig 
riscv_cpu_experimental_exts[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static Property riscv_cpu_options[] = {
+Property riscv_cpu_options[] = {
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 
 DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
@@ -1548,25 +1542,6 @@ static void cpu_add_multi_ext_prop(Object *cpu_obj,
multi_cfg->enabled);
 }
 
-#ifndef CONFIG_USER_ONLY
-static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
-const char *name,
-void *opaque, Error **errp)
-{
-const char *propname = opaque;
-bool value;
-
-if (!visit_type_bool(v, name, , errp)) {
-return;
-}
-
-if (value) {
-error_setg(errp, "extension %s is not available with KVM",
-   propname);
-}
-}
-#endif
-
 static void riscv_cpu_add_multiext_prop_array(Object *obj,
 const RISCVCPUMultiExtConfig *array)
 {
@@ -1577,56 +1552,6 @@ static void riscv_cpu_add_multiext_prop_array(Object 
*obj,
 }
 }
 
-#ifndef CONFIG_USER_ONLY
-static void riscv_cpu_add_kvm_unavail_prop(Object *obj, const char *prop_name)
-{
-/* Check if KVM created the property already */
-if (object_property_find(obj, prop_name)) {
-return;
-}
-
-/*
- * Set the default to disabled for every extension
- * unknown to KVM and error out if the user attempts
- * to enable any of them.
- */
-object_property_add(obj, prop_name, "bool",
-NULL, cpu_set_cfg_unavailable,
-NULL, (void *)prop_name);
-}
-
-static void riscv_cpu_add_kvm_unavail_prop_array(Object *obj,
-const RISCVCPUMultiExtConfig *array)
-{
-const RISCVCPUMultiExtConfig *prop;
-
-for (prop = array; prop 

[PATCH 20/20] target/riscv: add 'kvm_supported' class property

2023-08-25 Thread Daniel Henrique Barboza
This follows the same idea of 'tcg_support' property added in the
previous patch. Note that we're now implementing the 'cpu_realizefn' for
the KVMAccel class since this verification is done in realize() time.

Supporting vendor CPUs with KVM is not possible. We rely on the
extension support of the KVM module running in the host, making it
impossible to guarantee that a vendor CPU will have all the required
extensions available. The only way to guarantee that a vendor CPU is KVM
compatible is running KVM in a host that has the same vendor CPU, and
for this case we already have the 'host' CPU type.

We're better of declaring that all vendors CPUs are not KVM capable.
After this patch, running KVM accel with a vendor CPU will produce an
error like the following:

$ ./qemu-system-riscv64 -M virt,accel=kvm -cpu veyron-v1
qemu-system-riscv64: 'veyron-v1' CPU is not compatible with KVM acceleration

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c |  1 +
 target/riscv/kvm/kvm-cpu.c | 24 
 3 files changed, 26 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index e86b76f9fe..32d9bb07b4 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -72,5 +72,6 @@ struct RISCVCPUClass {
 
 bool user_extension_properties;
 bool tcg_supported;
+bool kvm_supported;
 };
 #endif /* RISCV_CPU_QOM_H */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f749ea2a2e..73302bb72a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1646,6 +1646,7 @@ static void riscv_dynamic_cpu_class_init(ObjectClass *c, 
void *data)
 
 rcc->user_extension_properties = true;
 rcc->tcg_supported = true;
+rcc->kvm_supported = true;
 }
 
 static void riscv_vendor_cpu_class_init(ObjectClass *c, void *data)
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 501384924b..85f3b8c80e 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1289,6 +1289,7 @@ static void riscv_kvm_cpu_class_init(ObjectClass *c, void 
*data)
 RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
 
 rcc->user_extension_properties = true;
+rcc->kvm_supported = true;
 }
 
 static const TypeInfo riscv_kvm_cpu_type_infos[] = {
@@ -1302,6 +1303,28 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = {
 
 DEFINE_TYPES(riscv_kvm_cpu_type_infos)
 
+/*
+ * We'll get here via the following path:
+ *
+ * riscv_cpu_realize()
+ *   -> cpu_exec_realizefn()
+ *  -> kvm_cpu_realizefn() (via accel_cpu_realizefn())
+ */
+static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
+{
+RISCVCPU *cpu = RISCV_CPU(cs);
+RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu);
+
+if (!rcc->kvm_supported) {
+g_autofree char *name = riscv_cpu_get_name(rcc);
+error_setg(errp, "'%s' CPU is not compatible with KVM acceleration",
+   name);
+return false;
+}
+
+return true;
+}
+
 static void kvm_cpu_instance_init(CPUState *cs)
 {
 Object *obj = OBJECT(RISCV_CPU(cs));
@@ -1328,6 +1351,7 @@ static void kvm_cpu_accel_class_init(ObjectClass *oc, 
void *data)
 AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
 
 acc->cpu_instance_init = kvm_cpu_instance_init;
+acc->cpu_realizefn = kvm_cpu_realizefn;
 }
 
 static const TypeInfo kvm_cpu_accel_type_info = {
-- 
2.41.0




[PATCH 18/20] target/riscv/cpu: move priv spec functions to tcg-cpu.c

2023-08-25 Thread Daniel Henrique Barboza
Priv spec validation is TCG specific. Move it to the TCG accel class.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 39 --
 target/riscv/cpu.h |  2 --
 target/riscv/tcg/tcg-cpu.c | 39 ++
 3 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ac5ad4727c..6817f94c2c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -172,22 +172,6 @@ void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t 
ext_offset, bool en)
 *ext_enabled = en;
 }
 
-int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
-{
-const RISCVIsaExtData *edata;
-
-for (edata = isa_edata_arr; edata && edata->name; edata++) {
-if (edata->ext_enable_offset != ext_offset) {
-continue;
-}
-
-return edata->min_version;
-}
-
-/* Default to oldest priv spec if no match found */
-return PRIV_VERSION_1_10_0;
-}
-
 const char * const riscv_int_regnames[] = {
 "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
 "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
@@ -926,29 +910,6 @@ static void riscv_cpu_disas_set_info(CPUState *s, 
disassemble_info *info)
 }
 }
 
-void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
-{
-CPURISCVState *env = >env;
-const RISCVIsaExtData *edata;
-
-/* Force disable extensions if priv spec version does not match */
-for (edata = isa_edata_arr; edata && edata->name; edata++) {
-if (isa_ext_is_enabled(cpu, edata->ext_enable_offset) &&
-(env->priv_ver < edata->min_version)) {
-isa_ext_update_enabled(cpu, edata->ext_enable_offset, false);
-#ifndef CONFIG_USER_ONLY
-warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx
-" because privilege spec version does not match",
-edata->name, env->mhartid);
-#else
-warn_report("disabling %s extension because "
-"privilege spec version does not match",
-edata->name);
-#endif
-}
-}
-}
-
 #ifndef CONFIG_USER_ONLY
 static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp)
 {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index d9a17df46a..4254f04684 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -711,9 +711,7 @@ enum riscv_pmu_event_idx {
 /* used by tcg/tcg-cpu.c*/
 void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset, bool en);
 bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset);
-int cpu_cfg_ext_get_min_version(uint32_t ext_offset);
 void set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext);
-void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu);
 
 typedef struct RISCVCPUMultiExtConfig {
 const char *name;
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 8e3f55d3a6..6c91978920 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -97,6 +97,22 @@ const struct TCGCPUOps riscv_tcg_ops = {
 #endif /* !CONFIG_USER_ONLY */
 };
 
+static int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
+{
+const RISCVIsaExtData *edata;
+
+for (edata = isa_edata_arr; edata && edata->name; edata++) {
+if (edata->ext_enable_offset != ext_offset) {
+continue;
+}
+
+return edata->min_version;
+}
+
+/* Default to oldest priv spec if no match found */
+return PRIV_VERSION_1_10_0;
+}
+
 static void cpu_cfg_ext_auto_update(RISCVCPU *cpu, uint32_t ext_offset,
 bool value)
 {
@@ -220,6 +236,29 @@ static void riscv_cpu_validate_v(CPURISCVState *env, 
RISCVCPUConfig *cfg,
 }
 }
 
+static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
+{
+CPURISCVState *env = >env;
+const RISCVIsaExtData *edata;
+
+/* Force disable extensions if priv spec version does not match */
+for (edata = isa_edata_arr; edata && edata->name; edata++) {
+if (isa_ext_is_enabled(cpu, edata->ext_enable_offset) &&
+(env->priv_ver < edata->min_version)) {
+isa_ext_update_enabled(cpu, edata->ext_enable_offset, false);
+#ifndef CONFIG_USER_ONLY
+warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx
+" because privilege spec version does not match",
+edata->name, env->mhartid);
+#else
+warn_report("disabling %s extension because "
+"privilege spec version does not match",
+edata->name);
+#endif
+}
+}
+}
+
 /*
  * Check consistency between chosen extensions while setting
  * cpu->cfg accordingly.
-- 
2.41.0




[PATCH 03/20] target/riscv: move riscv_cpu_validate_set_extensions() to tcg-cpu.c

2023-08-25 Thread Daniel Henrique Barboza
This function is the core of the RISC-V validations for TCG CPUs, and it
has a lot going on.

Functions in cpu.c were made public to allow them to be used by the KVM
accelerator class later on. 'cpu_cfg_ext_get_min_version()' is notably
hard to move it to another file due to its dependency with isa_edata_arr[]
array, thus make it public and use it as is for now.

riscv_cpu_validate_set_extensions() is kept public because it's used by
csr.c in write_misa().

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 361 +
 target/riscv/cpu.h |   8 +-
 target/riscv/csr.c |   1 +
 target/riscv/tcg/tcg-cpu.c | 352 
 target/riscv/tcg/tcg-cpu.h |  28 +++
 5 files changed, 393 insertions(+), 357 deletions(-)
 create mode 100644 target/riscv/tcg/tcg-cpu.h

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 36c5c6e579..12cea62ee7 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -163,22 +163,21 @@ static const struct isa_ext_data isa_edata_arr[] = {
 /* Hash that stores user set extensions */
 static GHashTable *multi_ext_user_opts;
 
-static bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset)
+bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset)
 {
 bool *ext_enabled = (void *)>cfg + ext_offset;
 
 return *ext_enabled;
 }
 
-static void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset,
-   bool en)
+void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset, bool en)
 {
 bool *ext_enabled = (void *)>cfg + ext_offset;
 
 *ext_enabled = en;
 }
 
-static int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
+int cpu_cfg_ext_get_min_version(uint32_t ext_offset)
 {
 int i;
 
@@ -194,38 +193,12 @@ static int cpu_cfg_ext_get_min_version(uint32_t 
ext_offset)
 return PRIV_VERSION_1_10_0;
 }
 
-static bool cpu_cfg_ext_is_user_set(uint32_t ext_offset)
+bool cpu_cfg_ext_is_user_set(uint32_t ext_offset)
 {
 return g_hash_table_contains(multi_ext_user_opts,
  GUINT_TO_POINTER(ext_offset));
 }
 
-static void cpu_cfg_ext_auto_update(RISCVCPU *cpu, uint32_t ext_offset,
-bool value)
-{
-CPURISCVState *env = >env;
-bool prev_val = isa_ext_is_enabled(cpu, ext_offset);
-int min_version;
-
-if (prev_val == value) {
-return;
-}
-
-if (cpu_cfg_ext_is_user_set(ext_offset)) {
-return;
-}
-
-if (value && env->priv_ver != PRIV_VERSION_LATEST) {
-/* Do not enable it if priv_ver is older than min_version */
-min_version = cpu_cfg_ext_get_min_version(ext_offset);
-if (env->priv_ver < min_version) {
-return;
-}
-}
-
-isa_ext_update_enabled(cpu, ext_offset, value);
-}
-
 const char * const riscv_int_regnames[] = {
 "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
 "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
@@ -1024,46 +997,7 @@ static void riscv_cpu_disas_set_info(CPUState *s, 
disassemble_info *info)
 }
 }
 
-static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg,
- Error **errp)
-{
-if (!is_power_of_2(cfg->vlen)) {
-error_setg(errp, "Vector extension VLEN must be power of 2");
-return;
-}
-if (cfg->vlen > RV_VLEN_MAX || cfg->vlen < 128) {
-error_setg(errp,
-   "Vector extension implementation only supports VLEN "
-   "in the range [128, %d]", RV_VLEN_MAX);
-return;
-}
-if (!is_power_of_2(cfg->elen)) {
-error_setg(errp, "Vector extension ELEN must be power of 2");
-return;
-}
-if (cfg->elen > 64 || cfg->elen < 8) {
-error_setg(errp,
-   "Vector extension implementation only supports ELEN "
-   "in the range [8, 64]");
-return;
-}
-if (cfg->vext_spec) {
-if (!g_strcmp0(cfg->vext_spec, "v1.0")) {
-env->vext_ver = VEXT_VERSION_1_00_0;
-} else {
-error_setg(errp, "Unsupported vector spec version '%s'",
-   cfg->vext_spec);
-return;
-}
-} else if (env->vext_ver == 0) {
-qemu_log("vector version is not specified, "
- "use the default value v1.0\n");
-
-env->vext_ver = VEXT_VERSION_1_00_0;
-}
-}
-
-static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
+void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
 {
 CPURISCVState *env = >env;
 int i;
@@ -1087,291 +1021,6 @@ static void 
riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
 }
 }
 
-/*
- * Check consistency between chosen extensions while setting
- * cpu->cfg accordingly.
- */
-void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
-{
-CPURISCVState *env = >env;
-Error *local_err = NULL;
-

Re: [sdl-qemu] [PATCH] fix leaks found wtih fuzzing

2023-08-25 Thread Alexey Khoroshilov
On 25.08.2023 12:29, Dmitry Frolov wrote:
> It is true, that there is no problem during runtime
> from the first sight, because the memmory is lost just
> before qemu exits. Nevertheless, this change is necessary,
> because AddressSanitizer is not able to recognize this
> situation and produces crash-report (which is
> false-positive in fact). Lots of False-Positive warnings
> are davaluing problems, found with fuzzing, and thus the
> whole methodology of dynamic analysis.
> This patch eliminates such False-Positive reports,
> and makes every problem, found with fuzzing, more valuable.

It would be good to separe answer to the previous mail and commit message.

> 
> Fixes: 060ab76356 ("gtk: don't exit early in case gtk init fails")
> 
> Signed-off-by: Dmitry Frolov 
> ---
> v2: Moved declarations in the beginning.
> 
>  ui/gtk.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/ui/gtk.c b/ui/gtk.c
> index 8ba41c8f13..23a78787df 100644
> --- a/ui/gtk.c
> +++ b/ui/gtk.c
> @@ -2360,7 +2360,7 @@ static void gtk_display_init(DisplayState *ds, 
> DisplayOptions *opts)
>  {
>  VirtualConsole *vc;
>  
> -GtkDisplayState *s = g_malloc0(sizeof(*s));
> +GtkDisplayState *s;
>  GdkDisplay *window_display;
>  GtkIconTheme *theme;
>  char *dir;
> @@ -2372,6 +2372,7 @@ static void gtk_display_init(DisplayState *ds, 
> DisplayOptions *opts)
>  assert(opts->type == DISPLAY_TYPE_GTK);>  s->opts = opts;
's' is already used here.

>  
> +*s = g_malloc0(sizeof(*s));
s = g_malloc0(sizeof(*s));

>  theme = gtk_icon_theme_get_default();
>  dir = get_relocated_path(CONFIG_QEMU_ICONDIR);
>  gtk_icon_theme_prepend_search_path(theme, dir);


Otherwise, I belive the change makes sense.

--
Alexey Khoroshilov
Linux Verification Center, ISPRAS




[PATCH 14/20] target/riscv/kvm: do not use riscv_cpu_add_misa_properties()

2023-08-25 Thread Daniel Henrique Barboza
riscv_cpu_add_misa_properties() is being used to fill the missing KVM
MISA properties but it is a TCG helper that was adapted to do so. We'll
move it to tcg-cpu.c in the next patches, meaning that KVM needs to fill
the remaining MISA properties on its own.

Do not use riscv_cpu_add_misa_properties(). Let's create a new array
with all available MISA bits we support that can be read by KVM. Then,
inside kvm_riscv_add_cpu_user_properties(), we'll create all KVM MISA
properties as usual and then use this array to add any missing MISA
properties with the riscv_cpu_add_kvm_unavail_prop() helper.

Note that we're creating misa_bits[], and not using the existing
'riscv_single_letter_exts[]', because the latter is tuned for riscv,isa
related functions and it doesn't have all MISA bits we support. Commit
0e2c377023 ("target/riscv: misa to ISA string conversion fix") has the
full context.

While we're at it, move both satp and the multi-letter extension
properties to kvm_riscv_add_cpu_user_properties() as well.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c |  2 ++
 target/riscv/cpu.h |  3 ++-
 target/riscv/kvm/kvm-cpu.c | 17 +++--
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index bf6c8519b1..f9aea6a80a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -38,6 +38,8 @@
 
 /* RISC-V CPU definitions */
 static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
+const uint32_t misa_bits[] = {RVI, RVE, RVM, RVA, RVF, RVD, RVV,
+  RVC, RVS, RVU, RVH, RVJ, RVG};
 
 struct isa_ext_data {
 const char *name;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 950c2301f2..9ec3b98bd2 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -43,7 +43,7 @@
 #define RV(x) ((target_ulong)1 << (x - 'A'))
 
 /*
- * Consider updating misa_ext_info_arr[] and misa_ext_cfgs[]
+ * Update misa_bits[], misa_ext_info_arr[] and misa_ext_cfgs[]
  * when adding new MISA bits here.
  */
 #define RVI RV('I')
@@ -60,6 +60,7 @@
 #define RVJ RV('J')
 #define RVG RV('G')
 
+extern const uint32_t misa_bits[13];
 const char *riscv_get_misa_ext_name(uint32_t bit);
 const char *riscv_get_misa_ext_description(uint32_t bit);
 
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 85e8b0a927..501384924b 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -387,6 +387,8 @@ static void kvm_riscv_add_cpu_user_properties(Object 
*cpu_obj)
 {
 int i;
 
+riscv_add_satp_mode_properties(cpu_obj);
+
 for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
 KVMCPUConfig *misa_cfg = _misa_ext_cfgs[i];
 int bit = misa_cfg->offset;
@@ -402,6 +404,11 @@ static void kvm_riscv_add_cpu_user_properties(Object 
*cpu_obj)
 misa_cfg->description);
 }
 
+for (i = 0; i < ARRAY_SIZE(misa_bits); i++) {
+const char *ext_name = riscv_get_misa_ext_name(misa_bits[i]);
+riscv_cpu_add_kvm_unavail_prop(cpu_obj, ext_name);
+}
+
 for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
 KVMCPUConfig *multi_cfg = _multi_ext_cfgs[i];
 
@@ -418,6 +425,10 @@ static void kvm_riscv_add_cpu_user_properties(Object 
*cpu_obj)
 object_property_add(cpu_obj, "cboz_blocksize", "uint16",
 NULL, kvm_cpu_set_cbomz_blksize,
 NULL, _cboz_blocksize);
+
+riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_extensions);
+riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_vendor_exts);
+riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_experimental_exts);
 }
 
 static int kvm_riscv_get_regs_core(CPUState *cs)
@@ -1301,12 +1312,6 @@ static void kvm_cpu_instance_init(CPUState *cs)
 
 if (rcc->user_extension_properties) {
 kvm_riscv_add_cpu_user_properties(obj);
-riscv_add_satp_mode_properties(obj);
-riscv_cpu_add_misa_properties(obj);
-
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_extensions);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_vendor_exts);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_experimental_exts);
 }
 
 for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) {
-- 
2.41.0




[PATCH 16/20] target/riscv/tcg: move riscv_cpu_add_misa_properties() to tcg-cpu.c

2023-08-25 Thread Daniel Henrique Barboza
All code related to MISA TCG properties is also moved.

At this point, all TCG properties handling is done in tcg-cpu.c, all KVM
properties handling is done in kvm-cpu.c.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 89 --
 target/riscv/cpu.h |  1 -
 target/riscv/tcg/tcg-cpu.c | 84 +++
 3 files changed, 84 insertions(+), 90 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 89b09a7e89..3c9db46837 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1201,49 +1201,6 @@ static void riscv_cpu_init(Object *obj)
 #endif /* CONFIG_USER_ONLY */
 }
 
-typedef struct RISCVCPUMisaExtConfig {
-const char *name;
-const char *description;
-target_ulong misa_bit;
-bool enabled;
-} RISCVCPUMisaExtConfig;
-
-static void cpu_set_misa_ext_cfg(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
-const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque;
-target_ulong misa_bit = misa_ext_cfg->misa_bit;
-RISCVCPU *cpu = RISCV_CPU(obj);
-CPURISCVState *env = >env;
-bool value;
-
-if (!visit_type_bool(v, name, , errp)) {
-return;
-}
-
-if (value) {
-env->misa_ext |= misa_bit;
-env->misa_ext_mask |= misa_bit;
-} else {
-env->misa_ext &= ~misa_bit;
-env->misa_ext_mask &= ~misa_bit;
-}
-}
-
-static void cpu_get_misa_ext_cfg(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
-const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque;
-target_ulong misa_bit = misa_ext_cfg->misa_bit;
-RISCVCPU *cpu = RISCV_CPU(obj);
-CPURISCVState *env = >env;
-bool value;
-
-value = env->misa_ext & misa_bit;
-
-visit_type_bool(v, name, , errp);
-}
-
 typedef struct misa_ext_info {
 const char *name;
 const char *description;
@@ -1304,52 +1261,6 @@ const char *riscv_get_misa_ext_description(uint32_t bit)
 return val;
 }
 
-#define MISA_CFG(_bit, _enabled) \
-{.misa_bit = _bit, .enabled = _enabled}
-
-static RISCVCPUMisaExtConfig misa_ext_cfgs[] = {
-MISA_CFG(RVA, true),
-MISA_CFG(RVC, true),
-MISA_CFG(RVD, true),
-MISA_CFG(RVF, true),
-MISA_CFG(RVI, true),
-MISA_CFG(RVE, false),
-MISA_CFG(RVM, true),
-MISA_CFG(RVS, true),
-MISA_CFG(RVU, true),
-MISA_CFG(RVH, true),
-MISA_CFG(RVJ, false),
-MISA_CFG(RVV, false),
-MISA_CFG(RVG, false),
-};
-
-void riscv_cpu_add_misa_properties(Object *cpu_obj)
-{
-int i;
-
-for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) {
-RISCVCPUMisaExtConfig *misa_cfg = _ext_cfgs[i];
-int bit = misa_cfg->misa_bit;
-
-misa_cfg->name = riscv_get_misa_ext_name(bit);
-misa_cfg->description = riscv_get_misa_ext_description(bit);
-
-/* Check if KVM already created the property */
-if (object_property_find(cpu_obj, misa_cfg->name)) {
-continue;
-}
-
-object_property_add(cpu_obj, misa_cfg->name, "bool",
-cpu_get_misa_ext_cfg,
-cpu_set_misa_ext_cfg,
-NULL, (void *)misa_cfg);
-object_property_set_description(cpu_obj, misa_cfg->name,
-misa_cfg->description);
-object_property_set_bool(cpu_obj, misa_cfg->name,
- misa_cfg->enabled, NULL);
-}
-}
-
 #define MULTI_EXT_CFG_BOOL(_name, _prop, _defval) \
 {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \
  .enabled = _defval}
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 74fbb33e09..4269523e24 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -726,7 +726,6 @@ extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[];
 extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[];
 extern Property riscv_cpu_options[];
 
-void riscv_cpu_add_misa_properties(Object *cpu_obj);
 void riscv_add_satp_mode_properties(Object *obj);
 
 /* CSR function table */
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 68ce3cbcb9..8e3f55d3a6 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -574,6 +574,90 @@ static bool tcg_cpu_realizefn(CPUState *cs, Error **errp)
 return true;
 }
 
+typedef struct RISCVCPUMisaExtConfig {
+const char *name;
+const char *description;
+target_ulong misa_bit;
+bool enabled;
+} RISCVCPUMisaExtConfig;
+
+static void cpu_set_misa_ext_cfg(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque;
+target_ulong misa_bit = misa_ext_cfg->misa_bit;
+RISCVCPU *cpu = RISCV_CPU(obj);
+CPURISCVState *env = >env;
+bool value;
+
+if (!visit_type_bool(v, name, , errp)) {
+return;
+}
+
+if 

[PATCH 01/20] target/riscv: introduce TCG AccelCPUClass

2023-08-25 Thread Daniel Henrique Barboza
target/riscv/cpu.c needs to handle all possible accelerators (TCG and
KVM at this moment) during both init() and realize() time. This forces
us to resort to a lot of "if tcg" and "if kvm" throughout the code,
which isn't wrong, but can get cluttered over time. Splitting
acceleration specific code from cpu.c to its own file will help to
declutter the existing code and it will also make it easier to support
KVM/TCG only builds in the future.

We'll start by adding a new subdir called 'tcg' and a new file called
'tcg-cpu.c'. This file will be used to introduce a new accelerator class
for TCG acceleration in RISC-V, allowing us to center all TCG exclusive
code in its file instead of using 'cpu.c' for everything. This design is
inpired by the work Claudio Fontana did in x86 a few years ago in commit
f5cc5a5c1 ("i386: split cpu accelerators from cpu.c, using
AccelCPUClass").

To avoid moving too much code at once we'll start by adding the new file
and TCG AccelCPUClass declaration. The 'class_init' from the accel class
will init 'tcg_ops', relieving the common riscv_cpu_class_init() from
doing it.

'riscv_tcg_ops' is being exported from 'cpu.c' for now to avoid having
to deal with moving code and files around right now. We'll focus on
decoupling the realize() logic first.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c   |  5 +---
 target/riscv/cpu.h   |  4 +++
 target/riscv/meson.build |  2 ++
 target/riscv/tcg/meson.build |  2 ++
 target/riscv/tcg/tcg-cpu.c   | 57 
 5 files changed, 66 insertions(+), 4 deletions(-)
 create mode 100644 target/riscv/tcg/meson.build
 create mode 100644 target/riscv/tcg/tcg-cpu.c

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 21ebdbf084..38dcbc4dd2 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2275,9 +2275,7 @@ static const struct SysemuCPUOps riscv_sysemu_ops = {
 };
 #endif
 
-#include "hw/core/tcg-cpu-ops.h"
-
-static const struct TCGCPUOps riscv_tcg_ops = {
+const struct TCGCPUOps riscv_tcg_ops = {
 .initialize = riscv_translate_init,
 .synchronize_from_tb = riscv_cpu_synchronize_from_tb,
 .restore_state_to_opc = riscv_restore_state_to_opc,
@@ -2436,7 +2434,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 #endif
 cc->gdb_arch_name = riscv_gdb_arch_name;
 cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml;
-cc->tcg_ops = _tcg_ops;
 
 object_class_property_add(c, "mvendorid", "uint32", cpu_get_mvendorid,
   cpu_set_mvendorid, NULL, NULL);
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 577abcd724..b84b62f84e 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -707,6 +707,10 @@ enum riscv_pmu_event_idx {
 RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021,
 };
 
+/* Export tcg_ops until we move everything to tcg/tcg-cpu.c */
+#include "hw/core/tcg-cpu-ops.h"
+extern const struct TCGCPUOps riscv_tcg_ops;
+
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
 
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 660078bda1..f0486183fa 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -38,5 +38,7 @@ riscv_system_ss.add(files(
   'riscv-qmp-cmds.c',
 ))
 
+subdir('tcg')
+
 target_arch += {'riscv': riscv_ss}
 target_softmmu_arch += {'riscv': riscv_system_ss}
diff --git a/target/riscv/tcg/meson.build b/target/riscv/tcg/meson.build
new file mode 100644
index 00..061df3d74a
--- /dev/null
+++ b/target/riscv/tcg/meson.build
@@ -0,0 +1,2 @@
+riscv_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'tcg-cpu.c'))
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
new file mode 100644
index 00..1ad27a26aa
--- /dev/null
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -0,0 +1,57 @@
+/*
+ * riscv TCG cpu class initialization
+ *
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu/accel.h"
+#include "hw/core/accel-cpu.h"
+
+static void tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc)
+{
+/*
+ * All cpus use the same set of operations.
+ * riscv_tcg_ops is being imported from cpu.c for now.
+ */
+cc->tcg_ops = _tcg_ops;
+}
+
+static void 

[PATCH 11/20] target/riscv: introduce KVM AccelCPUClass

2023-08-25 Thread Daniel Henrique Barboza
Add a KVM accelerator class like we did with TCG. The difference is
that, at least for now, we won't be using a realize() implementation for
this accelerator.

We'll start by assiging kvm_riscv_cpu_add_kvm_properties(), renamed to
kvm_cpu_instance_init(), as a 'cpu_instance_init' implementation. Change
riscv_cpu_post_init() to invoke accel_cpu_instance_init(), which will go
through the 'cpu_instance_init' impl of the current acceleration (if
available) and execute it. The end result is that the KVM initial setup,
i.e. starting registers and adding its specific properties, will be done
via this hook.

riscv_cpu_add_user_properties() is still being called via the common
post_init() function, thus we still need the "if kvm then return" logic
inside it for now. We'll deal with it when TCG accel class get its own
'cpu_instance_init' implementation.

riscv_add_satp_mode_properties() is now being exported from cpu.c since
it's a common helper between KVM and TCG.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c   |  8 ++---
 target/riscv/cpu.h   |  1 +
 target/riscv/kvm.c   | 64 +++-
 target/riscv/kvm_riscv.h |  1 -
 4 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 58b0ef2af8..04c6bfaeef 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1112,7 +1112,7 @@ static void cpu_riscv_set_satp(Object *obj, Visitor *v, 
const char *name,
 satp_map->init |= 1 << satp;
 }
 
-static void riscv_add_satp_mode_properties(Object *obj)
+void riscv_add_satp_mode_properties(Object *obj)
 {
 RISCVCPU *cpu = RISCV_CPU(obj);
 
@@ -1199,6 +1199,8 @@ static void riscv_cpu_post_init(Object *obj)
 RISCVCPU *cpu = RISCV_CPU(obj);
 RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu);
 
+accel_cpu_instance_init(CPU(obj));
+
 if (rcc->user_extension_properties) {
 riscv_cpu_add_user_properties(obj);
 }
@@ -1561,12 +1563,10 @@ static void riscv_cpu_add_multiext_prop_array(Object 
*obj,
 static void riscv_cpu_add_user_properties(Object *obj)
 {
 #ifndef CONFIG_USER_ONLY
-riscv_add_satp_mode_properties(obj);
-
 if (kvm_enabled()) {
-kvm_riscv_cpu_add_kvm_properties(obj);
 return;
 }
+riscv_add_satp_mode_properties(obj);
 #endif
 
 riscv_cpu_add_misa_properties(obj);
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index b9c4bea3f7..950c2301f2 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -726,6 +726,7 @@ extern const RISCVCPUMultiExtConfig 
riscv_cpu_experimental_exts[];
 extern Property riscv_cpu_options[];
 
 void riscv_cpu_add_misa_properties(Object *cpu_obj);
+void riscv_add_satp_mode_properties(Object *obj);
 
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 7e67121456..3c4fa43cee 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -31,6 +31,7 @@
 #include "sysemu/kvm_int.h"
 #include "cpu.h"
 #include "trace.h"
+#include "hw/core/accel-cpu.h"
 #include "hw/pci/pci.h"
 #include "exec/memattrs.h"
 #include "exec/address-spaces.h"
@@ -1262,26 +1263,6 @@ void kvm_riscv_aia_create(MachineState *machine, 
uint64_t group_shift,
 kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
 }
 
-void kvm_riscv_cpu_add_kvm_properties(Object *obj)
-{
-DeviceState *dev = DEVICE(obj);
-
-riscv_init_user_properties(obj);
-riscv_cpu_add_misa_properties(obj);
-
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_extensions);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_vendor_exts);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_experimental_exts);
-
-for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) {
-/* Check if KVM created the property already */
-if (object_property_find(obj, prop->name)) {
-continue;
-}
-qdev_property_add_static(dev, prop);
-}
-}
-
 static void riscv_host_cpu_init(Object *obj)
 {
 CPURISCVState *env = _CPU(obj)->env;
@@ -1310,3 +1291,46 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = {
 };
 
 DEFINE_TYPES(riscv_kvm_cpu_type_infos)
+
+static void kvm_cpu_instance_init(CPUState *cs)
+{
+Object *obj = OBJECT(RISCV_CPU(cs));
+DeviceState *dev = DEVICE(obj);
+
+riscv_init_user_properties(obj);
+
+riscv_add_satp_mode_properties(obj);
+riscv_cpu_add_misa_properties(obj);
+
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_extensions);
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_vendor_exts);
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_experimental_exts);
+
+for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) {
+/* Check if we have a specific KVM handler for the option */
+if (object_property_find(obj, prop->name)) {
+continue;
+}
+qdev_property_add_static(dev, prop);
+}
+}
+
+static void 

[PATCH 19/20] target/riscv: add 'tcg_supported' class property

2023-08-25 Thread Daniel Henrique Barboza
This property indicates if a CPU supports TCG acceleration. All CPUs but
the 'host' CPU supports it.

The error in tcg_cpu_realizefn() can now be made generic in case more
non-TCG CPUs are added in the future.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 10 ++
 target/riscv/cpu.h |  1 +
 target/riscv/tcg/tcg-cpu.c |  7 +--
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 7c76dc0dcc..e86b76f9fe 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -71,5 +71,6 @@ struct RISCVCPUClass {
 ResettablePhases parent_phases;
 
 bool user_extension_properties;
+bool tcg_supported;
 };
 #endif /* RISCV_CPU_QOM_H */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 6817f94c2c..f749ea2a2e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -625,6 +625,14 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
 }
 #endif
 
+char *riscv_cpu_get_name(RISCVCPUClass *rcc)
+{
+const char *typename = object_class_get_name(OBJECT_CLASS(rcc));
+
+return g_strndup(typename,
+ strlen(typename) - strlen("-" TYPE_RISCV_CPU));
+}
+
 static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model)
 {
 ObjectClass *oc;
@@ -1637,6 +1645,7 @@ static void riscv_dynamic_cpu_class_init(ObjectClass *c, 
void *data)
 RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
 
 rcc->user_extension_properties = true;
+rcc->tcg_supported = true;
 }
 
 static void riscv_vendor_cpu_class_init(ObjectClass *c, void *data)
@@ -1644,6 +1653,7 @@ static void riscv_vendor_cpu_class_init(ObjectClass *c, 
void *data)
 RISCVCPUClass *rcc = RISCV_CPU_CLASS(c);
 
 rcc->user_extension_properties = false;
+rcc->tcg_supported = true;
 }
 
 #define DEFINE_DYNAMIC_CPU(type_name, initfn) \
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 4254f04684..1e6ecf52ee 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -732,6 +732,7 @@ typedef struct isa_ext_data {
 extern const RISCVIsaExtData isa_edata_arr[];
 
 void riscv_add_satp_mode_properties(Object *obj);
+char *riscv_cpu_get_name(RISCVCPUClass *rcc);
 
 /* CSR function table */
 extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 6c91978920..a13796c597 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -554,11 +554,14 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp)
 static bool tcg_cpu_realizefn(CPUState *cs, Error **errp)
 {
 RISCVCPU *cpu = RISCV_CPU(cs);
+RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu);
 CPURISCVState *env = >env;
 Error *local_err = NULL;
 
-if (object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) {
-error_setg(errp, "'host' CPU is not compatible with TCG acceleration");
+if (!rcc->tcg_supported) {
+g_autofree char *name = riscv_cpu_get_name(rcc);
+error_setg(errp, "'%s' CPU is not compatible with TCG acceleration",
+   name);
 return false;
 }
 
-- 
2.41.0




[PATCH 12/20] target/riscv: move KVM only files to kvm subdir

2023-08-25 Thread Daniel Henrique Barboza
Move the files to a 'kvm' dir to promote more code separation between
accelerators and making our lives easier supporting build options such
as --disable-tcg.

Rename kvm.c to kvm-cpu.c to keep it in line with its TCG counterpart.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c   | 2 +-
 target/riscv/cpu.c| 2 +-
 target/riscv/{kvm.c => kvm/kvm-cpu.c} | 0
 target/riscv/{ => kvm}/kvm-stub.c | 0
 target/riscv/{ => kvm}/kvm_riscv.h| 0
 target/riscv/kvm/meson.build  | 2 ++
 target/riscv/meson.build  | 2 +-
 7 files changed, 5 insertions(+), 3 deletions(-)
 rename target/riscv/{kvm.c => kvm/kvm-cpu.c} (100%)
 rename target/riscv/{ => kvm}/kvm-stub.c (100%)
 rename target/riscv/{ => kvm}/kvm_riscv.h (100%)
 create mode 100644 target/riscv/kvm/meson.build

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 388e52a294..77c384ddc3 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -35,7 +35,7 @@
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
 #include "hw/riscv/numa.h"
-#include "kvm_riscv.h"
+#include "kvm/kvm_riscv.h"
 #include "hw/intc/riscv_aclint.h"
 #include "hw/intc/riscv_aplic.h"
 #include "hw/intc/riscv_imsic.h"
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 04c6bfaeef..bf6c8519b1 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -33,7 +33,7 @@
 #include "fpu/softfloat-helpers.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
-#include "kvm_riscv.h"
+#include "kvm/kvm_riscv.h"
 #include "tcg/tcg.h"
 
 /* RISC-V CPU definitions */
diff --git a/target/riscv/kvm.c b/target/riscv/kvm/kvm-cpu.c
similarity index 100%
rename from target/riscv/kvm.c
rename to target/riscv/kvm/kvm-cpu.c
diff --git a/target/riscv/kvm-stub.c b/target/riscv/kvm/kvm-stub.c
similarity index 100%
rename from target/riscv/kvm-stub.c
rename to target/riscv/kvm/kvm-stub.c
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm/kvm_riscv.h
similarity index 100%
rename from target/riscv/kvm_riscv.h
rename to target/riscv/kvm/kvm_riscv.h
diff --git a/target/riscv/kvm/meson.build b/target/riscv/kvm/meson.build
new file mode 100644
index 00..1cd6783894
--- /dev/null
+++ b/target/riscv/kvm/meson.build
@@ -0,0 +1,2 @@
+riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm-cpu.c'),
+ if_false: files('kvm-stub.c'))
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index f0486183fa..c53962215f 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -24,7 +24,6 @@ riscv_ss.add(files(
   'zce_helper.c',
   'vcrypto_helper.c'
 ))
-riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: 
files('kvm-stub.c'))
 
 riscv_system_ss = ss.source_set()
 riscv_system_ss.add(files(
@@ -39,6 +38,7 @@ riscv_system_ss.add(files(
 ))
 
 subdir('tcg')
+subdir('kvm')
 
 target_arch += {'riscv': riscv_ss}
 target_softmmu_arch += {'riscv': riscv_system_ss}
-- 
2.41.0




[PATCH 04/20] target/riscv: move riscv_tcg_ops to tcg-cpu.c

2023-08-25 Thread Daniel Henrique Barboza
Move the remaining of riscv_tcg_ops now that we have a working realize()
implementation.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 58 -
 target/riscv/cpu.h |  4 ---
 target/riscv/tcg/tcg-cpu.c | 59 ++
 3 files changed, 59 insertions(+), 62 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 12cea62ee7..839b83e52a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -839,24 +839,6 @@ static vaddr riscv_cpu_get_pc(CPUState *cs)
 return env->pc;
 }
 
-static void riscv_cpu_synchronize_from_tb(CPUState *cs,
-  const TranslationBlock *tb)
-{
-if (!(tb_cflags(tb) & CF_PCREL)) {
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = >env;
-RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
-
-tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
-
-if (xl == MXL_RV32) {
-env->pc = (int32_t) tb->pc;
-} else {
-env->pc = tb->pc;
-}
-}
-}
-
 static bool riscv_cpu_has_work(CPUState *cs)
 {
 #ifndef CONFIG_USER_ONLY
@@ -872,29 +854,6 @@ static bool riscv_cpu_has_work(CPUState *cs)
 #endif
 }
 
-static void riscv_restore_state_to_opc(CPUState *cs,
-   const TranslationBlock *tb,
-   const uint64_t *data)
-{
-RISCVCPU *cpu = RISCV_CPU(cs);
-CPURISCVState *env = >env;
-RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
-target_ulong pc;
-
-if (tb_cflags(tb) & CF_PCREL) {
-pc = (env->pc & TARGET_PAGE_MASK) | data[0];
-} else {
-pc = data[0];
-}
-
-if (xl == MXL_RV32) {
-env->pc = (int32_t)pc;
-} else {
-env->pc = pc;
-}
-env->bins = data[1];
-}
-
 static void riscv_cpu_reset_hold(Object *obj)
 {
 #ifndef CONFIG_USER_ONLY
@@ -1796,23 +1755,6 @@ static const struct SysemuCPUOps riscv_sysemu_ops = {
 };
 #endif
 
-const struct TCGCPUOps riscv_tcg_ops = {
-.initialize = riscv_translate_init,
-.synchronize_from_tb = riscv_cpu_synchronize_from_tb,
-.restore_state_to_opc = riscv_restore_state_to_opc,
-
-#ifndef CONFIG_USER_ONLY
-.tlb_fill = riscv_cpu_tlb_fill,
-.cpu_exec_interrupt = riscv_cpu_exec_interrupt,
-.do_interrupt = riscv_cpu_do_interrupt,
-.do_transaction_failed = riscv_cpu_do_transaction_failed,
-.do_unaligned_access = riscv_cpu_do_unaligned_access,
-.debug_excp_handler = riscv_cpu_debug_excp_handler,
-.debug_check_breakpoint = riscv_cpu_debug_check_breakpoint,
-.debug_check_watchpoint = riscv_cpu_debug_check_watchpoint,
-#endif /* !CONFIG_USER_ONLY */
-};
-
 static bool riscv_cpu_is_dynamic(Object *cpu_obj)
 {
 return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 721bd0b119..2ac00a0304 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -706,10 +706,6 @@ enum riscv_pmu_event_idx {
 RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021,
 };
 
-/* Export tcg_ops until we move everything to tcg/tcg-cpu.c */
-#include "hw/core/tcg-cpu-ops.h"
-extern const struct TCGCPUOps riscv_tcg_ops;
-
 /* used by tcg/tcg-cpu.c*/
 void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset, bool en);
 bool cpu_cfg_ext_is_user_set(uint32_t ext_offset);
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index fb17097bb1..2024c98793 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -26,7 +26,66 @@
 #include "qemu/accel.h"
 #include "qemu/error-report.h"
 #include "hw/core/accel-cpu.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "tcg/tcg.h"
 
+static void riscv_cpu_synchronize_from_tb(CPUState *cs,
+  const TranslationBlock *tb)
+{
+if (!(tb_cflags(tb) & CF_PCREL)) {
+RISCVCPU *cpu = RISCV_CPU(cs);
+CPURISCVState *env = >env;
+RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
+
+tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL));
+
+if (xl == MXL_RV32) {
+env->pc = (int32_t) tb->pc;
+} else {
+env->pc = tb->pc;
+}
+}
+}
+
+static void riscv_restore_state_to_opc(CPUState *cs,
+   const TranslationBlock *tb,
+   const uint64_t *data)
+{
+RISCVCPU *cpu = RISCV_CPU(cs);
+CPURISCVState *env = >env;
+RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
+target_ulong pc;
+
+if (tb_cflags(tb) & CF_PCREL) {
+pc = (env->pc & TARGET_PAGE_MASK) | data[0];
+} else {
+pc = data[0];
+}
+
+if (xl == MXL_RV32) {
+env->pc = (int32_t)pc;
+} else {
+env->pc = pc;
+}
+env->bins = data[1];
+}
+
+const struct TCGCPUOps riscv_tcg_ops = {
+.initialize = riscv_translate_init,
+.synchronize_from_tb = 

[PATCH 13/20] target/riscv/kvm: refactor kvm_riscv_init_user_properties()

2023-08-25 Thread Daniel Henrique Barboza
The function is doing way more than just init user properties. We would
also like to use the 'user_extension_properties' class property, as the
TCG driver is already using, to decide whether KVM should expose user
properties or not.

Rename kvm_riscv_init_user_properties() to riscv_init_kvm_registers()
and leave only the essential, non-optional KVM init functions there. All
functions that deals with property handling is now gated via
rcc->user_extension_properties.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/kvm/kvm-cpu.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 3c4fa43cee..85e8b0a927 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -792,7 +792,7 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, 
KVMScratchCPU *kvmcpu)
 }
 }
 
-static void riscv_init_user_properties(Object *cpu_obj)
+static void riscv_init_kvm_registers(Object *cpu_obj)
 {
 RISCVCPU *cpu = RISCV_CPU(cpu_obj);
 KVMScratchCPU kvmcpu;
@@ -801,7 +801,6 @@ static void riscv_init_user_properties(Object *cpu_obj)
 return;
 }
 
-kvm_riscv_add_cpu_user_properties(cpu_obj);
 kvm_riscv_init_machine_ids(cpu, );
 kvm_riscv_init_misa_ext_mask(cpu, );
 kvm_riscv_init_multiext_cfg(cpu, );
@@ -1295,16 +1294,20 @@ DEFINE_TYPES(riscv_kvm_cpu_type_infos)
 static void kvm_cpu_instance_init(CPUState *cs)
 {
 Object *obj = OBJECT(RISCV_CPU(cs));
+RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(obj);
 DeviceState *dev = DEVICE(obj);
 
-riscv_init_user_properties(obj);
+riscv_init_kvm_registers(obj);
 
-riscv_add_satp_mode_properties(obj);
-riscv_cpu_add_misa_properties(obj);
+if (rcc->user_extension_properties) {
+kvm_riscv_add_cpu_user_properties(obj);
+riscv_add_satp_mode_properties(obj);
+riscv_cpu_add_misa_properties(obj);
 
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_extensions);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_vendor_exts);
-riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_experimental_exts);
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_extensions);
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_vendor_exts);
+riscv_cpu_add_kvm_unavail_prop_array(obj, riscv_cpu_experimental_exts);
+}
 
 for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) {
 /* Check if we have a specific KVM handler for the option */
-- 
2.41.0




[PATCH 02/20] target/riscv: move riscv_cpu_realize_tcg() to TCG::cpu_realizefn()

2023-08-25 Thread Daniel Henrique Barboza
riscv_cpu_realize_tcg() was added to allow TCG cpus to have a different
realize() path during the common riscv_cpu_realize(), making it a good
choice to start moving TCG exclusive code to tcg-cpu.c.

Rename it to tcg_cpu_realizefn() and assign it as a implementation of
accel::cpu_realizefn(). tcg_cpu_realizefn() will then be called during
riscv_cpu_realize() via cpu_exec_realizefn(). We'll use a similar
approach with KVM in the near future.

riscv_cpu_validate_set_extensions() is too big and with too many
dependencies to be moved in this same patch. We'll do that next.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 128 ---
 target/riscv/tcg/tcg-cpu.c | 132 +
 2 files changed, 132 insertions(+), 128 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 38dcbc4dd2..36c5c6e579 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -23,9 +23,7 @@
 #include "qemu/log.h"
 #include "cpu.h"
 #include "cpu_vendorid.h"
-#include "pmu.h"
 #include "internals.h"
-#include "time_helper.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
@@ -1065,29 +1063,6 @@ static void riscv_cpu_validate_v(CPURISCVState *env, 
RISCVCPUConfig *cfg,
 }
 }
 
-static void riscv_cpu_validate_priv_spec(RISCVCPU *cpu, Error **errp)
-{
-CPURISCVState *env = >env;
-int priv_version = -1;
-
-if (cpu->cfg.priv_spec) {
-if (!g_strcmp0(cpu->cfg.priv_spec, "v1.12.0")) {
-priv_version = PRIV_VERSION_1_12_0;
-} else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.11.0")) {
-priv_version = PRIV_VERSION_1_11_0;
-} else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) {
-priv_version = PRIV_VERSION_1_10_0;
-} else {
-error_setg(errp,
-   "Unsupported privilege spec version '%s'",
-   cpu->cfg.priv_spec);
-return;
-}
-
-env->priv_ver = priv_version;
-}
-}
-
 static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
 {
 CPURISCVState *env = >env;
@@ -1112,33 +1087,6 @@ static void 
riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu)
 }
 }
 
-static void riscv_cpu_validate_misa_mxl(RISCVCPU *cpu, Error **errp)
-{
-RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu);
-CPUClass *cc = CPU_CLASS(mcc);
-CPURISCVState *env = >env;
-
-/* Validate that MISA_MXL is set properly. */
-switch (env->misa_mxl_max) {
-#ifdef TARGET_RISCV64
-case MXL_RV64:
-case MXL_RV128:
-cc->gdb_core_xml_file = "riscv-64bit-cpu.xml";
-break;
-#endif
-case MXL_RV32:
-cc->gdb_core_xml_file = "riscv-32bit-cpu.xml";
-break;
-default:
-g_assert_not_reached();
-}
-
-if (env->misa_mxl_max != env->misa_mxl) {
-error_setg(errp, "misa_mxl_max must be equal to misa_mxl");
-return;
-}
-}
-
 /*
  * Check consistency between chosen extensions while setting
  * cpu->cfg accordingly.
@@ -1512,74 +1460,6 @@ static void riscv_cpu_finalize_features(RISCVCPU *cpu, 
Error **errp)
 #endif
 }
 
-static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp)
-{
-if (riscv_has_ext(env, RVH) && env->priv_ver < PRIV_VERSION_1_12_0) {
-error_setg(errp, "H extension requires priv spec 1.12.0");
-return;
-}
-}
-
-static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp)
-{
-RISCVCPU *cpu = RISCV_CPU(dev);
-CPURISCVState *env = >env;
-Error *local_err = NULL;
-
-if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) {
-error_setg(errp, "'host' CPU is not compatible with TCG acceleration");
-return;
-}
-
-riscv_cpu_validate_misa_mxl(cpu, _err);
-if (local_err != NULL) {
-error_propagate(errp, local_err);
-return;
-}
-
-riscv_cpu_validate_priv_spec(cpu, _err);
-if (local_err != NULL) {
-error_propagate(errp, local_err);
-return;
-}
-
-riscv_cpu_validate_misa_priv(env, _err);
-if (local_err != NULL) {
-error_propagate(errp, local_err);
-return;
-}
-
-if (cpu->cfg.epmp && !cpu->cfg.pmp) {
-/*
- * Enhanced PMP should only be available
- * on harts with PMP support
- */
-error_setg(errp, "Invalid configuration: EPMP requires PMP support");
-return;
-}
-
-riscv_cpu_validate_set_extensions(cpu, _err);
-if (local_err != NULL) {
-error_propagate(errp, local_err);
-return;
-}
-
-#ifndef CONFIG_USER_ONLY
-CPU(dev)->tcg_cflags |= CF_PCREL;
-
-if (cpu->cfg.ext_sstc) {
-riscv_timer_init(cpu);
-}
-
-if (cpu->cfg.pmu_num) {
-if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) {
-cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-  riscv_pmu_timer_cb, cpu);
-}
- 

[PATCH 06/20] target/riscv: add 'max_features' CPU flag

2023-08-25 Thread Daniel Henrique Barboza
The 'max' CPU type is being configured during init() time by enabling
all relevant extensions.

Instead of checking for 'max' CPU to enable all extensions, add a new
CPU cfg flag 'max_features' that can be used by any CPU during its
cpu_init() function. We'll check for it during post_init() time to
decide whether we should enable the maximum amount of features in the
current CPU instance.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 2 ++
 target/riscv/cpu_cfg.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index e2e8724dc2..c35d58c64b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -424,6 +424,8 @@ static void riscv_max_cpu_init(Object *obj)
 CPURISCVState *env = >env;
 RISCVMXL mlx = MXL_RV64;
 
+cpu->cfg.max_features = true;
+
 #ifdef TARGET_RISCV32
 mlx = MXL_RV32;
 #endif
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index 0e6a0f245c..df723e697b 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -137,6 +137,7 @@ struct RISCVCPUConfig {
 bool epmp;
 bool debug;
 bool misa_w;
+bool max_features;
 
 bool short_isa_string;
 
-- 
2.41.0




[PATCH 09/20] target/riscv/cpu.c: mark extensions arrays as 'const'

2023-08-25 Thread Daniel Henrique Barboza
We'll need to export these arrays to the accelerator classes in the next
patches. Mark them as 'const' now to minimize changes in the future.

Note that 'riscv_cpu_options' will also be exported, but can't be marked
as 'const', because the properties are changed via
qdev_property_add_static().

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index dbf81796d2..4eda853f1d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1381,7 +1381,7 @@ typedef struct RISCVCPUMultiExtConfig {
 {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \
  .enabled = _defval}
 
-static RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
+static const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 /* Defaults for standard extensions */
 MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false),
 MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true),
@@ -1441,7 +1441,7 @@ static RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
+static const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
 MULTI_EXT_CFG_BOOL("xtheadba", ext_xtheadba, false),
 MULTI_EXT_CFG_BOOL("xtheadbb", ext_xtheadbb, false),
 MULTI_EXT_CFG_BOOL("xtheadbs", ext_xtheadbs, false),
@@ -1459,7 +1459,7 @@ static RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = {
 };
 
 /* These are experimental so mark with 'x-' */
-static RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
+static const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = {
 MULTI_EXT_CFG_BOOL("x-zicond", ext_zicond, false),
 
 /* ePMP 0.9.3 */
@@ -1532,7 +1532,7 @@ static void cpu_get_multi_ext_cfg(Object *obj, Visitor 
*v, const char *name,
 }
 
 static void cpu_add_multi_ext_prop(Object *cpu_obj,
-   RISCVCPUMultiExtConfig *multi_cfg)
+   const RISCVCPUMultiExtConfig *multi_cfg)
 {
 object_property_add(cpu_obj, multi_cfg->name, "bool",
 cpu_get_multi_ext_cfg,
@@ -1568,9 +1568,11 @@ static void cpu_set_cfg_unavailable(Object *obj, Visitor 
*v,
 #endif
 
 static void riscv_cpu_add_multiext_prop_array(Object *obj,
-  RISCVCPUMultiExtConfig *array)
+const RISCVCPUMultiExtConfig *array)
 {
-for (RISCVCPUMultiExtConfig *prop = array; prop && prop->name; prop++) {
+const RISCVCPUMultiExtConfig *prop;
+
+for (prop = array; prop && prop->name; prop++) {
 cpu_add_multi_ext_prop(obj, prop);
 }
 }
@@ -1594,9 +1596,11 @@ static void riscv_cpu_add_kvm_unavail_prop(Object *obj, 
const char *prop_name)
 }
 
 static void riscv_cpu_add_kvm_unavail_prop_array(Object *obj,
- RISCVCPUMultiExtConfig *array)
+const RISCVCPUMultiExtConfig *array)
 {
-for (RISCVCPUMultiExtConfig *prop = array; prop && prop->name; prop++) {
+const RISCVCPUMultiExtConfig *prop;
+
+for (prop = array; prop && prop->name; prop++) {
 riscv_cpu_add_kvm_unavail_prop(obj, prop->name);
 }
 }
@@ -1659,7 +1663,7 @@ static void riscv_init_max_cpu_extensions(Object *obj)
 {
 RISCVCPU *cpu = RISCV_CPU(obj);
 CPURISCVState *env = >env;
-RISCVCPUMultiExtConfig *prop;
+const RISCVCPUMultiExtConfig *prop;
 
 /* Enable RVG, RVJ and RVV that are disabled by default */
 set_misa(env, env->misa_mxl, env->misa_ext | RVG | RVJ | RVV);
-- 
2.41.0




[PATCH 00/20] riscv: split TCG/KVM accelerators from cpu.c

2023-08-25 Thread Daniel Henrique Barboza
Based-on: 20230824221440.484675-1-dbarb...@ventanamicro.com
("[PATCH RESEND v8 00/20] riscv: 'max' CPU, detect user choice in TCG")

Hi,

The idea of this work was hinted at during a review [1] where Phil
mentioned that we should handle TCG specific constraints in
AccelCPUClass::cpu_realizefn(). While working on that I came across
the work done in x86 by Claudio Fontana in commit f5cc5a5c1 ("i386:
split cpu accelerators from cpu.c, using AccelCPUClass"). The design
implemented here is heavily inspired by Claudio's work.

An AccelCPUClass is an abstraction used by all QEMU accelerators that
are already streamlined in the init/realize process, doesn't matter if
we use it or not. Using accel classes allow us to split accel-specific
code from cpu.c into their own files, making easier to support
accel-specific builds in the future. It also gives us a template to
follow when adding new accelerators in the future.

The final goal, not entirely reached with this series, is to have cpu.c
hosting only common code for all accelerators, in particular the code
related to extensions support. We should declare extensions in cpu.c
then go to each accelerator class and do what you want with it. We're
not there yet due to how we rely on isa_edata_arr[] for both priv-spec
checks (a tcg only thing) and provide the riscv,isa string (all
accelerators). Trying to untangle priv-spec and isa_str is a fight for
another day.

You'll also notice that I didn't move all TCG related files to the 'tcg'
subdir. The reason is that Phil already did that here [2]:

"[PATCH 00/16] target/riscv: Allow building without TCG (KVM-only so far)"

and I deliberately avoided colliding with what he did. Phil's series focus
in splitting TCG includes and ifdefs in TCG specific files, while this
series focus in decoupling accel-speciic logic inside cpu.c.

The only behavior change implemented is in patch 20 where we block
vendor CPUs from using KVM. Most of the time I'm just juggling code
around to avoid breaking what we already have while trying to keep
patches review-sane.

No other behavior changes were intended with this series.

[1] 
https://lore.kernel.org/qemu-riscv/3b93823c-3d12-0d67-b814-54a3922d0...@linaro.org/
[2] https://lore.kernel.org/qemu-riscv/20230711121453.59138-1-phi...@linaro.org/


Daniel Henrique Barboza (20):
  target/riscv: introduce TCG AccelCPUClass
  target/riscv: move riscv_cpu_realize_tcg() to TCG::cpu_realizefn()
  target/riscv: move riscv_cpu_validate_set_extensions() to tcg-cpu.c
  target/riscv: move riscv_tcg_ops to tcg-cpu.c
  target/riscv/cpu.c: add 'user_extension_properties' class prop
  target/riscv: add 'max_features' CPU flag
  target/riscv/cpu.c: add .instance_post_init()
  target/riscv: move 'host' CPU declaration to kvm.c
  target/riscv/cpu.c: mark extensions arrays as 'const'
  target/riscv: move riscv_cpu_add_kvm_properties() to kvm.c
  target/riscv: introduce KVM AccelCPUClass
  target/riscv: move KVM only files to kvm subdir
  target/riscv/kvm: refactor kvm_riscv_init_user_properties()
  target/riscv/kvm: do not use riscv_cpu_add_misa_properties()
  target/riscv/tcg: introduce tcg_cpu_instance_init()
  target/riscv/tcg: move riscv_cpu_add_misa_properties() to tcg-cpu.c
  target/riscv/cpu.c: export isa_edata_arr[]
  target/riscv/cpu: move priv spec functions to tcg-cpu.c
  target/riscv: add 'tcg_supported' class property
  target/riscv: add 'kvm_supported' class property

 hw/riscv/virt.c   |   2 +-
 target/riscv/cpu-qom.h|   5 +
 target/riscv/cpu.c| 999 ++
 target/riscv/cpu.h|  31 +-
 target/riscv/cpu_cfg.h|   1 +
 target/riscv/csr.c|   1 +
 target/riscv/{kvm.c => kvm/kvm-cpu.c} | 153 +++-
 target/riscv/{ => kvm}/kvm-stub.c |   0
 target/riscv/{ => kvm}/kvm_riscv.h|   1 -
 target/riscv/kvm/meson.build  |   2 +
 target/riscv/meson.build  |   4 +-
 target/riscv/tcg/meson.build  |   2 +
 target/riscv/tcg/tcg-cpu.c| 864 ++
 target/riscv/tcg/tcg-cpu.h|  28 +
 14 files changed, 1160 insertions(+), 933 deletions(-)
 rename target/riscv/{kvm.c => kvm/kvm-cpu.c} (89%)
 rename target/riscv/{ => kvm}/kvm-stub.c (100%)
 rename target/riscv/{ => kvm}/kvm_riscv.h (95%)
 create mode 100644 target/riscv/kvm/meson.build
 create mode 100644 target/riscv/tcg/meson.build
 create mode 100644 target/riscv/tcg/tcg-cpu.c
 create mode 100644 target/riscv/tcg/tcg-cpu.h

-- 
2.41.0




  1   2   >