Re: [PATCH v6 5/5] hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port
On 2023/06/29 13:07, Ani Sinha wrote: PCI Express ports only have one slot, so PCI Express devices can only be plugged into slot 0 on a PCIE port. Enforce it. The change has been tested to not break ARI by instantiating seven vfs on an emulated igb device (the maximum number of vfs the linux igb driver supports). The vfs are seen to have non-zero device/slot numbers in the conventional PCI BDF representation. CC: jus...@redhat.com CC: imamm...@redhat.com CC: akihiko.od...@daynix.com Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 Signed-off-by: Ani Sinha Reviewed-by: Julia Suvorova --- hw/pci/pci.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e2eb4c3b4a..0320ac2bb3 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -65,6 +65,7 @@ bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); static void pcibus_reset(BusState *qbus); +static bool pcie_has_upstream_port(PCIDevice *dev); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), @@ -1190,6 +1191,20 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, name); return NULL; +} /* + * With SRIOV and ARI, vfs can have non-zero slot in the conventional + * PCI interpretation as all five bits reserved for slot addresses are + * also used for function bits for the various vfs. Ignore that case. + * It is too early here to check for ARI capabilities in the PCI config + * space. Hence, we check for a vf device instead. + */ Why don't just perform this check after the capabilities are set? Regards, Akihiko Odaki +else if (!pci_is_vf(pci_dev) && + pcie_has_upstream_port(pci_dev) && + PCI_SLOT(devfn)) { +error_setg(errp, "PCI: slot %d is not valid for %s," + " parent device only allows plugging into slot 0.", + PCI_SLOT(devfn), name); +return NULL; } pci_dev->devfn = devfn;
Re: [PATCH 2/6] tests/qtest: migration: Expose migrate_set_capability
On 28/06/2023 18.55, Fabiano Rosas wrote: The following patch will make use of this function from within migrate-helpers.c, so move it there. Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela --- tests/qtest/migration-helpers.c | 11 +++ tests/qtest/migration-helpers.h | 3 +++ tests/qtest/migration-test.c| 11 --- 3 files changed, 14 insertions(+), 11 deletions(-) Reviewed-by: Thomas Huth
Re: [PATCH v3 6/6] target/ppc: Remove pointless checks of CONFIG_USER_ONLY in 'kvm_ppc.h'
On Tue, 27 Jun 2023 13:51:24 +0200 Philippe Mathieu-Daudé wrote: > Signed-off-by: Philippe Mathieu-Daudé > --- Reviewed-by: Greg Kurz > target/ppc/kvm_ppc.h | 5 - > 1 file changed, 5 deletions(-) > > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h > index 901e188c9a..6a4dd9c560 100644 > --- a/target/ppc/kvm_ppc.h > +++ b/target/ppc/kvm_ppc.h > @@ -42,7 +42,6 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); > target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, > bool radix, bool gtse, > uint64_t proc_tbl); > -#ifndef CONFIG_USER_ONLY > bool kvmppc_spapr_use_multitce(void); > int kvmppc_spapr_enable_inkernel_multitce(void); > void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, > @@ -52,7 +51,6 @@ int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t > window_size); > int kvmppc_reset_htab(int shift_hint); > uint64_t kvmppc_vrma_limit(unsigned int hash_shift); > bool kvmppc_has_cap_spapr_vfio(void); > -#endif /* !CONFIG_USER_ONLY */ > bool kvmppc_has_cap_epr(void); > int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); > int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp); > @@ -262,7 +260,6 @@ static inline void kvmppc_set_reg_tb_offset(PowerPCCPU > *cpu, int64_t tb_offset) > { > } > > -#ifndef CONFIG_USER_ONLY > static inline bool kvmppc_spapr_use_multitce(void) > { > return false; > @@ -322,8 +319,6 @@ static inline void kvmppc_write_hpte(hwaddr ptex, > uint64_t pte0, uint64_t pte1) > abort(); > } > > -#endif /* !CONFIG_USER_ONLY */ > - > static inline bool kvmppc_has_cap_epr(void) > { > return false; -- Greg
Re: [PATCH v3 4/6] target/ppc: Define TYPE_HOST_POWERPC_CPU in cpu-qom.h
On Tue, 27 Jun 2023 13:51:22 +0200 Philippe Mathieu-Daudé wrote: > TYPE_HOST_POWERPC_CPU is used in various places of cpu_init.c, > in order to restrict "kvm_ppc.h" to sysemu, move this QOM-related > definition to cpu-qom.h. > > Signed-off-by: Philippe Mathieu-Daudé > --- Reviewed-by: Greg Kurz > target/ppc/cpu-qom.h | 2 ++ > target/ppc/kvm_ppc.h | 2 -- > 2 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h > index c2bff349cc..4e4061068e 100644 > --- a/target/ppc/cpu-qom.h > +++ b/target/ppc/cpu-qom.h > @@ -36,6 +36,8 @@ OBJECT_DECLARE_CPU_TYPE(PowerPCCPU, PowerPCCPUClass, > POWERPC_CPU) > #define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU > #define cpu_list ppc_cpu_list > > +#define TYPE_HOST_POWERPC_CPU POWERPC_CPU_TYPE_NAME("host") > + > ObjectClass *ppc_cpu_class_by_name(const char *name); > > typedef struct CPUArchState CPUPPCState; > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h > index 49954a300b..901e188c9a 100644 > --- a/target/ppc/kvm_ppc.h > +++ b/target/ppc/kvm_ppc.h > @@ -13,8 +13,6 @@ > #include "exec/hwaddr.h" > #include "cpu.h" > > -#define TYPE_HOST_POWERPC_CPU POWERPC_CPU_TYPE_NAME("host") > - > #ifdef CONFIG_KVM > > uint32_t kvmppc_get_tbfreq(void); -- Greg
Re: [PATCH v3 5/6] target/ppc: Restrict 'kvm_ppc.h' to sysemu in cpu_init.c
On Tue, 27 Jun 2023 13:51:23 +0200 Philippe Mathieu-Daudé wrote: > User emulation shouldn't need any of the KVM prototypes > declared in "kvm_ppc.h". > > Signed-off-by: Philippe Mathieu-Daudé > --- Reviewed-by: Greg Kurz > target/ppc/cpu_init.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c > index aeff71d063..f2afb539eb 100644 > --- a/target/ppc/cpu_init.c > +++ b/target/ppc/cpu_init.c > @@ -21,7 +21,6 @@ > #include "qemu/osdep.h" > #include "disas/dis-asm.h" > #include "gdbstub/helpers.h" > -#include "kvm_ppc.h" > #include "sysemu/cpus.h" > #include "sysemu/hw_accel.h" > #include "sysemu/tcg.h" > @@ -49,6 +48,7 @@ > #ifndef CONFIG_USER_ONLY > #include "hw/boards.h" > #include "hw/intc/intc.h" > +#include "kvm_ppc.h" > #endif > > /* #define PPC_DEBUG_SPR */ -- Greg
Re: [PATCH] MAINTAINERS: Promote Cédric to VFIO co-maintainer
On 6/28/23 19:29, Alex Williamson wrote: Cédric has stepped up involvement in vfio, reviewing and managing patches, as well as pull requests. This work deserves gratitude and punishment with a promotion to co-maintainer ;) :) Signed-off-by: Alex Williamson Acked-by: Cédric Le Goater --- Cédric, I'd also support if you wanted to add a tree entry here. OK. I had some habits on github. For VFIO, I might change for gitlab where the CI runs. It can come later. Thanks, C. MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e07746ac7d45..37e48c72b16a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2051,7 +2051,7 @@ F: hw/usb/dev-serial.c VFIO M: Alex Williamson -R: Cédric Le Goater +M: Cédric Le Goater S: Supported F: hw/vfio/* F: include/hw/vfio/
Re: [RFC PATCH 0/3] ppc/pnv: SMT support for powernv
On 6/29/23 04:16, Nicholas Piggin wrote: These patches implement enough to boot a SMT powernv machine to Linux and boot a SMP KVM guest inside that. There are a few more SPRs that need to be done, and per-LPAR SPRs are mostly not annotated yet so it can't run in 1LPAR mode. But it is enough to run skiboot/Linux with SMT so I'll just post the minimal patches for RFC because the concept isn't really different to add more SPRs and things. This is good material for 8.1. Please send a v1 and Cc: all pnv reviewers. Thanks, C.
Re: [PULL 00/23] Block layer patches
On 6/28/23 16:15, Kevin Wolf wrote: The following changes since commit 52ed34cbddde1cb89b2ac263e758e349a77f21e1: Merge tag 'pull-request-2023-06-26' ofhttps://gitlab.com/thuth/qemu into staging (2023-06-26 10:38:41 +0200) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to 17362398ee1a7f04e8006a46333145d8b707fd35: block: use bdrv_co_debug_event in coroutine context (2023-06-28 09:46:34 +0200) Block layer patches - Re-enable the graph lock - More fixes to coroutine_fn marking Applied, thanks. Please update https://wiki.qemu.org/ChangeLog/8.1 as appropriate. r~
Re: [PATCH qemu] aspeed add montblanc bmc reference from fuji
On 6/29/23 04:57, Sittisak Sinprem wrote: Hi Cédric, I had fixed the function name to support in current branch, but facing about below error while starting ./build/qemu-system-arm -machine montblanc-bmc -drive file=~/flash-montblanc,format=raw,if=mtd -nographic -netdev tap,id=netdev0,script=no,downscript=no,ifname=tap0 -net nic,netdev=netdev0,model=ftgmac100 qemu-system-arm: device requires 134217728 bytes, block backend provides 27726336 bytes File ~/flash-montblanc is too big for the mx66l1g45g flash device and 128MB is expected. Use truncate maybe. Thanks, C.
Re: [RFC PATCH 3/3] ppc/pnv: SMT support for powernv
On 6/29/23 04:16, Nicholas Piggin wrote: Set the TIR default value with the SMT thread index, and place some standard limits on SMT configurations. Now powernv is able to boot skiboot and Linux with a SMT topology, including booting a KVM guest. There are several other per-core SPRs, but they are not so important to run OPAL/Linux. Some important per-LPAR ones to convert before powernv could run in 1LPAR mode. Broadcast msgsnd is not yet implemented either, but skiboot/Linux does not use that. KVM uses an implementation-specific detail of POWER9/10 TLBs where TLBIEL invalidates translations of all threads on a core, but that is not required here because KVM does not cache translations across PID or LPID switch. Most of these I have or aren't too hard to implement, but I start with a small bare bones for comments. Not-yet-Signed-off-by: Nicholas Piggin I am glad this becoming possible. You can model the missing parts later on. Reviewed-by: Cédric Le Goater Thanks, C. --- hw/ppc/pnv.c | 12 hw/ppc/pnv_core.c | 13 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index fc083173f3..f599ccad1d 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -887,6 +887,18 @@ static void pnv_init(MachineState *machine) pnv->num_chips = machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); + +if (machine->smp.threads > 8) { +error_report("Cannot support more than 8 threads/core " + "on a powernv machine"); +exit(1); +} +if (!is_power_of_2(machine->smp.threads)) { +error_report("Cannot support %d threads/core on a powernv" + "machine because it must be a power of 2", + machine->smp.threads); +exit(1); +} /* * TODO: should we decide on how many chips we can create based * on #cores and Venice vs. Murano vs. Naples chip type etc..., diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 0bc3ad41c8..acd83caee8 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -167,12 +167,13 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) +static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, + int thread_index) { CPUPPCState *env = &cpu->env; int core_pir; -int thread_index = 0; /* TODO: TCG supports only one thread */ ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; +ppc_spr_t *tir = &env->spr_cb[SPR_TIR]; Error *local_err = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); @@ -188,11 +189,7 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); -/* - * The PIR of a thread is the core PIR + the thread index. We will - * need to find a way to get the thread index when TCG supports - * more than 1. We could use the object name ? - */ +tir->default_value = thread_index; pir->default_value = core_pir + thread_index; /* Set time-base frequency to 512 MHz */ @@ -241,7 +238,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) } for (j = 0; j < cc->nr_threads; j++) { -pnv_core_cpu_realize(pc, pc->threads[j], &local_err); +pnv_core_cpu_realize(pc, pc->threads[j], &local_err, j); if (local_err) { goto err; }
Re: [PATCH] hw/ppc: Simplify clock update arithmetic
On 6/25/23 14:20, Nicholas Piggin wrote: The clock update logic reads the clock twice to compute the new clock value, with a value derived from the later time subtracted from a value derived from the earlier time. This can lead to an underflow in subtractions in bits that are intended to cancel exactly. This might not cause any real problem, but it is more complicated than necessary to reason about. Simplify this by reading the clock once. Signed-off-by: Nicholas Piggin This patch has ben superseded by https://patchwork.ozlabs.org/project/qemu-ppc/patch/20230629020713.327745-1-npig...@gmail.com/ It is nice to add a v2 prefix, even if you change the change the subject. Thanks, C. --- hw/ppc/ppc.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index f4fe1767d6..5d0a09eb5e 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -536,23 +536,24 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb | (uint64_t)value); } static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, + ((uint64_t)value << 32) | tb); } void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value) @@ -585,23 +586,24 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, tb | (uint64_t)value); } void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, + ((uint64_t)value << 32) | tb); } uint64_t cpu_ppc_load_vtb(CPUPPCState *env) @@ -623,14 +625,13 @@ void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value) void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), -tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xFFUL; tb |= (value & ~0xFFUL); -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb); } static void cpu_ppc_tb_stop (CPUPPCState *env)
Re: [PATCH] net: add initial support for AF_XDP network backend
On Wed, Jun 28, 2023 at 4:25 PM Stefan Hajnoczi wrote: > > On Wed, 28 Jun 2023 at 10:19, Jason Wang wrote: > > > > On Wed, Jun 28, 2023 at 4:15 PM Stefan Hajnoczi wrote: > > > > > > On Wed, 28 Jun 2023 at 09:59, Jason Wang wrote: > > > > > > > > On Wed, Jun 28, 2023 at 3:46 PM Stefan Hajnoczi > > > > wrote: > > > > > > > > > > On Wed, 28 Jun 2023 at 05:28, Jason Wang wrote: > > > > > > > > > > > > On Wed, Jun 28, 2023 at 6:45 AM Ilya Maximets > > > > > > wrote: > > > > > > > > > > > > > > On 6/27/23 04:54, Jason Wang wrote: > > > > > > > > On Mon, Jun 26, 2023 at 9:17 PM Ilya Maximets > > > > > > > > wrote: > > > > > > > >> > > > > > > > >> On 6/26/23 08:32, Jason Wang wrote: > > > > > > > >>> On Sun, Jun 25, 2023 at 3:06 PM Jason Wang > > > > > > > >>> wrote: > > > > > > > > > > > > > > On Fri, Jun 23, 2023 at 5:58 AM Ilya Maximets > > > > > > > wrote: > > > > > > > >> It is noticeably more performant than a tap with vhost=on in > > > > > > > >> terms of PPS. > > > > > > > >> So, that might be one case. Taking into account that just rcu > > > > > > > >> lock and > > > > > > > >> unlock in virtio-net code takes more time than a packet copy, > > > > > > > >> some batching > > > > > > > >> on QEMU side should improve performance significantly. And it > > > > > > > >> shouldn't be > > > > > > > >> too hard to implement. > > > > > > > >> > > > > > > > >> Performance over virtual interfaces may potentially be > > > > > > > >> improved by creating > > > > > > > >> a kernel thread for async Tx. Similarly to what io_uring > > > > > > > >> allows. Currently > > > > > > > >> Tx on non-zero-copy interfaces is synchronous, and that > > > > > > > >> doesn't allow to > > > > > > > >> scale well. > > > > > > > > > > > > > > > > Interestingly, actually, there are a lot of "duplication" > > > > > > > > between > > > > > > > > io_uring and AF_XDP: > > > > > > > > > > > > > > > > 1) both have similar memory model (user register) > > > > > > > > 2) both use ring for communication > > > > > > > > > > > > > > > > I wonder if we can let io_uring talks directly to AF_XDP. > > > > > > > > > > > > > > Well, if we submit poll() in QEMU main loop via io_uring, then we > > > > > > > can > > > > > > > avoid cost of the synchronous Tx for non-zero-copy modes, i.e. for > > > > > > > virtual interfaces. io_uring thread in the kernel will be able to > > > > > > > perform transmission for us. > > > > > > > > > > > > It would be nice if we can use iothread/vhost other than the main > > > > > > loop > > > > > > even if io_uring can use kthreads. We can avoid the memory > > > > > > translation > > > > > > cost. > > > > > > > > > > The QEMU event loop (AioContext) has io_uring code > > > > > (utils/fdmon-io_uring.c) but it's disabled at the moment. I'm working > > > > > on patches to re-enable it and will probably send them in July. The > > > > > patches also add an API to submit arbitrary io_uring operations so > > > > > that you can do stuff besides file descriptor monitoring. Both the > > > > > main loop and IOThreads will be able to use io_uring on Linux hosts. > > > > > > > > Just to make sure I understand. If we still need a copy from guest to > > > > io_uring buffer, we still need to go via memory API for GPA which > > > > seems expensive. > > > > > > > > Vhost seems to be a shortcut for this. > > > > > > I'm not sure how exactly you're thinking of using io_uring. > > > > > > Simply using io_uring for the event loop (file descriptor monitoring) > > > doesn't involve an extra buffer, but the packet payload still needs to > > > reside in AF_XDP umem, so there is a copy between guest memory and > > > umem. > > > > So there would be a translation from GPA to HVA (unless io_uring > > support 2 stages) which needs to go via qemu memory core. And this > > part seems to be very expensive according to my test in the past. > > Yes, but in the current approach where AF_XDP is implemented as a QEMU > netdev, there is already QEMU device emulation (e.g. virtio-net) > happening. So the GPA to HVA translation will happen anyway in device > emulation. Just to make sure we're on the same page. I meant, AF_XDP can do more than e.g 10Mpps. So if we still use the QEMU netdev, it would be very hard to achieve that if we stick to using the Qemu memory core translations which need to take care about too much extra stuff. That's why I suggest using vhost in io threads which only cares about ram so the translation could be very fast. > > Are you thinking about AF_XDP passthrough where the guest directly > interacts with AF_XDP? This could be another way to solve, since it won't use Qemu's memory core to do the translation. > > > > If umem encompasses guest memory, > > > > It requires you to pin the whole guest memory and a GPA to HVA > > translation is still required. > > Ilya mentioned that umem uses relative offsets instead of absolute > memory addresses. In the AF_XDP passthrough case this means no address >
Re: [PATCH] hw/ppc: Fix clock update drift
On 6/29/23 04:07, Nicholas Piggin wrote: The clock update logic reads the clock twice to compute the new clock value, with a value derived from the later time subtracted from a value derived from the earlier time. The delta causes time to be lost. This can ultimately result in time becoming unsynchronized between CPUs and that can cause OS lockups, timeouts, watchdogs, etc. This can be seen running a KVM guest (that causes lots of TB updates) on a powernv SMP machine. Fix this by reading the clock once. Cc: qemu-sta...@nongnu.org Signed-off-by: Nicholas Piggin Fixes: dbdd25065e90 ("Implement time-base start/stop helpers. Implement PowerPC 6xx time-base enable input pin.") Reviewed-by: Cédric Le Goater Thanks, C. --- I also made a test case that can trigger this with kvm-unit-tests, but it's been taking me a while to get that upstreamed. Thanks, Nick hw/ppc/ppc.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 82e4408c5c..6233f43c01 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -535,23 +535,24 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb | (uint64_t)value); } static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, + ((uint64_t)value << 32) | tb); } void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value) @@ -584,23 +585,24 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, tb | (uint64_t)value); } void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, + ((uint64_t)value << 32) | tb); } uint64_t cpu_ppc_load_vtb(CPUPPCState *env) @@ -622,14 +624,13 @@ void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value) void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), -tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xFFUL; tb |= (value & ~0xFFUL); -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb); } static void cpu_ppc_tb_stop (CPUPPCState *env)
Re: [PATCH v3 6/6] target/ppc: Remove pointless checks of CONFIG_USER_ONLY in 'kvm_ppc.h'
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: Signed-off-by: Philippe Mathieu-Daudé I guess the MemoryRegion code has sufficiently changed since commit 98efaf75282a ("ppc: Fix up usermode only builds") ? Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/kvm_ppc.h | 5 - 1 file changed, 5 deletions(-) diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 901e188c9a..6a4dd9c560 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -42,7 +42,6 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); -#ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, @@ -52,7 +51,6 @@ int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_vrma_limit(unsigned int hash_shift); bool kvmppc_has_cap_spapr_vfio(void); -#endif /* !CONFIG_USER_ONLY */ bool kvmppc_has_cap_epr(void); int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp); @@ -262,7 +260,6 @@ static inline void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) { } -#ifndef CONFIG_USER_ONLY static inline bool kvmppc_spapr_use_multitce(void) { return false; @@ -322,8 +319,6 @@ static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1) abort(); } -#endif /* !CONFIG_USER_ONLY */ - static inline bool kvmppc_has_cap_epr(void) { return false;
Re: [PATCH v3 5/6] target/ppc: Restrict 'kvm_ppc.h' to sysemu in cpu_init.c
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: User emulation shouldn't need any of the KVM prototypes declared in "kvm_ppc.h". Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/cpu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index aeff71d063..f2afb539eb 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "disas/dis-asm.h" #include "gdbstub/helpers.h" -#include "kvm_ppc.h" #include "sysemu/cpus.h" #include "sysemu/hw_accel.h" #include "sysemu/tcg.h" @@ -49,6 +48,7 @@ #ifndef CONFIG_USER_ONLY #include "hw/boards.h" #include "hw/intc/intc.h" +#include "kvm_ppc.h" #endif /* #define PPC_DEBUG_SPR */
Re: [PATCH v3 4/6] target/ppc: Define TYPE_HOST_POWERPC_CPU in cpu-qom.h
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: TYPE_HOST_POWERPC_CPU is used in various places of cpu_init.c, in order to restrict "kvm_ppc.h" to sysemu, move this QOM-related definition to cpu-qom.h. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/cpu-qom.h | 2 ++ target/ppc/kvm_ppc.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index c2bff349cc..4e4061068e 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -36,6 +36,8 @@ OBJECT_DECLARE_CPU_TYPE(PowerPCCPU, PowerPCCPUClass, POWERPC_CPU) #define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU #define cpu_list ppc_cpu_list +#define TYPE_HOST_POWERPC_CPU POWERPC_CPU_TYPE_NAME("host") + ObjectClass *ppc_cpu_class_by_name(const char *name); typedef struct CPUArchState CPUPPCState; diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 49954a300b..901e188c9a 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -13,8 +13,6 @@ #include "exec/hwaddr.h" #include "cpu.h" -#define TYPE_HOST_POWERPC_CPU POWERPC_CPU_TYPE_NAME("host") - #ifdef CONFIG_KVM uint32_t kvmppc_get_tbfreq(void);
Re: [PATCH v3 3/6] target/ppc: Move CPU QOM definitions to cpu-qom.h
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/cpu-qom.h | 5 + target/ppc/cpu.h | 6 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 9666f54f65..c2bff349cc 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -31,6 +31,11 @@ OBJECT_DECLARE_CPU_TYPE(PowerPCCPU, PowerPCCPUClass, POWERPC_CPU) +#define POWERPC_CPU_TYPE_SUFFIX "-" TYPE_POWERPC_CPU +#define POWERPC_CPU_TYPE_NAME(model) model POWERPC_CPU_TYPE_SUFFIX +#define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU +#define cpu_list ppc_cpu_list + ObjectClass *ppc_cpu_class_by_name(const char *name); typedef struct CPUArchState CPUPPCState; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index af12c93ebc..e91e1774e5 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1468,12 +1468,6 @@ static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn) int ppc_dcr_read(ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp); int ppc_dcr_write(ppc_dcr_t *dcr_env, int dcrn, uint32_t val); -#define POWERPC_CPU_TYPE_SUFFIX "-" TYPE_POWERPC_CPU -#define POWERPC_CPU_TYPE_NAME(model) model POWERPC_CPU_TYPE_SUFFIX -#define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU - -#define cpu_list ppc_cpu_list - /* MMU modes definitions */ #define MMU_USER_IDX 0 static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
Re: [PATCH v3 1/6] target/ppc: Have 'kvm_ppc.h' include 'sysemu/kvm.h'
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: "kvm_ppc.h" declares: int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run); 'struct kvm_run' is declared in "sysemu/kvm.h", include it. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/kvm_ppc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 611debc3ce..2e395416f0 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -9,6 +9,7 @@ #ifndef KVM_PPC_H #define KVM_PPC_H +#include "sysemu/kvm.h" #include "exec/hwaddr.h" #include "cpu.h"
Re: [PATCH v3 2/6] target/ppc: Reorder #ifdef'ry in kvm_ppc.h
On 6/27/23 13:51, Philippe Mathieu-Daudé wrote: Keep a single if/else/endif block checking CONFIG_KVM. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Cédric Le Goater Thanks, C. --- target/ppc/kvm_ppc.h | 62 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 2e395416f0..49954a300b 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -93,7 +93,34 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset); int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run); -#else +#define kvmppc_eieio() \ +do { \ +if (kvm_enabled()) { \ +asm volatile("eieio" : : : "memory"); \ +} \ +} while (0) + +/* Store data cache blocks back to memory */ +static inline void kvmppc_dcbst_range(PowerPCCPU *cpu, uint8_t *addr, int len) +{ +uint8_t *p; + +for (p = addr; p < addr + len; p += cpu->env.dcache_line_size) { +asm volatile("dcbst 0,%0" : : "r"(p) : "memory"); +} +} + +/* Invalidate instruction cache blocks */ +static inline void kvmppc_icbi_range(PowerPCCPU *cpu, uint8_t *addr, int len) +{ +uint8_t *p; + +for (p = addr; p < addr + len; p += cpu->env.icache_line_size) { +asm volatile("icbi 0,%0" : : "r"(p)); +} +} + +#else /* !CONFIG_KVM */ static inline uint32_t kvmppc_get_tbfreq(void) { @@ -440,10 +467,6 @@ static inline bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu) return false; } -#endif - -#ifndef CONFIG_KVM - #define kvmppc_eieio() do { } while (0) static inline void kvmppc_dcbst_range(PowerPCCPU *cpu, uint8_t *addr, int len) @@ -454,35 +477,6 @@ static inline void kvmppc_icbi_range(PowerPCCPU *cpu, uint8_t *addr, int len) { } -#else /* CONFIG_KVM */ - -#define kvmppc_eieio() \ -do { \ -if (kvm_enabled()) { \ -asm volatile("eieio" : : : "memory"); \ -} \ -} while (0) - -/* Store data cache blocks back to memory */ -static inline void kvmppc_dcbst_range(PowerPCCPU *cpu, uint8_t *addr, int len) -{ -uint8_t *p; - -for (p = addr; p < addr + len; p += cpu->env.dcache_line_size) { -asm volatile("dcbst 0,%0" : : "r"(p) : "memory"); -} -} - -/* Invalidate instruction cache blocks */ -static inline void kvmppc_icbi_range(PowerPCCPU *cpu, uint8_t *addr, int len) -{ -uint8_t *p; - -for (p = addr; p < addr + len; p += cpu->env.icache_line_size) { -asm volatile("icbi 0,%0" : : "r"(p)); -} -} - #endif /* CONFIG_KVM */ #endif /* KVM_PPC_H */
Re: [PATCH 0/2] target/ppc: Easy parts of the POWER chiptod series
On 6/25/23 14:03, Nicholas Piggin wrote: Cedric kindly reviewed these already so I think they should be good to go now. This is just a rebase and slight rewording the changelog. Still haven't completed the main chiptod device yet. Thanks, Nick Nicholas Piggin (2): target/ppc: Tidy POWER book4 SPR registration target/ppc: Add TFMR SPR implementation with read and write helpers target/ppc/cpu_init.c| 82 target/ppc/helper.h | 2 + target/ppc/spr_common.h | 2 + target/ppc/timebase_helper.c | 13 ++ target/ppc/translate.c | 10 + 5 files changed, 82 insertions(+), 27 deletions(-) Daniel, When you start building the next PPC PR, I think you can also take this patch : [4/4] target/ppc: Implement core timebase state machine and TFMR https://patchwork.ozlabs.org/project/qemu-ppc/patch/20230603233612.125879-5-npig...@gmail.com/ It belongs to the same series. Thanks, C.
[PATCH v6 4/5] tests/qtest/hd-geo-test: fix incorrect pcie-root-port usage and simplify test
The test attaches a SCSI controller to a non-zero slot and a pcie-to-pci bridge on slot 0 on the same pcie-root-port. Since a downstream device can be attached to a pcie-root-port only on slot 0, the above test configuration is not allowed. Additionally using pcie.0 as id for pcie-to-pci bridge is incorrect as that id is reserved only for the root bus. In the test scenario, there is no need to attach a pcie-root-port to the root complex. A SCSI controller can be attached to a pcie-to-pci bridge which can then be directly attached to the root bus (pcie.0). Fix the test and simplify it. CC: m...@redhat.com CC: imamm...@redhat.com CC: Michael Labiuk Signed-off-by: Ani Sinha --- tests/qtest/hd-geo-test.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c index 5aa258a2b3..d08bffad91 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -784,14 +784,12 @@ static void test_override_scsi(void) test_override(args, "pc", expected); } -static void setup_pci_bridge(TestArgs *args, const char *id, const char *rootid) +static void setup_pci_bridge(TestArgs *args, const char *id) { -char *root, *br; -root = g_strdup_printf("-device pcie-root-port,id=%s", rootid); -br = g_strdup_printf("-device pcie-pci-bridge,bus=%s,id=%s", rootid, id); +char *br; +br = g_strdup_printf("-device pcie-pci-bridge,bus=pcie.0,id=%s", id); -args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, root); args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, br); } @@ -811,8 +809,8 @@ static void test_override_scsi_q35(void) add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); -setup_pci_bridge(args, "pcie.0", "br"); -add_scsi_controller(args, "lsi53c895a", "br", 3); +setup_pci_bridge(args, "pcie-pci-br"); +add_scsi_controller(args, "lsi53c895a", "pcie-pci-br", 3); add_scsi_disk(args, 0, 0, 0, 0, 0, 1, 120, 30); add_scsi_disk(args, 1, 0, 0, 1, 0, 9000, 120, 30); add_scsi_disk(args, 2, 0, 0, 2, 0, 1, 0, 0); @@ -868,9 +866,9 @@ static void test_override_virtio_blk_q35(void) }; add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); -setup_pci_bridge(args, "pcie.0", "br"); -add_virtio_disk(args, 0, "br", 3, 1, 120, 30); -add_virtio_disk(args, 1, "br", 4, 9000, 120, 30); +setup_pci_bridge(args, "pcie-pci-br"); +add_virtio_disk(args, 0, "pcie-pci-br", 3, 1, 120, 30); +add_virtio_disk(args, 1, "pcie-pci-br", 4, 9000, 120, 30); test_override(args, "q35", expected); } -- 2.39.1
[PATCH v6 5/5] hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port
PCI Express ports only have one slot, so PCI Express devices can only be plugged into slot 0 on a PCIE port. Enforce it. The change has been tested to not break ARI by instantiating seven vfs on an emulated igb device (the maximum number of vfs the linux igb driver supports). The vfs are seen to have non-zero device/slot numbers in the conventional PCI BDF representation. CC: jus...@redhat.com CC: imamm...@redhat.com CC: akihiko.od...@daynix.com Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 Signed-off-by: Ani Sinha Reviewed-by: Julia Suvorova --- hw/pci/pci.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e2eb4c3b4a..0320ac2bb3 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -65,6 +65,7 @@ bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); static void pcibus_reset(BusState *qbus); +static bool pcie_has_upstream_port(PCIDevice *dev); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), @@ -1190,6 +1191,20 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, name); return NULL; +} /* + * With SRIOV and ARI, vfs can have non-zero slot in the conventional + * PCI interpretation as all five bits reserved for slot addresses are + * also used for function bits for the various vfs. Ignore that case. + * It is too early here to check for ARI capabilities in the PCI config + * space. Hence, we check for a vf device instead. + */ +else if (!pci_is_vf(pci_dev) && + pcie_has_upstream_port(pci_dev) && + PCI_SLOT(devfn)) { +error_setg(errp, "PCI: slot %d is not valid for %s," + " parent device only allows plugging into slot 0.", + PCI_SLOT(devfn), name); +return NULL; } pci_dev->devfn = devfn; -- 2.39.1
[PATCH v6 0/5] test and QEMU fixes to ensure proper PCIE device usage
Patches 1-4: Fix tests so that devices do not use non-zero slots on the pcie root ports. PCIE ports only have one slot, so PCIE devices can only be plugged into slot 0 on a PCIE port. Patch 5: Enforce only one slot on PCIE port. The test fixes must be applied before the QEMU change that checks for use of a single slot in PCIE port. CC: m...@redhat.com CC: imamm...@redhat.com CC: jus...@redhat.com CC: th...@redhat.com CC: lviv...@redhat.com CC: michael.lab...@virtuozzo.com Changelog: === v6: make patch 5 ARI compliant. fix commit message (s/pcie-root-port/pcie-to-pci/) in patch 4. Rebase patchset to latest master. v5: no code changes - correct a mistake in the commit log message. v4: reword commit log for patch 4. v3: tags added. reword the error description in patch 5. Reword commit log in patch 4. v2: add hd-geo-test fix as well as the actual QEMU code fix to the patchset. The patches are added in the right order. Ani Sinha (5): tests/acpi: allow changes in DSDT.noacpihp table blob tests/acpi/bios-tables-test: use the correct slot on the pcie-root-port tests/acpi/bios-tables-test: update acpi blob q35/DSDT.noacpihp tests/qtest/hd-geo-test: fix incorrect pcie-root-port usage and simplify test hw/pci: ensure PCIE devices are plugged into only slot 0 of PCIE port hw/pci/pci.c | 15 +++ tests/data/acpi/q35/DSDT.noacpihp | Bin 8248 -> 8241 bytes tests/qtest/bios-tables-test.c| 4 ++-- tests/qtest/hd-geo-test.c | 18 -- 4 files changed, 25 insertions(+), 12 deletions(-) -- 2.39.1
[PATCH v6 3/5] tests/acpi/bios-tables-test: update acpi blob q35/DSDT.noacpihp
Some fixes were committed in bios-tables-test in the previous commit. Update the acpi blob and clear bios-tables-test-allowed-diff.h so that the test continues to pass with the changes in the bios-tables-test. Following is the asl diff between the old and the newly updated blob: @@ -1,30 +1,30 @@ /* * Intel ACPI Component Architecture * AML/ASL+ Disassembler version 20210604 (64-bit version) * Copyright (c) 2000 - 2021 Intel Corporation * * Disassembling to symbolic ASL+ operators * - * Disassembly of tests/data/acpi/q35/DSDT.noacpihp, Wed Jun 21 18:26:52 2023 + * Disassembly of /tmp/aml-O8SU61, Wed Jun 21 18:26:52 2023 * * Original Table Header: * Signature"DSDT" - * Length 0x2038 (8248) + * Length 0x2031 (8241) * Revision 0x01 32-bit table (V1), no 64-bit math support - * Checksum 0x4A + * Checksum 0x89 * OEM ID "BOCHS " * OEM Table ID "BXPC" * OEM Revision 0x0001 (1) * Compiler ID "BXPC" * Compiler Version 0x0001 (1) */ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001) { Scope (\) { OperationRegion (DBG, SystemIO, 0x0402, One) Field (DBG, ByteAcc, NoLock, Preserve) { DBGB, 8 } @@ -3148,48 +3148,48 @@ { Name (_ADR, Zero) // _ADR: Address Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method { Local0 = Package (0x01) { 0x01F5 } Return (EDSM (Arg0, Arg1, Arg2, Arg3, Local0)) } } } Device (S40) { Name (_ADR, 0x0008) // _ADR: Address -Device (S41) +Device (S01) { -Name (_ADR, 0x00080001) // _ADR: Address +Name (_ADR, One) // _ADR: Address Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method { Local0 = Package (0x01) { 0x0259 } Return (EDSM (Arg0, Arg1, Arg2, Arg3, Local0)) } } -Device (S48) +Device (S02) { -Name (_ADR, 0x0009) // _ADR: Address +Name (_ADR, 0x02) // _ADR: Address Device (S00) { Name (_ADR, Zero) // _ADR: Address } } } Device (SF8) { Name (_ADR, 0x001F) // _ADR: Address OperationRegion (PIRQ, PCI_Config, 0x60, 0x0C) Scope (\_SB) { Field (PCI0.SF8.PIRQ, ByteAcc, NoLock, Preserve) { PRQA, 8, Signed-off-by: Ani Sinha Acked-by: Igor Mammedov --- tests/data/acpi/q35/DSDT.noacpihp | Bin 8248 -> 8241 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/DSDT.noacpihp b/tests/data/acpi/q35/DSDT.noacpihp index 6ab1f0e52543fcb7f84a7fd1327fe5aa42010565..8cab2f8eb9ae94e0165f3f17857ec7d080fb0e13 100644 GIT binary patch delta 109 zcmdntu+f3bCDi)r&-xoSoL DyqFtK delta 94 zcmdn!u)~4NCD
[PATCH v6 2/5] tests/acpi/bios-tables-test: use the correct slot on the pcie-root-port
PCIE ports only have one slot, slot 0. Hence, non-zero slots are not available for PCIE devices on PCIE root ports. Fix test_acpi_q35_tcg_no_acpi_hotplug() so that the test does not use them. Signed-off-by: Ani Sinha Reviewed-by: Igor Mammedov --- tests/qtest/bios-tables-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index ed1c69cf01..47ba20b957 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1020,9 +1020,9 @@ static void test_acpi_q35_tcg_no_acpi_hotplug(void) " -device pci-testdev,bus=nohprp,acpi-index=501" " -device pcie-root-port,id=nohprpint,port=0x0,chassis=3,hotplug=off," "multifunction=on,addr=8.0" -" -device pci-testdev,bus=nohprpint,acpi-index=601,addr=8.1" +" -device pci-testdev,bus=nohprpint,acpi-index=601,addr=0.1" " -device pcie-root-port,id=hprp2,port=0x0,chassis=4,bus=nohprpint," - "addr=9.0" + "addr=0.2" " -device pci-testdev,bus=hprp2,acpi-index=602" , &data); free_test_data(&data); -- 2.39.1
[PATCH v6 1/5] tests/acpi: allow changes in DSDT.noacpihp table blob
We are going to fix bio-tables-test in the next patch and hence need to make sure the acpi tests continue to pass. Signed-off-by: Ani Sinha Acked-by: Igor Mammedov --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..31df9c6187 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT.noacpihp", -- 2.39.1
Re: [PATCH qemu] aspeed add montblanc bmc reference from fuji
Hi Cédric, I had fixed the function name to support in current branch, but facing about below error while starting ./build/qemu-system-arm -machine montblanc-bmc -drive > file=~/flash-montblanc,format=raw,if=mtd -nographic -netdev > tap,id=netdev0,script=no,downscript=no,ifname=tap0 -net > nic,netdev=netdev0,model=ftgmac100 > qemu-system-arm: device requires 134217728 bytes, block backend provides > 27726336 bytes On Wed, Jun 28, 2023 at 11:34 PM Cédric Le Goater wrote: > On 6/28/23 12:07, Sittisak Sinprem wrote: > > Got it Cedric, I just know for it, > > > > I am fixing, and will re-send the patch as V2. > > Could you please use the patch below and send in your series ? > > Thanks, > > C. > > > From cfbc865ffe8a4dffe4ac764eb10416aa906a7170 Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= > Date: Wed, 28 Jun 2023 18:32:20 +0200 > Subject: [PATCH] aspeed: Introduce ASPEED_RAM_SIZE helper for 32-bit hosts > limitation > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > On 32-bit hosts, RAM has a 2047 MB limit. Use a macro to define the > default ram size of machines (AST2600 SoC) that can have 2 GB. > > Signed-off-by: Cédric Le Goater > --- > hw/arm/aspeed.c | 21 + > 1 file changed, 9 insertions(+), 12 deletions(-) > > diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c > index eefd2e275015..0ae252232597 100644 > --- a/hw/arm/aspeed.c > +++ b/hw/arm/aspeed.c > @@ -49,6 +49,13 @@ struct AspeedMachineState { > uint32_t hw_strap1; > }; > > +/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */ > +#if HOST_LONG_BITS == 32 > +#define ASPEED_RAM_SIZE(sz) MIN((sz), 1 * GiB) > +#else > +#define ASPEED_RAM_SIZE(sz) (sz) > +#endif > + > /* Palmetto hardware value: 0x120CE416 */ > #define PALMETTO_BMC_HW_STRAP1 (\ > SCU_AST2400_HW_STRAP_DRAM_SIZE(DRAM_SIZE_256MB) | \ > @@ -1504,12 +1511,7 @@ static void > aspeed_machine_rainier_class_init(ObjectClass *oc, void *data) > aspeed_machine_ast2600_class_init(oc, data); > }; > > -/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */ > -#if HOST_LONG_BITS == 32 > -#define FUJI_BMC_RAM_SIZE (1 * GiB) > -#else > -#define FUJI_BMC_RAM_SIZE (2 * GiB) > -#endif > +#define FUJI_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) > > static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data) > { > @@ -1533,12 +1535,7 @@ static void > aspeed_machine_fuji_class_init(ObjectClass *oc, void *data) > aspeed_machine_ast2600_class_init(oc, data); > }; > > -/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */ > -#if HOST_LONG_BITS == 32 > -#define BLETCHLEY_BMC_RAM_SIZE (1 * GiB) > -#else > -#define BLETCHLEY_BMC_RAM_SIZE (2 * GiB) > -#endif > +#define BLETCHLEY_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) > > static void aspeed_machine_bletchley_class_init(ObjectClass *oc, void > *data) > { > -- > 2.41.0 > > >
Re: ARI and igb emulation
> On 28-Jun-2023, at 6:19 PM, Ani Sinha wrote: > > > >> On 28-Jun-2023, at 6:15 PM, Akihiko Odaki wrote: >> >> Adding CC. >> >> On 2023/06/28 21:24, Akihiko Odaki wrote: >>> On 2023/06/27 23:32, Ani Sinha wrote: Hi : I am proposing a patch in QEMU [1] which may or may not break ARI but I wanted to give my best shot in making sure I am not breaking anything with ARI enabled. I see that your igb emulation code enables ARI with its SRIOV emulation. I ran the qtest and avocado tests that are mentioned in [2] and they both pass. Is there anything else/any tweaks that I should be doing to make sure I am not breaking ARI with igb? Thanks for information, Ani 1. https://lists.gnu.org/archive/html/qemu-devel/2023-06/msg05478.html 2. https://www.qemu.org/docs/master/system/devices/igb.html >>> This indeed resulted in the following error message with igb: >>> qemu-system-aarch64: PCI: slot 16 is not valid for igbvf, parent device >>> only allows plugging into slot 0. >>> To reproduce the issue, add the following to QEMU command line: >>> -device pcie-root-port,id=p -device igb,bus=p >>> And enable 7 virtual functions on the guest. For Linux, see: >>> https://docs.kernel.org/PCI/pci-iov-howto.html >>> You may only enforce the slot number restriction only for devices without >>> ARI. >>> The slot number is assumed as 0 when ARI is enabled if I understand >>> correctly. Yes, in linux there is something like this: device = pci_ari_enabled(dev->bus) ? 0 : PCI_SLOT(dev->devfn); We will have to make similar changes in QEMU to fix all PCI_SLOT references and adjust code accordingly.
[RFC PATCH 3/3] ppc/pnv: SMT support for powernv
Set the TIR default value with the SMT thread index, and place some standard limits on SMT configurations. Now powernv is able to boot skiboot and Linux with a SMT topology, including booting a KVM guest. There are several other per-core SPRs, but they are not so important to run OPAL/Linux. Some important per-LPAR ones to convert before powernv could run in 1LPAR mode. Broadcast msgsnd is not yet implemented either, but skiboot/Linux does not use that. KVM uses an implementation-specific detail of POWER9/10 TLBs where TLBIEL invalidates translations of all threads on a core, but that is not required here because KVM does not cache translations across PID or LPID switch. Most of these I have or aren't too hard to implement, but I start with a small bare bones for comments. Not-yet-Signed-off-by: Nicholas Piggin --- hw/ppc/pnv.c | 12 hw/ppc/pnv_core.c | 13 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index fc083173f3..f599ccad1d 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -887,6 +887,18 @@ static void pnv_init(MachineState *machine) pnv->num_chips = machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); + +if (machine->smp.threads > 8) { +error_report("Cannot support more than 8 threads/core " + "on a powernv machine"); +exit(1); +} +if (!is_power_of_2(machine->smp.threads)) { +error_report("Cannot support %d threads/core on a powernv" + "machine because it must be a power of 2", + machine->smp.threads); +exit(1); +} /* * TODO: should we decide on how many chips we can create based * on #cores and Venice vs. Murano vs. Naples chip type etc..., diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 0bc3ad41c8..acd83caee8 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -167,12 +167,13 @@ static const MemoryRegionOps pnv_core_power9_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) +static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, + int thread_index) { CPUPPCState *env = &cpu->env; int core_pir; -int thread_index = 0; /* TODO: TCG supports only one thread */ ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; +ppc_spr_t *tir = &env->spr_cb[SPR_TIR]; Error *local_err = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); @@ -188,11 +189,7 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); -/* - * The PIR of a thread is the core PIR + the thread index. We will - * need to find a way to get the thread index when TCG supports - * more than 1. We could use the object name ? - */ +tir->default_value = thread_index; pir->default_value = core_pir + thread_index; /* Set time-base frequency to 512 MHz */ @@ -241,7 +238,7 @@ static void pnv_core_realize(DeviceState *dev, Error **errp) } for (j = 0; j < cc->nr_threads; j++) { -pnv_core_cpu_realize(pc, pc->threads[j], &local_err); +pnv_core_cpu_realize(pc, pc->threads[j], &local_err, j); if (local_err) { goto err; } -- 2.40.1
[RFC PATCH 2/3] target/ppc: SMT support for the HID SPR
HID is a per-core shared register, skiboot sets this (e.g., setting HILE) on one thread and that must affect all threads of the core. Signed-off-by: Nicholas Piggin --- target/ppc/cpu_init.c| 2 +- target/ppc/helper.h | 1 + target/ppc/misc_helper.c | 21 + target/ppc/spr_common.h | 1 + target/ppc/translate.c | 16 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index dc3a65a575..a1b01dc479 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -5373,7 +5373,7 @@ static void register_book3s_ids_sprs(CPUPPCState *env) spr_register_hv(env, SPR_HID0, "HID0", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_core_write_generic, 0x); spr_register_hv(env, SPR_TSCR, "TSCR", SPR_NOACCESS, SPR_NOACCESS, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index fda40b8a60..719752b38a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -704,6 +704,7 @@ DEF_HELPER_3(store_dcr, void, env, tl, tl) DEF_HELPER_2(load_dump_spr, void, env, i32) DEF_HELPER_2(store_dump_spr, void, env, i32) +DEF_HELPER_3(spr_core_write_generic, void, env, i32, tl) DEF_HELPER_3(spr_write_CTRL, void, env, i32, tl) DEF_HELPER_4(fscr_facility_check, void, env, i32, i32, i32) diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 1f1af21f33..0da335472e 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -43,6 +43,27 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn) env->spr[sprn]); } +void helper_spr_core_write_generic(CPUPPCState *env, uint32_t sprn, + target_ulong val) +{ +CPUState *cs = env_cpu(env); +CPUState *ccs; +uint32_t nr_threads = cs->nr_threads; +uint32_t core_id = env->spr[SPR_PIR] & ~(nr_threads - 1); + +assert(core_id == env->spr[SPR_PIR] - env->spr[SPR_TIR]); + +if (nr_threads == 1) { +env->spr[sprn] = val; +return; +} + +THREAD_SIBLING_FOREACH(cs, ccs) { +CPUPPCState *cenv = &POWERPC_CPU(ccs)->env; +cenv->spr[sprn] = val; +} +} + void helper_spr_write_CTRL(CPUPPCState *env, uint32_t sprn, target_ulong val) { diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h index 4c0f2bed77..aae66ee966 100644 --- a/target/ppc/spr_common.h +++ b/target/ppc/spr_common.h @@ -82,6 +82,7 @@ void spr_noaccess(DisasContext *ctx, int gprn, int sprn); void spr_read_generic(DisasContext *ctx, int gprn, int sprn); void spr_write_generic(DisasContext *ctx, int sprn, int gprn); void spr_write_generic32(DisasContext *ctx, int sprn, int gprn); +void spr_core_write_generic(DisasContext *ctx, int sprn, int gprn); void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn); void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn); void spr_write_PMC(DisasContext *ctx, int sprn, int gprn); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index ef186396b4..f7a5ecc0c1 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -448,6 +448,22 @@ void spr_write_generic32(DisasContext *ctx, int sprn, int gprn) #endif } +void spr_core_write_generic(DisasContext *ctx, int sprn, int gprn) +{ +if (!(ctx->flags & POWERPC_FLAG_SMT)) { +spr_write_generic(ctx, sprn, gprn); +return; +} + +if (!gen_serialize(ctx)) { +return; +} + +gen_helper_spr_core_write_generic(cpu_env, tcg_constant_i32(sprn), + cpu_gpr[gprn]); +spr_store_dump_spr(sprn); +} + static void spr_write_CTRL_ST(DisasContext *ctx, int sprn, int gprn) { /* This does not implement >1 thread */ -- 2.40.1
[RFC PATCH 1/3] target/ppc: Add LPAR-per-core vs per-thread mode flag
The Power ISA has the concept of sub-processors: Hardware is allowed to sub-divide a multi-threaded processor into "sub-processors" that appear to privileged programs as multi-threaded processors with fewer threads. POWER9 and POWER10 have two modes, either every thread is a sub-processor or all threads appear as one multi-threaded processor. In the user manuals these are known as "LPAR-per-thread" and "LPAR per core" (or "1LPAR"), respectively. The practical difference is in LPAR-per-thread mode, non-hypervisor SPRs are not shared between threads and msgsndp can not be used to message siblings. In 1LPAR mode some SPRs are shared and msgsndp is usable. LPPT allows multiple partitions to run concurrently on the same core, and is a requirement for KVM to run on POWER9/10. Traditionally, SMT in PAPR environments including PowerVM and the pseries machine with KVM acceleration beahves as in 1LPAR mode. In OPAL systems, LPAR-per-thread is used. When adding SMT to the powernv machine, it is preferable to emulate OPAL LPAR-per-thread, so to account for this difference a flag is added and SPRs may become either per-thread, per-core shared, or per-LPAR shared. Per-LPAR registers become either per-thread or per-core shared depending on the mode. Signed-off-by: Nicholas Piggin --- hw/ppc/spapr_cpu_core.c | 2 ++ target/ppc/cpu.h| 3 +++ target/ppc/cpu_init.c | 12 target/ppc/translate.c | 16 +--- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index a4e3c2fadd..b482d9754a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -270,6 +270,8 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, env->spr_cb[SPR_PIR].default_value = cs->cpu_index; env->spr_cb[SPR_TIR].default_value = thread_index; +cpu_ppc_set_1lpar(cpu); + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 94497aa115..beddc5db5b 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -674,6 +674,8 @@ enum { POWERPC_FLAG_SCV = 0x0020, /* Has >1 thread per core*/ POWERPC_FLAG_SMT = 0x0040, +/* Using "LPAR per core" mode (as opposed to per-thread)*/ +POWERPC_FLAG_1LPAR= 0x0080, }; /* @@ -1435,6 +1437,7 @@ void store_booke_tsr(CPUPPCState *env, target_ulong val); void ppc_tlb_invalidate_all(CPUPPCState *env); void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr); void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp); +void cpu_ppc_set_1lpar(PowerPCCPU *cpu); int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp, target_ulong address, uint32_t pid); int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index aeff71d063..dc3a65a575 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6601,6 +6601,18 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) env->msr_mask &= ~MSR_HVB; } +void cpu_ppc_set_1lpar(PowerPCCPU *cpu) +{ +CPUPPCState *env = &cpu->env; + +/* + * pseries SMT means "LPAR per core" mode, e.g., msgsndp is usable + * between threads. + */ +if (env->flags & POWERPC_FLAG_SMT) { +env->flags |= POWERPC_FLAG_1LPAR; +} +} #endif /* !defined(CONFIG_USER_ONLY) */ #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 372ee600b2..ef186396b4 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -256,6 +256,16 @@ static inline bool gen_serialize_core(DisasContext *ctx) } #endif +static inline bool gen_serialize_core_lpar(DisasContext *ctx) +{ +/* 1LPAR implies SMT */ +if (ctx->flags & POWERPC_FLAG_1LPAR) { +return gen_serialize(ctx); +} + +return true; +} + /* SPR load/store helpers */ static inline void gen_load_spr(TCGv t, int reg) { @@ -451,7 +461,7 @@ static void spr_write_CTRL_ST(DisasContext *ctx, int sprn, int gprn) void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn) { -if (!(ctx->flags & POWERPC_FLAG_SMT)) { +if (!(ctx->flags & POWERPC_FLAG_1LPAR)) { spr_write_CTRL_ST(ctx, sprn, gprn); goto out; } @@ -815,7 +825,7 @@ void spr_write_pcr(DisasContext *ctx, int sprn, int gprn) /* DPDES */ void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn) { -if (!gen_serialize_core(ctx)) { +if (!gen_serialize_core_lpar(ctx)) { return; } @@ -824,7 +834,7 @@ void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn) void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn) { -if (!gen_serialize_core(ctx)) { +if (!gen_serialize_core_lpar(ctx)) { re
[RFC PATCH 0/3] ppc/pnv: SMT support for powernv
These patches implement enough to boot a SMT powernv machine to Linux and boot a SMP KVM guest inside that. There are a few more SPRs that need to be done, and per-LPAR SPRs are mostly not annotated yet so it can't run in 1LPAR mode. But it is enough to run skiboot/Linux with SMT so I'll just post the minimal patches for RFC because the concept isn't really different to add more SPRs and things. Thanks, Nick Nicholas Piggin (3): target/ppc: Add LPAR-per-core vs per-thread mode flag target/ppc: SMT support for the HID SPR ppc/pnv: SMT support for powernv hw/ppc/pnv.c | 12 hw/ppc/pnv_core.c| 13 + hw/ppc/spapr_cpu_core.c | 2 ++ target/ppc/cpu.h | 3 +++ target/ppc/cpu_init.c| 14 +- target/ppc/helper.h | 1 + target/ppc/misc_helper.c | 21 + target/ppc/spr_common.h | 1 + target/ppc/translate.c | 32 +--- 9 files changed, 87 insertions(+), 12 deletions(-) -- 2.40.1
[PATCH] hw/ppc: Fix clock update drift
The clock update logic reads the clock twice to compute the new clock value, with a value derived from the later time subtracted from a value derived from the earlier time. The delta causes time to be lost. This can ultimately result in time becoming unsynchronized between CPUs and that can cause OS lockups, timeouts, watchdogs, etc. This can be seen running a KVM guest (that causes lots of TB updates) on a powernv SMP machine. Fix this by reading the clock once. Cc: qemu-sta...@nongnu.org Signed-off-by: Nicholas Piggin --- I also made a test case that can trigger this with kvm-unit-tests, but it's been taking me a while to get that upstreamed. Thanks, Nick hw/ppc/ppc.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 82e4408c5c..6233f43c01 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -535,23 +535,24 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb | (uint64_t)value); } static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, + ((uint64_t)value << 32) | tb); } void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value) @@ -584,23 +585,24 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env) void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, tb | (uint64_t)value); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, tb | (uint64_t)value); } void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->atb_offset); tb &= 0xULL; -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->atb_offset, ((uint64_t)value << 32) | tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->atb_offset, + ((uint64_t)value << 32) | tb); } uint64_t cpu_ppc_load_vtb(CPUPPCState *env) @@ -622,14 +624,13 @@ void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value) void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value) { ppc_tb_t *tb_env = env->tb_env; +int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); uint64_t tb; -tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), -tb_env->tb_offset); +tb = cpu_ppc_get_tb(tb_env, clock, tb_env->tb_offset); tb &= 0xFFUL; tb |= (value & ~0xFFUL); -cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - &tb_env->tb_offset, tb); +cpu_ppc_store_tb(tb_env, clock, &tb_env->tb_offset, tb); } static void cpu_ppc_tb_stop (CPUPPCState *env) -- 2.40.1
RE: [PATCH v2 2/2] vfio/migration: Make VFIO migration non-experimental
>-Original Message- >From: Cédric Le Goater >Sent: Thursday, June 29, 2023 12:04 AM >To: Martins, Joao ; Avihai Horon > >Cc: Alex Williamson ; Juan Quintela >; Peter Xu ; Leonardo Bras >; Duan, Zhenzhong ; >Yishai Hadas ; Jason Gunthorpe ; >Maor Gottlieb ; Kirti Wankhede >; Tarun Gupta ; qemu- >de...@nongnu.org >Subject: Re: [PATCH v2 2/2] vfio/migration: Make VFIO migration non- >experimental > >On 6/28/23 16:51, Joao Martins wrote: >> On 28/06/2023 13:54, Cédric Le Goater wrote: >>> On 6/28/23 09:31, Avihai Horon wrote: The major parts of VFIO migration are supported today in QEMU. This includes basic VFIO migration, device dirty page tracking and precopy support. Thus, at this point in time, it seems appropriate to make VFIO migration non-experimental: remove the x prefix from enable_migration property, change it to ON_OFF_AUTO and let the default value be AUTO. In addition, make the following adjustments: 1. When enable_migration is ON and migration is not supported, fail VFIO device realization. 2. When enable_migration is AUTO (i.e., not explicitly enabled), require device dirty tracking support. This is because device dirty tracking is currently the only method to do dirty page tracking, which is essential for migrating in a reasonable downtime. Setting enable_migration to ON will not require device dirty tracking. 3. Make migration error and blocker messages more elaborate. 4. Remove error prints in vfio_migration_query_flags(). 5. Rename trace_vfio_migration_probe() to trace_vfio_migration_realize(). Signed-off-by: Avihai Horon >>> >>> >>> We should rework the return value of most of the routines called by >>> vfio_migration_realize() and simply use a bool. I think Zhenzhong is >>> working it. >>> >>> Zhenzhong, >>> >>> When you resend v4 of the "VFIO migration related refactor and bug fix" >>> series, please rebase on this patch since it should be merged. >>> >> >> This, and his switchover-ack series from Avihai that preceeds it. >> >> Perhaps it might be easier to point to your tree:branch where you are >> queueing all the patches? >> > >Sure. > >I track QEMU patches for various subsystems under : > > https://github.com/legoater/qemu > > >VFIO candidates are under : > > https://github.com/legoater/qemu/tree/vfio-8.1 > >This is a wip tree, patches come and go. It contains the VFIO patches of the >day/week, good for testing new ideas and checking CI. > > >The vfio-next branch contains what I am 90% sure to send upstream : > > https://github.com/legoater/qemu/tree/vfio-next Great, I'll rebase on this branch, thanks for sharing. Regards Zhenzhong
Re: [PATCH 5/7] migration: Display error in query-migrate irrelevant of status
Peter Xu writes: > Display it as long as being set, irrelevant of FAILED status. E.g., it may > also be applicable to PAUSED stage of postcopy, to provide hint on what has > gone wrong. This might have made the documentation slightly inaccurate: # @error-desc: the human readable error description string, when # @status is 'failed'. Clients should not attempt to parse the # error strings. (Since 2.7) But it's not wrong, so: Reviewed-by: Fabiano Rosas
Re: [PATCH 4/7] migration: Deliver return path file error to migrate state too
Peter Xu writes: > We've already did this for most of the return path thread errors, but not > yet for the IO errors happened on the return path qemufile. Do that too. > > Remember to reset "err" always, because the ownership is not us anymore, > otherwise we're prone to use-after-free later after recovered. > > Signed-off-by: Peter Xu Reviewed-by: Fabiano Rosas
Re: [PATCH 3/7] migration: Refactor error handling in source return path
Peter Xu writes: > rp_state.error was a boolean used to show error happened in return path > thread. That's not only duplicating error reporting (migrate_set_error), > but also not good enough in that we only do error_report() and set it to > true, we never can keep a history of the exact error and show it in > query-migrate. > > To make this better, a few things done: > > - Use error_setg() rather than error_report() across the whole lifecycle > of return path thread, keeping the error in an Error*. > > - Use migrate_set_error() to apply that captured error to the global > migration object when error occured in this thread. > > - With above, no need to have mark_source_rp_bad(), remove it, alongside > with rp_state.error itself. > > Signed-off-by: Peter Xu Reviewed-by: Fabiano Rosas
Re: [PATCH] target/arm: gdbstub: Guard M-profile code with CONFIG_TCG
Fabiano Rosas writes: > Philippe Mathieu-Daudé writes: > >> On 28/6/23 18:48, Fabiano Rosas wrote: >>> This code is only relevant when TCG is present in the build. Building >>> with --disable-tcg --enable-xen on an x86 host we get: >>> >>> $ ../configure --target-list=x86_64-softmmu,aarch64-softmmu --disable-tcg >>> --enable-xen >>> $ make -j$(nproc) >>> ... >>> libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function >>> `m_sysreg_ptr': >>> ../target/arm/gdbstub.c:358: undefined reference to `arm_v7m_get_sp_ptr' >>> ../target/arm/gdbstub.c:361: undefined reference to `arm_v7m_get_sp_ptr' >>> >>> libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function >>> `arm_gdb_get_m_systemreg': >>> ../target/arm/gdbstub.c:405: undefined reference to `arm_v7m_mrs_control' >> >> I'm a bit confused, isn't this covered by the cross-arm64-xen-only >> job? > > It should be. Perhaps the CI is using different optimization flags. I'll > try to figure it out. Yep. The CI has -O2 while I am using --enable-debug.
Re: [PATCH v3] multifd: Add colo support
copy hailiang. On Thu, Jun 22, 2023 at 01:18:46PM +0200, Lukas Straub wrote: > Like in the normal ram_load() path, put the received pages into the > colo cache and mark the pages in the bitmap so that they will be > flushed to the guest later. > > Signed-off-by: Juan Quintela > Signed-off-by: Lukas Straub > --- > migration/meson.build| 1 + > migration/multifd-colo.c | 53 > migration/multifd-colo.h | 24 ++ > migration/multifd.c | 5 > 4 files changed, 83 insertions(+) > create mode 100644 migration/multifd-colo.c > create mode 100644 migration/multifd-colo.h > > diff --git a/migration/meson.build b/migration/meson.build > index 1ae28523a1..063e0e0a8c 100644 > --- a/migration/meson.build > +++ b/migration/meson.build > @@ -21,6 +21,7 @@ system_ss.add(files( >'migration.c', >'multifd.c', >'multifd-zlib.c', > + 'multifd-colo.c', >'ram-compress.c', >'options.c', >'postcopy-ram.c', > diff --git a/migration/multifd-colo.c b/migration/multifd-colo.c > new file mode 100644 > index 00..4872dc6d01 > --- /dev/null > +++ b/migration/multifd-colo.c > @@ -0,0 +1,53 @@ > +/* > + * multifd colo implementation > + * > + * Copyright (c) Lukas Straub > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "exec/target_page.h" > +#include "exec/ramblock.h" > +#include "qemu/error-report.h" > +#include "qapi/error.h" > +#include "ram.h" > +#include "multifd.h" > +#include "options.h" > +#include "io/channel-socket.h" > +#include "migration/colo.h" > +#include "multifd-colo.h" > + > +void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p) > +{ > +if (!migrate_colo()) > +return; > + > +assert(p->block->colo_cache); > + > +/* > + * While we're still in precopy state (not yet in colo state), we copy > + * received pages to both guest and cache. No need to set dirty bits, > + * since guest and cache memory are in sync. > + */ > +if (migration_incoming_in_colo_state()) { > +colo_record_bitmap(p->block, p->normal, p->normal_num); > +} > +p->host = p->block->colo_cache; > +} > + > +void multifd_colo_process_recv_pages(MultiFDRecvParams *p) > +{ > +if (!migrate_colo()) > +return; > + > +if (!migration_incoming_in_colo_state()) { > +for (int i = 0; i < p->normal_num; i++) { > +void *guest = p->block->host + p->normal[i]; > +void *cache = p->host + p->normal[i]; > +memcpy(guest, cache, p->page_size); > +} > +} > +p->host = p->block->host; > +} > diff --git a/migration/multifd-colo.h b/migration/multifd-colo.h > new file mode 100644 > index 00..58920a0583 > --- /dev/null > +++ b/migration/multifd-colo.h > @@ -0,0 +1,24 @@ > +/* > + * multifd colo header > + * > + * Copyright (c) Lukas Straub > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_MIGRATION_MULTIFD_COLO_H > +#define QEMU_MIGRATION_MULTIFD_COLO_H > + > +#ifdef CONFIG_REPLICATION > + > +void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p); > +void multifd_colo_process_recv_pages(MultiFDRecvParams *p); > + > +#else > + > +static inline void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p) {} > +static inline void multifd_colo_process_recv_pages(MultiFDRecvParams *p) {} > + > +#endif > +#endif > diff --git a/migration/multifd.c b/migration/multifd.c > index 3387d8277f..6b031c1fd2 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -25,6 +25,7 @@ > #include "qemu-file.h" > #include "trace.h" > #include "multifd.h" > +#include "multifd-colo.h" > #include "threadinfo.h" > #include "options.h" > #include "qemu/yank.h" > @@ -1134,10 +1135,14 @@ static void *multifd_recv_thread(void *opaque) > qemu_mutex_unlock(&p->mutex); > > if (p->normal_num) { > +multifd_colo_prepare_recv_pages(p); > + > ret = multifd_recv_state->ops->recv_pages(p, &local_err); > if (ret != 0) { > break; > } > + > +multifd_colo_process_recv_pages(p); > } > > if (flags & MULTIFD_FLAG_SYNC) { > -- > 2.39.2 -- Peter Xu
Re: [PATCH] target/arm: gdbstub: Guard M-profile code with CONFIG_TCG
Philippe Mathieu-Daudé writes: > On 28/6/23 18:48, Fabiano Rosas wrote: >> This code is only relevant when TCG is present in the build. Building >> with --disable-tcg --enable-xen on an x86 host we get: >> >> $ ../configure --target-list=x86_64-softmmu,aarch64-softmmu --disable-tcg >> --enable-xen >> $ make -j$(nproc) >> ... >> libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function >> `m_sysreg_ptr': >> ../target/arm/gdbstub.c:358: undefined reference to `arm_v7m_get_sp_ptr' >> ../target/arm/gdbstub.c:361: undefined reference to `arm_v7m_get_sp_ptr' >> >> libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function >> `arm_gdb_get_m_systemreg': >> ../target/arm/gdbstub.c:405: undefined reference to `arm_v7m_mrs_control' > > I'm a bit confused, isn't this covered by the cross-arm64-xen-only > job? It should be. Perhaps the CI is using different optimization flags. I'll try to figure it out.
[PATCH 7/7] migration: Provide explicit error message for file shutdowns
Provide an explicit reason for qemu_file_shutdown()s, which can be displayed in query-migrate when used. This will make e.g. migrate-pause to display explicit error descriptions, from: "error-desc": "Channel error: Input/output error" To: "error-desc": "Channel is explicitly shutdown by the user" in query-migrate. Signed-off-by: Peter Xu --- migration/qemu-file.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 419b4092e7..ff605027de 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -87,7 +87,10 @@ int qemu_file_shutdown(QEMUFile *f) * --> guest crash! */ if (!f->last_error) { -qemu_file_set_error(f, -EIO); +Error *err = NULL; + +error_setg(&err, "Channel is explicitly shutdown by the user"); +qemu_file_set_error_obj(f, -EIO, err); } if (!qio_channel_has_feature(f->ioc, -- 2.41.0
[PATCH 0/7] migration: Better error handling in return path thread
This is a small series that reworks error handling of postcopy return path threads. We used to contain a bunch of error_report(), converting them into error_setg() properly and deliver any of those errors to migration generic error reports (via migrate_set_error()). Then these errors can also be observed in query-migrate after postcopy is paused. Dropped the return-path specific error reporting: mark_source_rp_bad(), because it's a duplication if we can always use migrate_set_error(). Please have a look, thanks. Peter Xu (7): migration: Let migrate_set_error() take ownership migration: Introduce migrate_has_error() migration: Refactor error handling in source return path migration: Deliver return path file error to migrate state too migration: Display error in query-migrate irrelevant of status qemufile: Always return a verbose error migration: Provide explicit error message for file shutdowns migration/migration.h| 8 +- migration/ram.h | 5 +- migration/channel.c | 1 - migration/migration.c| 168 +++ migration/multifd.c | 10 +-- migration/postcopy-ram.c | 1 - migration/qemu-file.c| 20 - migration/ram.c | 42 +- migration/trace-events | 2 +- 9 files changed, 147 insertions(+), 110 deletions(-) -- 2.41.0
[PATCH 3/7] migration: Refactor error handling in source return path
rp_state.error was a boolean used to show error happened in return path thread. That's not only duplicating error reporting (migrate_set_error), but also not good enough in that we only do error_report() and set it to true, we never can keep a history of the exact error and show it in query-migrate. To make this better, a few things done: - Use error_setg() rather than error_report() across the whole lifecycle of return path thread, keeping the error in an Error*. - Use migrate_set_error() to apply that captured error to the global migration object when error occured in this thread. - With above, no need to have mark_source_rp_bad(), remove it, alongside with rp_state.error itself. Signed-off-by: Peter Xu --- migration/migration.h | 1 - migration/ram.h| 5 +- migration/migration.c | 118 - migration/ram.c| 41 +++--- migration/trace-events | 2 +- 5 files changed, 82 insertions(+), 85 deletions(-) diff --git a/migration/migration.h b/migration/migration.h index e48317ca97..d9c4b753b5 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -290,7 +290,6 @@ struct MigrationState { /* Protected by qemu_file_lock */ QEMUFile *from_dst_file; QemuThreadrp_thread; -bool error; /* * We can also check non-zero of rp_thread, but there's no "official" * way to do this, so this bool makes it slightly more elegant. diff --git a/migration/ram.h b/migration/ram.h index ea1f3c25b5..02a4af2db6 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -52,7 +52,8 @@ uint64_t ram_bytes_total(void); void mig_throttle_counter_reset(void); uint64_t ram_pagesize_summary(void); -int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len, + Error **errp); void ram_postcopy_migrated_memory_release(MigrationState *ms); /* For outgoing discard bitmap */ void ram_postcopy_send_discard_bitmap(MigrationState *ms); @@ -72,7 +73,7 @@ void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); -int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); +int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); diff --git a/migration/migration.c b/migration/migration.c index 39674a3981..f8c41c4d98 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1409,7 +1409,6 @@ void migrate_init(MigrationState *s) s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; s->rp_state.from_dst_file = NULL; -s->rp_state.error = false; s->mbps = 0.0; s->pages_per_second = 0.0; s->downtime = 0; @@ -1724,16 +1723,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) qemu_sem_post(&s->pause_sem); } -/* migration thread support */ -/* - * Something bad happened to the RP stream, mark an error - * The caller shall print or trace something to indicate why - */ -static void mark_source_rp_bad(MigrationState *s) -{ -s->rp_state.error = true; -} - static struct rp_cmd_args { ssize_t len; /* -1 = variable */ const char *name; @@ -1754,7 +1743,7 @@ static struct rp_cmd_args { * and we don't need to send pages that have already been sent. */ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, - ram_addr_t start, size_t len) +ram_addr_t start, size_t len, Error **errp) { long our_host_ps = qemu_real_host_page_size(); @@ -1766,15 +1755,12 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, */ if (!QEMU_IS_ALIGNED(start, our_host_ps) || !QEMU_IS_ALIGNED(len, our_host_ps)) { -error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT - " len: %zd", __func__, start, len); -mark_source_rp_bad(ms); +error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:" + RAM_ADDR_FMT " len: %zd", start, len); return; } -if (ram_save_queue_pages(rbname, start, len)) { -mark_source_rp_bad(ms); -} +ram_save_queue_pages(rbname, start, len, errp); } /* Return true to retry, false to quit */ @@ -1789,26 +1775,28 @@ static bool postcopy_pause_return_path_thread(MigrationState *s) return true; } -static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) +static int migrate_handle_rp_recv_bitmap(Migra
[PATCH 6/7] qemufile: Always return a verbose error
There're a lot of cases where we only have an errno set in last_error but without a detailed error description. When this happens, try to generate an error contains the errno as a descriptive error. This will be helpful in cases where one relies on the Error*. E.g., migration state only caches Error* in MigrationState.error. With this, we'll display correct error messages in e.g. query-migrate when the error was only set by qemu_file_set_error(). Signed-off-by: Peter Xu --- migration/qemu-file.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index acc282654a..419b4092e7 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -156,15 +156,24 @@ void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) * * Return negative error value if there has been an error on previous * operations, return 0 if no error happened. - * Optional, it returns Error* in errp, but it may be NULL even if return value - * is not 0. * + * If errp is specified, a verbose error message will be copied over. */ int qemu_file_get_error_obj(QEMUFile *f, Error **errp) { +if (!f->last_error) { +return 0; +} + +/* There is an error */ if (errp) { -*errp = f->last_error_obj ? error_copy(f->last_error_obj) : NULL; +if (f->last_error_obj) { +*errp = error_copy(f->last_error_obj); +} else { +error_setg_errno(errp, -f->last_error, "Channel error"); +} } + return f->last_error; } -- 2.41.0
[PATCH 5/7] migration: Display error in query-migrate irrelevant of status
Display it as long as being set, irrelevant of FAILED status. E.g., it may also be applicable to PAUSED stage of postcopy, to provide hint on what has gone wrong. The error_mutex seems to be overlooked when referencing the error, add it to be very safe. Signed-off-by: Peter Xu --- migration/migration.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 234dd3601d..7455353918 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1033,9 +1033,6 @@ static void fill_source_migration_info(MigrationInfo *info) break; case MIGRATION_STATUS_FAILED: info->has_status = true; -if (s->error) { -info->error_desc = g_strdup(error_get_pretty(s->error)); -} break; case MIGRATION_STATUS_CANCELLED: info->has_status = true; @@ -1045,6 +1042,11 @@ static void fill_source_migration_info(MigrationInfo *info) break; } info->status = state; + +QEMU_LOCK_GUARD(&s->error_mutex); +if (s->error) { +info->error_desc = g_strdup(error_get_pretty(s->error)); +} } static void fill_destination_migration_info(MigrationInfo *info) -- 2.41.0
[PATCH 1/7] migration: Let migrate_set_error() take ownership
migrate_set_error() used one error_copy() so it always copy an error. However that's not the major use case - the major use case is one would like to pass the error to migrate_set_error() without further touching the error. It can be proved if we see most of the callers are freeing the error explicitly right afterwards. There're a few outliers (only if when the caller) where we can use error_copy() explicitly there. Signed-off-by: Peter Xu --- migration/migration.h| 6 +++--- migration/channel.c | 1 - migration/migration.c| 20 ++-- migration/multifd.c | 10 -- migration/postcopy-ram.c | 1 - migration/ram.c | 1 - 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/migration/migration.h b/migration/migration.h index 721b1c9473..32f87e3834 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -468,8 +468,8 @@ bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); -void migrate_set_error(MigrationState *s, const Error *error); -void migrate_fd_error(MigrationState *s, const Error *error); +void migrate_set_error(MigrationState *s, Error *error); +void migrate_fd_error(MigrationState *s, Error *error); void migrate_fd_connect(MigrationState *s, Error *error_in); @@ -513,7 +513,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); -void migration_cancel(const Error *error); +void migration_cancel(Error *error); void populate_vfio_info(MigrationInfo *info); void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); diff --git a/migration/channel.c b/migration/channel.c index ca3319a309..48b3f6abd6 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -90,7 +90,6 @@ void migration_channel_connect(MigrationState *s, } } migrate_fd_connect(s, error); -error_free(error); } diff --git a/migration/migration.c b/migration/migration.c index 203d40d4c9..13dccc4c12 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -160,7 +160,7 @@ void migration_object_init(void) dirty_bitmap_mig_init(); } -void migration_cancel(const Error *error) +void migration_cancel(Error *error) { if (error) { migrate_set_error(current_migration, error); @@ -1197,11 +1197,20 @@ static void migrate_fd_cleanup_bh(void *opaque) object_unref(OBJECT(s)); } -void migrate_set_error(MigrationState *s, const Error *error) +/* + * Set error for current migration state. The `error' ownership will be + * moved from the caller to MigrationState, so the caller doesn't need to + * free the error. + * + * If the caller still needs to reference the `error' passed in, one should + * use error_copy() explicitly. + */ +void migrate_set_error(MigrationState *s, Error *error) { QEMU_LOCK_GUARD(&s->error_mutex); if (!s->error) { -s->error = error_copy(error); +/* Record the first error triggered */ +s->error = error; } } @@ -1214,7 +1223,7 @@ static void migrate_error_free(MigrationState *s) } } -void migrate_fd_error(MigrationState *s, const Error *error) +void migrate_fd_error(MigrationState *s, Error *error) { trace_migrate_fd_error(error_get_pretty(error)); assert(s->to_dst_file == NULL); @@ -1678,7 +1687,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, if (!(has_resume && resume)) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -migrate_fd_error(s, local_err); +migrate_fd_error(s, error_copy(local_err)); error_propagate(errp, local_err); return; } @@ -2595,7 +2604,6 @@ static MigThrError migration_detect_error(MigrationState *s) if (local_error) { migrate_set_error(s, local_error); -error_free(local_error); } if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) { diff --git a/migration/multifd.c b/migration/multifd.c index 3387d8277f..62bc2dbf49 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -551,7 +551,6 @@ void multifd_save_cleanup(void) multifd_send_state->ops->send_cleanup(p, &local_err); if (local_err) { migrate_set_error(migrate_get_current(), local_err); -error_free(local_err); } } qemu_sem_destroy(&multifd_send_state->channels_ready); @@ -750,7 +749,6 @@ out: if (local_err) { trace_multifd_send_error(p->id); multifd_send_terminate_threads(local_err); -error_free(local_err); } /* @@ -883,7 +881,6 @@ static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, */ p->quit = true; object_unref(OBJECT(ioc)); - error_free(err); } static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) @@ -1148,7 +1145,6 @@ static void *multifd_recv_thread(void *opaque) if (l
[PATCH 2/7] migration: Introduce migrate_has_error()
Introduce a helper to detect whether MigrationState.error is set for whatever reason. It is intended to not taking the error_mutex here because neither do we reference the pointer, nor do we modify the pointer. State why it's safe to do so. This is preparation work for any thread (e.g. source return path thread) to setup errors in an unified way to MigrationState, rather than relying on its own way to set errors (mark_source_rp_bad()). Signed-off-by: Peter Xu --- migration/migration.h | 1 + migration/migration.c | 15 +++ 2 files changed, 16 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 32f87e3834..e48317ca97 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -469,6 +469,7 @@ bool migration_has_all_channels(void); uint64_t migrate_max_downtime(void); void migrate_set_error(MigrationState *s, Error *error); +bool migrate_has_error(MigrationState *s); void migrate_fd_error(MigrationState *s, Error *error); void migrate_fd_connect(MigrationState *s, Error *error_in); diff --git a/migration/migration.c b/migration/migration.c index 13dccc4c12..39674a3981 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1214,6 +1214,21 @@ void migrate_set_error(MigrationState *s, Error *error) } } +/* + * Whether the migration state has error set? + * + * Note this function explicitly didn't use error_mutex, because it only + * reads the error pointer for a boolean status. + * + * As long as the Error* is set, it shouldn't be freed before migration + * cleanup, so any thread can use this helper to safely detect whether + * there's anything wrong happened already. + */ +bool migrate_has_error(MigrationState *s) +{ +return qatomic_read(&s->error); +} + static void migrate_error_free(MigrationState *s) { QEMU_LOCK_GUARD(&s->error_mutex); -- 2.41.0
[PATCH 4/7] migration: Deliver return path file error to migrate state too
We've already did this for most of the return path thread errors, but not yet for the IO errors happened on the return path qemufile. Do that too. Remember to reset "err" always, because the ownership is not us anymore, otherwise we're prone to use-after-free later after recovered. Signed-off-by: Peter Xu --- migration/migration.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index f8c41c4d98..234dd3601d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1992,6 +1992,13 @@ out: res = qemu_file_get_error(rp); if (res) { +/* We have forwarded any error in "err" already, reuse "error" */ +assert(err == NULL); +/* Try to deliver this file error to migration state */ +qemu_file_get_error_obj(rp, &err); +migrate_set_error(ms, err); +err = NULL; + if (res && migration_in_postcopy()) { /* * Maybe there is something we can do: it looks like a -- 2.41.0
[PATCH v6 20/20] target/riscv/kvm.c: read/write (cbom|cboz)_blocksize in KVM
If we don't set a proper cbom_blocksize|cboz_blocksize in the FDT the Linux Kernel will fail to detect the availability of the CBOM/CBOZ extensions, regardless of the contents of the 'riscv,isa' DT prop. The FDT is being written using the cpu->cfg.cbom|z_blocksize attributes, so let's expose them as user properties like it is already done with TCG. This will also require us to determine proper blocksize values during init() time since the FDT is already created during realize(). We'll take a ride in kvm_riscv_init_multiext_cfg() to do it. Note that we don't need to fetch both cbom and cboz blocksizes every time: check for their parent extensions (icbom and icboz) and only read the blocksizes if needed. In contrast with cbom/z_blocksize properties from TCG, the user is not able to set any value that is different from the 'host' value when running KVM. KVM can be particularly harsh dealing with it: a ENOTSUPP can be thrown for the mere attempt of executing kvm_set_one_reg() for these 2 regs. Hopefully, we don't need to call kvm_set_one_reg() for these regs. We'll check if the user input matches the host value in kvm_cpu_set_cbomz_blksize(), the set() accessor for both blocksize properties. We'll fail fast since it's already known to not be supported. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones --- target/riscv/kvm.c | 70 ++ 1 file changed, 70 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index d1bf518893..e535884830 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -276,6 +276,42 @@ static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v, kvm_cpu_cfg_set(cpu, multi_ext_cfg, value); } +static KVMCPUConfig kvm_cbom_blocksize = { +.name = "cbom_blocksize", +.offset = CPUCFG(cbom_blocksize), +.kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) +}; + +static KVMCPUConfig kvm_cboz_blocksize = { +.name = "cboz_blocksize", +.offset = CPUCFG(cboz_blocksize), +.kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) +}; + +static void kvm_cpu_set_cbomz_blksize(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ +KVMCPUConfig *cbomz_cfg = opaque; +RISCVCPU *cpu = RISCV_CPU(obj); +uint16_t value, *host_val; + +if (!visit_type_uint16(v, name, &value, errp)) { +return; +} + +host_val = kvmconfig_get_cfg_addr(cpu, cbomz_cfg); + +if (value != *host_val) { +error_report("Unable to set %s to a different value than " + "the host (%u)", + cbomz_cfg->name, *host_val); +exit(EXIT_FAILURE); +} + +cbomz_cfg->user_set = true; +} + static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) { CPURISCVState *env = &cpu->env; @@ -329,6 +365,14 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) kvm_cpu_set_multi_ext_cfg, NULL, multi_cfg); } + +object_property_add(cpu_obj, "cbom_blocksize", "uint16", +NULL, kvm_cpu_set_cbomz_blksize, +NULL, &kvm_cbom_blocksize); + +object_property_add(cpu_obj, "cboz_blocksize", "uint16", +NULL, kvm_cpu_set_cbomz_blksize, +NULL, &kvm_cboz_blocksize); } static int kvm_riscv_get_regs_core(CPUState *cs) @@ -644,6 +688,24 @@ static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, env->misa_ext = env->misa_ext_mask; } +static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, + KVMCPUConfig *cbomz_cfg) +{ +CPURISCVState *env = &cpu->env; +struct kvm_one_reg reg; +int ret; + +reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + cbomz_cfg->kvm_reg_id); +reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg); +ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); +if (ret != 0) { +error_report("Unable to read KVM reg %s, error %d", + cbomz_cfg->name, ret); +exit(EXIT_FAILURE); +} +} + static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { CPURISCVState *env = &cpu->env; @@ -675,6 +737,14 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) kvm_cpu_cfg_set(cpu, multi_ext_cfg, val); } + +if (cpu->cfg.ext_icbom) { +kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize); +} + +if (cpu->cfg.ext_icboz) { +kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize); +} } void kvm_riscv_init_user_properties(Object *cpu_obj) -- 2.41.0
[PATCH v6 09/20] linux-headers: Update to v6.4-rc1
Update to commit ac9a78681b92 ("Linux 6.4-rc1"). Signed-off-by: Daniel Henrique Barboza Acked-by: Alistair Francis --- include/standard-headers/linux/const.h| 2 +- include/standard-headers/linux/virtio_blk.h | 18 +++ .../standard-headers/linux/virtio_config.h| 6 +++ include/standard-headers/linux/virtio_net.h | 1 + linux-headers/asm-arm64/kvm.h | 33 linux-headers/asm-riscv/kvm.h | 53 ++- linux-headers/asm-riscv/unistd.h | 9 linux-headers/asm-s390/unistd_32.h| 1 + linux-headers/asm-s390/unistd_64.h| 1 + linux-headers/asm-x86/kvm.h | 3 ++ linux-headers/linux/const.h | 2 +- linux-headers/linux/kvm.h | 12 +++-- linux-headers/linux/psp-sev.h | 7 +++ linux-headers/linux/userfaultfd.h | 17 +- 14 files changed, 149 insertions(+), 16 deletions(-) diff --git a/include/standard-headers/linux/const.h b/include/standard-headers/linux/const.h index 5e48987251..1eb84b5087 100644 --- a/include/standard-headers/linux/const.h +++ b/include/standard-headers/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h index 7155b1a470..d7be3cf5e4 100644 --- a/include/standard-headers/linux/virtio_blk.h +++ b/include/standard-headers/linux/virtio_blk.h @@ -138,11 +138,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - uint32_t zone_sectors; - uint32_t max_open_zones; - uint32_t max_active_zones; - uint32_t max_append_sectors; - uint32_t write_granularity; + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; uint8_t model; uint8_t unused2[3]; } zoned; @@ -239,11 +239,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - uint64_t z_cap; + __virtio64 z_cap; /* The starting sector of the zone */ - uint64_t z_start; + __virtio64 z_start; /* Zone write pointer position in sectors */ - uint64_t z_wp; + __virtio64 z_wp; /* Zone type */ uint8_t z_type; /* Zone state */ @@ -252,7 +252,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - uint64_t nr_zones; + __virtio64 nr_zones; uint8_t reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 965ee6ae23..8a7d0dc8b0 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -97,6 +97,12 @@ */ #define VIRTIO_F_SR_IOV37 +/* + * This feature indicates that the driver passes extra data (besides + * identifying the virtqueue) in its device notifications. + */ +#define VIRTIO_F_NOTIFICATION_DATA 38 + /* * This feature indicates that the driver can reset a queue individually. */ diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index c0e797067a..2325485f2c 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO655 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS 60/* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61/* extended coalescing info */ #define VIRTIO_NET_F_STANDBY 62/* Act as standby for another device diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index d7e7bb885e..38e5957526 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */
[PATCH v6 16/20] target/riscv/cpu.c: remove priv_ver check from riscv_isa_string_ext()
riscv_isa_string_ext() is being used by riscv_isa_string(), which is then used by boards to retrieve the 'riscv,isa' string to be written in the FDT. All this happens after riscv_cpu_realize(), meaning that we're already past riscv_cpu_validate_set_extensions() and, more important, riscv_cpu_disable_priv_spec_isa_exts(). This means that all extensions that needed to be disabled due to priv_spec mismatch are already disabled. Checking this again during riscv_isa_string_ext() is unneeded. Remove it. As a bonus, riscv_isa_string_ext() can now be used with the 'host' KVM-only CPU type since it doesn't have a env->priv_ver assigned and it would fail this check for no good reason. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones --- target/riscv/cpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index b4a6fd8bab..79c8ffe6b7 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -2015,8 +2015,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int i; for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) { -if (cpu->env.priv_ver >= isa_edata_arr[i].min_version && -isa_ext_is_enabled(cpu, &isa_edata_arr[i])) { +if (isa_ext_is_enabled(cpu, &isa_edata_arr[i])) { new = g_strconcat(old, "_", isa_edata_arr[i].name, NULL); g_free(old); old = new; -- 2.41.0
[PATCH v6 19/20] target/riscv/kvm.c: add kvmconfig_get_cfg_addr() helper
There are 2 places in which we need to get a pointer to a certain property of the cpu->cfg struct based on property offset. Next patch will add a couple more. Create a helper to avoid repeating this code over and over. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones --- target/riscv/kvm.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index aa4edc8a56..d1bf518893 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -215,11 +215,15 @@ static KVMCPUConfig kvm_multi_ext_cfgs[] = { KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT), }; +static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg) +{ +return (void *)&cpu->cfg + kvmcfg->offset; +} + static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext, uint32_t val) { -int cpu_cfg_offset = multi_ext->offset; -bool *ext_enabled = (void *)&cpu->cfg + cpu_cfg_offset; +bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext); *ext_enabled = val; } @@ -227,8 +231,7 @@ static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext, static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu, KVMCPUConfig *multi_ext) { -int cpu_cfg_offset = multi_ext->offset; -bool *ext_enabled = (void *)&cpu->cfg + cpu_cfg_offset; +bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext); return *ext_enabled; } -- 2.41.0
[PATCH v6 02/20] hw/riscv/virt.c: skip 'mmu-type' FDT if satp mode not set
The absence of a satp mode in riscv_host_cpu_init() is causing the following error: $ sudo ./qemu/build/qemu-system-riscv64 -machine virt,accel=kvm \ -m 2G -smp 1 -nographic -snapshot \ -kernel ./guest_imgs/Image \ -initrd ./guest_imgs/rootfs_kvm_riscv64.img \ -append "earlycon=sbi root=/dev/ram rw" \ -cpu host ** ERROR:../target/riscv/cpu.c:320:satp_mode_str: code should not be reached Bail out! ERROR:../target/riscv/cpu.c:320:satp_mode_str: code should not be reached Aborted The error is triggered from create_fdt_socket_cpus() in hw/riscv/virt.c. It's trying to get satp_mode_str for a NULL cpu->cfg.satp_mode.map. For this KVM cpu we would need to inherit the satp supported modes from the RISC-V host. At this moment this is not possible because the KVM driver does not support it. And even when it does we can't just let this broken for every other older kernel. Since mmu-type is not a required node, according to [1], skip the 'mmu-type' FDT node if there's no satp_mode set. We'll revisit this logic when we can get satp information from KVM. [1] https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/cpu.yaml Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé --- hw/riscv/virt.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 95708d890e..f025a0fcaf 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -243,13 +243,13 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); -satp_mode_max = satp_mode_max_from_map( -s->soc[socket].harts[cpu].cfg.satp_mode.map); -sv_name = g_strdup_printf("riscv,%s", - satp_mode_str(satp_mode_max, is_32_bit)); -qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); -g_free(sv_name); - +if (cpu_ptr->cfg.satp_mode.supported != 0) { +satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map); +sv_name = g_strdup_printf("riscv,%s", + satp_mode_str(satp_mode_max, is_32_bit)); +qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); +g_free(sv_name); +} name = riscv_isa_string(cpu_ptr); qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name); -- 2.41.0
[PATCH v6 14/20] target/riscv/kvm.c: add multi-letter extension KVM properties
Let's add KVM user properties for the multi-letter extensions that KVM currently supports: zicbom, zicboz, zihintpause, zbb, ssaia, sstc, svinval and svpbmt. As with MISA extensions, we're using the KVMCPUConfig type to hold information about the state of each extension. However, multi-letter extensions have more cases to cover than MISA extensions, so we're adding an extra 'supported' flag as well. This flag will reflect if a given extension is supported by KVM, i.e. KVM knows how to handle it. This is determined during KVM extension discovery in kvm_riscv_init_multiext_cfg(), where we test for EINVAL errors. Any other error different from EINVAL will cause an abort. The use of the 'user_set' is similar to what we already do with MISA extensions: the flag set only if the user is changing the extension state. The 'supported' flag will be used later on to make an exception for users that are disabling multi-letter extensions that are unknown to KVM. Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 8 +++ target/riscv/kvm.c | 119 + 2 files changed, 127 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f4b1868466..5428402cfa 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1751,6 +1751,14 @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_misa_properties(obj); for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { +#ifndef CONFIG_USER_ONLY +if (kvm_enabled()) { +/* Check if KVM created the property already */ +if (object_property_find(obj, prop->name)) { +continue; +} +} +#endif qdev_property_add_static(dev, prop); } diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index ba97b0cbed..68f6538b04 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -113,6 +113,7 @@ typedef struct KVMCPUConfig { target_ulong offset; int kvm_reg_id; bool user_set; +bool supported; } KVMCPUConfig; #define KVM_MISA_CFG(_bit, _reg_id) \ @@ -197,6 +198,81 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) } } +#define CPUCFG(_prop) offsetof(struct RISCVCPUConfig, _prop) + +#define KVM_EXT_CFG(_name, _prop, _reg_id) \ +{.name = _name, .offset = CPUCFG(_prop), \ + .kvm_reg_id = _reg_id} + +static KVMCPUConfig kvm_multi_ext_cfgs[] = { +KVM_EXT_CFG("zicbom", ext_icbom, KVM_RISCV_ISA_EXT_ZICBOM), +KVM_EXT_CFG("zicboz", ext_icboz, KVM_RISCV_ISA_EXT_ZICBOZ), +KVM_EXT_CFG("zihintpause", ext_zihintpause, KVM_RISCV_ISA_EXT_ZIHINTPAUSE), +KVM_EXT_CFG("zbb", ext_zbb, KVM_RISCV_ISA_EXT_ZBB), +KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA), +KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC), +KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL), +KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT), +}; + +static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext, +uint32_t val) +{ +int cpu_cfg_offset = multi_ext->offset; +bool *ext_enabled = (void *)&cpu->cfg + cpu_cfg_offset; + +*ext_enabled = val; +} + +static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu, +KVMCPUConfig *multi_ext) +{ +int cpu_cfg_offset = multi_ext->offset; +bool *ext_enabled = (void *)&cpu->cfg + cpu_cfg_offset; + +return *ext_enabled; +} + +static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ +KVMCPUConfig *multi_ext_cfg = opaque; +RISCVCPU *cpu = RISCV_CPU(obj); +bool value, host_val; + +if (!visit_type_bool(v, name, &value, errp)) { +return; +} + +host_val = kvm_cpu_cfg_get(cpu, multi_ext_cfg); + +/* + * Ignore if the user is setting the same value + * as the host. + */ +if (value == host_val) { +return; +} + +if (!multi_ext_cfg->supported) { +/* + * Error out if the user is trying to enable an + * extension that KVM doesn't support. Ignore + * option otherwise. + */ +if (value) { +error_setg(errp, "KVM does not support disabling extension %s", + multi_ext_cfg->name); +} + +return; +} + +multi_ext_cfg->user_set = true; +kvm_cpu_cfg_set(cpu, multi_ext_cfg, value); +} + static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) { int i; @@ -215,6 +291,15 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) object_property_set_description(cpu_obj, misa_cfg->name, misa_cfg->description); } + +for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { +KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i]; + +object_property_add(cpu_obj
[PATCH v6 18/20] target/riscv: update multi-letter extension KVM properties
We're now ready to update the multi-letter extensions status for KVM. kvm_riscv_update_cpu_cfg_isa_ext() is called called during vcpu creation time to verify which user options changes host defaults (via the 'user_set' flag) and tries to write them back to KVM. Failure to commit a change to KVM is only ignored in case KVM doesn't know about the extension (-EINVAL error code) and the user wanted to disable the given extension. Otherwise we're going to abort the boot process. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones --- target/riscv/kvm.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 68f6538b04..aa4edc8a56 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -273,6 +273,32 @@ static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v, kvm_cpu_cfg_set(cpu, multi_ext_cfg, value); } +static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) +{ +CPURISCVState *env = &cpu->env; +uint64_t id, reg; +int i, ret; + +for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { +KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i]; + +if (!multi_ext_cfg->user_set) { +continue; +} + +id = kvm_riscv_reg_id(env, KVM_REG_RISCV_ISA_EXT, + multi_ext_cfg->kvm_reg_id); +reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); +ret = kvm_set_one_reg(cs, id, ®); +if (ret != 0) { +error_report("Unable to %s extension %s in KVM, error %d", + reg ? "enable" : "disable", + multi_ext_cfg->name, ret); +exit(EXIT_FAILURE); +} +} +} + static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) { int i; @@ -792,6 +818,7 @@ int kvm_arch_init_vcpu(CPUState *cs) } kvm_riscv_update_cpu_misa_ext(cpu, cs); +kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs); return ret; } -- 2.41.0
[PATCH v6 17/20] target/riscv/cpu.c: create KVM mock properties
KVM-specific properties are being created inside target/riscv/kvm.c. But at this moment we're gathering all the remaining properties from TCG and adding them as is when running KVM. This creates a situation where non-KVM properties are setting flags to 'true' due to its default settings (e.g. Zawrs). Users can also freely enable them via command line. This doesn't impact runtime per se because KVM doesn't care about these flags, but code such as riscv_isa_string_ext() take those flags into account. The result is that, for a KVM guest, setting non-KVM properties will make them appear in the riscv,isa DT. We want to keep the same API for both TCG and KVM and at the same time, when running KVM, forbid non-KVM extensions to be enabled internally. We accomplish both by changing riscv_cpu_add_user_properties() to add a mock boolean property for every non-KVM extension in riscv_cpu_extensions[]. Then, when running KVM, users are still free to set extensions at will, but we'll error out if a non-KVM extension is enabled. Setting such extension to 'false' will be ignored. Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 36 1 file changed, 36 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 79c8ffe6b7..6d7a0bc4ae 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1731,6 +1731,26 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_END_OF_LIST(), }; + +#ifndef CONFIG_USER_ONLY +static void cpu_set_cfg_unavailable(Object *obj, Visitor *v, +const char *name, +void *opaque, Error **errp) +{ +const char *propname = opaque; +bool value; + +if (!visit_type_bool(v, name, &value, errp)) { +return; +} + +if (value) { +error_setg(errp, "extension %s is not available with KVM", + propname); +} +} +#endif + /* * Add CPU properties with user-facing flags. * @@ -1759,6 +1779,22 @@ static void riscv_cpu_add_user_properties(Object *obj) if (object_property_find(obj, prop->name)) { continue; } + +/* + * Set the default to disabled for every extension + * unknown to KVM and error out if the user attempts + * to enable any of them. + * + * We're giving a pass for non-bool properties since they're + * not related to the availability of extensions and can be + * safely ignored as is. + */ +if (prop->info == &qdev_prop_bool) { +object_property_add(obj, prop->name, "bool", +NULL, cpu_set_cfg_unavailable, +NULL, (void *)prop->name); +continue; +} } #endif qdev_property_add_static(dev, prop); -- 2.41.0
[PATCH v6 03/20] target/riscv/cpu.c: restrict 'mvendorid' value
We're going to change the handling of mvendorid/marchid/mimpid by the KVM driver. Since these are always present in all CPUs let's put the same validation for everyone. It doesn't make sense to allow 'mvendorid' to be different than it is already set in named (vendor) CPUs. Generic (dynamic) CPUs can have any 'mvendorid' they want. Change 'mvendorid' to be a class property created via 'object_class_property_add', instead of using the DEFINE_PROP_UINT32() macro. This allow us to define a custom setter for it that will verify, for named CPUs, if mvendorid is different than it is already set by the CPU. This is the error thrown for the 'veyron-v1' CPU if 'mvendorid' is set to an invalid value: $ qemu-system-riscv64 -M virt -nographic -cpu veyron-v1,mvendorid=2 qemu-system-riscv64: can't apply global veyron-v1-riscv-cpu.mvendorid=2: Unable to change veyron-v1-riscv-cpu mvendorid (0x61f) Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis --- target/riscv/cpu.c | 38 +- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index bbb201a2b3..652988db25 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1730,7 +1730,6 @@ static void riscv_cpu_add_user_properties(Object *obj) static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), -DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0), DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), @@ -1817,6 +1816,40 @@ static const struct TCGCPUOps riscv_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; +static bool riscv_cpu_is_dynamic(Object *cpu_obj) +{ +return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; +} + +static void cpu_set_mvendorid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ +bool dynamic_cpu = riscv_cpu_is_dynamic(obj); +RISCVCPU *cpu = RISCV_CPU(obj); +uint32_t prev_val = cpu->cfg.mvendorid; +uint32_t value; + +if (!visit_type_uint32(v, name, &value, errp)) { +return; +} + +if (!dynamic_cpu && prev_val != value) { +error_setg(errp, "Unable to change %s mvendorid (0x%x)", + object_get_typename(obj), prev_val); +return; +} + +cpu->cfg.mvendorid = value; +} + +static void cpu_get_mvendorid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ +bool value = RISCV_CPU(obj)->cfg.mvendorid; + +visit_type_bool(v, name, &value, errp); +} + static void riscv_cpu_class_init(ObjectClass *c, void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); @@ -1848,6 +1881,9 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml; cc->tcg_ops = &riscv_tcg_ops; +object_class_property_add(c, "mvendorid", "uint32", cpu_get_mvendorid, + cpu_set_mvendorid, NULL, NULL); + device_class_set_props(dc, riscv_cpu_properties); } -- 2.41.0
[PATCH v6 07/20] target/riscv: read marchid/mimpid in kvm_riscv_init_machine_ids()
Allow 'marchid' and 'mimpid' to also be initialized in kvm_riscv_init_machine_ids(). After this change, the handling of mvendorid/marchid/mimpid for the 'host' CPU type will be equal to what we already have for TCG named CPUs, i.e. the user is not able to set these values to a different val than the one that is already preset. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis --- target/riscv/kvm.c | 16 1 file changed, 16 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 37f0f70794..cd2974c663 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -378,6 +378,22 @@ static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) if (ret != 0) { error_report("Unable to retrieve mvendorid from host, error %d", ret); } + +reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(marchid)); +reg.addr = (uint64_t)&cpu->cfg.marchid; +ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); +if (ret != 0) { +error_report("Unable to retrieve marchid from host, error %d", ret); +} + +reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(mimpid)); +reg.addr = (uint64_t)&cpu->cfg.mimpid; +ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); +if (ret != 0) { +error_report("Unable to retrieve mimpid from host, error %d", ret); +} } void kvm_riscv_init_user_properties(Object *cpu_obj) -- 2.41.0
[PATCH v6 13/20] target/riscv/kvm.c: update KVM MISA bits
Our design philosophy with KVM properties can be resumed in two main decisions based on KVM interface availability and what the user wants to do: - if the user disables an extension that the host KVM module doesn't know about (i.e. it doesn't implement the kvm_get_one_reg() interface), keep booting the CPU. This will avoid users having to deal with issues with older KVM versions while disabling features they don't care; - for any other case we're going to error out immediately. If the user wants to enable a feature that KVM doesn't know about this a problem that is worth aborting - the user must know that the feature wasn't enabled in the hart. Likewise, if KVM knows about the extension, the user wants to enable/disable it, and we fail to do it so, that's also a problem we can't shrug it off. In the case of MISA bits we won't even try enabling bits that aren't already available in the host. The ioctl() is so likely to fail that it's not worth trying. This check is already done in the previous patch, in kvm_cpu_set_misa_ext_cfg(), thus we don't need to worry about it now. In kvm_riscv_update_cpu_misa_ext() we'll go through every potential user option and do as follows: - if the user didn't set the property or set to the same value of the host, do nothing; - Disable the given extension in KVM. Error out if anything goes wrong. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones --- target/riscv/kvm.c | 40 1 file changed, 40 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 0fb63cced3..ba97b0cbed 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -162,6 +162,41 @@ static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v, "enabled in the host", misa_ext_cfg->name); } +static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) +{ +CPURISCVState *env = &cpu->env; +uint64_t id, reg; +int i, ret; + +for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) { +KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i]; +target_ulong misa_bit = misa_cfg->offset; + +if (!misa_cfg->user_set) { +continue; +} + +/* If we're here we're going to disable the MISA bit */ +reg = 0; +id = kvm_riscv_reg_id(env, KVM_REG_RISCV_ISA_EXT, + misa_cfg->kvm_reg_id); +ret = kvm_set_one_reg(cs, id, ®); +if (ret != 0) { +/* + * We're not checking for -EINVAL because if the bit is about + * to be disabled, it means that it was already enabled by + * KVM. We determined that by fetching the 'isa' register + * during init() time. Any error at this point is worth + * aborting. + */ +error_report("Unable to set KVM reg %s, error %d", + misa_cfg->name, ret); +exit(EXIT_FAILURE); +} +env->misa_ext &= ~misa_bit; +} +} + static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) { int i; @@ -632,8 +667,13 @@ int kvm_arch_init_vcpu(CPUState *cs) if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) { ret = kvm_vcpu_set_machine_ids(cpu, cs); +if (ret != 0) { +return ret; +} } +kvm_riscv_update_cpu_misa_ext(cpu, cs); + return ret; } -- 2.41.0
[PATCH v6 10/20] target/riscv/kvm.c: init 'misa_ext_mask' with scratch CPU
At this moment we're retrieving env->misa_ext during kvm_arch_init_cpu(), leaving env->misa_ext_mask behind. We want to set env->misa_ext_mask, and we want to set it as early as possible. The reason is that we're going to use it in the validation process of the KVM MISA properties we're going to add next. Setting it during arch_init_cpu() is too late for user validation. Move the code to a new helper that is going to be called during init() time, via kvm_riscv_init_user_properties(), like we're already doing for the machine ID properties. Set both misa_ext and misa_ext_mask to the same value retrieved by the 'isa' config reg. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis --- target/riscv/kvm.c | 34 +++--- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 602727cdfd..4d0808cb9a 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -396,6 +396,28 @@ static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) } } +static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, + KVMScratchCPU *kvmcpu) +{ +CPURISCVState *env = &cpu->env; +struct kvm_one_reg reg; +int ret; + +reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(isa)); +reg.addr = (uint64_t)&env->misa_ext_mask; +ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + +if (ret) { +error_report("Unable to fetch ISA register from KVM, " + "error %d", ret); +kvm_riscv_destroy_scratch_vcpu(kvmcpu); +exit(EXIT_FAILURE); +} + +env->misa_ext = env->misa_ext_mask; +} + void kvm_riscv_init_user_properties(Object *cpu_obj) { RISCVCPU *cpu = RISCV_CPU(cpu_obj); @@ -406,6 +428,7 @@ void kvm_riscv_init_user_properties(Object *cpu_obj) } kvm_riscv_init_machine_ids(cpu, &kvmcpu); +kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -525,21 +548,10 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; -target_ulong isa; RISCVCPU *cpu = RISCV_CPU(cs); -CPURISCVState *env = &cpu->env; -uint64_t id; qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs); -id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, - KVM_REG_RISCV_CONFIG_REG(isa)); -ret = kvm_get_one_reg(cs, id, &isa); -if (ret) { -return ret; -} -env->misa_ext = isa; - if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) { ret = kvm_vcpu_set_machine_ids(cpu, cs); } -- 2.41.0
[PATCH v6 00/20] target/riscv, KVM: fixes and enhancements
Hi, This new version has changes suggested by Phil in v5. It also has some changes I made on the fly. The short summary is: - use "#ifndef CONFIG_USER_MODE kvm_enabled() #endif" instead of a helper that wraps them all up. This was suggested by Phil to be more clear about the code that depends on KVM or not. A new patch (15) was added to keep the amount of #ifndefs in control in riscv_cpu_add_user_properties() due to this design change; - misa_ext_info_arr[] is now indexed from 0 to 25 instead of 0 from potentially 1 << 25. Getters for 'name' and 'description' were added. It's worth mentioning a change I decided to make that wasn't mentioned in the v5 review. In patch 12, where we added KVM MISA properties, we were trying to find the property before proceeding with object_property_add() to add the remaining properties. I remembered that we can also use object_property_try_add(). In fact, object_property_add() is a simple call for object_property_try_add() that uses &error_fatal. And it turns out that and the only error being reported by try_add() happens when object_property_find() finds a property with the same name. This means that we can use object_property_try_add() with an error pointer and, if an error is reported, it means that we already have the property. This change makes us do a single object_property_find() instead of 2. Patches 6 and 17 (former 16) had trivial changes, all of them related to the extinct riscv_running_kvm() helper, and I decided to keep their r-bs for reviewer convenience. Series was rebased on top of riscv-to-apply.next. Patches missing reviews: 1, 11, 12, 14, 15. Changes from v5: - trivial changes (R-bs kept): - patch 6: use kvm_enabled() instead of riscv_running_kvm() - patch 17 (former 16): wrap cpu_set_cfg_unavailable() inside '#ifndef CONFIG_USER_MODE' - patch 1: - riscv_running_kvm() removed - rename riscv_cpu_realize_functions() to riscv_cpu_realize_tcg() - use 'tcg_enabled()' before riscv_cpu_realize_tcg() - reword commit msg to indicate that we're checking positive for TCG instead of negative for KVM - patch 11: - misa_ext_info_arr[] is now indexed from 0 (A-A) to 25 (Z-A) - getter functions for 'name' and 'description' were added - misa_ext_info_arr[] is no longer exported in target/riscv/cpu.h - 'name' and 'description' of misa_ext_cfg[] must now be populated after initialization because misa_ext_info_arr[] is no longer a constant array - patch 12: - use object_property_try_add() in riscv_cpu_add_misa_properties(). It'll set an Error pointer if the property already exists. Use that to skip an already created property instead of doing a redundant object_property_find() beforehand - moved the object_property_find() code that was checking for multi-letter properties to patch 14 - use the new getters() from patch 11 instead of reading the array directly - patch 14: - added the code from patch 12 that checks for an existing multi-letter property. Add a kvm_enabled() check around it in preparation for the mock KVM properties patch later on - patch 15 (new): - create satp_mode properties earlier to avoid an extra #ifndef CONFIG_USER_MODE block - v5 link: https://lists.gnu.org/archive/html/qemu-devel/2023-06/msg05984.html Daniel Henrique Barboza (20): target/riscv: skip features setup for KVM CPUs hw/riscv/virt.c: skip 'mmu-type' FDT if satp mode not set target/riscv/cpu.c: restrict 'mvendorid' value target/riscv/cpu.c: restrict 'mimpid' value target/riscv/cpu.c: restrict 'marchid' value target/riscv: use KVM scratch CPUs to init KVM properties target/riscv: read marchid/mimpid in kvm_riscv_init_machine_ids() target/riscv: handle mvendorid/marchid/mimpid for KVM CPUs linux-headers: Update to v6.4-rc1 target/riscv/kvm.c: init 'misa_ext_mask' with scratch CPU target/riscv/cpu: add misa_ext_info_arr[] target/riscv: add KVM specific MISA properties target/riscv/kvm.c: update KVM MISA bits target/riscv/kvm.c: add multi-letter extension KVM properties target/riscv/cpu.c: add satp_mode properties earlier target/riscv/cpu.c: remove priv_ver check from riscv_isa_string_ext() target/riscv/cpu.c: create KVM mock properties target/riscv: update multi-letter extension KVM properties target/riscv/kvm.c: add kvmconfig_get_cfg_addr() helper target/riscv/kvm.c: read/write (cbom|cboz)_blocksize in KVM hw/riscv/virt.c | 14 +- include/standard-headers/linux/const.h| 2 +- include/standard-headers/linux/virtio_blk.h | 18 +- .../standard-headers/linux/virtio_config.h| 6 + include/standard-headers/linux/virtio_net.h | 1 + linux-headers/asm-arm64/kvm.h | 33 ++ linux-headers/asm-riscv/kvm.h | 53 +- linux-headers/asm-riscv/unistd.h | 9 + linux-headers/asm-s390/unistd_32.h| 1 + linux-headers/asm-s390/unistd_64.h| 1 + linux-hea
[PATCH v6 08/20] target/riscv: handle mvendorid/marchid/mimpid for KVM CPUs
After changing user validation for mvendorid/marchid/mimpid to guarantee that the value is validated on user input time, coupled with the work in fetching KVM default values for them by using a scratch CPU, we're certain that the values in cpu->cfg.(mvendorid|marchid|mimpid) are already good to be written back to KVM. There's no need to write the values back for 'host' type CPUs since the values can't be changed, so let's do that just for generic CPUs. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis --- target/riscv/kvm.c | 31 +++ 1 file changed, 31 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index cd2974c663..602727cdfd 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -495,6 +495,33 @@ void kvm_arch_init_irq_routing(KVMState *s) { } +static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) +{ +CPURISCVState *env = &cpu->env; +uint64_t id; +int ret; + +id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(mvendorid)); +ret = kvm_set_one_reg(cs, id, &cpu->cfg.mvendorid); +if (ret != 0) { +return ret; +} + +id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(marchid)); +ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid); +if (ret != 0) { +return ret; +} + +id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(mimpid)); +ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid); + +return ret; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; @@ -513,6 +540,10 @@ int kvm_arch_init_vcpu(CPUState *cs) } env->misa_ext = isa; +if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) { +ret = kvm_vcpu_set_machine_ids(cpu, cs); +} + return ret; } -- 2.41.0
[PATCH v6 15/20] target/riscv/cpu.c: add satp_mode properties earlier
riscv_cpu_add_user_properties() ended up with an excess of "#ifndef CONFIG_USER_ONLY" blocks after changes that added KVM properties handling. KVM specific properties are required to be created earlier than their TCG counterparts, but the remaining props can be created at any order. Move riscv_add_satp_mode_properties() to the start of the function, inside the !CONFIG_USER_ONLY block already present there, to remove the last ifndef block. Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 5428402cfa..b4a6fd8bab 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1743,6 +1743,8 @@ static void riscv_cpu_add_user_properties(Object *obj) DeviceState *dev = DEVICE(obj); #ifndef CONFIG_USER_ONLY +riscv_add_satp_mode_properties(obj); + if (kvm_enabled()) { kvm_riscv_init_user_properties(obj); } @@ -1761,10 +1763,6 @@ static void riscv_cpu_add_user_properties(Object *obj) #endif qdev_property_add_static(dev, prop); } - -#ifndef CONFIG_USER_ONLY -riscv_add_satp_mode_properties(obj); -#endif } static Property riscv_cpu_properties[] = { -- 2.41.0
[PATCH v6 11/20] target/riscv/cpu: add misa_ext_info_arr[]
Next patch will add KVM specific user properties for both MISA and multi-letter extensions. For MISA extensions we want to make use of what is already available in misa_ext_cfgs[] to avoid code repetition. misa_ext_info_arr[] array will hold name and description for each MISA extension that misa_ext_cfgs[] is declaring. We'll then use this new array in KVM code to avoid duplicating strings. There's nothing holding us back from doing the same with multi-letter extensions. For now doing just with MISA extensions is enough. Suggested-by: Andrew Jones Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 83 ++ target/riscv/cpu.h | 7 +++- 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 2485e820f8..90dd2078ae 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1558,33 +1558,57 @@ static void cpu_get_misa_ext_cfg(Object *obj, Visitor *v, const char *name, visit_type_bool(v, name, &value, errp); } -static const RISCVCPUMisaExtConfig misa_ext_cfgs[] = { -{.name = "a", .description = "Atomic instructions", - .misa_bit = RVA, .enabled = true}, -{.name = "c", .description = "Compressed instructions", - .misa_bit = RVC, .enabled = true}, -{.name = "d", .description = "Double-precision float point", - .misa_bit = RVD, .enabled = true}, -{.name = "f", .description = "Single-precision float point", - .misa_bit = RVF, .enabled = true}, -{.name = "i", .description = "Base integer instruction set", - .misa_bit = RVI, .enabled = true}, -{.name = "e", .description = "Base integer instruction set (embedded)", - .misa_bit = RVE, .enabled = false}, -{.name = "m", .description = "Integer multiplication and division", - .misa_bit = RVM, .enabled = true}, -{.name = "s", .description = "Supervisor-level instructions", - .misa_bit = RVS, .enabled = true}, -{.name = "u", .description = "User-level instructions", - .misa_bit = RVU, .enabled = true}, -{.name = "h", .description = "Hypervisor", - .misa_bit = RVH, .enabled = true}, -{.name = "x-j", .description = "Dynamic translated languages", - .misa_bit = RVJ, .enabled = false}, -{.name = "v", .description = "Vector operations", - .misa_bit = RVV, .enabled = false}, -{.name = "g", .description = "General purpose (IMAFD_Zicsr_Zifencei)", - .misa_bit = RVG, .enabled = false}, +typedef struct misa_ext_info { +const char *name; +const char *description; +} MISAExtInfo; + +#define MISA_EXT_INFO(_idx, _propname, _descr) \ +[(_idx - 'A')] = {.name = _propname, .description = _descr} + +static const MISAExtInfo misa_ext_info_arr[] = { +MISA_EXT_INFO('A', "a", "Atomic instructions"), +MISA_EXT_INFO('C', "c", "Compressed instructions"), +MISA_EXT_INFO('D', "d", "Double-precision float point"), +MISA_EXT_INFO('F', "f", "Single-precision float point"), +MISA_EXT_INFO('I', "i", "Base integer instruction set"), +MISA_EXT_INFO('E', "e", "Base integer instruction set (embedded)"), +MISA_EXT_INFO('M', "m", "Integer multiplication and division"), +MISA_EXT_INFO('S', "s", "Supervisor-level instructions"), +MISA_EXT_INFO('U', "u", "User-level instructions"), +MISA_EXT_INFO('H', "h", "Hypervisor"), +MISA_EXT_INFO('J', "x-j", "Dynamic translated languages"), +MISA_EXT_INFO('V', "v", "Vector operations"), +MISA_EXT_INFO('G', "g", "General purpose (IMAFD_Zicsr_Zifencei)"), +}; + +const char *riscv_get_misa_ext_name(uint32_t bit) +{ +return misa_ext_info_arr[ctz32(bit)].name; +} + +const char *riscv_get_misa_ext_descr(uint32_t bit) +{ +return misa_ext_info_arr[ctz32(bit)].description; +} + +#define MISA_CFG(_bit, _enabled) \ +{.misa_bit = _bit, .enabled = _enabled} + +static RISCVCPUMisaExtConfig misa_ext_cfgs[] = { +MISA_CFG(RVA, true), +MISA_CFG(RVC, true), +MISA_CFG(RVD, true), +MISA_CFG(RVF, true), +MISA_CFG(RVI, true), +MISA_CFG(RVE, false), +MISA_CFG(RVM, true), +MISA_CFG(RVS, true), +MISA_CFG(RVU, true), +MISA_CFG(RVH, true), +MISA_CFG(RVJ, false), +MISA_CFG(RVV, false), +MISA_CFG(RVG, false), }; static void riscv_cpu_add_misa_properties(Object *cpu_obj) @@ -1592,7 +1616,10 @@ static void riscv_cpu_add_misa_properties(Object *cpu_obj) int i; for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) { -const RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i]; +RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i]; + +misa_cfg->name = riscv_get_misa_ext_name(misa_cfg->misa_bit); +misa_cfg->description = riscv_get_misa_ext_descr(misa_cfg->misa_bit); object_property_add(cpu_obj, misa_cfg->name, "bool", cpu_get_misa_ext_cfg, diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index cc20ee25a7..acae118b9b 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@
[PATCH v6 04/20] target/riscv/cpu.c: restrict 'mimpid' value
Following the same logic used with 'mvendorid' let's also restrict 'mimpid' for named CPUs. Generic CPUs keep setting the value freely. Note that we're getting rid of the default RISCV_CPU_MARCHID value. The reason is that this is not a good default since it's dynamic, changing with with every QEMU version, regardless of whether the actual implementation of the CPU changed from one QEMU version to the other. Named CPU should set it to a meaningful value instead and generic CPUs can set whatever they want. This is the error thrown for an invalid 'mimpid' value for the veyron-v1 CPU: $ ./qemu-system-riscv64 -M virt -nographic -cpu veyron-v1,mimpid=2 qemu-system-riscv64: can't apply global veyron-v1-riscv-cpu.mimpid=2: Unable to change veyron-v1-riscv-cpu mimpid (0x111) Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis --- target/riscv/cpu.c | 34 -- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 652988db25..7ca7b7bbfa 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -43,7 +43,6 @@ #define RISCV_CPU_MARCHID ((QEMU_VERSION_MAJOR << 16) | \ (QEMU_VERSION_MINOR << 8) | \ (QEMU_VERSION_MICRO)) -#define RISCV_CPU_MIMPIDRISCV_CPU_MARCHID static const char riscv_single_letter_exts[] = "IEMAFDQCPVH"; @@ -1731,7 +1730,6 @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), -DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID), #ifndef CONFIG_USER_ONLY DEFINE_PROP_UINT64("resetvec", RISCVCPU, env.resetvec, DEFAULT_RSTVEC), @@ -1850,6 +1848,35 @@ static void cpu_get_mvendorid(Object *obj, Visitor *v, const char *name, visit_type_bool(v, name, &value, errp); } +static void cpu_set_mimpid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ +bool dynamic_cpu = riscv_cpu_is_dynamic(obj); +RISCVCPU *cpu = RISCV_CPU(obj); +uint64_t prev_val = cpu->cfg.mimpid; +uint64_t value; + +if (!visit_type_uint64(v, name, &value, errp)) { +return; +} + +if (!dynamic_cpu && prev_val != value) { +error_setg(errp, "Unable to change %s mimpid (0x%lx)", + object_get_typename(obj), prev_val); +return; +} + +cpu->cfg.mimpid = value; +} + +static void cpu_get_mimpid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ +bool value = RISCV_CPU(obj)->cfg.mimpid; + +visit_type_bool(v, name, &value, errp); +} + static void riscv_cpu_class_init(ObjectClass *c, void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); @@ -1884,6 +1911,9 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) object_class_property_add(c, "mvendorid", "uint32", cpu_get_mvendorid, cpu_set_mvendorid, NULL, NULL); +object_class_property_add(c, "mimpid", "uint64", cpu_get_mimpid, + cpu_set_mimpid, NULL, NULL); + device_class_set_props(dc, riscv_cpu_properties); } -- 2.41.0
[PATCH v6 05/20] target/riscv/cpu.c: restrict 'marchid' value
'marchid' shouldn't be set to a different value as previously set for named CPUs. For all other CPUs it shouldn't be freely set either - the spec requires that 'marchid' can't have the MSB (most significant bit) set and every other bit set to zero, i.e. 0x8000 is an invalid 'marchid' value for 32 bit CPUs. As with 'mimpid', setting a default value based on the current QEMU version is not a good idea because it implies that the CPU implementation changes from one QEMU version to the other. Named CPUs should set 'marchid' to a meaningful value instead, and generic CPUs can set to any valid value. For the 'veyron-v1' CPU this is the error thrown if 'marchid' is set to a different val: $ ./build/qemu-system-riscv64 -M virt -nographic -cpu veyron-v1,marchid=0x8000 qemu-system-riscv64: can't apply global veyron-v1-riscv-cpu.marchid=0x8000: Unable to change veyron-v1-riscv-cpu marchid (0x8001) And, for generics CPUs, this is the error when trying to set to an invalid val: $ ./build/qemu-system-riscv64 -M virt -nographic -cpu rv64,marchid=0x8000 qemu-system-riscv64: can't apply global rv64-riscv-cpu.marchid=0x8000: Unable to set marchid with MSB (64) bit set and the remaining bits zero Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis --- target/riscv/cpu.c | 60 -- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 7ca7b7bbfa..51f055b8be 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -39,11 +39,6 @@ #include "tcg/tcg.h" /* RISC-V CPU definitions */ - -#define RISCV_CPU_MARCHID ((QEMU_VERSION_MAJOR << 16) | \ - (QEMU_VERSION_MINOR << 8) | \ - (QEMU_VERSION_MICRO)) - static const char riscv_single_letter_exts[] = "IEMAFDQCPVH"; struct isa_ext_data { @@ -1729,8 +1724,6 @@ static void riscv_cpu_add_user_properties(Object *obj) static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), -DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID), - #ifndef CONFIG_USER_ONLY DEFINE_PROP_UINT64("resetvec", RISCVCPU, env.resetvec, DEFAULT_RSTVEC), #endif @@ -1877,6 +1870,56 @@ static void cpu_get_mimpid(Object *obj, Visitor *v, const char *name, visit_type_bool(v, name, &value, errp); } +static void cpu_set_marchid(Object *obj, Visitor *v, const char *name, +void *opaque, Error **errp) +{ +bool dynamic_cpu = riscv_cpu_is_dynamic(obj); +RISCVCPU *cpu = RISCV_CPU(obj); +uint64_t prev_val = cpu->cfg.marchid; +uint64_t value, invalid_val; +uint32_t mxlen = 0; + +if (!visit_type_uint64(v, name, &value, errp)) { +return; +} + +if (!dynamic_cpu && prev_val != value) { +error_setg(errp, "Unable to change %s marchid (0x%lx)", + object_get_typename(obj), prev_val); +return; +} + +switch (riscv_cpu_mxl(&cpu->env)) { +case MXL_RV32: +mxlen = 32; +break; +case MXL_RV64: +case MXL_RV128: +mxlen = 64; +break; +default: +g_assert_not_reached(); +} + +invalid_val = 1LL << (mxlen - 1); + +if (value == invalid_val) { +error_setg(errp, "Unable to set marchid with MSB (%u) bit set " + "and the remaining bits zero", mxlen); +return; +} + +cpu->cfg.marchid = value; +} + +static void cpu_get_marchid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ +bool value = RISCV_CPU(obj)->cfg.marchid; + +visit_type_bool(v, name, &value, errp); +} + static void riscv_cpu_class_init(ObjectClass *c, void *data) { RISCVCPUClass *mcc = RISCV_CPU_CLASS(c); @@ -1914,6 +1957,9 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) object_class_property_add(c, "mimpid", "uint64", cpu_get_mimpid, cpu_set_mimpid, NULL, NULL); +object_class_property_add(c, "marchid", "uint64", cpu_get_marchid, + cpu_set_marchid, NULL, NULL); + device_class_set_props(dc, riscv_cpu_properties); } -- 2.41.0
[PATCH v6 06/20] target/riscv: use KVM scratch CPUs to init KVM properties
Certain validations, such as the validations done for the machine IDs (mvendorid/marchid/mimpid), are done before starting the CPU. Non-dynamic (named) CPUs tries to match user input with a preset default. As it is today we can't prefetch a KVM default for these cases because we're only able to read/write KVM regs after the vcpu is spinning. Our target/arm friends use a concept called "scratch CPU", which consists of creating a vcpu for doing queries and validations and so on, which is discarded shortly after use [1]. This is a suitable solution for what we need so let's implement it in target/riscv as well. kvm_riscv_init_machine_ids() will be used to do any pre-launch setup for KVM CPUs, via riscv_cpu_add_user_properties(). The function will create a KVM scratch CPU, fetch KVM regs that work as default values for user properties, and then discard the scratch CPU afterwards. We're starting by initializing 'mvendorid'. This concept will be used to init other KVM specific properties in the next patches as well. [1] target/arm/kvm.c, kvm_arm_create_scratch_host_vcpu() Suggested-by: Andrew Jones Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis --- target/riscv/cpu.c | 6 +++ target/riscv/kvm.c | 85 target/riscv/kvm_riscv.h | 1 + 3 files changed, 92 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 51f055b8be..2485e820f8 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1710,6 +1710,12 @@ static void riscv_cpu_add_user_properties(Object *obj) Property *prop; DeviceState *dev = DEVICE(obj); +#ifndef CONFIG_USER_ONLY +if (kvm_enabled()) { +kvm_riscv_init_user_properties(obj); +} +#endif + riscv_cpu_add_misa_properties(obj); for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 0f932a5b96..37f0f70794 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -309,6 +309,91 @@ static void kvm_riscv_put_regs_timer(CPUState *cs) env->kvm_timer_dirty = false; } +typedef struct KVMScratchCPU { +int kvmfd; +int vmfd; +int cpufd; +} KVMScratchCPU; + +/* + * Heavily inspired by kvm_arm_create_scratch_host_vcpu() + * from target/arm/kvm.c. + */ +static bool kvm_riscv_create_scratch_vcpu(KVMScratchCPU *scratch) +{ +int kvmfd = -1, vmfd = -1, cpufd = -1; + +kvmfd = qemu_open_old("/dev/kvm", O_RDWR); +if (kvmfd < 0) { +goto err; +} +do { +vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); +} while (vmfd == -1 && errno == EINTR); +if (vmfd < 0) { +goto err; +} +cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); +if (cpufd < 0) { +goto err; +} + +scratch->kvmfd = kvmfd; +scratch->vmfd = vmfd; +scratch->cpufd = cpufd; + +return true; + + err: +if (cpufd >= 0) { +close(cpufd); +} +if (vmfd >= 0) { +close(vmfd); +} +if (kvmfd >= 0) { +close(kvmfd); +} + +return false; +} + +static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) +{ +close(scratch->cpufd); +close(scratch->vmfd); +close(scratch->kvmfd); +} + +static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ +CPURISCVState *env = &cpu->env; +struct kvm_one_reg reg; +int ret; + +reg.id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, + KVM_REG_RISCV_CONFIG_REG(mvendorid)); +reg.addr = (uint64_t)&cpu->cfg.mvendorid; +ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); +if (ret != 0) { +error_report("Unable to retrieve mvendorid from host, error %d", ret); +} +} + +void kvm_riscv_init_user_properties(Object *cpu_obj) +{ +RISCVCPU *cpu = RISCV_CPU(cpu_obj); +KVMScratchCPU kvmcpu; + +if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) { +return; +} + +kvm_riscv_init_machine_ids(cpu, &kvmcpu); + +kvm_riscv_destroy_scratch_vcpu(&kvmcpu); +} + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h index ed281bdce0..e3ba935808 100644 --- a/target/riscv/kvm_riscv.h +++ b/target/riscv/kvm_riscv.h @@ -19,6 +19,7 @@ #ifndef QEMU_KVM_RISCV_H #define QEMU_KVM_RISCV_H +void kvm_riscv_init_user_properties(Object *cpu_obj); void kvm_riscv_reset_vcpu(RISCVCPU *cpu); void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); -- 2.41.0
[PATCH v6 12/20] target/riscv: add KVM specific MISA properties
Using all TCG user properties in KVM is tricky. First because KVM supports only a small subset of what TCG provides, so most of the cpu->cfg flags do nothing for KVM. Second, and more important, we don't have a way of telling if any given value is an user input or not. For TCG this has a small impact since we just validating everything and error out if needed. But for KVM it would be good to know if a given value was set by the user or if it's a value already provided by KVM. Otherwise we don't know how to handle failed kvm_set_one_regs() when writing the configurations back. These characteristics make it overly complicated to use the same user facing flags for both KVM and TCG. A simpler approach is to create KVM specific properties that have specialized logic, forking KVM and TCG use cases for those cases only. Fully separating KVM/TCG properties is unneeded at this point - in fact we want the user experience to be as equal as possible, regardless of the acceleration chosen. We'll start this fork with the MISA properties, adding the MISA bits that the KVM driver currently supports. A new KVMCPUConfig type is introduced. It'll hold general information about an extension. For MISA extensions we're going to use the newly created getters of misa_ext_infos[] to populate their name and description. 'offset' holds the MISA bit (RVA, RVC, ...). We're calling it 'offset' instead of 'misa_bit' because this same KVMCPUConfig struct will be used to multi-letter extensions later on. This new type also holds a 'user_set' flag. This flag will be set when the user set an option that's different than what is already configured in the host, requiring KVM intervention to write the regs back during kvm_arch_init_vcpu(). Similar mechanics will be implemented for multi-letter extensions as well. There is no need to duplicate more code than necessary, so we're going to use the existing kvm_riscv_init_user_properties() to add the KVM specific properties. Any code that is adding a TCG user prop is then changed slightly to verify first if there's a KVM prop with the same name already added. Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 13 +--- target/riscv/kvm.c | 78 ++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 90dd2078ae..f4b1868466 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1617,14 +1617,19 @@ static void riscv_cpu_add_misa_properties(Object *cpu_obj) for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) { RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i]; +Error *local_err = NULL; misa_cfg->name = riscv_get_misa_ext_name(misa_cfg->misa_bit); misa_cfg->description = riscv_get_misa_ext_descr(misa_cfg->misa_bit); -object_property_add(cpu_obj, misa_cfg->name, "bool", -cpu_get_misa_ext_cfg, -cpu_set_misa_ext_cfg, -NULL, (void *)misa_cfg); +object_property_try_add(cpu_obj, misa_cfg->name, "bool", +cpu_get_misa_ext_cfg, cpu_set_misa_ext_cfg, +NULL, (void *)misa_cfg, &local_err); +if (local_err) { +/* Someone (KVM) already created the property */ +continue; +} + object_property_set_description(cpu_obj, misa_cfg->name, misa_cfg->description); object_property_set_bool(cpu_obj, misa_cfg->name, diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 4d0808cb9a..0fb63cced3 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -22,8 +22,10 @@ #include #include "qemu/timer.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qapi/visitor.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "sysemu/kvm_int.h" @@ -105,6 +107,81 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, } \ } while (0) +typedef struct KVMCPUConfig { +const char *name; +const char *description; +target_ulong offset; +int kvm_reg_id; +bool user_set; +} KVMCPUConfig; + +#define KVM_MISA_CFG(_bit, _reg_id) \ +{.offset = _bit, .kvm_reg_id = _reg_id} + +/* KVM ISA extensions */ +static KVMCPUConfig kvm_misa_ext_cfgs[] = { +KVM_MISA_CFG(RVA, KVM_RISCV_ISA_EXT_A), +KVM_MISA_CFG(RVC, KVM_RISCV_ISA_EXT_C), +KVM_MISA_CFG(RVD, KVM_RISCV_ISA_EXT_D), +KVM_MISA_CFG(RVF, KVM_RISCV_ISA_EXT_F), +KVM_MISA_CFG(RVH, KVM_RISCV_ISA_EXT_H), +KVM_MISA_CFG(RVI, KVM_RISCV_ISA_EXT_I), +KVM_MISA_CFG(RVM, KVM_RISCV_ISA_EXT_M), +}; + +static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ +KVMCPUConfig *misa_ext_cfg = opaque; +target_ulong mis
[PATCH v6 01/20] target/riscv: skip features setup for KVM CPUs
As it is today it's not possible to use '-cpu host' if the RISC-V host has RVH enabled. This is the resulting error: $ sudo ./qemu/build/qemu-system-riscv64 \ -machine virt,accel=kvm -m 2G -smp 1 \ -nographic -snapshot -kernel ./guest_imgs/Image \ -initrd ./guest_imgs/rootfs_kvm_riscv64.img \ -append "earlycon=sbi root=/dev/ram rw" \ -cpu host qemu-system-riscv64: H extension requires priv spec 1.12.0 This happens because we're checking for priv spec for all CPUs, and since we're not setting env->priv_ver for the 'host' CPU, it's being default to zero (i.e. PRIV_SPEC_1_10_0). In reality env->priv_ver does not make sense when running with the KVM 'host' CPU. It's used to gate certain CSRs/extensions during translation to make them unavailable if the hart declares an older spec version. It doesn't have any other use. E.g. OpenSBI version 1.2 retrieves the spec checking if the CSR_MCOUNTEREN, CSR_MCOUNTINHIBIT and CSR_MENVCFG CSRs are available [1]. 'priv_ver' is just one example. We're doing a lot of feature validation and setup during riscv_cpu_realize() that it doesn't apply to KVM CPUs. Validating the feature set for those CPUs is a KVM problem that should be handled in KVM specific code. The new riscv_cpu_realize_tcg() helper contains all validation logic that are applicable to TCG CPUs only. riscv_cpu_realize() verifies if we're running TCG and, if it's the case, proceed with the usual TCG realize() logic. [1] lib/sbi/sbi_hart.c, hart_detect_features() Signed-off-by: Daniel Henrique Barboza --- target/riscv/cpu.c | 35 +-- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index fb8458bf74..bbb201a2b3 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -34,6 +34,7 @@ #include "migration/vmstate.h" #include "fpu/softfloat-helpers.h" #include "sysemu/kvm.h" +#include "sysemu/tcg.h" #include "kvm_riscv.h" #include "tcg/tcg.h" @@ -1308,20 +1309,12 @@ static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp) } } -static void riscv_cpu_realize(DeviceState *dev, Error **errp) +static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) { -CPUState *cs = CPU(dev); RISCVCPU *cpu = RISCV_CPU(dev); CPURISCVState *env = &cpu->env; -RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev); Error *local_err = NULL; -cpu_exec_realizefn(cs, &local_err); -if (local_err != NULL) { -error_propagate(errp, local_err); -return; -} - riscv_cpu_validate_misa_mxl(cpu, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -1356,7 +1349,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) } #ifndef CONFIG_USER_ONLY -cs->tcg_cflags |= CF_PCREL; +CPU(dev)->tcg_cflags |= CF_PCREL; if (cpu->cfg.ext_sstc) { riscv_timer_init(cpu); @@ -1369,6 +1362,28 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) } } #endif +} + +static void riscv_cpu_realize(DeviceState *dev, Error **errp) +{ +CPUState *cs = CPU(dev); +RISCVCPU *cpu = RISCV_CPU(dev); +RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev); +Error *local_err = NULL; + +cpu_exec_realizefn(cs, &local_err); +if (local_err != NULL) { +error_propagate(errp, local_err); +return; +} + +if (tcg_enabled()) { +riscv_cpu_realize_tcg(dev, &local_err); +if (local_err != NULL) { +error_propagate(errp, local_err); +return; +} +} riscv_cpu_finalize_features(cpu, &local_err); if (local_err != NULL) { -- 2.41.0
Re: [PATCH v2 0/2] vfio/migration: Make VFIO migration non-experimental
On Wed, 28 Jun 2023 10:31:10 +0300 Avihai Horon wrote: > Hello, > > The major parts of VFIO migration are supported today in QEMU. This > includes basic VFIO migration, device dirty page tracking and precopy > support. Thus, at this point in time, it seems appropriate to make VFIO > migration non-experimental. > > This short series (which is based on the precopy series [1]) does that > and also adds a few improvements: > - Patch #1 resets bytes_transferred counter properly. > - Patch #2 cleans up the VFIO migration realize flow and makes VFIO > migration non-experimental. > > Note that Zhenzhong's series [2] fixes additional bugs and further > cleans the VFIO migration realize flow. > > Changes from v1 [3]: > * Dropped patch #1 as it's an optimization. (Cedric) > * Added Fixes tag to patch #2. (Cedric) > * Made VFIO device realization fail if migration is not supported and > enable_migration is ON. (Cedric) > * Kept the error message of errno == ENOTTY case as it was in > vfio_migration_query_flags(). (Cedric) > * Added a warn when enable_migration is ON and device dirty tracking is > not supported. (Alex) > * Renamed trace_vfio_migration_probe() to > * trace_vfio_migration_realize(). > > > Thanks. > > [1] > https://lore.kernel.org/qemu-devel/2023062201.29729-1-avih...@nvidia.com/ > > [2] > https://lore.kernel.org/qemu-devel/20230621080204.420723-1-zhenzhong.d...@intel.com/ > > [3] > https://lore.kernel.org/qemu-devel/20230626082353.18535-1-avih...@nvidia.com/ > > Avihai Horon (2): > vfio/migration: Reset bytes_transferred properly > vfio/migration: Make VFIO migration non-experimental > > include/hw/vfio/vfio-common.h | 7 +-- > migration/migration.h | 1 + > hw/vfio/common.c | 16 ++- > hw/vfio/migration.c | 85 --- > hw/vfio/pci.c | 4 +- > migration/migration.c | 1 + > migration/savevm.c| 1 + > migration/target.c| 17 ++- > hw/vfio/trace-events | 2 +- > 9 files changed, 97 insertions(+), 37 deletions(-) > Reviewed-by: Alex Williamson
[QEMU][PATCH v1] tests/qtest: xlnx-canfd-test: Fix code coverity issues
Following are done to fix the coverity issues: 1. Change read_data to fix the CID 1512899: Out-of-bounds access (OVERRUN) 2. Fix match_rx_tx_data to fix CID 1512900: Logically dead code (DEADCODE) 3. Replace rand() in generate_random_data() with g_rand_int() Signed-off-by: Vikram Garhwal --- tests/qtest/xlnx-canfd-test.c | 33 +++-- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tests/qtest/xlnx-canfd-test.c b/tests/qtest/xlnx-canfd-test.c index 76ee106d4f..78ec9ef2a7 100644 --- a/tests/qtest/xlnx-canfd-test.c +++ b/tests/qtest/xlnx-canfd-test.c @@ -170,23 +170,23 @@ static void generate_random_data(uint32_t *buf_tx, bool is_canfd_frame) /* Generate random TX data for CANFD frame. */ if (is_canfd_frame) { for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { -buf_tx[2 + i] = rand(); +buf_tx[2 + i] = g_random_int(); } } else { /* Generate random TX data for CAN frame. */ for (int i = 0; i < CAN_FRAME_SIZE - 2; i++) { -buf_tx[2 + i] = rand(); +buf_tx[2 + i] = g_random_int(); } } } -static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) +static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx, + uint32_t frame_size) { uint32_t int_status; uint32_t fifo_status_reg_value; /* At which RX FIFO the received data is stored. */ uint8_t store_ind = 0; -bool is_canfd_frame = false; /* Read the interrupt on CANFD rx. */ int_status = qtest_readl(qts, can_base_addr + R_ISR_OFFSET) & ISR_RXOK; @@ -207,16 +207,9 @@ static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) buf_rx[0] = qtest_readl(qts, can_base_addr + R_RX0_ID_OFFSET); buf_rx[1] = qtest_readl(qts, can_base_addr + R_RX0_DLC_OFFSET); -is_canfd_frame = (buf_rx[1] >> DLC_FD_BIT_SHIFT) & 1; - -if (is_canfd_frame) { -for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { -buf_rx[i + 2] = qtest_readl(qts, -can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); -} -} else { -buf_rx[2] = qtest_readl(qts, can_base_addr + R_RX0_DATA1_OFFSET); -buf_rx[3] = qtest_readl(qts, can_base_addr + R_RX0_DATA2_OFFSET); +for (int i = 0; i < frame_size - 2; i++) { +buf_rx[i + 2] = qtest_readl(qts, +can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); } /* Clear the RX interrupt. */ @@ -272,10 +265,6 @@ static void match_rx_tx_data(const uint32_t *buf_tx, const uint32_t *buf_rx, g_assert_cmpint((buf_rx[size] & DLC_FD_BIT_MASK), ==, (buf_tx[size] & DLC_FD_BIT_MASK)); } else { -if (!is_canfd_frame && size == 4) { -break; -} - g_assert_cmpint(buf_rx[size], ==, buf_tx[size]); } @@ -318,7 +307,7 @@ static void test_can_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, false); send_data(qts, CANFD0_BASE_ADDR); -read_data(qts, CANFD1_BASE_ADDR, buf_rx); +read_data(qts, CANFD1_BASE_ADDR, buf_rx, CAN_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, false); qtest_quit(qts); @@ -358,7 +347,7 @@ static void test_canfd_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); -read_data(qts, CANFD1_BASE_ADDR, buf_rx); +read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); @@ -397,7 +386,7 @@ static void test_can_loopback(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); -read_data(qts, CANFD0_BASE_ADDR, buf_rx); +read_data(qts, CANFD0_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); generate_random_data(buf_tx, true); @@ -405,7 +394,7 @@ static void test_can_loopback(void) write_data(qts, CANFD1_BASE_ADDR, buf_tx, true); send_data(qts, CANFD1_BASE_ADDR); -read_data(qts, CANFD1_BASE_ADDR, buf_rx); +read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); -- 2.30.2
Re: [PATCH] target/arm: gdbstub: Guard M-profile code with CONFIG_TCG
On 28/6/23 18:48, Fabiano Rosas wrote: This code is only relevant when TCG is present in the build. Building with --disable-tcg --enable-xen on an x86 host we get: $ ../configure --target-list=x86_64-softmmu,aarch64-softmmu --disable-tcg --enable-xen $ make -j$(nproc) ... libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function `m_sysreg_ptr': ../target/arm/gdbstub.c:358: undefined reference to `arm_v7m_get_sp_ptr' ../target/arm/gdbstub.c:361: undefined reference to `arm_v7m_get_sp_ptr' libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function `arm_gdb_get_m_systemreg': ../target/arm/gdbstub.c:405: undefined reference to `arm_v7m_mrs_control' I'm a bit confused, isn't this covered by the cross-arm64-xen-only job? Signed-off-by: Fabiano Rosas --- This is a respin of: https://lore.kernel.org/r/20230313151058.19645-5-faro...@suse.de --- target/arm/gdbstub.c | 4 1 file changed, 4 insertions(+)
Re: [PATCH] MAINTAINERS: Promote Cédric to VFIO co-maintainer
On 28/6/23 19:29, Alex Williamson wrote: Cédric has stepped up involvement in vfio, reviewing and managing patches, as well as pull requests. This work deserves gratitude and punishment with a promotion to co-maintainer ;) Signed-off-by: Alex Williamson --- Cédric, I'd also support if you wanted to add a tree entry here. MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Reviewed-by: Philippe Mathieu-Daudé
[PATCH v2 10/16] hw/pci-host/i440fx: Make MemoryRegion pointers accessible as properties
The goal is to eliminate i440fx_init() which is a legacy init function. This neccessitates the memory regions to be properties, like in Q35, which will be assigned in board code. Since i440fx needs different PCI devices in Xen mode, and since i440fx shall be self-contained, the PCI device will be created during realization of the host. Thus the pointers need to be moved to the host structure to be usable as properties. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé --- include/hw/pci-host/i440fx.h | 3 --- hw/pci-host/i440fx.c | 42 +--- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index bf57216c78..e3a550021e 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -25,9 +25,6 @@ struct PCII440FXState { PCIDevice parent_obj; /*< public >*/ -MemoryRegion *system_memory; -MemoryRegion *pci_address_space; -MemoryRegion *ram_memory; PAMMemoryRegion pam_regions[PAM_REGIONS_COUNT]; MemoryRegion smram_region; MemoryRegion smram, low_smram; diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 9df4688b2e..efc940ba12 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -47,6 +47,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(I440FXState, I440FX_PCI_HOST_BRIDGE) struct I440FXState { PCIHostState parent_obj; + +MemoryRegion *system_memory; +MemoryRegion *pci_address_space; +MemoryRegion *ram_memory; Range pci_hole; uint64_t pci_hole64_size; bool pci_hole64_fix; @@ -214,12 +218,25 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, static void i440fx_pcihost_initfn(Object *obj) { +I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); PCIHostState *phb = PCI_HOST_BRIDGE(obj); memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb, "pci-conf-idx", 4); memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb, "pci-conf-data", 4); + +object_property_add_link(obj, PCI_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, + (Object **) &s->ram_memory, + qdev_prop_allow_set_link_before_realize, 0); + +object_property_add_link(obj, PCI_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, + (Object **) &s->pci_address_space, + qdev_prop_allow_set_link_before_realize, 0); + +object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, + (Object **) &s->system_memory, + qdev_prop_allow_set_link_before_realize, 0); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) @@ -255,7 +272,11 @@ PCIBus *i440fx_init(const char *pci_type, PCII440FXState *f; unsigned i; -b = pci_root_bus_new(dev, NULL, pci_address_space, +s->system_memory = address_space_mem; +s->pci_address_space = pci_address_space; +s->ram_memory = ram_memory; + +b = pci_root_bus_new(dev, NULL, s->pci_address_space, address_space_io, 0, TYPE_PCI_BUS); phb->bus = b; object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev)); @@ -263,20 +284,17 @@ PCIBus *i440fx_init(const char *pci_type, d = pci_create_simple(b, 0, pci_type); f = I440FX_PCI_DEVICE(d); -f->system_memory = address_space_mem; -f->pci_address_space = pci_address_space; -f->ram_memory = ram_memory; range_set_bounds(&s->pci_hole, below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ -pc_pci_as_mapping_init(f->system_memory, f->pci_address_space); +pc_pci_as_mapping_init(s->system_memory, s->pci_address_space); /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region", - f->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); -memory_region_add_subregion_overlap(f->system_memory, SMRAM_C_BASE, + s->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); +memory_region_add_subregion_overlap(s->system_memory, SMRAM_C_BASE, &f->smram_region, 1); memory_region_set_enabled(&f->smram_region, true); @@ -284,17 +302,17 @@ PCIBus *i440fx_init(const char *pci_type, memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", - f->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); + s->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); memory_region_set_enabled(&f->low_smram, true); memory_region_add_subregion(&f->smram, SMRAM_C_BASE, &f->low_smram); object_property_add_con
[PATCH v2 05/16] hw/pci-host/q35: Initialize PCI_HOST_BYPASS_IOMMU property from board code
The Q35 PCI host already has a PCI_HOST_BYPASS_IOMMU property. However, the host initializes this property itself by accessing global machine state, thereby assuming it to be a PC machine. Avoid this by having board code set this property. Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé --- hw/i386/pc_q35.c | 2 ++ hw/pci-host/q35.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 4edc0b35f4..852250e8cb 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -230,6 +230,8 @@ static void pc_q35_init(MachineState *machine) x86ms->below_4g_mem_size, NULL); object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); +object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 0604464074..d2830cee34 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -66,8 +66,7 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.pci_address_space, s->mch.address_space_io, 0, TYPE_PCIE_BUS); -pci->bypass_iommu = -PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; + qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); } -- 2.41.0
[PATCH v2 11/16] hw/pci-host/i440fx: Add PCI_HOST_PROP_IO_MEM property
Introduce the property in anticipation of QOM'ification; Q35 has the same property. Signed-off-by: Bernhard Beschow --- hw/pci-host/i440fx.c | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index efc940ba12..a740d1762b 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -27,7 +27,6 @@ #include "qemu/range.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" -#include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" #include "hw/pci-host/i440fx.h" #include "hw/qdev-properties.h" @@ -49,6 +48,7 @@ struct I440FXState { PCIHostState parent_obj; MemoryRegion *system_memory; +MemoryRegion *io_memory; MemoryRegion *pci_address_space; MemoryRegion *ram_memory; Range pci_hole; @@ -237,17 +237,22 @@ static void i440fx_pcihost_initfn(Object *obj) object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, (Object **) &s->system_memory, qdev_prop_allow_set_link_before_realize, 0); + +object_property_add_link(obj, PCI_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, + (Object **) &s->io_memory, + qdev_prop_allow_set_link_before_realize, 0); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { +I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); -memory_region_add_subregion(phb->bus->address_space_io, 0xcf8, &phb->conf_mem); +memory_region_add_subregion(s->io_memory, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); -memory_region_add_subregion(phb->bus->address_space_io, 0xcfc, &phb->data_mem); +memory_region_add_subregion(s->io_memory, 0xcfc, &phb->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); /* register i440fx 0xcf8 port as coalesced pio */ @@ -273,11 +278,12 @@ PCIBus *i440fx_init(const char *pci_type, unsigned i; s->system_memory = address_space_mem; +s->io_memory = address_space_io; s->pci_address_space = pci_address_space; s->ram_memory = ram_memory; b = pci_root_bus_new(dev, NULL, s->pci_address_space, - address_space_io, 0, TYPE_PCI_BUS); + s->io_memory, 0, TYPE_PCI_BUS); phb->bus = b; object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev)); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); -- 2.41.0
[PATCH v2 09/16] hw/pci-host/i440fx: Move i440fx_realize() into PCII440FXState section
i440fx_realize() realizes the PCI device inside the host bridge (PCII440FXState), but is implemented between i440fx_pcihost_realize() and i440fx_init() which deal with the host bridge itself (I440FXState). Since we want to append i440fx_init() to i440fx_pcihost_realize() later let's move i440fx_realize() out of the way. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé --- hw/pci-host/i440fx.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 88beaf99c4..9df4688b2e 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -65,6 +65,15 @@ struct I440FXState { */ #define I440FX_COREBOOT_RAM_SIZE 0x57 +static void i440fx_realize(PCIDevice *dev, Error **errp) +{ +dev->config[I440FX_SMRAM] = 0x02; + +if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { +warn_report("i440fx doesn't support emulated iommu"); +} +} + static void i440fx_update_memory_mappings(PCII440FXState *d) { int i; @@ -229,15 +238,6 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) memory_region_add_coalescing(&phb->conf_mem, 0, 4); } -static void i440fx_realize(PCIDevice *dev, Error **errp) -{ -dev->config[I440FX_SMRAM] = 0x02; - -if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { -warn_report("i440fx doesn't support emulated iommu"); -} -} - PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, -- 2.41.0
[PATCH v2 15/16] hw/i386/pc_piix: Turn some local variables into initializers
Eliminates an else branch. Suggested-by: Igor Mammedov Signed-off-by: Bernhard Beschow --- hw/i386/pc_piix.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 1df309b8e2..d07218a8c9 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -122,11 +122,11 @@ static void pc_init1(MachineState *machine, BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; MemoryRegion *ram_memory; -MemoryRegion *pci_memory; -MemoryRegion *rom_memory; +MemoryRegion *pci_memory = NULL; +MemoryRegion *rom_memory = system_memory; ram_addr_t lowmem; -uint64_t hole64_size; -Object *i440fx_host; +uint64_t hole64_size = 0; +Object *i440fx_host = NULL; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -205,11 +205,6 @@ static void pc_init1(MachineState *machine, hole64_size = object_property_get_uint(i440fx_host, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); -} else { -pci_memory = NULL; -rom_memory = system_memory; -i440fx_host = NULL; -hole64_size = 0; } pc_guest_info_init(pcms); -- 2.41.0
[PATCH v2 13/16] hw/pci-host/i440fx: Add I440FX_HOST_PROP_PCI_TYPE property
I440FX needs a different PCI device model if the "igd-passthru" property is enabled. The type name is currently passed as a parameter to i440fx_init(). This parameter will be replaced by a property assignment once i440fx_init() gets resolved. Signed-off-by: Bernhard Beschow --- include/hw/pci-host/i440fx.h | 2 ++ hw/pci-host/i440fx.c | 6 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 7e38456ebb..2d7bae5a45 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -15,6 +15,8 @@ #include "hw/pci-host/pam.h" #include "qom/object.h" +#define I440FX_HOST_PROP_PCI_TYPE "pci-type" + #define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" #define TYPE_I440FX_PCI_DEVICE "i440FX" diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 4dd70e68fa..e8e66afc11 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -57,6 +57,8 @@ struct I440FXState { uint64_t pci_hole64_size; bool pci_hole64_fix; uint32_t short_root_bus; + +char *pci_type; }; #define I440FX_PAM 0x59 @@ -284,6 +286,7 @@ PCIBus *i440fx_init(const char *pci_type, s->ram_memory = ram_memory; s->below_4g_mem_size = below_4g_mem_size; s->above_4g_mem_size = above_4g_mem_size; +s->pci_type = (char *)pci_type; b = pci_root_bus_new(dev, NULL, s->pci_address_space, s->io_memory, 0, TYPE_PCI_BUS); @@ -291,7 +294,7 @@ PCIBus *i440fx_init(const char *pci_type, object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev)); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); -d = pci_create_simple(b, 0, pci_type); +d = pci_create_simple(b, 0, s->pci_type); f = I440FX_PCI_DEVICE(d); range_set_bounds(&s->pci_hole, s->below_4g_mem_size, @@ -390,6 +393,7 @@ static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, I440FXState, above_4g_mem_size, 0), DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), +DEFINE_PROP_STRING(I440FX_HOST_PROP_PCI_TYPE, I440FXState, pci_type), DEFINE_PROP_END_OF_LIST(), }; -- 2.41.0
[PATCH v2 08/16] hw/pci-host/i440fx: Have common names for some local variables
`PCIHostState` is often referred to as `phb`, own device state usually as `s`. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé --- hw/pci-host/i440fx.c | 26 -- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index daa4d11104..88beaf99c4 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -205,28 +205,28 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, static void i440fx_pcihost_initfn(Object *obj) { -PCIHostState *s = PCI_HOST_BRIDGE(obj); +PCIHostState *phb = PCI_HOST_BRIDGE(obj); -memory_region_init_io(&s->conf_mem, obj, &pci_host_conf_le_ops, s, +memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb, "pci-conf-idx", 4); -memory_region_init_io(&s->data_mem, obj, &pci_host_data_le_ops, s, +memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb, "pci-conf-data", 4); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { -PCIHostState *s = PCI_HOST_BRIDGE(dev); +PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); -memory_region_add_subregion(s->bus->address_space_io, 0xcf8, &s->conf_mem); +memory_region_add_subregion(phb->bus->address_space_io, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); -memory_region_add_subregion(s->bus->address_space_io, 0xcfc, &s->data_mem); +memory_region_add_subregion(phb->bus->address_space_io, 0xcfc, &phb->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); /* register i440fx 0xcf8 port as coalesced pio */ -memory_region_set_flush_coalesced(&s->data_mem); -memory_region_add_coalescing(&s->conf_mem, 0, 4); +memory_region_set_flush_coalesced(&phb->data_mem); +memory_region_add_coalescing(&phb->conf_mem, 0, 4); } static void i440fx_realize(PCIDevice *dev, Error **errp) @@ -248,17 +248,16 @@ PCIBus *i440fx_init(const char *pci_type, MemoryRegion *pci_address_space, MemoryRegion *ram_memory) { +I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); +PCIHostState *phb = PCI_HOST_BRIDGE(dev); PCIBus *b; PCIDevice *d; -PCIHostState *s; PCII440FXState *f; unsigned i; -I440FXState *i440fx; -s = PCI_HOST_BRIDGE(dev); b = pci_root_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); -s->bus = b; +phb->bus = b; object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev)); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -268,8 +267,7 @@ PCIBus *i440fx_init(const char *pci_type, f->pci_address_space = pci_address_space; f->ram_memory = ram_memory; -i440fx = I440FX_PCI_HOST_BRIDGE(dev); -range_set_bounds(&i440fx->pci_hole, below_4g_mem_size, +range_set_bounds(&s->pci_hole, below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ -- 2.41.0
[PATCH v2 14/16] hw/pci-host/i440fx: Resolve i440fx_init()
i440fx_init() is a legacy init function. The previous patches worked towards TYPE_I440FX_PCI_HOST_BRIDGE to be instantiated the QOM way. Do this now by transforming the parameters passed to i440fx_init() into property assignments. Signed-off-by: Bernhard Beschow --- include/hw/pci-host/i440fx.h | 10 -- hw/i386/pc_piix.c| 30 +- hw/pci-host/i440fx.c | 34 +- 3 files changed, 26 insertions(+), 48 deletions(-) diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 2d7bae5a45..c988f70890 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -34,14 +34,4 @@ struct PCII440FXState { #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" -PCIBus *i440fx_init(const char *pci_type, -DeviceState *dev, -MemoryRegion *address_space_mem, -MemoryRegion *address_space_io, -ram_addr_t below_4g_mem_size, -ram_addr_t above_4g_mem_size, -MemoryRegion *pci_memory, -MemoryRegion *ram_memory); - - #endif diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 87bee368fc..1df309b8e2 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -126,7 +126,7 @@ static void pc_init1(MachineState *machine, MemoryRegion *rom_memory; ram_addr_t lowmem; uint64_t hole64_size; -DeviceState *i440fx_host; +Object *i440fx_host; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -201,8 +201,8 @@ static void pc_init1(MachineState *machine, pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; -i440fx_host = qdev_new(host_type); -hole64_size = object_property_get_uint(OBJECT(i440fx_host), +i440fx_host = OBJECT(qdev_new(host_type)); +hole64_size = object_property_get_uint(i440fx_host, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); } else { @@ -243,12 +243,24 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; -pci_bus = i440fx_init(pci_type, - i440fx_host, - system_memory, system_io, - x86ms->below_4g_mem_size, - x86ms->above_4g_mem_size, - pci_memory, ram_memory); +object_property_add_child(OBJECT(machine), "i440fx", i440fx_host); +object_property_set_link(i440fx_host, PCI_HOST_PROP_RAM_MEM, + OBJECT(ram_memory), &error_fatal); +object_property_set_link(i440fx_host, PCI_HOST_PROP_PCI_MEM, + OBJECT(pci_memory), &error_fatal); +object_property_set_link(i440fx_host, PCI_HOST_PROP_SYSTEM_MEM, + OBJECT(system_memory), &error_fatal); +object_property_set_link(i440fx_host, PCI_HOST_PROP_IO_MEM, + OBJECT(system_io), &error_fatal); +object_property_set_uint(i440fx_host, PCI_HOST_BELOW_4G_MEM_SIZE, + x86ms->below_4g_mem_size, &error_fatal); +object_property_set_uint(i440fx_host, PCI_HOST_ABOVE_4G_MEM_SIZE, + x86ms->above_4g_mem_size, &error_fatal); +object_property_set_str(i440fx_host, I440FX_HOST_PROP_PCI_TYPE, +pci_type, &error_fatal); +sysbus_realize_and_unref(SYS_BUS_DEVICE(i440fx_host), &error_fatal); + +pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(i440fx_host), "pci.0")); pci_bus_map_irqs(pci_bus, xen_enabled() ? xen_pci_slot_get_pirq : pc_pci_slot_get_pirq); diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index e8e66afc11..62d6287681 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -249,9 +249,14 @@ static void i440fx_pcihost_initfn(Object *obj) static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { +ERRP_GUARD(); I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); +PCIBus *b; +PCIDevice *d; +PCII440FXState *f; +unsigned i; memory_region_add_subregion(s->io_memory, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); @@ -262,37 +267,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) /* register i440fx 0xcf8 port as coalesced pio */ memory_region_set_flush_coalesced(&phb->data_mem); memory_region_add_coalescing(&phb->conf_mem, 0, 4); -} - -PCIBus *i440fx_init(const char *pci_type, -
[PATCH v2 04/16] hw/pci/pci_host: Introduce PCI_HOST_BYPASS_IOMMU macro
Introduce a macro to avoid copy and pasting strings which can easily cause typos. Suggested-by: Michael S. Tsirkin Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov --- include/hw/pci/pci_host.h | 2 ++ hw/pci/pci_host.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h index c6f4eb4585..e52d8ec2cd 100644 --- a/include/hw/pci/pci_host.h +++ b/include/hw/pci/pci_host.h @@ -31,6 +31,8 @@ #include "hw/sysbus.h" #include "qom/object.h" +#define PCI_HOST_BYPASS_IOMMU "bypass-iommu" + #define TYPE_PCI_HOST_BRIDGE "pci-host-bridge" OBJECT_DECLARE_TYPE(PCIHostState, PCIHostBridgeClass, PCI_HOST_BRIDGE) diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index dfd185bbb4..7af8afdcbe 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -232,7 +232,7 @@ const VMStateDescription vmstate_pcihost = { static Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), -DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false), +DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; -- 2.41.0
[PATCH v2 07/16] hw/pci-host/i440fx: Replace magic values by existing constants
Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé --- hw/pci-host/i440fx.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 61e7b97ff4..daa4d11104 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -277,8 +277,8 @@ PCIBus *i440fx_init(const char *pci_type, /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region", - f->pci_address_space, 0xa, 0x2); -memory_region_add_subregion_overlap(f->system_memory, 0xa, + f->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); +memory_region_add_subregion_overlap(f->system_memory, SMRAM_C_BASE, &f->smram_region, 1); memory_region_set_enabled(&f->smram_region, true); @@ -286,9 +286,9 @@ PCIBus *i440fx_init(const char *pci_type, memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", - f->ram_memory, 0xa, 0x2); + f->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); memory_region_set_enabled(&f->low_smram, true); -memory_region_add_subregion(&f->smram, 0xa, &f->low_smram); +memory_region_add_subregion(&f->smram, SMRAM_C_BASE, &f->low_smram); object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&f->smram)); -- 2.41.0
[PATCH v2 02/16] hw/pci-host/q35: Fix double, contradicting .endianness assignment
Fixes the following clangd warning (-Winitializer-overrides): q35.c:297:19: Initializer overrides prior initialization of this subobject q35.c:292:19: previous initialization is here Settle on little endian which is consistent with using pci_host_conf_le_ops. Fixes: bafc90bdc594 ("q35: implement TSEG") Signed-off-by: Bernhard Beschow --- hw/pci-host/q35.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index fd18920e7f..84137b9ad9 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -285,7 +285,6 @@ static void blackhole_write(void *opaque, hwaddr addr, uint64_t val, static const MemoryRegionOps blackhole_ops = { .read = blackhole_read, .write = blackhole_write, -.endianness = DEVICE_NATIVE_ENDIAN, .valid.min_access_size = 1, .valid.max_access_size = 4, .impl.min_access_size = 4, -- 2.41.0
[PATCH v2 16/16] hw/i386/pc_piix: Move i440fx' realize near its qdev_new()
I440FX realization is currently mixed with PIIX3 creation. Furthermore, it is common practice to only set properties between a device's qdev_new() and qdev_realize(). Clean up to resolve both issues. Since I440FX spawns a PCI bus let's also move the pci_bus initialization there. Note that when running `qemu-system-x86_64 -M pc -S` before and after this patch, `info mtree` in the QEMU console doesn't show any differences except that the ordering is different. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé --- hw/i386/pc_piix.c | 57 --- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index d07218a8c9..b18443d3df 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -114,7 +114,7 @@ static void pc_init1(MachineState *machine, X86MachineState *x86ms = X86_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); -PCIBus *pci_bus; +PCIBus *pci_bus = NULL; ISABus *isa_bus; int piix3_devfn = -1; qemu_irq smi_irq; @@ -126,7 +126,6 @@ static void pc_init1(MachineState *machine, MemoryRegion *rom_memory = system_memory; ram_addr_t lowmem; uint64_t hole64_size = 0; -Object *i440fx_host = NULL; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -198,11 +197,37 @@ static void pc_init1(MachineState *machine, } if (pcmc->pci_enabled) { +Object *phb; + pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; -i440fx_host = OBJECT(qdev_new(host_type)); -hole64_size = object_property_get_uint(i440fx_host, + +phb = OBJECT(qdev_new(host_type)); +object_property_add_child(OBJECT(machine), "i440fx", phb); +object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, + OBJECT(ram_memory), &error_fatal); +object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, + OBJECT(pci_memory), &error_fatal); +object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, + OBJECT(system_memory), &error_fatal); +object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, + OBJECT(system_io), &error_fatal); +object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE, + x86ms->below_4g_mem_size, &error_fatal); +object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, + x86ms->above_4g_mem_size, &error_fatal); +object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, pci_type, +&error_fatal); +sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + +pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0")); +pci_bus_map_irqs(pci_bus, + xen_enabled() ? xen_pci_slot_get_pirq + : pc_pci_slot_get_pirq); +pcms->bus = pci_bus; + +hole64_size = object_property_get_uint(phb, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); } @@ -238,29 +263,6 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; -object_property_add_child(OBJECT(machine), "i440fx", i440fx_host); -object_property_set_link(i440fx_host, PCI_HOST_PROP_RAM_MEM, - OBJECT(ram_memory), &error_fatal); -object_property_set_link(i440fx_host, PCI_HOST_PROP_PCI_MEM, - OBJECT(pci_memory), &error_fatal); -object_property_set_link(i440fx_host, PCI_HOST_PROP_SYSTEM_MEM, - OBJECT(system_memory), &error_fatal); -object_property_set_link(i440fx_host, PCI_HOST_PROP_IO_MEM, - OBJECT(system_io), &error_fatal); -object_property_set_uint(i440fx_host, PCI_HOST_BELOW_4G_MEM_SIZE, - x86ms->below_4g_mem_size, &error_fatal); -object_property_set_uint(i440fx_host, PCI_HOST_ABOVE_4G_MEM_SIZE, - x86ms->above_4g_mem_size, &error_fatal); -object_property_set_str(i440fx_host, I440FX_HOST_PROP_PCI_TYPE, -pci_type, &error_fatal); -sysbus_realize_and_unref(SYS_BUS_DEVICE(i440fx_host), &error_fatal); - -pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(i440fx_host), "pci.0")); -pci_bus_map_irqs(pci_bus, - xen_enabled() ? xen_pci_slot_get_pirq - : pc_pci_slot_get_pirq); -pcms->bus = pci_bus; - pci_dev = pci_create_simple_multifunction(pci
[PATCH v2 00/16] Q35 and I440FX host bridge QOM cleanup
This series resolves the legacy i440fx_init() function and instantiates the I440FX host bridge the QOM way. As a preparation the Q35 host bridge receives some cleanup as well. Most of the Q35 patches have been submitted under [1] before. This series incorporates only the changes making the two device models consistent with each other. The original plan of [1] was to clean up Q35 first and then submit a separate follow-up I440FX QOM'ification series. This series takes a more direct approach by cutting down on the changes in both device models while still allowing both device models to be instantiated the same way. The remaining patches in [1] would still be doable. The series is structured as follows: The first 5 patches clean up Q35, the next patch massages Q35 to share its property names with I440FX. The rest of the series resolves i440fx_init(). Tesging done: * `make check` * `make check-avocado` * Run `xl create` under Xen with the following config: name = "Manjaro" type = 'hvm' memory = 1536 apic = 1 usb = 1 disk = [ "file:manjaro-kde-21.2.6-220416-linux515.iso,hdc:cdrom,r" ] device_model_override = "/usr/bin/qemu-system-x86_64" vga = "stdvga" sdl = 1 v2: * Rename `address_space_io` to `io_memory` (Phil) * Eliminate one else branch in pc_piix (Igor) * Make Q35's blackhole_ops DEVICE_LITTLE_ENDIAN again (Igor) * Possibly ongoing discussion regarding bringing together i440fx new and realize [1] https://patchew.org/QEMU/20230304152648.103749-1-shen...@gmail.com/ Bernhard Beschow (16): hw/i386/pc_q35: Resolve redundant q35_host variable hw/pci-host/q35: Fix double, contradicting .endianness assignment hw/pci-host/q35: Initialize PCMachineState::bus in board code hw/pci/pci_host: Introduce PCI_HOST_BYPASS_IOMMU macro hw/pci-host/q35: Initialize PCI_HOST_BYPASS_IOMMU property from board code hw/pci-host/q35: Make some property name macros reusable by i440fx hw/pci-host/i440fx: Replace magic values by existing constants hw/pci-host/i440fx: Have common names for some local variables hw/pci-host/i440fx: Move i440fx_realize() into PCII440FXState section hw/pci-host/i440fx: Make MemoryRegion pointers accessible as properties hw/pci-host/i440fx: Add PCI_HOST_PROP_IO_MEM property hw/pci-host/i440fx: Add PCI_HOST_{ABOVE, BELOW}_4G_MEM_SIZE properties hw/pci-host/i440fx: Add I440FX_HOST_PROP_PCI_TYPE property hw/pci-host/i440fx: Resolve i440fx_init() hw/i386/pc_piix: Turn some local variables into initializers hw/i386/pc_piix: Move i440fx' realize near its qdev_new() include/hw/i386/pc.h | 4 ++ include/hw/pci-host/i440fx.h | 16 + include/hw/pci-host/q35.h| 5 -- include/hw/pci/pci_host.h| 2 + hw/i386/pc_piix.c| 59 +--- hw/i386/pc_q35.c | 31 + hw/pci-host/i440fx.c | 128 +++ hw/pci-host/q35.c| 13 ++-- hw/pci/pci_host.c| 2 +- 9 files changed, 135 insertions(+), 125 deletions(-) -- 2.41.0
[PATCH v2 12/16] hw/pci-host/i440fx: Add PCI_HOST_{ABOVE, BELOW}_4G_MEM_SIZE properties
Introduce the properties in anticipation of QOM'ification; Q35 has the same properties. Note that we want to avoid a "ram size" property in the QOM interface since it seems redundant to both properties introduced in this change. Thus the removal of the ram_size parameter. We assume the invariant of both properties to sum up to "ram size" which is already asserted in pc_memory_init(). Under Xen the invariant seems to hold as well, so we now also check it there. Signed-off-by: Bernhard Beschow --- include/hw/pci-host/i440fx.h | 1 - hw/i386/pc_piix.c| 5 - hw/pci-host/i440fx.c | 12 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index e3a550021e..7e38456ebb 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -36,7 +36,6 @@ PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, -ram_addr_t ram_size, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *pci_memory, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index f9947fbc10..87bee368fc 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -227,6 +227,9 @@ static void pc_init1(MachineState *machine, if (!xen_enabled()) { pc_memory_init(pcms, system_memory, rom_memory, hole64_size); } else { +assert(machine->ram_size == x86ms->below_4g_mem_size + +x86ms->above_4g_mem_size); + pc_system_flash_cleanup_unused(pcms); if (machine->kernel_filename != NULL) { /* For xen HVM direct kernel boot, load linux here */ @@ -242,7 +245,7 @@ static void pc_init1(MachineState *machine, pci_bus = i440fx_init(pci_type, i440fx_host, - system_memory, system_io, machine->ram_size, + system_memory, system_io, x86ms->below_4g_mem_size, x86ms->above_4g_mem_size, pci_memory, ram_memory); diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index a740d1762b..4dd70e68fa 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -52,6 +52,8 @@ struct I440FXState { MemoryRegion *pci_address_space; MemoryRegion *ram_memory; Range pci_hole; +uint64_t below_4g_mem_size; +uint64_t above_4g_mem_size; uint64_t pci_hole64_size; bool pci_hole64_fix; uint32_t short_root_bus; @@ -264,7 +266,6 @@ PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, -ram_addr_t ram_size, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *pci_address_space, @@ -281,6 +282,8 @@ PCIBus *i440fx_init(const char *pci_type, s->io_memory = address_space_io; s->pci_address_space = pci_address_space; s->ram_memory = ram_memory; +s->below_4g_mem_size = below_4g_mem_size; +s->above_4g_mem_size = above_4g_mem_size; b = pci_root_bus_new(dev, NULL, s->pci_address_space, s->io_memory, 0, TYPE_PCI_BUS); @@ -291,7 +294,7 @@ PCIBus *i440fx_init(const char *pci_type, d = pci_create_simple(b, 0, pci_type); f = I440FX_PCI_DEVICE(d); -range_set_bounds(&s->pci_hole, below_4g_mem_size, +range_set_bounds(&s->pci_hole, s->below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ @@ -322,6 +325,7 @@ PCIBus *i440fx_init(const char *pci_type, PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } +ram_addr_t ram_size = s->below_4g_mem_size + s->above_4g_mem_size; ram_size = ram_size / 8 / 1024 / 1024; if (ram_size > 255) { ram_size = 255; @@ -381,6 +385,10 @@ static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, pci_hole64_size, I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT), DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), +DEFINE_PROP_SIZE(PCI_HOST_BELOW_4G_MEM_SIZE, I440FXState, + below_4g_mem_size, 0), +DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, I440FXState, + above_4g_mem_size, 0), DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; -- 2.41.0
[PATCH v2 03/16] hw/pci-host/q35: Initialize PCMachineState::bus in board code
The Q35 PCI host currently sets the PC machine's PCI bus attribute through global state, thereby assuming the machine to be a PC machine. The Q35 machine code already holds on to Q35's pci bus attribute, so can easily set its own property while preserving encapsulation. Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé --- hw/i386/pc_q35.c | 4 +++- hw/pci-host/q35.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index d9f3764184..4edc0b35f4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -230,10 +230,12 @@ static void pc_q35_init(MachineState *machine) x86ms->below_4g_mem_size, NULL); object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); +sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ -sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0")); +pcms->bus = host_bus; + /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 84137b9ad9..0604464074 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -66,7 +66,6 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.pci_address_space, s->mch.address_space_io, 0, TYPE_PCIE_BUS); -PC_MACHINE(qdev_get_machine())->bus = pci->bus; pci->bypass_iommu = PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); -- 2.41.0
[PATCH v2 01/16] hw/i386/pc_q35: Resolve redundant q35_host variable
The variable is redundant to "phb" and is never used by its real type. Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth --- hw/i386/pc_q35.c | 27 +-- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 11a7084ea1..d9f3764184 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -120,8 +120,7 @@ static void pc_q35_init(MachineState *machine) PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(machine); -Q35PCIHost *q35_host; -PCIHostState *phb; +Object *phb; PCIBus *host_bus; PCIDevice *lpc; DeviceState *lpc_dev; @@ -207,10 +206,10 @@ static void pc_q35_init(MachineState *machine) } /* create pci host bus */ -q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE)); +phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); if (pcmc->pci_enabled) { -pci_hole64_size = object_property_get_uint(OBJECT(q35_host), +pci_hole64_size = object_property_get_uint(phb, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); } @@ -218,23 +217,23 @@ static void pc_q35_init(MachineState *machine) /* allocate ram and load rom/bios */ pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); -object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host)); -object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM, +object_property_add_child(OBJECT(machine), "q35", phb); +object_property_set_link(phb, MCH_HOST_PROP_RAM_MEM, OBJECT(machine->ram), NULL); -object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_PCI_MEM, +object_property_set_link(phb, MCH_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); -object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_SYSTEM_MEM, +object_property_set_link(phb, MCH_HOST_PROP_SYSTEM_MEM, OBJECT(system_memory), NULL); -object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_IO_MEM, +object_property_set_link(phb, MCH_HOST_PROP_IO_MEM, OBJECT(system_io), NULL); -object_property_set_int(OBJECT(q35_host), PCI_HOST_BELOW_4G_MEM_SIZE, +object_property_set_int(phb, PCI_HOST_BELOW_4G_MEM_SIZE, x86ms->below_4g_mem_size, NULL); -object_property_set_int(OBJECT(q35_host), PCI_HOST_ABOVE_4G_MEM_SIZE, +object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); + /* pci */ -sysbus_realize_and_unref(SYS_BUS_DEVICE(q35_host), &error_fatal); -phb = PCI_HOST_BRIDGE(q35_host); -host_bus = phb->bus; +sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); +host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0")); /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); -- 2.41.0
[PATCH v2 06/16] hw/pci-host/q35: Make some property name macros reusable by i440fx
Signed-off-by: Bernhard Beschow --- include/hw/i386/pc.h | 4 include/hw/pci-host/q35.h | 5 - hw/i386/pc_q35.c | 8 hw/pci-host/q35.c | 8 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6eec0fc51d..c34c698cdd 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -146,6 +146,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); void pc_guest_info_init(PCMachineState *pcms); +#define PCI_HOST_PROP_RAM_MEM "ram-mem" +#define PCI_HOST_PROP_PCI_MEM "pci-mem" +#define PCI_HOST_PROP_SYSTEM_MEM "system-mem" +#define PCI_HOST_PROP_IO_MEM "io-mem" #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" #define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end" #define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start" diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index e89329c51e..1d98bbfe0d 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -74,11 +74,6 @@ struct Q35PCIHost { * gmch part */ -#define MCH_HOST_PROP_RAM_MEM "ram-mem" -#define MCH_HOST_PROP_PCI_MEM "pci-mem" -#define MCH_HOST_PROP_SYSTEM_MEM "system-mem" -#define MCH_HOST_PROP_IO_MEM "io-mem" - /* PCI configuration */ #define MCH_HOST_BRIDGE"MCH" diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 852250e8cb..02dd274276 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -218,13 +218,13 @@ static void pc_q35_init(MachineState *machine) pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", phb); -object_property_set_link(phb, MCH_HOST_PROP_RAM_MEM, +object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, OBJECT(machine->ram), NULL); -object_property_set_link(phb, MCH_HOST_PROP_PCI_MEM, +object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); -object_property_set_link(phb, MCH_HOST_PROP_SYSTEM_MEM, +object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, OBJECT(system_memory), NULL); -object_property_set_link(phb, MCH_HOST_PROP_IO_MEM, +object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, OBJECT(system_io), NULL); object_property_set_int(phb, PCI_HOST_BELOW_4G_MEM_SIZE, x86ms->below_4g_mem_size, NULL); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index d2830cee34..91c46df9ae 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -240,19 +240,19 @@ static void q35_host_initfn(Object *obj) object_property_add_uint64_ptr(obj, PCIE_HOST_MCFG_SIZE, &pehb->size, OBJ_PROP_FLAG_READ); -object_property_add_link(obj, MCH_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, +object_property_add_link(obj, PCI_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.ram_memory, qdev_prop_allow_set_link_before_realize, 0); -object_property_add_link(obj, MCH_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, +object_property_add_link(obj, PCI_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.pci_address_space, qdev_prop_allow_set_link_before_realize, 0); -object_property_add_link(obj, MCH_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, +object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.system_memory, qdev_prop_allow_set_link_before_realize, 0); -object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, +object_property_add_link(obj, PCI_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.address_space_io, qdev_prop_allow_set_link_before_realize, 0); } -- 2.41.0
[PATCH] ui/gtk: Make sure the right EGL context is currently bound
Observed a wrong context is bound when changing the scanout mode. To prevent problem, it is needed to make sure to bind the right context in gtk_egl_set_scanout_mode/gtk_gl_area_set_scanout_mode as well as unbind one in the end of gd_egl_update/gd_gl_area_update. Cc: Gerd Hoffmann Cc: Marc-André Lureau Cc: Vivek Kasireddy Signed-off-by: Dongwon Kim --- ui/gtk-egl.c | 4 ui/gtk-gl-area.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index 19130041bc..79f9f334f2 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -31,6 +31,8 @@ static void gtk_egl_set_scanout_mode(VirtualConsole *vc, bool scanout) vc->gfx.scanout_mode = scanout; if (!vc->gfx.scanout_mode) { +eglMakeCurrent(qemu_egl_display, vc->gfx.esurface, + vc->gfx.esurface, vc->gfx.ectx); egl_fb_destroy(&vc->gfx.guest_fb); if (vc->gfx.surface) { surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds); @@ -134,6 +136,8 @@ void gd_egl_update(DisplayChangeListener *dcl, vc->gfx.esurface, vc->gfx.ectx); surface_gl_update_texture(vc->gfx.gls, vc->gfx.ds, x, y, w, h); vc->gfx.glupdates++; +eglMakeCurrent(qemu_egl_display, EGL_NO_SURFACE, + EGL_NO_SURFACE, EGL_NO_CONTEXT); } void gd_egl_refresh(DisplayChangeListener *dcl) diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index c384a1516b..285e661071 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -26,6 +26,7 @@ static void gtk_gl_area_set_scanout_mode(VirtualConsole *vc, bool scanout) vc->gfx.scanout_mode = scanout; if (!vc->gfx.scanout_mode) { +gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); egl_fb_destroy(&vc->gfx.guest_fb); if (vc->gfx.surface) { surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds); @@ -115,6 +116,7 @@ void gd_gl_area_update(DisplayChangeListener *dcl, gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); surface_gl_update_texture(vc->gfx.gls, vc->gfx.ds, x, y, w, h); vc->gfx.glupdates++; +gdk_gl_context_clear_current(); } void gd_gl_area_refresh(DisplayChangeListener *dcl) -- 2.34.1
[PATCH] ui/gtk: skip refresh if new dmabuf is submitted
Skip refresh if a new dmabuf (guest scanout frame) is submitted and ready to be drawn because the scanout will be refreshed with new frame anyway. Also, setting scanout mode is better to be done right before a draw event is scheduled because the mode can be reset anytime after it is set in dpy_gl_scanout_texture by any asynchronouse dpy_refresh call, which eventually cancels the drawing of the guest scanout texture. Cc: Gerd Hoffmann Cc: Marc-André Lureau Cc: Vivek Kasireddy Signed-off-by: Dongwon Kim --- ui/gtk-egl.c | 6 +- ui/gtk-gl-area.c | 6 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index 19130041bc..62ff6812b6 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -143,6 +143,10 @@ void gd_egl_refresh(DisplayChangeListener *dcl) gd_update_monitor_refresh_rate( vc, vc->window ? vc->window : vc->gfx.drawing_area); +if (vc->gfx.guest_fb.dmabuf && vc->gfx.guest_fb.dmabuf->draw_submitted) { +return; +} + if (!vc->gfx.esurface) { gd_egl_init(vc); if (!vc->gfx.esurface) { @@ -236,7 +240,6 @@ void gd_egl_scanout_texture(DisplayChangeListener *dcl, eglMakeCurrent(qemu_egl_display, vc->gfx.esurface, vc->gfx.esurface, vc->gfx.ectx); -gtk_egl_set_scanout_mode(vc, true); egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height, backing_id, false); } @@ -344,6 +347,7 @@ void gd_egl_flush(DisplayChangeListener *dcl, if (vc->gfx.guest_fb.dmabuf && !vc->gfx.guest_fb.dmabuf->draw_submitted) { graphic_hw_gl_block(vc->gfx.dcl.con, true); vc->gfx.guest_fb.dmabuf->draw_submitted = true; +gtk_egl_set_scanout_mode(vc, true); gtk_widget_queue_draw_area(area, x, y, w, h); return; } diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index c384a1516b..68eff3435b 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -123,6 +123,10 @@ void gd_gl_area_refresh(DisplayChangeListener *dcl) gd_update_monitor_refresh_rate(vc, vc->window ? vc->window : vc->gfx.drawing_area); +if (vc->gfx.guest_fb.dmabuf && vc->gfx.guest_fb.dmabuf->draw_submitted) { +return; +} + if (!vc->gfx.gls) { if (!gtk_widget_get_realized(vc->gfx.drawing_area)) { return; @@ -261,7 +265,6 @@ void gd_gl_area_scanout_texture(DisplayChangeListener *dcl, return; } -gtk_gl_area_set_scanout_mode(vc, true); egl_fb_setup_for_tex(&vc->gfx.guest_fb, backing_width, backing_height, backing_id, false); } @@ -281,6 +284,7 @@ void gd_gl_area_scanout_flush(DisplayChangeListener *dcl, if (vc->gfx.guest_fb.dmabuf && !vc->gfx.guest_fb.dmabuf->draw_submitted) { graphic_hw_gl_block(vc->gfx.dcl.con, true); vc->gfx.guest_fb.dmabuf->draw_submitted = true; +gtk_gl_area_set_scanout_mode(vc, true); } gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area)); } -- 2.34.1
Re: [PATCH] Add support for RAPL MSRs in KVM/Qemu
Marcelo Tosatti, Jun 28, 2023 at 19:26: Hi Marcelo, > On Fri, Jun 16, 2023 at 04:08:30PM +0200, Anthony Harivel wrote: > This feature is activated with -accel kvm,rapl=true. > >> I suppose this should be a CPU flag instead? -cpu xxx,rapl=on. It's possible yes then I might need to make sure that '-accel=kvm' is selected because this rapl feature depends on the KVM msr filtering mecanism. Like '-cpu host' is checking that KVM or HVF is selected. Thanks Anthony
[PATCH] MAINTAINERS: Promote Cédric to VFIO co-maintainer
Cédric has stepped up involvement in vfio, reviewing and managing patches, as well as pull requests. This work deserves gratitude and punishment with a promotion to co-maintainer ;) Signed-off-by: Alex Williamson --- Cédric, I'd also support if you wanted to add a tree entry here. MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e07746ac7d45..37e48c72b16a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2051,7 +2051,7 @@ F: hw/usb/dev-serial.c VFIO M: Alex Williamson -R: Cédric Le Goater +M: Cédric Le Goater S: Supported F: hw/vfio/* F: include/hw/vfio/ -- 2.40.1
Re: [PATCH] Add support for RAPL MSRs in KVM/Qemu
On Fri, Jun 16, 2023 at 04:08:30PM +0200, Anthony Harivel wrote: > Starting with the "Sandy Bridge" generation, Intel CPUs provide a RAPL > interface (Running Average Power Limit) for advertising the accumulated > energy consumption of various power domains (e.g. CPU packages, DRAM, > etc.). > > The consumption is reported via MSRs (model specific registers) like > MSR_PKG_ENERGY_STATUS for the CPU package power domain. These MSRs are > 64 bits registers that represent the accumulated energy consumption in > micro Joules. They are updated by microcode every ~1ms. > > For now, KVM always returns 0 when the guest requests the value of > these MSRs. Use the KVM MSR filtering mechanism to allow QEMU handle > these MSRs dynamically in userspace. > > To limit the amount of system calls for every MSR call, create a new > thread in QEMU that updates the "virtual" MSR values asynchronously. > > Each vCPU has its own vMSR to reflect the independence of vCPUs. The > thread updates the vMSR values with the ratio of energy consumed of > the whole physical CPU package the vCPU thread runs on and the > thread's utime and stime values. > > All other non-vCPU threads are also taken into account. Their energy > consumption is evenly distributed among all vCPUs threads running on > the same physical CPU package. > > This feature is activated with -accel kvm,rapl=true. I suppose this should be a CPU flag instead? -cpu xxx,rapl=on.
Re: [PATCH V3 2/2] migration: file URI offset
On Thu, Jun 22, 2023 at 01:37:31PM -0700, Steve Sistare wrote: > Allow an offset option to be specified as part of the file URI, in > the form "file:filename,offset=offset", where offset accepts the common > size suffixes, or the 0x prefix, but not both. Migration data is written > to and read from the file starting at offset. If unspecified, it defaults > to 0. > > This is needed by libvirt to store its own data at the head of the file. > > Suggested-by: Daniel P. Berrange > Signed-off-by: Steve Sistare Ideally I think we should make "," the separator from the start, but I suppose no one will try to apply previous patch only, and then start using a file name contains ",offset=".. so maybe not that a huge deal. Reviewed-by: Peter Xu -- Peter Xu
Re: [PATCH V3 1/2] migration: file URI
On Wed, Jun 28, 2023 at 01:07:24PM -0400, Peter Xu wrote: > On Thu, Jun 22, 2023 at 01:37:30PM -0700, Steve Sistare wrote: > > Extend the migration URI to support file:. This can be used for > > any migration scenario that does not require a reverse path. It can be > > used as an alternative to 'exec:cat > file' in minimized containers that > > do not contain /bin/sh, and it is easier to use than the fd: URI. > > It can be used in HMP commands, and as a qemu command-line parameter. > > > > For best performance, guest ram should be shared and x-ignore-shared > > should be true, so guest pages are not written to the file, in which case > > the guest may remain running. If ram is not so configured, then the user > > is advised to stop the guest first. Otherwise, a busy guest may re-dirty > > the same page, causing it to be appended to the file multiple times, > > and the file may grow unboundedly. That issue is being addressed in the > > "fixed-ram" patch series. > > > > Signed-off-by: Steve Sistare > > Reviewed-by: Fabiano Rosas > > --- > > migration/file.c | 62 > > ++ > > migration/file.h | 14 > > migration/meson.build | 1 + > > migration/migration.c | 5 > > migration/trace-events | 4 > > qemu-options.hx| 6 - > > 6 files changed, 91 insertions(+), 1 deletion(-) > > create mode 100644 migration/file.c > > create mode 100644 migration/file.h > > > > diff --git a/migration/file.c b/migration/file.c > > new file mode 100644 > > index 000..8e35827 > > --- /dev/null > > +++ b/migration/file.c > > @@ -0,0 +1,62 @@ > > +/* > > + * Copyright (c) 2021-2023 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2. > > + * See the COPYING file in the top-level directory. > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "channel.h" > > +#include "file.h" > > +#include "migration.h" > > +#include "io/channel-file.h" > > +#include "io/channel-util.h" > > +#include "trace.h" > > + > > +void file_start_outgoing_migration(MigrationState *s, const char *filename, > > + Error **errp) > > +{ > > +g_autoptr(QIOChannelFile) fioc = NULL; > > +QIOChannel *ioc; > > + > > +trace_migration_file_outgoing(filename); > > + > > +fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | > > O_TRUNC, > > + 0600, errp); > > +if (!fioc) { > > +return; > > +} > > + > > +ioc = QIO_CHANNEL(fioc); > > +qio_channel_set_name(ioc, "migration-file-outgoing"); > > +migration_channel_connect(s, ioc, NULL, NULL); > > Miss one object_unref(ioc)? Never mind, I overlooked the g_autoptr.. all fine: Reviewed-by: Peter Xu -- Peter Xu
Re: [PATCH v3 0/6] net/tap: Fix QEMU frozen issue when the maximum number of file descriptors is very large
17.06.2023 08:36, Bin Meng wrote: Current codes using a brute-force traversal of all file descriptors do not scale on a system where the maximum number of file descriptors is set to a very large value (e.g.: in a Docker container of Manjaro distribution it is set to 1073741816). QEMU just looks frozen during start-up. What's the reason to close all these file descriptors in the first place? No other software I know does this. For some situations, such closing is actually bad, -- think, eg, flock lockfile qemu-system-foo ... -- this one opens a file, locks it using fcntl/flock, and executes the command, keeping the file descriptor open across exec, so the file stays locked until the process terminates. This works and works well. Qemu with its let's-close-everything approach breaks this. Why? :) /mjt
Re: [RFC 7/7] hw/mem/cxl_type3: add read/write support to dynamic capacity
The 05/15/2023 16:22, Jonathan Cameron wrote: > On Thu, 11 May 2023 17:56:40 + > Fan Ni wrote: > > > From: Fan Ni > > > > Before the change, read from or write to dynamic capacity of the memory > > device is not support as 1) no host backed file/memory is provided for > > it; 2) no address space is created for the dynamic capacity. > > Ah nice. I should have read ahead. Probably makes sense to reorder things > so that when we present DCD region it will work. We can back dynamic capacity with host memory/file and create address space for dc regions, but until extents can be added we should not expect any read/write can happen to the dynamic capacity, right? Fan > > > > > With the change, add code to support following: > > 1. add a new property to type3 device "dc-memdev" to point to host > >memory backend for dynamic capacity; > > 2. add a bitmap for each region to track whether a block is host backed, > > which will be used for address check when read/write dynamic capacity; > > 3. add namespace for dynamic capacity for read/write support; > > 4. create cdat entries for each dynamic capacity region; > > > > Signed-off-by: Fan Ni > > --- > > hw/cxl/cxl-mailbox-utils.c | 21 ++- > > hw/mem/cxl_type3.c | 336 +--- > > include/hw/cxl/cxl_device.h | 8 +- > > 3 files changed, 298 insertions(+), 67 deletions(-) > > > > diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c > > index 7212934627..efe61e67fb 100644 > > --- a/hw/cxl/cxl-mailbox-utils.c > > +++ b/hw/cxl/cxl-mailbox-utils.c > > @@ -391,9 +391,11 @@ static CXLRetCode cmd_firmware_update_get_info(struct > > cxl_cmd *cmd, > > char fw_rev4[0x10]; > > } QEMU_PACKED *fw_info; > > QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); > > + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); > > > > if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) || > > -(cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) { > > + (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) || > > Keep old alignment > > > + (ct3d->dc.total_dynamic_capicity < CXL_CAPACITY_MULTIPLIER)) { > > We should think about the separation between what goes in cxl_dstate and > directly > in ct3d. That boundary has been blurring for a while and getting some review > comments. > > > return CXL_MBOX_INTERNAL_ERROR; > > } > > > > @@ -534,7 +536,9 @@ static CXLRetCode cmd_identify_memory_device(struct > > cxl_cmd *cmd, > > CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); > > > > if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) > > || > > -(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, > > CXL_CAPACITY_MULTIPLIER))) { > > + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, > > CXL_CAPACITY_MULTIPLIER)) || > > + (!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity, > > + CXL_CAPACITY_MULTIPLIER))) { > > return CXL_MBOX_INTERNAL_ERROR; > > } > > > > @@ -543,7 +547,8 @@ static CXLRetCode cmd_identify_memory_device(struct > > cxl_cmd *cmd, > > > > snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); > > > > -stq_le_p(&id->total_capacity, cxl_dstate->mem_size / > > CXL_CAPACITY_MULTIPLIER); > > + stq_le_p(&id->total_capacity, > > + cxl_dstate->static_mem_size / CXL_CAPACITY_MULTIPLIER); > > Pull the rename out as a precursor patch. > > > stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / > > CXL_CAPACITY_MULTIPLIER); > > stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / > > CXL_CAPACITY_MULTIPLIER); > > stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d)); > > @@ -568,9 +573,12 @@ static CXLRetCode cmd_ccls_get_partition_info(struct > > cxl_cmd *cmd, > > uint64_t next_pmem; > > } QEMU_PACKED *part_info = (void *)cmd->payload; > > QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); > > + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); > > > > if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) > > || > > -(!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, > > CXL_CAPACITY_MULTIPLIER))) { > > + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, > > CXL_CAPACITY_MULTIPLIER)) || > > + (!QEMU_IS_ALIGNED(ct3d->dc.total_dynamic_capicity, > > + CXL_CAPACITY_MULTIPLIER))) { > > return CXL_MBOX_INTERNAL_ERROR; > > } > > > > @@ -881,9 +889,8 @@ static CXLRetCode cmd_media_clear_poison(struct cxl_cmd > > *cmd, > > struct clear_poison_pl *in = (void *)cmd->payload; > > > > dpa = ldq_le_p(&in->dpa); > > -if (dpa + 64 > cxl_dstate->mem_size) { > > -return CXL_MBOX_INVALID_PA; > > -} > > + if (dpa + 64 > cxl_dstate->static_mem_size && ct3d->dc.num_regions == 0) > > This test will need expanding to include DPAs in D
Re: [PATCH V3 1/2] migration: file URI
On Thu, Jun 22, 2023 at 01:37:30PM -0700, Steve Sistare wrote: > Extend the migration URI to support file:. This can be used for > any migration scenario that does not require a reverse path. It can be > used as an alternative to 'exec:cat > file' in minimized containers that > do not contain /bin/sh, and it is easier to use than the fd: URI. > It can be used in HMP commands, and as a qemu command-line parameter. > > For best performance, guest ram should be shared and x-ignore-shared > should be true, so guest pages are not written to the file, in which case > the guest may remain running. If ram is not so configured, then the user > is advised to stop the guest first. Otherwise, a busy guest may re-dirty > the same page, causing it to be appended to the file multiple times, > and the file may grow unboundedly. That issue is being addressed in the > "fixed-ram" patch series. > > Signed-off-by: Steve Sistare > Reviewed-by: Fabiano Rosas > --- > migration/file.c | 62 > ++ > migration/file.h | 14 > migration/meson.build | 1 + > migration/migration.c | 5 > migration/trace-events | 4 > qemu-options.hx| 6 - > 6 files changed, 91 insertions(+), 1 deletion(-) > create mode 100644 migration/file.c > create mode 100644 migration/file.h > > diff --git a/migration/file.c b/migration/file.c > new file mode 100644 > index 000..8e35827 > --- /dev/null > +++ b/migration/file.c > @@ -0,0 +1,62 @@ > +/* > + * Copyright (c) 2021-2023 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2. > + * See the COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "channel.h" > +#include "file.h" > +#include "migration.h" > +#include "io/channel-file.h" > +#include "io/channel-util.h" > +#include "trace.h" > + > +void file_start_outgoing_migration(MigrationState *s, const char *filename, > + Error **errp) > +{ > +g_autoptr(QIOChannelFile) fioc = NULL; > +QIOChannel *ioc; > + > +trace_migration_file_outgoing(filename); > + > +fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | O_TRUNC, > + 0600, errp); > +if (!fioc) { > +return; > +} > + > +ioc = QIO_CHANNEL(fioc); > +qio_channel_set_name(ioc, "migration-file-outgoing"); > +migration_channel_connect(s, ioc, NULL, NULL); Miss one object_unref(ioc)? > +} > + > +static gboolean file_accept_incoming_migration(QIOChannel *ioc, > + GIOCondition condition, > + gpointer opaque) > +{ > +migration_channel_process_incoming(ioc); > +object_unref(OBJECT(ioc)); > +return G_SOURCE_REMOVE; > +} > + > +void file_start_incoming_migration(const char *filename, Error **errp) > +{ > +QIOChannelFile *fioc = NULL; > +QIOChannel *ioc; > + > +trace_migration_file_incoming(filename); > + > +fioc = qio_channel_file_new_path(filename, O_RDONLY, 0, errp); > +if (!fioc) { > +return; > +} > + > +ioc = QIO_CHANNEL(fioc); > +qio_channel_set_name(QIO_CHANNEL(ioc), "migration-file-incoming"); > +qio_channel_add_watch_full(ioc, G_IO_IN, > + file_accept_incoming_migration, > + NULL, NULL, > + g_main_context_get_thread_default()); > +} > diff --git a/migration/file.h b/migration/file.h > new file mode 100644 > index 000..841b94a > --- /dev/null > +++ b/migration/file.h > @@ -0,0 +1,14 @@ > +/* > + * Copyright (c) 2021-2023 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2. > + * See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_MIGRATION_FILE_H > +#define QEMU_MIGRATION_FILE_H > +void file_start_incoming_migration(const char *filename, Error **errp); > + > +void file_start_outgoing_migration(MigrationState *s, const char *filename, > + Error **errp); > +#endif > diff --git a/migration/meson.build b/migration/meson.build > index 8ba6e42..3af817e 100644 > --- a/migration/meson.build > +++ b/migration/meson.build > @@ -16,6 +16,7 @@ softmmu_ss.add(files( >'dirtyrate.c', >'exec.c', >'fd.c', > + 'file.c', >'global_state.c', >'migration-hmp-cmds.c', >'migration.c', > diff --git a/migration/migration.c b/migration/migration.c > index dc05c6f..cfbde86 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -20,6 +20,7 @@ > #include "migration/blocker.h" > #include "exec.h" > #include "fd.h" > +#include "file.h" > #include "socket.h" > #include "sysemu/runstate.h" > #include "sysemu/sysemu.h" > @@ -442,6 +443,8 @@ static void qemu_start_incoming_migration(const char > *uri, Error **errp) >