[PATCH 07/10] exec: Move all RAMBlock functions to 'exec/ramblock.h'
The RAMBlock API was dispersed in 3 different headers. One of these headers, "exec/ram_addr.h", is restricted to target dependent code. However these functions are not target specific. Move all functions into a single place. Now all these functions can be accessed by target-agnostic code. Signed-off-by: Philippe Mathieu-Daudé --- include/exec/cpu-common.h| 24 --- include/exec/ram_addr.h | 105 --- include/exec/ramblock.h | 134 +++ migration/migration.h| 1 + accel/tcg/translate-all.c| 2 - hw/block/nvme.c | 2 +- hw/s390x/s390-stattrib-kvm.c | 1 - hw/s390x/s390-stattrib.c | 1 - hw/s390x/s390-virtio-ccw.c | 1 - hw/virtio/vhost-user.c | 1 + hw/virtio/vhost.c| 1 + hw/virtio/virtio-balloon.c | 1 + memory.c | 1 + migration/migration.c| 1 + migration/postcopy-ram.c | 1 + migration/savevm.c | 1 + stubs/ram-block.c| 2 +- target/ppc/kvm.c | 1 - target/s390x/kvm.c | 1 - util/vfio-helpers.c | 2 +- 20 files changed, 145 insertions(+), 139 deletions(-) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index b47e5630e7..347ceb603b 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -49,25 +49,6 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr); void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); /* This should not be used by devices. */ ram_addr_t qemu_ram_addr_from_host(void *ptr); -RAMBlock *qemu_ram_block_by_name(const char *name); -RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, - ram_addr_t *offset); -ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); -void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); -void qemu_ram_unset_idstr(RAMBlock *block); -const char *qemu_ram_get_idstr(RAMBlock *rb); -void *qemu_ram_get_host_addr(RAMBlock *rb); -ram_addr_t qemu_ram_get_offset(RAMBlock *rb); -ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); -bool qemu_ram_is_shared(RAMBlock *rb); -bool qemu_ram_is_uf_zeroable(RAMBlock *rb); -void qemu_ram_set_uf_zeroable(RAMBlock *rb); -bool qemu_ram_is_migratable(RAMBlock *rb); -void qemu_ram_set_migratable(RAMBlock *rb); -void qemu_ram_unset_migratable(RAMBlock *rb); - -size_t qemu_ram_pagesize(RAMBlock *block); -size_t qemu_ram_pagesize_largest(void); void cpu_physical_memory_rw(hwaddr addr, void *buf, hwaddr len, bool is_write); @@ -100,11 +81,6 @@ void qemu_flush_coalesced_mmio_buffer(void); void cpu_flush_icache_range(hwaddr start, hwaddr len); -typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); - -int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); -int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); - #endif #endif /* CPU_COMMON_H */ diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index c61d5188f7..0deffad66f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -26,112 +26,7 @@ #include "exec/ramlist.h" #include "exec/ramblock.h" -/** - * clear_bmap_size: calculate clear bitmap size - * - * @pages: number of guest pages - * @shift: guest page number shift - * - * Returns: number of bits for the clear bitmap - */ -static inline long clear_bmap_size(uint64_t pages, uint8_t shift) -{ -return DIV_ROUND_UP(pages, 1UL << shift); -} -/** - * clear_bmap_set: set clear bitmap for the page range - * - * @rb: the ramblock to operate on - * @start: the start page number - * @size: number of pages to set in the bitmap - * - * Returns: None - */ -static inline void clear_bmap_set(RAMBlock *rb, uint64_t start, - uint64_t npages) -{ -uint8_t shift = rb->clear_bmap_shift; - -bitmap_set_atomic(rb->clear_bmap, start >> shift, - clear_bmap_size(npages, shift)); -} - -/** - * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set - * - * @rb: the ramblock to operate on - * @page: the page number to check - * - * Returns: true if the bit was set, false otherwise - */ -static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page) -{ -uint8_t shift = rb->clear_bmap_shift; - -return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1); -} - -static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) -{ -return (b && b->host && offset < b->used_length) ? true : false; -} - -static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) -{ -assert(offset_in_ramblock(block, offset)); -return (char *)block->host + offset; -} - -bool ramblock_is_pmem(RAMBlock *rb); - -/** - * qemu_ram_alloc_from_file, - * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing - * file or device - * - *
[PATCH 05/10] exec: Move qemu_minrampagesize/qemu_maxrampagesize to 'qemu-common.h'
Move these generic functions to a more common place, with other functions related to host page size. Document a little. Cc: Alexey Kardashevskiy Signed-off-by: Philippe Mathieu-Daudé --- include/exec/ram_addr.h| 3 --- include/qemu-common.h | 10 ++ hw/ppc/spapr_caps.c| 2 +- hw/s390x/s390-virtio-ccw.c | 1 + hw/vfio/spapr.c| 2 +- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 06096e8c6a..195b67d3c8 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -93,9 +93,6 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, bool ramblock_is_pmem(RAMBlock *rb); -long qemu_minrampagesize(void); -long qemu_maxrampagesize(void); - /** * qemu_ram_alloc_from_file, * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing diff --git a/include/qemu-common.h b/include/qemu-common.h index d0142f29ac..2821a6ef76 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -80,6 +80,16 @@ bool set_preferred_target_page_bits(int bits); */ void finalize_target_page_bits(void); +/** + * qemu_minrampagesize: + * qemu_maxrampagesize: + * + * If backed via -memdev, return the device page size, + * else return the host kernel page size. + */ +long qemu_minrampagesize(void); +long qemu_maxrampagesize(void); + /** * Sends a (part of) iovec down a socket, yielding when the socket is full, or * Receives data into a (part of) iovec from a socket, diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index eb54f94227..33a802a103 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -23,11 +23,11 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "sysemu/hw_accel.h" -#include "exec/ram_addr.h" #include "target/ppc/cpu.h" #include "target/ppc/mmu-hash64.h" #include "cpu-models.h" diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index f660070d22..c009384505 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" #include "qapi/error.h" #include "cpu.h" #include "hw/boards.h" diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 2900bd1941..c64db940a7 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -9,13 +9,13 @@ */ #include "qemu/osdep.h" +#include "qemu-common.h" #include "cpu.h" #include #include #include "hw/vfio/vfio-common.h" #include "hw/hw.h" -#include "exec/ram_addr.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "trace.h" -- 2.21.3
[PATCH 01/10] exec: Rename qemu_ram_writeback() as qemu_ram_msync()
Rename qemu_ram_writeback() as qemu_ram_msync() to better match what it does. Suggested-by: Stefan Hajnoczi Signed-off-by: Philippe Mathieu-Daudé --- include/exec/ram_addr.h | 4 ++-- exec.c | 2 +- hw/block/nvme.c | 3 +-- memory.c| 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 4e05292f91..7b5c24e928 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -136,12 +136,12 @@ void qemu_ram_free(RAMBlock *block); int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp); -void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length); +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length); /* Clear whole block of mem */ static inline void qemu_ram_block_writeback(RAMBlock *block) { -qemu_ram_writeback(block, 0, block->used_length); +qemu_ram_msync(block, 0, block->used_length); } #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) diff --git a/exec.c b/exec.c index 2874bb5088..f5a8cdf370 100644 --- a/exec.c +++ b/exec.c @@ -2127,7 +2127,7 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) * Otherwise no-op. * @Note: this is supposed to be a synchronous op. */ -void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length) +void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) { /* The requested range should fit in within the block range */ g_assert((start + length) <= block->used_length); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 9b453423cf..21a199e53b 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1207,8 +1207,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) */ if (addr == 0xE08 && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { -qemu_ram_writeback(n->pmrdev->mr.ram_block, - 0, n->pmrdev->size); +qemu_ram_msync(n->pmrdev->mr.ram_block, 0, n->pmrdev->size); } memcpy(, ptr + addr, size); } else { diff --git a/memory.c b/memory.c index 601b749906..3e65e33ac4 100644 --- a/memory.c +++ b/memory.c @@ -2205,7 +2205,7 @@ void memory_region_do_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size) * different types of memory regions */ if (mr->ram_block && mr->dirty_log_mask) { -qemu_ram_writeback(mr->ram_block, addr, size); +qemu_ram_msync(mr->ram_block, addr, size); } } -- 2.21.3
[PATCH 00/10] exec: Shear 'exec/ram_addr.h' and make NVMe device target-agnostic
Stefan suggested to make qemu_ram_writeback() target agnostic, Paolo to add memory_region_msync(), and Peter to remove "exec/ram_addr.h" [*]. I let a single function in this file, cpu_physical_memory_sync_dirty_bitmap(), to let the maintainer have the pleasure to remove this header definitively himself :) Based-on: <20200507155813.16169-1-phi...@redhat.com> "Move Xen accelerator code under accel/xen/" https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg01722.html [*] https://www.mail-archive.com/qemu-block@nongnu.org/msg66242.html Philippe Mathieu-Daudé (10): exec: Rename qemu_ram_writeback() as qemu_ram_msync() exec/ramblock: Add missing 'qemu/rcu.h' include exec: Move tb_invalidate_phys_range() to 'exec/exec-all.h' exec/memory-internal: Check CONFIG_SOFTMMU instead of CONFIG_USER_ONLY exec: Move qemu_minrampagesize/qemu_maxrampagesize to 'qemu-common.h' exec: Move ramblock_recv_bitmap_offset() to migration/ram.c exec: Move all RAMBlock functions to 'exec/ramblock.h' hw/block: Let the NVMe emulated device be target-agnostic exec: Update coding style to make checkpatch.pl happy exec: Move cpu_physical_memory_* functions to 'exec/memory-internal.h' include/exec/cpu-common.h | 24 -- include/exec/exec-all.h| 14 +- include/exec/memory-internal.h | 307 +++- include/exec/ram_addr.h| 415 + include/exec/ramblock.h| 135 +++ include/qemu-common.h | 10 + migration/migration.h | 1 + accel/tcg/cputlb.c | 1 - accel/tcg/translate-all.c | 2 - exec.c | 2 +- hw/block/nvme.c| 5 +- hw/ppc/spapr.c | 1 - hw/ppc/spapr_caps.c| 2 +- hw/ppc/spapr_pci.c | 1 - hw/s390x/s390-stattrib-kvm.c | 1 - hw/s390x/s390-stattrib.c | 1 - hw/s390x/s390-virtio-ccw.c | 2 +- hw/vfio/spapr.c| 2 +- hw/virtio/vhost-user.c | 1 + hw/virtio/vhost.c | 1 + hw/virtio/virtio-balloon.c | 1 + memory.c | 4 +- migration/migration.c | 1 + migration/postcopy-ram.c | 1 + migration/ram.c| 8 + migration/savevm.c | 1 + stubs/ram-block.c | 2 +- target/ppc/kvm.c | 1 - target/s390x/kvm.c | 1 - util/vfio-helpers.c| 2 +- hw/block/Makefile.objs | 2 +- 31 files changed, 485 insertions(+), 467 deletions(-) -- 2.21.3
[Bug 1877418] Re: qemu-nbd freezes access to VDI file
** Attachment added: "shell-commands.txt" https://bugs.launchpad.net/qemu/+bug/1877418/+attachment/5367894/+files/shell-commands.txt ** Description changed: Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS: + sudo modprobe nbd max_part=16 sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi Then I am operating on the btrfs filesystem and suddenly it freezes. ** Description changed: - Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS: + Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has GTP+BTRFS: sudo modprobe nbd max_part=16 sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi Then I am operating on the btrfs filesystem and suddenly it freezes. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1877418 Title: qemu-nbd freezes access to VDI file Status in QEMU: New Status in btrfs-progs package in Ubuntu: New Bug description: Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has GTP+BTRFS: sudo modprobe nbd max_part=16 sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi mount /dev/nbd0p1 /mydata/ Then I am operating on the btrfs filesystem and suddenly it freezes. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions
[PATCH 4/6] target/ppc: Add missing braces in ppc_radix64_partition_scoped_xlate()
As per CODING_STYLE. Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped translation" Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 5e2d912ee346..956519b3ad20 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -282,8 +282,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx, pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, , _cause, _addr) || ppc_radix64_check_prot(cpu, rwx, pte, _cause, h_prot, true)) { -if (pde_addr) /* address being translated was that of a guest pde */ +if (pde_addr) { /* address being translated was that of a guest pde */ fault_cause |= DSISR_PRTABLE_FAULT; +} if (cause_excp) { ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause); }
[Bug 1877418] Re: qemu-nbd freezes access to VDI file
** Attachment added: "dmesg-bug-qemu-nbd-btrfs-vdi.txt" https://bugs.launchpad.net/qemu/+bug/1877418/+attachment/5367895/+files/dmesg-bug-qemu-nbd-btrfs-vdi.txt ** Also affects: btrfs-progs (Ubuntu) Importance: Undecided Status: New ** Description changed: Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has GTP+BTRFS: sudo modprobe nbd max_part=16 sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi + mount /dev/nbd0p1 /mydata/ Then I am operating on the btrfs filesystem and suddenly it freezes. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1877418 Title: qemu-nbd freezes access to VDI file Status in QEMU: New Status in btrfs-progs package in Ubuntu: New Bug description: Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has GTP+BTRFS: sudo modprobe nbd max_part=16 sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi mount /dev/nbd0p1 /mydata/ Then I am operating on the btrfs filesystem and suddenly it freezes. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions
[PATCH 3/6] target/ppc: Don't initialize some local variables in ppc_radix64_xlate()
It is the job of the ppc_radix64_get_fully_qualified_addr() function which is called at the beginning of ppc_radix64_xlate() to set both lpid *and* pid. It doesn't buy us anything to initialize them first. Worse, a bug in ppc_radix64_get_fully_qualified_addr(), eg. failing to set either lpid or pid, would be undetectable by static analysis tools like coverity. Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index c76879f65b78..5e2d912ee346 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -433,7 +433,7 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx, bool cause_excp) { CPUPPCState *env = >env; -uint64_t lpid = 0, pid = 0; +uint64_t lpid, pid; ppc_v3_pate_t pate; int psize, prot; hwaddr g_raddr;
[PATCH 5/6] target/ppc: Fix arguments to ppc_radix64_partition_scoped_xlate()
The last two arguments have the bool type. Also, we shouldn't raise an exception when using gdbstub. This was found while reading the code. Since it only affects the powernv machine, I didn't dig further to find an actual bug. Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped translation" Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 956519b3ad20..ceeb3dfe2d49 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -335,7 +335,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, */ ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, pate, _raddr, _prot, - _page_size, 1, 1); + _page_size, true, + cause_excp); if (ret) { return ret; } @@ -374,7 +375,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, do { ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, pate, _raddr, _prot, - _page_size, 1, 1); + _page_size, true, + cause_excp); if (ret) { return ret; }
[PATCH 6/6] target/ppc: Don't update radix PTE R/C bits with gdbstub
gdbstub shouldn't silently change guest visible state when doing address translation. While here drop a not very useful comment. This was found while reading the code. I could verify that this affects both powernv and pseries, but I failed to observe any actual bug. Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped translation" Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.c | 36 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index ceeb3dfe2d49..bc51cd89a079 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -270,7 +270,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx, ppc_v3_pate_t pate, hwaddr *h_raddr, int *h_prot, int *h_page_size, bool pde_addr, - bool cause_excp) + bool cause_excp, + bool cause_rc_update) { int fault_cause = 0; hwaddr pte_addr; @@ -291,8 +292,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx, return 1; } -/* Update Reference and Change Bits */ -ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot); +if (cause_rc_update) { +ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot); +} return 0; } @@ -301,7 +303,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, vaddr eaddr, uint64_t pid, ppc_v3_pate_t pate, hwaddr *g_raddr, int *g_prot, int *g_page_size, -bool cause_excp) +bool cause_excp, +bool cause_rc_update) { CPUState *cs = CPU(cpu); CPUPPCState *env = >env; @@ -336,7 +339,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, pate, _raddr, _prot, _page_size, true, - cause_excp); + cause_excp, + cause_rc_update); if (ret) { return ret; } @@ -376,7 +380,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, pate, _raddr, _prot, _page_size, true, - cause_excp); + cause_excp, + cause_rc_update); if (ret) { return ret; } @@ -408,7 +413,9 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, return 1; } -ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot); +if (cause_rc_update) { +ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot); +} return 0; } @@ -433,7 +440,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx, static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx, bool relocation, hwaddr *raddr, int *psizep, int *protp, - bool cause_excp) + bool cause_excp, + bool cause_rc_update) { CPUPPCState *env = >env; uint64_t lpid, pid; @@ -483,7 +491,9 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx, if (relocation) { int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid, pate, _raddr, , - , cause_excp); + , + cause_excp, + cause_rc_update); if (ret) { return ret; } @@ -506,7 +516,9 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx, ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr, pate, raddr, , , - 0, cause_excp); +
[PATCH 2/6] target/ppc: Pass const pointer to ppc_radix64_get_fully_qualified_addr()
This doesn't require write access to the CPU registers. Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 1404e53deca8..c76879f65b78 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -28,7 +28,8 @@ #include "mmu-radix64.h" #include "mmu-book3s-v3.h" -static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr eaddr, +static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env, + vaddr eaddr, uint64_t *lpid, uint64_t *pid) { if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */
[PATCH v2 4/4] target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed)
DUP (indexed) can duplicate 128-bit elements, so using esz unconditionally can assert in tcg_gen_gvec_dup_imm. Fixes: 8711e71f9cbb Reported-by: Laurent Desnogues Signed-off-by: Richard Henderson --- target/arm/translate-sve.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index c8649283be..83614e9e70 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -2044,7 +2044,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) unsigned nofs = vec_reg_offset(s, a->rn, index, esz); tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); } else { -tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0); +/* + * While dup_mem handles 128-bit elements, dup_imm does not. + * Thankfully element size doesn't matter for splatting zero. + */ +tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); } } return true; -- 2.20.1
[PATCH 0/6] target/ppc: Various clean-up and fixes for radix64
First three patches of this series are simple cleanups. The other ones fix some regressions introduced by Cedric's recent addition of partition-scoped translation. -- Greg --- Greg Kurz (6): target/ppc: Pass const pointer to ppc_radix64_get_prot_amr() target/ppc: Pass const pointer to ppc_radix64_get_fully_qualified_addr() target/ppc: Don't initialize some local variables in ppc_radix64_xlate() target/ppc: Add missing braces in ppc_radix64_partition_scoped_xlate() target/ppc: Fix arguments to ppc_radix64_partition_scoped_xlate() target/ppc: Don't update radix PTE R/C bits with gdbstub target/ppc/mmu-radix64.c | 46 +++--- target/ppc/mmu-radix64.h |4 ++-- 2 files changed, 33 insertions(+), 17 deletions(-)
[PATCH 1/6] target/ppc: Pass const pointer to ppc_radix64_get_prot_amr()
This doesn't require write access to the CPU structure. Signed-off-by: Greg Kurz --- target/ppc/mmu-radix64.h |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h index 96228546aa85..f28c5794d071 100644 --- a/target/ppc/mmu-radix64.h +++ b/target/ppc/mmu-radix64.h @@ -55,9 +55,9 @@ static inline int ppc_radix64_get_prot_eaa(uint64_t pte) (pte & R_PTE_EAA_X ? PAGE_EXEC : 0); } -static inline int ppc_radix64_get_prot_amr(PowerPCCPU *cpu) +static inline int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu) { -CPUPPCState *env = >env; +const CPUPPCState *env = >env; int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */ int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */
[PATCH v2 1/4] target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA
Now that we can pass 7 parameters, do not encode register operands within simd_data. Reviewed-by: Alex Bennée Reviewed-by: Taylor Simpson Signed-off-by: Richard Henderson --- v2: Remove gen_helper_sve_fmla typedef (phil). --- target/arm/helper-sve.h| 45 +++ target/arm/sve_helper.c| 157 ++--- target/arm/translate-sve.c | 70 ++--- 3 files changed, 114 insertions(+), 158 deletions(-) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 2f47279155..7a200755ac 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) -DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index fdfa652094..33b5a54a47 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -3372,23 +3372,11 @@ DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64) #undef DO_ZPZ_FP -/* 4-operand predicated multiply-add. This requires 7 operands to pass - * "properly", so we need to encode some of the registers into DESC. - */ -QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32); - -static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc, +static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, +float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3) { intptr_t i = simd_oprsz(desc); -unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5); -unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5); -unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5); -unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5); -void *vd = >vfp.zregs[rd]; -void *vn = >vfp.zregs[rn]; -void *vm = >vfp.zregs[rm]; -void *va =
[PATCH v2 3/4] target/arm: Use clear_vec_high more effectively
Do not explicitly store zero to the NEON high part when we can pass !is_q to clear_vec_high. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- target/arm/translate-a64.c | 59 +++--- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b6feb2b9dc..0947eaee05 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -939,11 +939,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) { /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); -TCGv_i64 tmphi; +TCGv_i64 tmphi = NULL; if (size < 4) { MemOp memop = s->be_data + size; -tmphi = tcg_const_i64(0); tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop); } else { bool be = s->be_data == MO_BE; @@ -961,12 +960,13 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) } tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); -tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); - tcg_temp_free_i64(tmplo); -tcg_temp_free_i64(tmphi); -clear_vec_high(s, true, destidx); +if (tmphi) { +tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); +tcg_temp_free_i64(tmphi); +} +clear_vec_high(s, tmphi != NULL, destidx); } /* @@ -6960,8 +6960,8 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) return; } -tcg_resh = tcg_temp_new_i64(); tcg_resl = tcg_temp_new_i64(); +tcg_resh = NULL; /* Vd gets bits starting at pos bits into Vm:Vn. This is * either extracting 128 bits from a 128:128 concatenation, or @@ -6973,7 +6973,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_resh, rm, 0, MO_64); do_ext64(s, tcg_resh, tcg_resl, pos); } -tcg_gen_movi_i64(tcg_resh, 0); } else { TCGv_i64 tcg_hh; typedef struct { @@ -6988,6 +6987,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) pos -= 64; } +tcg_resh = tcg_temp_new_i64(); read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); elt++; read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); @@ -7003,9 +7003,12 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_resl); -write_vec_element(s, tcg_resh, rd, 1, MO_64); -tcg_temp_free_i64(tcg_resh); -clear_vec_high(s, true, rd); + +if (is_q) { +write_vec_element(s, tcg_resh, rd, 1, MO_64); +tcg_temp_free_i64(tcg_resh); +} +clear_vec_high(s, is_q, rd); } /* TBL/TBX @@ -7042,17 +7045,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) * the input. */ tcg_resl = tcg_temp_new_i64(); -tcg_resh = tcg_temp_new_i64(); +tcg_resh = NULL; if (is_tblx) { read_vec_element(s, tcg_resl, rd, 0, MO_64); } else { tcg_gen_movi_i64(tcg_resl, 0); } -if (is_tblx && is_q) { -read_vec_element(s, tcg_resh, rd, 1, MO_64); -} else { -tcg_gen_movi_i64(tcg_resh, 0); + +if (is_q) { +tcg_resh = tcg_temp_new_i64(); +if (is_tblx) { +read_vec_element(s, tcg_resh, rd, 1, MO_64); +} else { +tcg_gen_movi_i64(tcg_resh, 0); +} } tcg_idx = tcg_temp_new_i64(); @@ -7072,9 +7079,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_resl); -write_vec_element(s, tcg_resh, rd, 1, MO_64); -tcg_temp_free_i64(tcg_resh); -clear_vec_high(s, true, rd); + +if (is_q) { +write_vec_element(s, tcg_resh, rd, 1, MO_64); +tcg_temp_free_i64(tcg_resh); +} +clear_vec_high(s, is_q, rd); } /* ZIP/UZP/TRN @@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) } tcg_resl = tcg_const_i64(0); -tcg_resh = tcg_const_i64(0); +tcg_resh = is_q ? tcg_const_i64(0) : NULL; tcg_res = tcg_temp_new_i64(); for (i = 0; i < elements; i++) { @@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) write_vec_element(s, tcg_resl, rd, 0, MO_64); tcg_temp_free_i64(tcg_resl); -write_vec_element(s, tcg_resh, rd, 1, MO_64); -tcg_temp_free_i64(tcg_resh); -clear_vec_high(s, true, rd); + +if (is_q) { +write_vec_element(s, tcg_resh, rd, 1, MO_64); +tcg_temp_free_i64(tcg_resh); +} +clear_vec_high(s, is_q, rd); } /* -- 2.20.1
Re: [PATCH v2 0/4] target/arm: Misc cleanups
On 5/7/20 10:23 AM, Richard Henderson wrote: > Version 2 adds a fix to a just merged patch. Ho hum. This is actually v4. Sigh. r~
[PATCH v2 2/4] target/arm: Use tcg_gen_gvec_mov for clear_vec_high
The 8-byte store for the end a !is_q operation can be merged with the other stores. Use a no-op vector move to trigger the expand_clr portion of tcg_gen_gvec_mov. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- target/arm/translate-a64.c | 10 ++ 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 62e5729904..b6feb2b9dc 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -496,14 +496,8 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd) unsigned ofs = fp_reg_offset(s, rd, MO_64); unsigned vsz = vec_full_reg_size(s); -if (!is_q) { -TCGv_i64 tcg_zero = tcg_const_i64(0); -tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8); -tcg_temp_free_i64(tcg_zero); -} -if (vsz > 16) { -tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0); -} +/* Nop move, with side effect of clearing the tail. */ +tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); } void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) -- 2.20.1
[PATCH v2 0/4] target/arm: Misc cleanups
Version 2 adds a fix to a just merged patch. r~ Richard Henderson (4): target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA target/arm: Use tcg_gen_gvec_mov for clear_vec_high target/arm: Use clear_vec_high more effectively target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed) target/arm/helper-sve.h| 45 +++ target/arm/sve_helper.c| 157 ++--- target/arm/translate-a64.c | 69 target/arm/translate-sve.c | 76 +++--- 4 files changed, 157 insertions(+), 190 deletions(-) -- 2.20.1
Re: tst-arm-mte bug: PSTATE.TCO is cleared on exceptions
On 5/7/20 2:59 AM, Szabolcs Nagy wrote: > is there some recommended way to turn some form > of tracing on in qemu before i execute the > problematic application? I didn't add any tracing within mte. I can do so if we can guess what we're looking for. > or is it better if i try to extract a reproducer? > (that does not use the network) A reproducer would be most helpful. Something that can help is saving a VM snapshot with the kernel booted and the user logged in, just ready to run the test program. Then you can get back to exactly the state you want before things go wrong, even with a different qemu build. r~
Re: [PULL 04/10] target/arm: Use tcg_gen_gvec_dup_imm
On 5/7/20 7:39 AM, Laurent Desnogues wrote: >> @@ -2044,7 +2044,7 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) >> unsigned nofs = vec_reg_offset(s, a->rn, index, esz); >> tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); >> } else { >> -tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0); >> +tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0); > > For an indexed DUP, size can be 128-bit so that will fail the first > assert in tcg-op-gvec.c:do_dup. Ho hum, quite right. This has already been merged, so I'll send a follow-up fix. r~
Re: [PATCH] ppc/pnv: Fix NMI system reset SRR1 value
On 5/7/20 1:48 PM, Nicholas Piggin wrote: > Commit a77fed5bd926 ("ppc/pnv: Add support for NMI interface") got the > SRR1 setting wrong for sresets that hit outside of power-save states. > > Fix this, better documenting the source for the bit definitions. > > Fixes: a77fed5bd926 ("ppc/pnv: Add support for NMI interface") got the > Cc: Cédric Le Goater > Cc: David Gibson > Signed-off-by: Nicholas Piggin We should introduce some defines like the SRR1_WAKE ones in Linux and cleanup powerpc_reset_wakeup(). This function uses cryptic values. That can be done later on as a followup. Reviewed-by: Cédric Le Goater > --- > > Thanks to Cedric for pointing out concerns with a previous MCE patch > that unearthed this as well. Linux does not actually care what these > SRR1[42:45] bits look like for non-powersave sresets, but we should > follow documented behaviour as far as possible. We should introduce some defines like the SRR1_WAKE ones in Linux and cleanup powerpc_reset_wakeup(). This function uses cryptic values. That can be done later on as a followup. I am currently after a bug which results in a CPU hard lockup because of a pending interrupt. It occurs on a SMP PowerNV machine when it is stressed with IO, such as scp of a big file. I am suspecting more and more an issue with an interrupt being handled when the CPU is coming out of idle. I haven't seen anything wrong in the models. Unless this maybe : /* Pretend to be returning from doze always as we don't lose state */ *msr |= (0x1ull << (63 - 47)); I am not sure how in sync it is with PSSCR. Thanks, C. > hw/ppc/pnv.c | 26 -- > 1 file changed, 20 insertions(+), 6 deletions(-) > > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c > index a3b7a8d0ff..1b4748ce6d 100644 > --- a/hw/ppc/pnv.c > +++ b/hw/ppc/pnv.c > @@ -1986,12 +1986,26 @@ static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, > run_on_cpu_data arg) > > cpu_synchronize_state(cs); > ppc_cpu_do_system_reset(cs); > -/* > - * SRR1[42:45] is set to 0100 which the ISA defines as implementation > - * dependent. POWER processors use this for xscom triggered interrupts, > - * which come from the BMC or NMI IPIs. > - */ > -env->spr[SPR_SRR1] |= PPC_BIT(43); > +if (env->spr[SPR_SRR1] & PPC_BITMASK(46, 47)) { > +/* > + * Power-save wakeups, as indicated by non-zero SRR1[46:47] put the > + * wakeup reason in SRR1[42:45], system reset is indicated with 0b0100 > + * (PPC_BIT(43)). > + */ > +if (!(env->spr[SPR_SRR1] & PPC_BIT(43))) { > +warn_report("ppc_cpu_do_system_reset does not set system reset > wakeup reason"); > +env->spr[SPR_SRR1] |= PPC_BIT(43); > +} > +} else { > +/* > + * For non-powersave system resets, SRR1[42:45] are defined to be > + * implementation-dependent. The POWER9 User Manual specifies that > + * an external (SCOM driven, which may come from a BMC nmi command or > + * another CPU requesting a NMI IPI) system reset exception should be > + * 0b0010 (PPC_BIT(44)). > + */ > +env->spr[SPR_SRR1] |= PPC_BIT(44); > +} > } > > static void pnv_nmi(NMIState *n, int cpu_index, Error **errp) >
[Bug 1877418] [NEW] qemu-nbd freezes access to VDI file
Public bug reported: Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS: sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi Then I am operating on the btrfs filesystem and suddenly it freezes. ** Affects: qemu Importance: Undecided Status: New -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1877418 Title: qemu-nbd freezes access to VDI file Status in QEMU: New Bug description: Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS: sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi Then I am operating on the btrfs filesystem and suddenly it freezes. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions
Re: device hotplug & file handles
On 5/7/20 9:49 AM, Gerd Hoffmann wrote: Hi, For usb device pass-through (aka -device usb-host) it would be very useful to pass file handles from libvirt to qemu. The workflow would change from ... (1) libvirt enables access to /dev/usb/$bus/$dev (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties) to qemu. (3) qemu opens /dev/usb/$bus/$dev ... to ... (1) libvirt opens /dev/usb/$bus/$dev (2) libvirt passes filehandle to qemu. Question is how can we pass the file descriptor best? My idea would be to simply add an fd property to usb-host: * Coldplug would be "-device usb-host,fd=" (cmd line). * Hotplug would be "device_add usb-host,fd=" (monitor). Will that work from libvirt point of view? Or does anyone have an better idea? Qemu already has -add-fd (both a CLI version, and a QMP version when a Unix socket can pass fds), at which point any existing interface that uses qemu_open() will understand the magic syntax /dev/fdset/NNN to refer to the existing fd previously passed in via -add-fd. Libvirt is already able to use this feature for some cases (for example, see src/qemu/qemu_command.c:qemuBuildChrChardevFileStr). So all that remains is making sure -device usb-host uses qemu_open(), and if it didn't already do so, also making sure libvirt can find a way to introspect when usb-host started supporting fdset usage. Or put another way, let's use the generic fd mechanism that qemu already supports, rather than inventing yet another syntax. thanks, Gerd PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525 -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [RFC PATCH] qom: Implement qom-get HMP command
* Markus Armbruster (arm...@redhat.com) wrote: > Cédric Le Goater writes: > > > From: "Dr. David Alan Gilbert" > > > > Reimplement it based on qmp_qom_get() to avoid converting QObjects back > > to strings. > > > > Inspired-by: Paolo Bonzini > > Signed-off-by: Andreas Färber > > > > Slight fix for bit-rot: > > Signed-off-by: Dr. David Alan Gilbert > > [clg: updates for QEMU 5.0 ] > > Signed-off-by: Cédric Le Goater > > --- > > > > I would like to restart the discussion on qom-get command to understand > > what was the conclusion and see if things have changed since. > > I've since learned more about QOM. I believe we should do HMP qom-get, > but not quite like this patch does. Let me explain. > > A QOM property has ->get() and ->set() methods to expose the property > via the Visitor interface. > > ->get() works with an output visitor. With the QObject output visitor, > you can get the property value as a QObject. QMP qom-get uses this. > With the string output visitor, you can get it as a string. Your > proposed HMP qom-get uses this. > > ->set() works with an input visitor. With the QObject input visitor, > you can set the property value from a QObject. QMP qom-set uses this. > With the string input visitor, you can set it from a string. HMP > qom-set uses this. With the options visitor, you can set it from a > QemuOpts. CLI -object uses this. > > The Visitor interface supports arbitrary QAPI types. The ->get() and > ->set() methods can use them all. > > Some visitors, howerver, carry restrictions: > > * The string output visitor does not implement support for visiting > * QAPI structs, alternates, null, or arbitrary QTypes. It also > * requires a non-null list argument to visit_start_list(). > > * The string input visitor does not implement support for visiting > * QAPI structs, alternates, null, or arbitrary QTypes. Only flat lists > * of integers (except type "size") are supported. > > * The Opts input visitor does not implement support for visiting QAPI > * alternates, numbers (other than integers), null, or arbitrary > * QTypes. It also requires a non-null list argument to > * visit_start_list(). > > If you try to qom-set a property whose ->set() uses something the string > input visitor doesn't support, QEMU crashes. Same for -object, and your > proposed qom-get. Crashing would be bad. > I'm not aware of such a ->set(), but this is a death trap all the same. > > The obvious way to disarm it is removing the restrictions. > > One small step we took towards that goal is visible in the comments > above: support for flat lists of integers. The code for that will make > your eyes bleed. It's been a thorn in my side ever since I inherited > QAPI. Perhaps it could be done better. But there's a reason why it > should not be done at all: it's language design. > > The QObject visitors translate between QAPI types and our internal > representation of JSON. The JSON parser and formatter complete the > translation to and from JSON. Sensible enough. > > The string visitors translate between QAPI and ... well, a barely > documented language of our own "design". Tolerable when the language it > limited to numbers, booleans and strings. Foolish when it grows into > something isomorphic to JSON. > > This is a dead end. > > Can we still implement a safe and tolerably useful HMP qom-get and > qom-set? I think we can. Remember the basic rule of HMP command > implementation: wrap around the QMP command. So let's try that. > > hmp_qom_get() calls qmp_qom_get() and formats the resulting QObject with > qobject_to_json_pretty(). Why don't we *just* do this? OK, we wont fix the qom-set or the CLI, but if we just get hmp_qom_get to call QMP, format it into json and then dump the json to the user, then we get a usable, if not pretty, qom-get command, without having to solve all these hard problems to move it forward? Dave > hmp_qom_get() parses the value with qobject_from_json(), and feeds the > resulting QObject to qmp_qom_set(). To avoid interference with the HMP > parser, we probably want argument type 'S'. > > The two commands then parse / print JSON instead of the string visitors' > language. Is that bad? > > * Integers: qom-set loses support for hexadecimal and octal. > > * Bools: qom-set loses alternate spellings of true and false. > > * Floating-point numbers: no change > > * Strings: gain enclosing quotes. > > * List of integers: gain enclosing square brackets. qom-set loses > support for ranges. > > The only use of lists I can find is memory-backend property > host-nodes. > > * Everything else: lose support for crashing QEMU. > > Again, I'm not aware of properties that crash now. > > I think these changes are just fine. If you disagree, you could try to > mitigate with convenience magic like "if it doesn't parse as JSON, and > it looks hexadecimal, convert to decimal and try again", or "looks kind > of stringy, enclose in
[PULL 09/12] migration/rdma: fix a memleak on error path in rdma_start_incoming_migration
From: Pan Nengyuan 'rdma->host' is malloced in qemu_rdma_data_init, but forgot to free on the error path in rdma_start_incoming_migration(), this patch fix that. The leak stack: Direct leak of 2 byte(s) in 1 object(s) allocated from: #0 0x7fb7add18ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8) #1 0x7fb7ad0df1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5) #2 0x7fb7ad0f8b32 in g_strdup (/lib64/libglib-2.0.so.0+0x6cb32) #3 0x55a0464a0f6f in qemu_rdma_data_init /mnt/sdb/qemu/migration/rdma.c:2647 #4 0x55a0464b0e76 in rdma_start_incoming_migration /mnt/sdb/qemu/migration/rdma.c:4020 #5 0x55a0463f898a in qemu_start_incoming_migration /mnt/sdb/qemu/migration/migration.c:365 #6 0x55a0458c75d3 in qemu_init /mnt/sdb/qemu/softmmu/vl.c:4438 #7 0x55a046a3d811 in main /mnt/sdb/qemu/softmmu/main.c:48 #8 0x7fb7a8417872 in __libc_start_main (/lib64/libc.so.6+0x23872) #9 0x55a04536b26d in _start (/mnt/sdb/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x286926d) Reported-by: Euler Robot Signed-off-by: Pan Nengyuan Message-Id: <20200420102727.17339-1-pannengy...@huawei.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/rdma.c b/migration/rdma.c index f61587891b..967fda5b0c 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4056,6 +4056,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) return; err: error_propagate(errp, local_err); +g_free(rdma->host); g_free(rdma); g_free(rdma_return_path); } -- 2.26.2
[PULL 11/12] migration/multifd: fix memleaks in multifd_new_send_channel_async
From: Pan Nengyuan When error happen in multifd_new_send_channel_async, 'sioc' will not be used to create the multifd_send_thread. Let's free it to avoid a memleak. And also do error_free after migrate_set_error() to avoid another leak in the same place. The leak stack: Direct leak of 2880 byte(s) in 8 object(s) allocated from: #0 0x7f20b5118ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8) #1 0x7f20b44df1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5) #2 0x564133bce18b in object_new_with_type /mnt/sdb/backup/qemu/qom/object.c:683 #3 0x564133eea950 in qio_channel_socket_new /mnt/sdb/backup/qemu/io/channel-socket.c:56 #4 0x5641339cfe4f in socket_send_channel_create /mnt/sdb/backup/qemu/migration/socket.c:37 #5 0x564133a10328 in multifd_save_setup /mnt/sdb/backup/qemu/migration/multifd.c:772 #6 0x5641339cebed in migrate_fd_connect /mnt/sdb/backup/qemu/migration/migration.c:3530 #7 0x5641339d15e4 in migration_channel_connect /mnt/sdb/backup/qemu/migration/channel.c:92 #8 0x5641339cf5b7 in socket_outgoing_migration /mnt/sdb/backup/qemu/migration/socket.c:108 Direct leak of 384 byte(s) in 8 object(s) allocated from: #0 0x7f20b5118cf0 in calloc (/lib64/libasan.so.5+0xefcf0) #1 0x7f20b44df22d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5322d) #2 0x56413406fc17 in error_setv /mnt/sdb/backup/qemu/util/error.c:61 #3 0x564134070464 in error_setg_errno_internal /mnt/sdb/backup/qemu/util/error.c:109 #4 0x5641340851be in inet_connect_addr /mnt/sdb/backup/qemu/util/qemu-sockets.c:379 #5 0x5641340851be in inet_connect_saddr /mnt/sdb/backup/qemu/util/qemu-sockets.c:458 #6 0x5641340870ab in socket_connect /mnt/sdb/backup/qemu/util/qemu-sockets.c:1105 #7 0x564133eeaabf in qio_channel_socket_connect_sync /mnt/sdb/backup/qemu/io/channel-socket.c:145 #8 0x564133eeabf5 in qio_channel_socket_connect_worker /mnt/sdb/backup/qemu/io/channel-socket.c:168 Indirect leak of 360 byte(s) in 8 object(s) allocated from: #0 0x7f20b5118ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8) #1 0x7f20af901817 in __GI___vasprintf_chk (/lib64/libc.so.6+0x10d817) #2 0x7f20b451fa6c in g_vasprintf (/lib64/libglib-2.0.so.0+0x93a6c) #3 0x7f20b44f8cd0 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x6ccd0) #4 0x7f20b44f8d8c in g_strdup_printf (/lib64/libglib-2.0.so.0+0x6cd8c) #5 0x56413406fc86 in error_setv /mnt/sdb/backup/qemu/util/error.c:65 #6 0x564134070464 in error_setg_errno_internal /mnt/sdb/backup/qemu/util/error.c:109 #7 0x5641340851be in inet_connect_addr /mnt/sdb/backup/qemu/util/qemu-sockets.c:379 #8 0x5641340851be in inet_connect_saddr /mnt/sdb/backup/qemu/util/qemu-sockets.c:458 #9 0x5641340870ab in socket_connect /mnt/sdb/backup/qemu/util/qemu-sockets.c:1105 #10 0x564133eeaabf in qio_channel_socket_connect_sync /mnt/sdb/backup/qemu/io/channel-socket.c:145 #11 0x564133eeabf5 in qio_channel_socket_connect_worker /mnt/sdb/backup/qemu/io/channel-socket.c:168 Reported-by: Euler Robot Signed-off-by: Pan Nengyuan Message-Id: <20200506095416.26099-2-pannengy...@huawei.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/multifd.c b/migration/multifd.c index 9123c111a3..e3efd33a0d 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -727,6 +727,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) * its status. */ p->quit = true; +object_unref(OBJECT(sioc)); +error_free(local_err); } else { p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); -- 2.26.2
[PULL 07/12] migration/throttle: Add cpu-throttle-tailslow migration parameter
From: Keqian Zhu At the tail stage of throttling, the Guest is very sensitive to CPU percentage while the @cpu-throttle-increment is excessive usually at tail stage. If this parameter is true, we will compute the ideal CPU percentage used by the Guest, which may exactly make the dirty rate match the dirty rate threshold. Then we will choose a smaller throttle increment between the one specified by @cpu-throttle-increment and the one generated by ideal CPU percentage. Therefore, it is compatible to traditional throttling, meanwhile the throttle increment won't be excessive at tail stage. This may make migration time longer, and is disabled by default. Signed-off-by: Keqian Zhu Message-Id: <20200413101508.54793-1-zhukeqi...@huawei.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 13 migration/ram.c | 25 +- monitor/hmp-cmds.c| 8 qapi/migration.json | 48 +++ 4 files changed, 89 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 79f16f6625..f5dbffc442 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; params->has_cpu_throttle_increment = true; params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; +params->has_cpu_throttle_tailslow = true; +params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; params->has_tls_creds = true; params->tls_creds = g_strdup(s->parameters.tls_creds); params->has_tls_hostname = true; @@ -1327,6 +1329,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, dest->cpu_throttle_increment = params->cpu_throttle_increment; } +if (params->has_cpu_throttle_tailslow) { +dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; +} + if (params->has_tls_creds) { assert(params->tls_creds->type == QTYPE_QSTRING); dest->tls_creds = g_strdup(params->tls_creds->u.s); @@ -1415,6 +1421,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; } +if (params->has_cpu_throttle_tailslow) { +s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; +} + if (params->has_tls_creds) { g_free(s->parameters.tls_creds); assert(params->tls_creds->type == QTYPE_QSTRING); @@ -3597,6 +3607,8 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, parameters.cpu_throttle_increment, DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), +DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, + parameters.cpu_throttle_tailslow, false), DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, parameters.max_bandwidth, MAX_THROTTLE), DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, @@ -3703,6 +3715,7 @@ static void migration_instance_init(Object *obj) params->has_throttle_trigger_threshold = true; params->has_cpu_throttle_initial = true; params->has_cpu_throttle_increment = true; +params->has_cpu_throttle_tailslow = true; params->has_max_bandwidth = true; params->has_downtime_limit = true; params->has_x_checkpoint_delay = true; diff --git a/migration/ram.c b/migration/ram.c index 53166fc279..52fc032b83 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, * able to complete migration. Some workloads dirty memory way too * fast and will not effectively converge, even with auto-converge. */ -static void mig_throttle_guest_down(void) +static void mig_throttle_guest_down(uint64_t bytes_dirty_period, +uint64_t bytes_dirty_threshold) { MigrationState *s = migrate_get_current(); uint64_t pct_initial = s->parameters.cpu_throttle_initial; -uint64_t pct_icrement = s->parameters.cpu_throttle_increment; +uint64_t pct_increment = s->parameters.cpu_throttle_increment; +bool pct_tailslow = s->parameters.cpu_throttle_tailslow; int pct_max = s->parameters.max_cpu_throttle; +uint64_t throttle_now = cpu_throttle_get_percentage(); +uint64_t cpu_now, cpu_ideal, throttle_inc; + /* We have not started throttling yet. Let's start it. */ if (!cpu_throttle_active()) { cpu_throttle_set(pct_initial); } else { /* Throttling already on, just increase the rate */ -cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement, - pct_max)); +if
[PULL 10/12] migration/xbzrle: add encoding rate
From: Wei Wang Users may need to check the xbzrle encoding rate to know if the guest memory is xbzrle encoding-friendly, and dynamically turn off the encoding if the encoding rate is low. Signed-off-by: Yi Sun Signed-off-by: Wei Wang Message-Id: <1588208375-19556-1-git-send-email-wei.w.w...@intel.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 1 + migration/ram.c | 39 +-- monitor/hmp-cmds.c| 2 ++ qapi/migration.json | 5 - 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index f5dbffc442..0bb042a0f7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -932,6 +932,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->xbzrle_cache->pages = xbzrle_counters.pages; info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; +info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; info->xbzrle_cache->overflow = xbzrle_counters.overflow; } diff --git a/migration/ram.c b/migration/ram.c index 08eb382f53..859f835f1a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -327,6 +327,10 @@ struct RAMState { uint64_t num_dirty_pages_period; /* xbzrle misses since the beginning of the period */ uint64_t xbzrle_cache_miss_prev; +/* Amount of xbzrle pages since the beginning of the period */ +uint64_t xbzrle_pages_prev; +/* Amount of xbzrle encoded bytes since the beginning of the period */ +uint64_t xbzrle_bytes_prev; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ @@ -710,6 +714,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, return -1; } +/* + * Reaching here means the page has hit the xbzrle cache, no matter what + * encoding result it is (normal encoding, overflow or skipping the page), + * count the page as encoded. This is used to caculate the encoding rate. + * + * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB, + * 2nd page turns out to be skipped (i.e. no new bytes written to the + * page), the overall encoding rate will be 8KB / 2KB = 4, which has the + * skipped page included. In this way, the encoding rate can tell if the + * guest page is good for xbzrle encoding. + */ +xbzrle_counters.pages++; prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); /* save current buffer into memory */ @@ -740,6 +756,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, } else if (encoded_len == -1) { trace_save_xbzrle_page_overflow(); xbzrle_counters.overflow++; +xbzrle_counters.bytes += TARGET_PAGE_SIZE; return -1; } @@ -750,8 +767,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, qemu_put_be16(rs->f, encoded_len); qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); bytes_xbzrle += encoded_len + 1 + 2; -xbzrle_counters.pages++; -xbzrle_counters.bytes += bytes_xbzrle; +/* + * Like compressed_size (please see update_compress_thread_counts), + * the xbzrle encoded bytes don't count the 8 byte header with + * RAM_SAVE_FLAG_CONTINUE. + */ +xbzrle_counters.bytes += bytes_xbzrle - 8; ram_counters.transferred += bytes_xbzrle; return 1; @@ -884,9 +905,23 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) } if (migrate_use_xbzrle()) { +double encoded_size, unencoded_size; + xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - rs->xbzrle_cache_miss_prev) / page_count; rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; +unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) * + TARGET_PAGE_SIZE; +encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev; +if (xbzrle_counters.pages == rs->xbzrle_pages_prev) { +xbzrle_counters.encoding_rate = 0; +} else if (!encoded_size) { +xbzrle_counters.encoding_rate = UINT64_MAX; +} else { +xbzrle_counters.encoding_rate = unencoded_size / encoded_size; +} +rs->xbzrle_pages_prev = xbzrle_counters.pages; +rs->xbzrle_bytes_prev = xbzrle_counters.bytes; } if (migrate_use_compression()) { diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index ade1f85e0c..9c61e769ca 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -303,6 +303,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->cache_miss); monitor_printf(mon, "xbzrle cache miss rate:
[PULL 08/12] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()
From: David Hildenbrand Let's consolidate resetting the variables. Cc: "Dr. David Alan Gilbert" Cc: Juan Quintela Cc: Peter Xu Signed-off-by: David Hildenbrand Message-Id: <20200421085300.7734-10-da...@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert Fixup for context conflicts with 91ba442 --- migration/ram.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 52fc032b83..08eb382f53 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3147,7 +3147,7 @@ static int ram_load_postcopy(QEMUFile *f) /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; void *this_host = NULL; -bool all_zero = false; +bool all_zero = true; int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { @@ -3174,7 +3174,6 @@ static int ram_load_postcopy(QEMUFile *f) addr &= TARGET_PAGE_MASK; trace_ram_load_postcopy_loop((uint64_t)addr, flags); -place_needed = false; if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); @@ -3199,9 +3198,7 @@ static int ram_load_postcopy(QEMUFile *f) */ page_buffer = postcopy_host_page + ((uintptr_t)host & (block->page_size - 1)); -/* If all TP are zero then we can optimise the place */ if (target_pages == 1) { -all_zero = true; this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size); } else { @@ -3221,7 +3218,6 @@ static int ram_load_postcopy(QEMUFile *f) */ if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { place_needed = true; -target_pages = 0; } place_source = postcopy_host_page; } @@ -3303,6 +3299,10 @@ static int ram_load_postcopy(QEMUFile *f) ret = postcopy_place_page(mis, place_dest, place_source, block); } +place_needed = false; +target_pages = 0; +/* Assume we have a zero page until we detect something different */ +all_zero = true; } } -- 2.26.2
[PULL 06/12] migration/colo: Add missing error-propagation code
From: Philippe Mathieu-Daudé Running the coccinelle script produced: $ spatch \ --macro-file scripts/cocci-macro-file.h --include-headers \ --sp-file scripts/coccinelle/find-missing-error_propagate.cocci \ --keep-comments --smpl-spacing --dir . HANDLING: ./migration/colo.c [[manual check required: error_propagate() might be missing in migrate_set_block_enabled() ./migration/colo.c:439:4]] Add the missing error_propagate() after review. Reviewed-by: Juan Quintela Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200413205250.687-1-f4...@amsat.org> Signed-off-by: Dr. David Alan Gilbert --- migration/colo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/migration/colo.c b/migration/colo.c index 1b3493729b..d015d4f84e 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -443,6 +443,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s, /* Disable block migration */ migrate_set_block_enabled(false, _err); +if (local_err) { +goto out; +} qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION -- 2.26.2
[PULL 05/12] docs/devel/migration: start a debugging section
From: Marc-André Lureau Explain how to use analyze-migration.py, this may help. Signed-off-by: Marc-André Lureau Message-Id: <20200330174852.456148-1-marcandre.lur...@redhat.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- docs/devel/migration.rst | 20 1 file changed, 20 insertions(+) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index e88918f763..2eb08624fc 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure to save/restore state devices. This infrastructure is shared with the savevm/loadvm functionality. +Debugging += + +The migration stream can be analyzed thanks to `scripts/analyze_migration.py`. + +Example usage: + +.. code-block:: shell + + $ qemu-system-x86_64 + (qemu) migrate "exec:cat > mig" + $ ./scripts/analyze_migration.py -f mig + { +"ram (3)": { +"section sizes": { +"pc.ram": "0x0800", + ... + +See also ``analyze_migration.py -h`` help for more options. + Common infrastructure = -- 2.26.2
[PULL 12/12] migration/multifd: Do error_free after migrate_set_error to avoid memleaks
From: Pan Nengyuan When error happen in multifd_send_thread, it use error_copy to set migrate error in multifd_send_terminate_threads(). We should call error_free after it. Similarly, fix another two places in multifd_recv_thread/multifd_save_cleanup. The leak stack: Direct leak of 48 byte(s) in 1 object(s) allocated from: #0 0x7f781af07cf0 in calloc (/lib64/libasan.so.5+0xefcf0) #1 0x7f781a2ce22d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5322d) #2 0x55ee1d075c17 in error_setv /mnt/sdb/backup/qemu/util/error.c:61 #3 0x55ee1d076464 in error_setg_errno_internal /mnt/sdb/backup/qemu/util/error.c:109 #4 0x55ee1cef066e in qio_channel_socket_writev /mnt/sdb/backup/qemu/io/channel-socket.c:569 #5 0x55ee1cee806b in qio_channel_writev /mnt/sdb/backup/qemu/io/channel.c:207 #6 0x55ee1cee806b in qio_channel_writev_all /mnt/sdb/backup/qemu/io/channel.c:171 #7 0x55ee1cee8248 in qio_channel_write_all /mnt/sdb/backup/qemu/io/channel.c:257 #8 0x55ee1ca12c9a in multifd_send_thread /mnt/sdb/backup/qemu/migration/multifd.c:657 #9 0x55ee1d0607fc in qemu_thread_start /mnt/sdb/backup/qemu/util/qemu-thread-posix.c:519 #10 0x7f78159ae2dd in start_thread (/lib64/libpthread.so.0+0x82dd) #11 0x7f78156df4b2 in __GI___clone (/lib64/libc.so.6+0xfc4b2) Indirect leak of 52 byte(s) in 1 object(s) allocated from: #0 0x7f781af07f28 in __interceptor_realloc (/lib64/libasan.so.5+0xeff28) #1 0x7f78156f07d9 in __GI___vasprintf_chk (/lib64/libc.so.6+0x10d7d9) #2 0x7f781a30ea6c in g_vasprintf (/lib64/libglib-2.0.so.0+0x93a6c) #3 0x7f781a2e7cd0 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x6ccd0) #4 0x7f781a2e7d8c in g_strdup_printf (/lib64/libglib-2.0.so.0+0x6cd8c) #5 0x55ee1d075c86 in error_setv /mnt/sdb/backup/qemu/util/error.c:65 #6 0x55ee1d076464 in error_setg_errno_internal /mnt/sdb/backup/qemu/util/error.c:109 #7 0x55ee1cef066e in qio_channel_socket_writev /mnt/sdb/backup/qemu/io/channel-socket.c:569 #8 0x55ee1cee806b in qio_channel_writev /mnt/sdb/backup/qemu/io/channel.c:207 #9 0x55ee1cee806b in qio_channel_writev_all /mnt/sdb/backup/qemu/io/channel.c:171 #10 0x55ee1cee8248 in qio_channel_write_all /mnt/sdb/backup/qemu/io/channel.c:257 #11 0x55ee1ca12c9a in multifd_send_thread /mnt/sdb/backup/qemu/migration/multifd.c:657 #12 0x55ee1d0607fc in qemu_thread_start /mnt/sdb/backup/qemu/util/qemu-thread-posix.c:519 #13 0x7f78159ae2dd in start_thread (/lib64/libpthread.so.0+0x82dd) #14 0x7f78156df4b2 in __GI___clone (/lib64/libc.so.6+0xfc4b2) Reported-by: Euler Robot Signed-off-by: Pan Nengyuan Message-Id: <20200506095416.26099-3-pannengy...@huawei.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/migration/multifd.c b/migration/multifd.c index e3efd33a0d..5a3e4d0d46 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -550,6 +550,7 @@ void multifd_save_cleanup(void) multifd_send_state->ops->send_cleanup(p, _err); if (local_err) { migrate_set_error(migrate_get_current(), local_err); +error_free(local_err); } } qemu_sem_destroy(_send_state->channels_ready); @@ -688,6 +689,7 @@ out: if (local_err) { trace_multifd_send_error(p->id); multifd_send_terminate_threads(local_err); +error_free(local_err); } /* @@ -965,6 +967,7 @@ static void *multifd_recv_thread(void *opaque) if (local_err) { multifd_recv_terminate_threads(local_err); +error_free(local_err); } qemu_mutex_lock(>mutex); p->running = false; -- 2.26.2
[PULL 04/12] migration: move the units of migrate parameters from milliseconds to ms
From: Mao Zhongyi Signed-off-by: Mao Zhongyi Reviewed-by: Juan Quintela Message-Id: <474bb6cf67defb8be9de5035c11aee57a680557a.1585641083.git.maozhon...@cmss.chinamobile.com> Reviewed-by: Stefano Garzarella Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 2 +- monitor/hmp-cmds.c| 8 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 6e079efdcc..79f16f6625 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1214,7 +1214,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "downtime_limit", "an integer in the range of 0 to " -stringify(MAX_MIGRATE_DOWNTIME)" milliseconds"); +stringify(MAX_MIGRATE_DOWNTIME)" ms"); return false; } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 9bb6946fbf..1552dee489 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -231,18 +231,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "\n"); } -monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n", +monitor_printf(mon, "total time: %" PRIu64 " ms\n", info->total_time); if (info->has_expected_downtime) { -monitor_printf(mon, "expected downtime: %" PRIu64 " milliseconds\n", +monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n", info->expected_downtime); } if (info->has_downtime) { -monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n", +monitor_printf(mon, "downtime: %" PRIu64 " ms\n", info->downtime); } if (info->has_setup_time) { -monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n", +monitor_printf(mon, "setup: %" PRIu64 " ms\n", info->setup_time); } } -- 2.26.2
[PULL 03/12] monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed()
From: Mao Zhongyi Signed-off-by: Mao Zhongyi Reviewed-by: Juan Quintela Message-Id: <305323f835436023c53d759f5ab18af3ec874183.1585641083.git.maozhon...@cmss.chinamobile.com> Signed-off-by: Dr. David Alan Gilbert --- monitor/hmp-cmds.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 7f6e982dc8..9bb6946fbf 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1198,8 +1198,11 @@ void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict) /* Kept for backwards compatibility */ void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict) { +Error *err = NULL; + int64_t value = qdict_get_int(qdict, "value"); -qmp_migrate_set_speed(value, NULL); +qmp_migrate_set_speed(value, ); +hmp_handle_error(mon, err); } void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict) -- 2.26.2
[PULL 02/12] migration/migration: improve error reporting for migrate parameters
From: Mao Zhongyi use QERR_INVALID_PARAMETER_VALUE instead of "Parameter '%s' expects" for consistency. Signed-off-by: Mao Zhongyi Message-Id: <4ce71da4a5f98ad6ead0806ec71043473dcb4c07.1585641083.git.maozhon...@cmss.chinamobile.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8f27174ff6..6e079efdcc 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1202,16 +1202,19 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) } if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" - " range of 0 to %zu bytes/second", SIZE_MAX); +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "max_bandwidth", + "an integer in the range of 0 to "stringify(SIZE_MAX) + " bytes/second"); return false; } if (params->has_downtime_limit && (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -error_setg(errp, "Parameter 'downtime_limit' expects an integer in " - "the range of 0 to %d milliseconds", - MAX_MIGRATE_DOWNTIME); +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "downtime_limit", + "an integer in the range of 0 to " +stringify(MAX_MIGRATE_DOWNTIME)" milliseconds"); return false; } @@ -2107,9 +2110,10 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) void qmp_migrate_set_downtime(double value, Error **errp) { if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { -error_setg(errp, "Parameter 'downtime_limit' expects an integer in " - "the range of 0 to %d seconds", - MAX_MIGRATE_DOWNTIME_SECONDS); +error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "downtime_limit", + "an integer in the range of 0 to " +stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds"); return; } -- 2.26.2
[PULL 00/12] migration queue
From: "Dr. David Alan Gilbert" The following changes since commit 3c7adbc67d9a5c3e992a4dd13b8704464daaad5b: Merge remote-tracking branch 'remotes/berrange/tags/qcrypto-next-pull-request' into staging (2020-05-07 14:30:12 +0100) are available in the Git repository at: git://github.com/dagrh/qemu.git tags/pull-migration-20200507a for you to fetch changes up to 13f2cb21e5fb33e9f8d7db8eee48edc1c67b812f: migration/multifd: Do error_free after migrate_set_error to avoid memleaks (2020-05-07 17:40:24 +0100) Migration pull 2020-05-07 Mostly tidy-ups, but two new features: cpu-throttle-tailslow for making a gentler throttle xbzrle encoding rate measurement for getting a feal for xbzrle performance. David Hildenbrand (1): migration/ram: Consolidate variable reset after placement in ram_load_postcopy() Keqian Zhu (1): migration/throttle: Add cpu-throttle-tailslow migration parameter Mao Zhongyi (4): migration: fix bad indentation in error_report() migration/migration: improve error reporting for migrate parameters monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed() migration: move the units of migrate parameters from milliseconds to ms Marc-André Lureau (1): docs/devel/migration: start a debugging section Pan Nengyuan (3): migration/rdma: fix a memleak on error path in rdma_start_incoming_migration migration/multifd: fix memleaks in multifd_new_send_channel_async migration/multifd: Do error_free after migrate_set_error to avoid memleaks Philippe Mathieu-Daudé (1): migration/colo: Add missing error-propagation code Wei Wang (1): migration/xbzrle: add encoding rate docs/devel/migration.rst | 20 + migration/colo.c | 3 ++ migration/migration.c| 44 +++- migration/multifd.c | 5 migration/ram.c | 74 migration/rdma.c | 1 + monitor/hmp-cmds.c | 23 +++ qapi/migration.json | 53 +- 8 files changed, 192 insertions(+), 31 deletions(-)
[PULL 01/12] migration: fix bad indentation in error_report()
From: Mao Zhongyi bad indentation conflicts with CODING_STYLE doc. Signed-off-by: Mao Zhongyi Message-Id: <09f7529c665cac0c6a5e032ac6fdb6ca701f7e37.1585329482.git.maozhon...@cmss.chinamobile.com> Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 177cce9e95..8f27174ff6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2494,7 +2494,7 @@ retry: if (header_type >= MIG_RP_MSG_MAX || header_type == MIG_RP_MSG_INVALID) { error_report("RP: Received invalid message 0x%04x length 0x%04x", -header_type, header_len); + header_type, header_len); mark_source_rp_bad(ms); goto out; } @@ -2503,9 +2503,9 @@ retry: header_len != rp_cmd_args[header_type].len) || header_len > sizeof(buf)) { error_report("RP: Received '%s' message (0x%04x) with" -"incorrect length %d expecting %zu", -rp_cmd_args[header_type].name, header_type, header_len, -(size_t)rp_cmd_args[header_type].len); + "incorrect length %d expecting %zu", + rp_cmd_args[header_type].name, header_type, header_len, + (size_t)rp_cmd_args[header_type].len); mark_source_rp_bad(ms); goto out; } @@ -2560,7 +2560,7 @@ retry: } if (header_len != expected_len) { error_report("RP: Req_Page_id with length %d expecting %zd", -header_len, expected_len); + header_len, expected_len); mark_source_rp_bad(ms); goto out; } -- 2.26.2
Re: [PATCH] 9pfs: Fix potential deadlock of QEMU mainloop
On Thu, 07 May 2020 17:03:46 +0200 Christian Schoenebeck wrote: > On Donnerstag, 7. Mai 2020 16:33:28 CEST Greg Kurz wrote: > > > I also haven't reviewed QEMU's lock implementations in very detail, but > > > IIRC CoMutexes are completely handled in user space, while QemuMutex uses > > > regular OS mutexes and hence might cost context switches. > > > > ... since the locking would only been exercised with an hypothetical > > client doing stupid things, this is beginning to look like bike-shedding > > to me. :) > > Aha, keep that in mind when you're doing your next review. ;-) > Fair enough. :) > No seriously, like I said, I don't really care too much about Mutex vs. > CoMutex in you patch here. It was actually more about wide-picture thinking, > i.e. other places of (co)mutexes being used or other potential changes that > would make this or other uses more relevant one day. > Then I agree with you that it would be better to use CoMutex if we were experiencing thread pool exhaustion indeed. > > > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > > > > > index 9e046f7acb51..ac84ae804496 100644 > > > > > > --- a/hw/9pfs/9p.c > > > > > > +++ b/hw/9pfs/9p.c > > > > > > @@ -2170,7 +2170,7 @@ static int coroutine_fn > > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, int32_t count = 0; > > > > > > > > > > > > struct stat stbuf; > > > > > > off_t saved_dir_pos; > > > > > > > > > > > > -struct dirent *dent; > > > > > > +struct dirent dent; > > > > > > > > > > > > /* save the directory position */ > > > > > > saved_dir_pos = v9fs_co_telldir(pdu, fidp); > > > > > > > > > > > > @@ -2181,13 +2181,11 @@ static int coroutine_fn > > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, while (1) { > > > > > > > > > > > > v9fs_path_init(); > > > > > > > > > > > > -v9fs_readdir_lock(>fs.dir); > > > > > > - > > > > > > > > > > That's the deadlock fix, but ... > > > > > > > > > > > err = v9fs_co_readdir(pdu, fidp, ); > > > > > > > > > > > > -if (err || !dent) { > > > > > > +if (err <= 0) { > > > > > > > > > > > > break; > > > > > > > > > > > > } > > > > > > > > > > ... even though this code simplification might make sense, I don't > > > > > think > > > > > it > > > > > should be mixed with the deadlock fix together in one patch. They are > > > > > not > > > > > > > > I could possibly split this in two patches, one for returning a copy > > > > and one for moving the locking around, but... > > > > > > > > > related with each other, nor is the code simplification you are aiming > > > > > trivial > > > > > > > > ... this assertion is somewhat wrong: moving the locking to > > > > v9fs_co_readdir() really requires it returns a copy. > > > > > > Yeah, I am also not sure whether a split would make it more trivial enough > > > in this case to be worth the hassle. If you find an acceptable solution, > > > good, if not then leave it one patch. > > > > Another option would be to g_malloc() the dirent in v9fs_co_readdir() and > > g_free() in the callers. This would cause less churn since we could keep > > the same function signature. > > I was actually just going to suggest the same. So yes, looks like a less > invasive change to me. > I'll just do that then :) > Best regards, > Christian Schoenebeck > >
Re: [RFC PATCH 6/6] hw/block/nvme: Make device target agnostic
On 07/05/20 12:04, Stefan Hajnoczi wrote: >>> (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { >>> -qemu_ram_writeback(n->pmrdev->mr.ram_block, >>> - 0, n->pmrdev->size); >>> +memory_region_do_writeback(>pmrdev->mr, 0, n->pmrdev->size); > qemu_ram_write() is being called because we need to msync or persist > pmem here. > > The memory_region_do_writeback() API is not equivalent, its purpose is > for dirty write logging (which we don't care about here because the > writes themselves will already have been logged when the guest performed > them). > > I think qemu_ram_writeback() should just be made common so that this > code isn't target-specific. Maybe it should be renamed to > qemu_ram_msync() to avoid confusion with dirty write APIs. Yes, we can add qemu_ram_msync and memory_region_msync. Paolo signature.asc Description: OpenPGP digital signature
[PATCH] tests/acceptance/boot_linux: Skip slow Aarch64 'virt' machine TCG test
The BootLinuxAarch64.test_virt_tcg is reported to take >7min to run. Add a possibility to users to skip this particular test, by setting the AVOCADO_SKIP_SLOW_TESTS environment variable: $ AVOCADO_SKIP_SLOW_TESTS=please make check-acceptance ... (05/88) tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_tcg: SKIP: Test takes >7min ... Reported-by: Peter Maydell Signed-off-by: Philippe Mathieu-Daudé --- tests/acceptance/boot_linux.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py index 075a386300..a8df50d769 100644 --- a/tests/acceptance/boot_linux.py +++ b/tests/acceptance/boot_linux.py @@ -15,6 +15,7 @@ from qemu.accel import kvm_available from qemu.accel import tcg_available +from avocado import skipIf from avocado.utils import cloudinit from avocado.utils import network from avocado.utils import vmimage @@ -159,6 +160,7 @@ def add_common_args(self): self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0') self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom') +@skipIf(os.getenv('AVOCADO_SKIP_SLOW_TESTS'), 'Test takes >7min') def test_virt_tcg(self): """ :avocado: tags=accel:tcg -- 2.21.3
Re: device hotplug & file handles
On Thu, May 07, 2020 at 16:49:14 +0200, Gerd Hoffmann wrote: > Hi, > > For usb device pass-through (aka -device usb-host) it would be very > useful to pass file handles from libvirt to qemu. The workflow would > change from ... > > (1) libvirt enables access to /dev/usb/$bus/$dev > (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties) > to qemu. > (3) qemu opens /dev/usb/$bus/$dev > > ... to ... > > (1) libvirt opens /dev/usb/$bus/$dev > (2) libvirt passes filehandle to qemu. > > Question is how can we pass the file descriptor best? My idea would be > to simply add an fd property to usb-host: > > * Coldplug would be "-device usb-host,fd=" (cmd line). > * Hotplug would be "device_add usb-host,fd=" (monitor). We have prior art for both approaches so it's fine. > > Will that work from libvirt point of view? Sure! Just please make sure that the new approach is detectable somehow. Either via device-list-properties or query-qmp-schema. > Or does anyone have an better idea? > > thanks, > Gerd > > PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525 >
[PATCH] target/ppc: Untabify excp_helper.c
Some tabs crept in with a recent change. Fixes: 6dc6b557913f "target/ppc: Improve syscall exception logging" Signed-off-by: Greg Kurz --- target/ppc/excp_helper.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 639cac38f9c3..4db3d9ed9af5 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -70,16 +70,16 @@ static inline void dump_syscall(CPUPPCState *env) static inline void dump_hcall(CPUPPCState *env) { qemu_log_mask(CPU_LOG_INT, "hypercall r3=%016" PRIx64 - " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64 - " r7=%016" PRIx64 " r8=%016" PRIx64 " r9=%016" PRIx64 - " r10=%016" PRIx64 " r11=%016" PRIx64 " r12=%016" PRIx64 + " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64 + " r7=%016" PRIx64 " r8=%016" PRIx64 " r9=%016" PRIx64 + " r10=%016" PRIx64 " r11=%016" PRIx64 " r12=%016" PRIx64 " nip=" TARGET_FMT_lx "\n", ppc_dump_gpr(env, 3), ppc_dump_gpr(env, 4), - ppc_dump_gpr(env, 5), ppc_dump_gpr(env, 6), - ppc_dump_gpr(env, 7), ppc_dump_gpr(env, 8), - ppc_dump_gpr(env, 9), ppc_dump_gpr(env, 10), - ppc_dump_gpr(env, 11), ppc_dump_gpr(env, 12), - env->nip); + ppc_dump_gpr(env, 5), ppc_dump_gpr(env, 6), + ppc_dump_gpr(env, 7), ppc_dump_gpr(env, 8), + ppc_dump_gpr(env, 9), ppc_dump_gpr(env, 10), + ppc_dump_gpr(env, 11), ppc_dump_gpr(env, 12), + env->nip); } static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
Re: [PATCH v5 01/14] qcrypto/core: add generic infrastructure for crypto options amendment
On 5/7/20 7:54 AM, Maxim Levitsky wrote: This will be used first to implement luks keyslot management. block_crypto_amend_opts_init will be used to convert qemu-img cmdline to QCryptoBlockAmendOptions Signed-off-by: Maxim Levitsky Reviewed-by: Daniel P. Berrangé --- +++ b/qapi/crypto.json @@ -309,3 +309,19 @@ 'base': 'QCryptoBlockInfoBase', 'discriminator': 'format', 'data': { 'luks': 'QCryptoBlockInfoLUKS' } } + + + +## +# @QCryptoBlockAmendOptions: +# +# The options that are available for all encryption formats +# when amending encryption settings +# +# Since: 5.0 Looks like our mail crossed, my v4 review landed after you sent v5. We'll still have to scrub this series for s/5.0/5.1/ +## +{ 'union': 'QCryptoBlockAmendOptions', + 'base': 'QCryptoBlockOptionsBase', + 'discriminator': 'format', + 'data': { +} } -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v4 6/6] net/colo-compare.c: Correct ordering in complete and finalize
On Thu, 7 May 2020 13:26:11 + "Zhang, Chen" wrote: > > -Original Message- > > From: Lukas Straub > > Sent: Monday, May 4, 2020 6:28 PM > > To: qemu-devel > > Cc: Zhang, Chen ; Li Zhijian > > ; Jason Wang ; Marc- > > André Lureau ; Paolo Bonzini > > > > Subject: [PATCH v4 6/6] net/colo-compare.c: Correct ordering in complete > > and finalize > > > > In colo_compare_complete, insert CompareState into net_compares only > > after everything has been initialized. > > In colo_compare_finalize, remove CompareState from net_compares before > > anything is deinitialized. > > S/deinitialized/finalized > > It looks no dependences on each step on initialization and finalization. > Do you means we just need add/remove each colo-compare module at last in > logic? Yes. While I didn't see any crashes here, there is the possibility that if colo-compare is removed during checkpoint, the destroyed event_bh is called from colo_notify_compares_event. Same with colo_compare_complete (very unlikely) if colo-compare is created while colo is running, colo_notify_compares_event may call the uninitialized event_bh. Regards, Lukas Straub > Or current code have some issue? > > Thanks > Zhang Chen > > > > > Signed-off-by: Lukas Straub > > --- > > net/colo-compare.c | 45 +++-- > > 1 file changed, 23 insertions(+), 22 deletions(-) > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index > > c7572d75e9..6f80bcece6 100644 > > --- a/net/colo-compare.c > > +++ b/net/colo-compare.c > > @@ -1283,15 +1283,6 @@ static void > > colo_compare_complete(UserCreatable *uc, Error **errp) > > s->vnet_hdr); > > } > > > > -qemu_mutex_lock(_compare_mutex); > > -if (!colo_compare_active) { > > -qemu_mutex_init(_mtx); > > -qemu_cond_init(_complete_cond); > > -colo_compare_active = true; > > -} > > -QTAILQ_INSERT_TAIL(_compares, s, next); > > -qemu_mutex_unlock(_compare_mutex); > > - > > s->out_sendco.s = s; > > s->out_sendco.chr = >chr_out; > > s->out_sendco.notify_remote_frame = false; @@ -1312,6 +1303,16 @@ > > static void colo_compare_complete(UserCreatable *uc, Error **errp) > >connection_destroy); > > > > colo_compare_iothread(s); > > + > > +qemu_mutex_lock(_compare_mutex); > > +if (!colo_compare_active) { > > +qemu_mutex_init(_mtx); > > +qemu_cond_init(_complete_cond); > > +colo_compare_active = true; > > +} > > +QTAILQ_INSERT_TAIL(_compares, s, next); > > +qemu_mutex_unlock(_compare_mutex); > > + > > return; > > } > > > > @@ -1384,19 +1385,6 @@ static void colo_compare_finalize(Object *obj) > > CompareState *s = COLO_COMPARE(obj); > > CompareState *tmp = NULL; > > > > -qemu_chr_fe_deinit(>chr_pri_in, false); > > -qemu_chr_fe_deinit(>chr_sec_in, false); > > -qemu_chr_fe_deinit(>chr_out, false); > > -if (s->notify_dev) { > > -qemu_chr_fe_deinit(>chr_notify_dev, false); > > -} > > - > > -if (s->iothread) { > > -colo_compare_timer_del(s); > > -} > > - > > -qemu_bh_delete(s->event_bh); > > - > > qemu_mutex_lock(_compare_mutex); > > QTAILQ_FOREACH(tmp, _compares, next) { > > if (tmp == s) { > > @@ -1411,6 +1399,19 @@ static void colo_compare_finalize(Object *obj) > > } > > qemu_mutex_unlock(_compare_mutex); > > > > +qemu_chr_fe_deinit(>chr_pri_in, false); > > +qemu_chr_fe_deinit(>chr_sec_in, false); > > +qemu_chr_fe_deinit(>chr_out, false); > > +if (s->notify_dev) { > > +qemu_chr_fe_deinit(>chr_notify_dev, false); > > +} > > + > > +if (s->iothread) { > > +colo_compare_timer_del(s); > > +} > > + > > +qemu_bh_delete(s->event_bh); > > + > > AioContext *ctx = iothread_get_aio_context(s->iothread); > > aio_context_acquire(ctx); > > AIO_WAIT_WHILE(ctx, !s->out_sendco.done); > > -- > > 2.20.1 pgpTILdNe6FCb.pgp Description: OpenPGP digital signature
[PATCH] accel: Move Xen accelerator code under accel/xen/
This code is not related to hardware emulation. Move it under accel/ with the other hypervisors. Signed-off-by: Philippe Mathieu-Daudé --- We could also move the memory management functions from hw/i386/xen/xen-hvm.c but it is not trivial. Necessary step to remove "exec/ram_addr.h" in the next series. --- include/exec/ram_addr.h| 2 +- include/hw/xen/xen.h | 11 -- include/sysemu/xen.h | 40 ++ hw/xen/xen-common.c => accel/xen/xen-all.c | 3 ++ hw/acpi/piix4.c| 2 +- hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + hw/i386/xen/xen-hvm.c | 1 + hw/i386/xen/xen_platform.c | 1 + hw/isa/piix3.c | 1 + hw/pci/msix.c | 1 + migration/savevm.c | 2 +- softmmu/vl.c | 2 +- stubs/xen-hvm.c| 9 - target/i386/cpu.c | 2 +- MAINTAINERS| 2 ++ accel/Makefile.objs| 1 + accel/xen/Makefile.objs| 1 + hw/xen/Makefile.objs | 2 +- 20 files changed, 60 insertions(+), 26 deletions(-) create mode 100644 include/sysemu/xen.h rename hw/xen/xen-common.c => accel/xen/xen-all.c (99%) create mode 100644 accel/xen/Makefile.objs diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 5e59a3d8d7..4e05292f91 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -21,7 +21,7 @@ #ifndef CONFIG_USER_ONLY #include "cpu.h" -#include "hw/xen/xen.h" +#include "sysemu/xen.h" #include "sysemu/tcg.h" #include "exec/ramlist.h" #include "exec/ramblock.h" diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 5ac1c6dc55..771dd447f2 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -20,13 +20,6 @@ extern uint32_t xen_domid; extern enum xen_mode xen_mode; extern bool xen_domid_restrict; -extern bool xen_allowed; - -static inline bool xen_enabled(void) -{ -return xen_allowed; -} - int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); void xen_piix3_set_irq(void *opaque, int irq_num, int level); void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); @@ -39,10 +32,6 @@ void xenstore_store_pv_console_info(int i, struct Chardev *chr); void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory); -void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, - struct MemoryRegion *mr, Error **errp); -void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); - void xen_register_framebuffer(struct MemoryRegion *mr); #endif /* QEMU_HW_XEN_H */ diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h new file mode 100644 index 00..609d2d4184 --- /dev/null +++ b/include/sysemu/xen.h @@ -0,0 +1,40 @@ +/* + * QEMU Xen support + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef SYSEMU_XEN_H +#define SYSEMU_XEN_H + +#ifdef CONFIG_XEN + +extern bool xen_allowed; + +#define xen_enabled() (xen_allowed) + +#ifndef CONFIG_USER_ONLY +void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length); +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + struct MemoryRegion *mr, Error **errp); +#endif + +#else /* !CONFIG_XEN */ + +#define xen_enabled() 0 +#ifndef CONFIG_USER_ONLY +static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) +{ +abort(); +} +static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, + MemoryRegion *mr, Error **errp) +{ +abort(); +} +#endif + +#endif /* CONFIG_XEN */ + +#endif diff --git a/hw/xen/xen-common.c b/accel/xen/xen-all.c similarity index 99% rename from hw/xen/xen-common.c rename to accel/xen/xen-all.c index a15070f7f6..5c64571cb8 100644 --- a/hw/xen/xen-common.c +++ b/accel/xen/xen-all.c @@ -16,6 +16,7 @@ #include "hw/xen/xen_pt.h" #include "chardev/char.h" #include "sysemu/accel.h" +#include "sysemu/xen.h" #include "sysemu/runstate.h" #include "migration/misc.h" #include "migration/global_state.h" @@ -31,6 +32,8 @@ do { } while (0) #endif +bool xen_allowed; + xc_interface *xen_xc; xenforeignmemory_handle *xen_fmem; xendevicemodel_handle *xen_dmod; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 964d6f5990..daed273687 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -30,6 +30,7 @@ #include "hw/acpi/acpi.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" +#include "sysemu/xen.h" #include "qapi/error.h" #include "qemu/range.h" #include "exec/address-spaces.h" @@ -41,7 +42,6 @@ #include "hw/mem/nvdimm.h" #include
Re: [PATCH 0/2] migration/multifd: fix two memleaks
* Pan Nengyuan (pannengy...@huawei.com) wrote: > Fix two memleaks in multifd_send_thread/multifd_new_send_channel_async when > error happen. > > Pan Nengyuan (2): > migration/multifd: fix memleaks in multifd_new_send_channel_async > migration/multifd: Do error_free after migrate_set_error to avoid > memleaks > > migration/multifd.c | 5 + > 1 file changed, 5 insertions(+) Queued > > -- > 2.18.2 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
Re: [PATCH 2/2] block/block-copy: Simplify block_copy_do_copy()
On 5/7/20 7:11 AM, Philippe Mathieu-Daudé wrote: block_copy_do_copy() is static, only used in block_copy_task_entry with the error_is_read argument set. No need to check for it, simplify. Signed-off-by: Philippe Mathieu-Daudé --- block/block-copy.c | 12 +++- 1 file changed, 3 insertions(+), 9 deletions(-) Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
On Thu, May 7, 2020 at 11:30 PM Maxime Coquelin wrote: > > > > On 4/20/20 11:32 AM, Cindy Lu wrote: > > diff --git a/include/hw/virtio/vhost-backend.h > > b/include/hw/virtio/vhost-backend.h > > index 6f6670783f..d81bd9885f 100644 > > --- a/include/hw/virtio/vhost-backend.h > > +++ b/include/hw/virtio/vhost-backend.h > > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > > VHOST_BACKEND_TYPE_NONE = 0, > > VHOST_BACKEND_TYPE_KERNEL = 1, > > VHOST_BACKEND_TYPE_USER = 2, > > -VHOST_BACKEND_TYPE_MAX = 3, > > +VHOST_BACKEND_TYPE_VDPA = 3, > > +VHOST_BACKEND_TYPE_MAX = 4, > > } VhostBackendType; > > > > typedef enum VhostSetConfigType { > > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct > > vhost_dev *dev, > > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > > struct vhost_inflight *inflight); > > > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); > > I think state should be of type uint8_t. > ok, I will change this to uint8_t
Re: [PATCH v4 09/13] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()
On 07.05.20 17:42, Dr. David Alan Gilbert wrote: > * Dr. David Alan Gilbert (dgilb...@redhat.com) wrote: >> * David Hildenbrand (da...@redhat.com) wrote: >>> Let's consolidate resetting the variables. >>> >>> Cc: "Dr. David Alan Gilbert" >>> Cc: Juan Quintela >>> Cc: Peter Xu >>> Signed-off-by: David Hildenbrand >> >> Reviewed-by: Dr. David Alan Gilbert > > Queued this one only; I had to do manual patch application due to some > renaming in 91ba442; but I think it's still OK. Thanks, how to best proceed with the other 12 patches in this series? -- Thanks, David / dhildenb
Re: [PATCH v6 3/5] 9pfs: add new function v9fs_co_readdir_many()
On Thu, 07 May 2020 14:16:43 +0200 Christian Schoenebeck wrote: > On Montag, 4. Mai 2020 11:18:34 CEST Greg Kurz wrote: > > On Fri, 01 May 2020 16:04:41 +0200 > > > > Christian Schoenebeck wrote: > > > On Donnerstag, 30. April 2020 15:30:49 CEST Greg Kurz wrote: > > > > > > I agree that a client that issues concurrent readdir requests on the > > > > > > same fid is probably asking for troubles, but this permitted by the > > > > > > spec. Whether we should detect such conditions and warn or even fail > > > > > > is discussion for another thread. > > > > > > > > > > > > The locking is only needed to avoid concurrent accesses to the > > > > > > dirent > > > > > > structure returned by readdir(), otherwise we could return partially > > > > > > overwritten file names to the client. It must be done for each > > > > > > individual > > > > > > call to readdir(), but certainly not for multiple calls. > > > > > > > > > > Yeah, that would resolve this issue more appropriately for 9p2000.L, > > > > > since > > > > > Treaddir specifies an offset, but for 9p2000.u the result of a > > > > > concurrent > > > > > read on a directory (9p2000.u) would still be undefined. > > > > > > > > The bad client behavior you want to tackle has nothing to do with > > > > the locking itself. Since all the code in 9p.c runs serialized in > > > > the context of the QEMU main loop, concurrent readdir requests could > > > > easily be detected up-front with a simple flag in the fid structure. > > > > > > Well, it's fine with me. I don't really see an issue here right now. But > > > that all the code was serialized is not fully true. Most of the 9p.c code > > > is still heavily dispatching between main thread and worker threads back > > > and forth. And for that reason the order of request processing might > > > change quite arbitrarily in between. Just keep that in mind. > > > > Just to make things clear. The code in 9p.c is ALWAYS exclusively run by > > the main thread. Only the code called under v9fs_co_run_in_worker() is > > dispatched on worker threads. So, yes the order of individual backend > > operations may change, but the start of a new client request is necessarily > > serialized with the completion of pending ones, which is the only thing > > we care for actually. > > I just looked at this. 9p.c code is called by main I/O thread only, that's > clear. The start of requests come also in in order, yes, but it seems you > would think that main I/O thread would not grab the next client request from > queue before completing the current/previous client request (that is not > before transmitting result to client). If yes, I am not so sure about this > claim: > Hrm... I don't think I've ever said anything like that :) What I'm saying is that, thanks to the serialization of request start and completion, v9fs_readdir() and v9fs_read() can: - flag the fid as being involved in a readdir request - clear the flag when the request is complete or failed - directly fail or print a warning if the fid already has the flag set (ie. a previous request hasn't completed yet) But again, I'm not a big fan of adding code to handle such an unlikely situation which is even not explicitly forbidden by the 9p spec. > For instance v9fs_path_write_lock() is using a co-mutex, right? So an > unsuccesful lock would cause main I/O thread to grab the next request before > completing the current/previous request. > An unsuccessful lock would cause the main I/O thread to switch to some other task. > And what happens on any run_in_worker({}) call? If there is another client > request in the queue, main I/O thread would pull that request from the queue > before waiting for the worker thread to complete its task, wouldn't it? > The main I/O thread won't wait for anything, so, yes, it will probably start handling the new request until it yields. > Just looked at the code so far, haven't tested it yet ... > > Best regards, > Christian Schoenebeck > >
Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-compare is active
On Thu, 7 May 2020 11:38:04 + "Zhang, Chen" wrote: > > -Original Message- > > From: Lukas Straub > > Sent: Monday, May 4, 2020 6:28 PM > > To: qemu-devel > > Cc: Zhang, Chen ; Li Zhijian > > ; Jason Wang ; Marc- > > André Lureau ; Paolo Bonzini > > > > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo- > > compare is active > > > > If the colo-compare object is removed before failover and a checkpoint > > happens, qemu crashes because it tries to lock the destroyed event_mtx in > > colo_notify_compares_event. > > > > Fix this by checking if everything is initialized by introducing a new > > variable > > colo_compare_active which is protected by a new mutex > > colo_compare_mutex. The new mutex also protects against concurrent > > access of the net_compares list and makes sure that > > colo_notify_compares_event isn't active while we destroy event_mtx and > > event_complete_cond. > > > > With this it also is again possible to use colo without colo-compare > > (periodic > > mode) and to use multiple colo-compare for multiple network interfaces. > > > > Hi Lukas, > > For this case I think we don't need to touch vl.c code, we can solve this > issue from another perspective: > How to remove colo-compare? > User will use qemu-monitor or QMP command to disable an object, so we just > need return operation failed > When user try to remove colo-compare object while COLO is running. Yeah, but that still leaves the other problem that colo can't be used without colo-compare (qemu crashes then). Regards, Lukas Straub > Thanks > Zhang Chen > > > Signed-off-by: Lukas Straub > > --- > > net/colo-compare.c | 35 +-- > > net/colo-compare.h | 1 + > > softmmu/vl.c | 2 ++ > > 3 files changed, 32 insertions(+), 6 deletions(-) > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index > > 56db3d3bfc..c7572d75e9 100644 > > --- a/net/colo-compare.c > > +++ b/net/colo-compare.c > > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers = #define > > REGULAR_PACKET_CHECK_MS 3000 #define DEFAULT_TIME_OUT_MS 3000 > > > > +static QemuMutex colo_compare_mutex; > > +static bool colo_compare_active; > > static QemuMutex event_mtx; > > static QemuCond event_complete_cond; > > static int event_unhandled_count; > > @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque) > > void colo_notify_compares_event(void *opaque, int event, Error **errp) { > > CompareState *s; > > +qemu_mutex_lock(_compare_mutex); > > + > > +if (!colo_compare_active) { > > +qemu_mutex_unlock(_compare_mutex); > > +return; > > +} > > > > qemu_mutex_lock(_mtx); > > QTAILQ_FOREACH(s, _compares, next) { @@ -919,6 +927,7 @@ void > > colo_notify_compares_event(void *opaque, int event, Error **errp) > > } > > > > qemu_mutex_unlock(_mtx); > > +qemu_mutex_unlock(_compare_mutex); > > } > > > > static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14 > > @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) > > s->vnet_hdr); > > } > > > > +qemu_mutex_lock(_compare_mutex); > > +if (!colo_compare_active) { > > +qemu_mutex_init(_mtx); > > +qemu_cond_init(_complete_cond); > > +colo_compare_active = true; > > +} > > QTAILQ_INSERT_TAIL(_compares, s, next); > > +qemu_mutex_unlock(_compare_mutex); > > > > s->out_sendco.s = s; > > s->out_sendco.chr = >chr_out; > > @@ -1290,9 +1306,6 @@ static void colo_compare_complete(UserCreatable > > *uc, Error **errp) > > > > g_queue_init(>conn_list); > > > > -qemu_mutex_init(_mtx); > > -qemu_cond_init(_complete_cond); > > - > > s->connection_track_table = > > g_hash_table_new_full(connection_key_hash, > >connection_key_equal, > >g_free, @@ -1384,12 > > +1397,19 @@ static void > > colo_compare_finalize(Object *obj) > > > > qemu_bh_delete(s->event_bh); > > > > +qemu_mutex_lock(_compare_mutex); > > QTAILQ_FOREACH(tmp, _compares, next) { > > if (tmp == s) { > > QTAILQ_REMOVE(_compares, s, next); > > break; > > } > > } > > +if (QTAILQ_EMPTY(_compares)) { > > +colo_compare_active = false; > > +qemu_mutex_destroy(_mtx); > > +qemu_cond_destroy(_complete_cond); > > +} > > +qemu_mutex_unlock(_compare_mutex); > > > > AioContext *ctx = iothread_get_aio_context(s->iothread); > > aio_context_acquire(ctx); > > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj) > > object_unref(OBJECT(s->iothread)); > > } > > > > -qemu_mutex_destroy(_mtx); > > -qemu_cond_destroy(_complete_cond); > > - > > g_free(s->pri_indev); > > g_free(s->sec_indev); > >
Re: [PULL 13/32] cpus: Fix configure_icount() error API violation
On Wed, 29 Apr 2020 at 08:34, Markus Armbruster wrote: > > The Error ** argument must be NULL, _abort, _fatal, or a > pointer to a variable containing NULL. Passing an argument of the > latter kind twice without clearing it in between is wrong: if the > first call sets an error, it no longer points to NULL for the second > call. > > configure_icount() is wrong that way. Harmless, because its @errp is > always _abort or _fatal. > > Just as wrong (and just as harmless): when it fails, it can still > update global state. Hi; Coverity complains about this change (CID 1428754): > > void configure_icount(QemuOpts *opts, Error **errp) > { > -const char *option; > +const char *option = qemu_opt_get(opts, "shift"); > +bool sleep = qemu_opt_get_bool(opts, "sleep", true); > +bool align = qemu_opt_get_bool(opts, "align", false); > +long time_shift = -1; > char *rem_str = NULL; > > -option = qemu_opt_get(opts, "shift"); > -if (!option) { > -if (qemu_opt_get(opts, "align") != NULL) { > -error_setg(errp, "Please specify shift option when using align"); > -} > +if (!option && qemu_opt_get(opts, "align")) { > +error_setg(errp, "Please specify shift option when using align"); > return; Previously, if option was NULL we would always take this early exit. Now we only take the exit if option is NULL and the qemu_opt_get() returns true, so in some cases execution can continue through the function with a NULL option... > } > > -icount_sleep = qemu_opt_get_bool(opts, "sleep", true); > +if (align && !sleep) { > +error_setg(errp, "align=on and sleep=off are incompatible"); > +return; > +} > + > +if (strcmp(option, "auto") != 0) { ...but here we pass option to strcmp(), which is wrong if it can be NULL. thanks -- PMM
Re: [PATCH 1/2] block/block-copy: Fix uninitialized variable in block_copy_task_entry
On 5/7/20 7:11 AM, Philippe Mathieu-Daudé wrote: Fix when building with -Os: CC block/block-copy.o block/block-copy.c: In function ‘block_copy_task_entry’: block/block-copy.c:428:38: error: ‘error_is_read’ may be used uninitialized in this function [-Werror=maybe-uninitialized] 428 | t->call_state->error_is_read = error_is_read; | ~^~~ Looks like -Os triggered different inlining of block_copy_do_copy(). I confirm that block_copy_do_copy does NOT initialize error_is_read except when returning < 0, but similarly block_copy_task_entry() does not read error_is_read except in the same setups. So it looks like no actual bug was triggered, but we can definitely aid the compiler's analysis by initializing. Signed-off-by: Philippe Mathieu-Daudé --- block/block-copy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
On Thu, May 7, 2020 at 11:13 PM Maxime Coquelin wrote: > > > > On 4/20/20 11:32 AM, Cindy Lu wrote: > > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > > vhost-user. The above patch provides a generic device for vDPA purpose, > > this vDPA device exposes to user space a non-vendor-specific configuration > > interface for setting up a vhost HW accelerator, this patch set introduces > > a third vhost backend called vhost-vdpa based on the vDPA interface. > > > > Vhost-vdpa usage: > > > > qemu-system-x86_64 -cpu host -enable-kvm \ > > .. > > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > > > Author: Tiwei Bie > > Signed-off-by: Cindy Lu > > --- > > hw/net/vhost_net.c| 43 > > hw/net/virtio-net.c | 9 + > > hw/virtio/Makefile.objs | 2 +- > > hw/virtio/vhost-backend.c | 3 + > > hw/virtio/vhost-vdpa.c| 379 ++ > > hw/virtio/vhost.c | 5 + > > include/hw/virtio/vhost-backend.h | 6 +- > > include/hw/virtio/vhost-vdpa.h| 14 ++ > > 8 files changed, 459 insertions(+), 2 deletions(-) > > create mode 100644 hw/virtio/vhost-vdpa.c > > create mode 100644 include/hw/virtio/vhost-vdpa.h > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > > index 4096d64aaf..0d13fda2fc 100644 > > --- a/hw/net/vhost_net.c > > +++ b/hw/net/vhost_net.c > > @@ -17,8 +17,10 @@ > > #include "net/net.h" > > #include "net/tap.h" > > #include "net/vhost-user.h" > > +#include "net/vhost-vdpa.h" > > > > #include "standard-headers/linux/vhost_types.h" > > +#include "linux-headers/linux/vhost.h" > > #include "hw/virtio/virtio-net.h" > > #include "net/vhost_net.h" > > #include "qemu/error-report.h" > > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > > VHOST_INVALID_FEATURE_BIT > > }; > > > > +static const int vdpa_feature_bits[] = { > > +VIRTIO_F_NOTIFY_ON_EMPTY, > > +VIRTIO_RING_F_INDIRECT_DESC, > > +VIRTIO_RING_F_EVENT_IDX, > > +VIRTIO_F_ANY_LAYOUT, > > +VIRTIO_F_VERSION_1, > > +VIRTIO_NET_F_CSUM, > > +VIRTIO_NET_F_GUEST_CSUM, > > +VIRTIO_NET_F_GSO, > > +VIRTIO_NET_F_GUEST_TSO4, > > +VIRTIO_NET_F_GUEST_TSO6, > > +VIRTIO_NET_F_GUEST_ECN, > > +VIRTIO_NET_F_GUEST_UFO, > > +VIRTIO_NET_F_HOST_TSO4, > > +VIRTIO_NET_F_HOST_TSO6, > > +VIRTIO_NET_F_HOST_ECN, > > +VIRTIO_NET_F_HOST_UFO, > > +VIRTIO_NET_F_MRG_RXBUF, > > +VIRTIO_NET_F_MTU, > > +VIRTIO_F_IOMMU_PLATFORM, > > +VIRTIO_NET_F_GUEST_ANNOUNCE, > > +VHOST_INVALID_FEATURE_BIT > > +}; > > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > > { > > const int *feature_bits = 0; > > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct > > vhost_net *net) > > case NET_CLIENT_DRIVER_VHOST_USER: > > feature_bits = user_feature_bits; > > break; > > +case NET_CLIENT_DRIVER_VHOST_VDPA: > > +feature_bits = vdpa_feature_bits; > > +break; > > default: > > error_report("Feature bits not defined for this type: %d", > > net->nc->info->type); > > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > > assert(vhost_net); > > break; > > #endif > > +case NET_CLIENT_DRIVER_VHOST_VDPA: > > +vhost_net = vhost_vdpa_get_vhost_net(nc); > > +assert(vhost_net); > > +break; > > default: > > break; > > } > > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t > > mtu) > > > > return vhost_ops->vhost_net_set_mtu(>dev, mtu); > > } > > +int vhost_set_state(NetClientState *nc, int state) > > +{ > > +struct vhost_net *net = get_vhost_net(nc); > > +struct vhost_dev *hdev = >dev; > > +if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { > > Maybe checking the vhost_set_state callback is implemented is enough, > and it is not need to restrict that to Vhost-vDPA? Sure, Will remove this > > +if (hdev->vhost_ops->vhost_set_state) { > > +return hdev->vhost_ops->vhost_set_state(hdev, state); > > + } > > +} > > +return 0; > > +} >
Re: [PULL 24/24] block/block-copy: use aio-task-pool API
On Tue, 5 May 2020 at 13:59, Max Reitz wrote: > > From: Vladimir Sementsov-Ogievskiy > > Run block_copy iterations in parallel in aio tasks. > > Changes: > - BlockCopyTask becomes aio task structure. Add zeroes field to pass > it to block_copy_do_copy > - add call state - it's a state of one call of block_copy(), shared > between parallel tasks. For now used only to keep information about > first error: is it read or not. > - convert block_copy_dirty_clusters to aio-task loop. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > Message-Id: <20200429130847.28124-6-vsement...@virtuozzo.com> > Signed-off-by: Max Reitz Hi; this patch seems to introduce a use-after-free (CID 1428756): > @@ -484,6 +554,8 @@ static int coroutine_fn > block_copy_dirty_clusters(BlockCopyState *s, > int ret = 0; > bool found_dirty = false; > int64_t end = offset + bytes; > +AioTaskPool *aio = NULL; > +BlockCopyCallState call_state = {false, false}; > > /* > * block_copy() user is responsible for keeping source and target in same > @@ -495,11 +567,11 @@ static int coroutine_fn > block_copy_dirty_clusters(BlockCopyState *s, > assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); > assert(QEMU_IS_ALIGNED(bytes, s->cluster_size)); > > -while (bytes) { > -g_autofree BlockCopyTask *task = NULL; > +while (bytes && aio_task_pool_status(aio) == 0) { > +BlockCopyTask *task; > int64_t status_bytes; > > -task = block_copy_task_create(s, offset, bytes); > +task = block_copy_task_create(s, _state, offset, bytes); > if (!task) { > /* No more dirty bits in the bitmap */ > trace_block_copy_skip_range(s, offset, bytes); > @@ -519,6 +591,7 @@ static int coroutine_fn > block_copy_dirty_clusters(BlockCopyState *s, > } > if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) { > block_copy_task_end(task, 0); > +g_free(task); This hunk frees 'task' here... > progress_set_remaining(s->progress, > bdrv_get_dirty_count(s->copy_bitmap) + > s->in_flight_bytes); ...but the next lines after this one (just out of context) are: trace_block_copy_skip_range(s, task->offset, task->bytes); offset = task_end(task); which now dereference 'task' after it is freed. thanks -- PMM
Re: [PATCH v3] migration/xbzrle: add encoding rate
* Wei Wang (wei.w.w...@intel.com) wrote: > Users may need to check the xbzrle encoding rate to know if the guest > memory is xbzrle encoding-friendly, and dynamically turn off the > encoding if the encoding rate is low. > > Signed-off-by: Yi Sun > Signed-off-by: Wei Wang Queued > --- > migration/migration.c | 1 + > migration/ram.c | 39 +-- > monitor/hmp-cmds.c| 2 ++ > qapi/migration.json | 5 - > 4 files changed, 44 insertions(+), 3 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 187ac0410c..e40421353c 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -930,6 +930,7 @@ static void populate_ram_info(MigrationInfo *info, > MigrationState *s) > info->xbzrle_cache->pages = xbzrle_counters.pages; > info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; > info->xbzrle_cache->cache_miss_rate = > xbzrle_counters.cache_miss_rate; > +info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; > info->xbzrle_cache->overflow = xbzrle_counters.overflow; > } > > diff --git a/migration/ram.c b/migration/ram.c > index 04f13feb2e..41b75a0a0f 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -327,6 +327,10 @@ struct RAMState { > uint64_t num_dirty_pages_period; > /* xbzrle misses since the beginning of the period */ > uint64_t xbzrle_cache_miss_prev; > +/* Amount of xbzrle pages since the beginning of the period */ > +uint64_t xbzrle_pages_prev; > +/* Amount of xbzrle encoded bytes since the beginning of the period */ > +uint64_t xbzrle_bytes_prev; > > /* compression statistics since the beginning of the period */ > /* amount of count that no free thread to compress data */ > @@ -696,6 +700,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t > **current_data, > return -1; > } > > +/* > + * Reaching here means the page has hit the xbzrle cache, no matter what > + * encoding result it is (normal encoding, overflow or skipping the > page), > + * count the page as encoded. This is used to caculate the encoding rate. > + * > + * Example: 2 pages (8KB) being encoded, first page encoding generates > 2KB, > + * 2nd page turns out to be skipped (i.e. no new bytes written to the > + * page), the overall encoding rate will be 8KB / 2KB = 4, which has the > + * skipped page included. In this way, the encoding rate can tell if the > + * guest page is good for xbzrle encoding. > + */ > +xbzrle_counters.pages++; > prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > > /* save current buffer into memory */ > @@ -726,6 +742,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t > **current_data, > } else if (encoded_len == -1) { > trace_save_xbzrle_page_overflow(); > xbzrle_counters.overflow++; > +xbzrle_counters.bytes += TARGET_PAGE_SIZE; > return -1; > } > > @@ -736,8 +753,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t > **current_data, > qemu_put_be16(rs->f, encoded_len); > qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); > bytes_xbzrle += encoded_len + 1 + 2; > -xbzrle_counters.pages++; > -xbzrle_counters.bytes += bytes_xbzrle; > +/* > + * Like compressed_size (please see update_compress_thread_counts), > + * the xbzrle encoded bytes don't count the 8 byte header with > + * RAM_SAVE_FLAG_CONTINUE. > + */ > +xbzrle_counters.bytes += bytes_xbzrle - 8; > ram_counters.transferred += bytes_xbzrle; > > return 1; > @@ -870,9 +891,23 @@ static void migration_update_rates(RAMState *rs, int64_t > end_time) > } > > if (migrate_use_xbzrle()) { > +double encoded_size, unencoded_size; > + > xbzrle_counters.cache_miss_rate = > (double)(xbzrle_counters.cache_miss - > rs->xbzrle_cache_miss_prev) / page_count; > rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; > +unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) * > + TARGET_PAGE_SIZE; > +encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev; > +if (xbzrle_counters.pages == rs->xbzrle_pages_prev) { > +xbzrle_counters.encoding_rate = 0; > +} else if (!encoded_size) { > +xbzrle_counters.encoding_rate = UINT64_MAX; > +} else { > +xbzrle_counters.encoding_rate = unencoded_size / encoded_size; > +} > +rs->xbzrle_pages_prev = xbzrle_counters.pages; > +rs->xbzrle_bytes_prev = xbzrle_counters.bytes; > } > > if (migrate_use_compression()) { > diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c > index 9b94e67879..c2a3a667ae 100644 > --- a/monitor/hmp-cmds.c > +++ b/monitor/hmp-cmds.c > @@ -303,6 +303,8 @@ void hmp_info_migrate(Monitor *mon,
Re: [PATCH v4 5/5] block/block-copy: use aio-task-pool API
Am 29.04.2020 um 15:08 hat Vladimir Sementsov-Ogievskiy geschrieben: > Run block_copy iterations in parallel in aio tasks. > > Changes: > - BlockCopyTask becomes aio task structure. Add zeroes field to pass > it to block_copy_do_copy > - add call state - it's a state of one call of block_copy(), shared > between parallel tasks. For now used only to keep information about > first error: is it read or not. > - convert block_copy_dirty_clusters to aio-task loop. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > @@ -519,6 +591,7 @@ static int coroutine_fn > block_copy_dirty_clusters(BlockCopyState *s, > } > if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) { > block_copy_task_end(task, 0); > +g_free(task); > progress_set_remaining(s->progress, > bdrv_get_dirty_count(s->copy_bitmap) + > s->in_flight_bytes); > trace_block_copy_skip_range(s, task->offset, task->bytes); > offset = task_end(task); > bytes = end - offset; Coverity found this use after free for task. Please fix. Kevin
Re: [PATCH v4 3/6] net/colo-compare.c: Fix deadlock in compare_chr_send
On Thu, 7 May 2020 11:00:26 + "Zhang, Chen" wrote: > > -Original Message- > > From: Lukas Straub > > Sent: Monday, May 4, 2020 6:28 PM > > To: qemu-devel > > Cc: Zhang, Chen ; Li Zhijian > > ; Jason Wang ; Marc- > > André Lureau ; Paolo Bonzini > > > > Subject: [PATCH v4 3/6] net/colo-compare.c: Fix deadlock in > > compare_chr_send > > > > The chr_out chardev is connected to a filter-redirector running in the > > main loop. qemu_chr_fe_write_all might block here in compare_chr_send > > if the (socket-)buffer is full. > > If another filter-redirector in the main loop want's to send data to > > chr_pri_in it might also block if the buffer is full. This leads to a > > deadlock because both event loops get blocked. > > > > Fix this by converting compare_chr_send to a coroutine and putting the > > packets in a send queue. > > > > Signed-off-by: Lukas Straub > > --- > > net/colo-compare.c | 187 ++--- > > > > net/colo.c | 7 ++ > > net/colo.h | 1 + > > 3 files changed, 150 insertions(+), 45 deletions(-) > > > > diff --git a/net/colo-compare.c b/net/colo-compare.c index > > 1de4220fe2..2a4e7f7c4e 100644 > > --- a/net/colo-compare.c > > +++ b/net/colo-compare.c > > @@ -32,6 +32,9 @@ > > #include "migration/migration.h" > > #include "util.h" > > > > +#include "block/aio-wait.h" > > +#include "qemu/coroutine.h" > > + > > #define TYPE_COLO_COMPARE "colo-compare" > > #define COLO_COMPARE(obj) \ > > OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) @@ -77,6 > > +80,23 @@ static int event_unhandled_count; > > *|packet | |packet +|packet | |packet + > > *++ ++++ ++ > > */ > > + > > +typedef struct SendCo { > > +Coroutine *co; > > +struct CompareState *s; > > +CharBackend *chr; > > +GQueue send_list; > > +bool notify_remote_frame; > > +bool done; > > +int ret; > > +} SendCo; > > + > > +typedef struct SendEntry { > > +uint32_t size; > > +uint32_t vnet_hdr_len; > > +uint8_t *buf; > > +} SendEntry; > > + > > typedef struct CompareState { > > Object parent; > > > > @@ -91,6 +111,8 @@ typedef struct CompareState { > > SocketReadState pri_rs; > > SocketReadState sec_rs; > > SocketReadState notify_rs; > > +SendCo out_sendco; > > +SendCo notify_sendco; > > bool vnet_hdr; > > uint32_t compare_timeout; > > uint32_t expired_scan_cycle; > > @@ -124,10 +146,11 @@ enum { > > > > > > static int compare_chr_send(CompareState *s, > > -const uint8_t *buf, > > +uint8_t *buf, > > uint32_t size, > > uint32_t vnet_hdr_len, > > -bool notify_remote_frame); > > +bool notify_remote_frame, > > +bool zero_copy); > > > > static bool packet_matches_str(const char *str, > > const uint8_t *buf, @@ -145,7 +168,7 > > @@ static void notify_remote_frame(CompareState *s) > > char msg[] = "DO_CHECKPOINT"; > > int ret = 0; > > > > -ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true); > > +ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, > > + false); > > if (ret < 0) { > > error_report("Notify Xen COLO-frame failed"); > > } > > @@ -272,12 +295,13 @@ static void > > colo_release_primary_pkt(CompareState *s, Packet *pkt) > > pkt->data, > > pkt->size, > > pkt->vnet_hdr_len, > > - false); > > + false, > > + true); > > if (ret < 0) { > > error_report("colo send primary packet failed"); > > } > > trace_colo_compare_main("packet same and release packet"); > > -packet_destroy(pkt, NULL); > > +packet_destroy_partial(pkt, NULL); > > } > > > > /* > > @@ -699,65 +723,115 @@ static void colo_compare_connection(void > > *opaque, void *user_data) > > } > > } > > > > -static int compare_chr_send(CompareState *s, > > -const uint8_t *buf, > > -uint32_t size, > > -uint32_t vnet_hdr_len, > > -bool notify_remote_frame) > > +static void coroutine_fn _compare_chr_send(void *opaque) > > { > > +SendCo *sendco = opaque; > > +CompareState *s = sendco->s; > > int ret = 0; > > -uint32_t len = htonl(size); > > > > -if (!size) { > > -return 0; > > -} > > +while (!g_queue_is_empty(>send_list)) { > > +SendEntry *entry = g_queue_pop_tail(>send_list); > > +uint32_t len = htonl(entry->size); > > > > -if (notify_remote_frame) { > > -ret =
Re: [RFC v3 6/6] hmp: add x-debug-virtio commands
On 5/7/20 6:49 AM, Laurent Vivier wrote: This patch implements HMP version of the virtio QMP commands Signed-off-by: Laurent Vivier Most HMP commands do not use '-' in their name. Also, HMP doesn't promise api compatibility; so we could just name this 'info virtio' or 'debug_virtio' without an x- prefix, with no real loss. --- Makefile| 2 +- Makefile.target | 7 +- docs/system/monitor.rst | 2 + hmp-commands-virtio.hx | 160 + hmp-commands.hx | 10 +++ hw/virtio/virtio.c | 193 +++- include/monitor/hmp.h | 4 + monitor/misc.c | 17 8 files changed, 391 insertions(+), 4 deletions(-) create mode 100644 hmp-commands-virtio.hx +SRST +``x-debug-virtio`` *subcommand* + Show various information about virtio. + + Example: + + List all sub-commands:: + +(qemu) x-debug-virtio +x-debug-virtio query -- List all available virtio devices +x-debug-virtio status path -- Display status of a given virtio device +x-debug-virtio queue-status path queue -- Display status of a given virtio queue +x-debug-virtio queue-element path queue [index] -- Display element of a given virtio queue Oh, you're introducing it as a metacommand (like 'info' already is) with several subcommands. Still, naming it 'virtio' instead of 'x-debug-virtio' would make sense to me, even though the underlying QMP commands are (correctly) in the x-debug- namespace. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v5 19/31] qcow2: Add subcluster support to calculate_l2_meta()
On Thu 07 May 2020 05:34:18 PM CEST, Alberto Garcia wrote: > On Wed 06 May 2020 07:39:48 PM CEST, Eric Blake wrote: >> In fact, if we rely on 20/31 checking for invalid subclusters when >> computing nb_clusters, we could probably assert that the start and end >> cluster in this function are not invalid, instead of adding the fail: >> label. > > I think you're right with that, good catch! There's no need to return an > error code in this function. No, wait, I got confused. Patch 20/31 updates qcow2_get_host_offset(), and that's used for reading. The caller of calculate_l2_meta() is qcow2_alloc_cluster_offset() (indirectly), which is used for writing. nb_clusters here is calculated with cluster_needs_new_alloc(), but that doesn't check whether a cluster is valid or not, and certainly doesn't check if every single subcluster is valid. Thinking about it, there are probably faster ways to check for the validity of extended L2 entries, for example: - For QCOW2_CLUSTER_NORMAL, (l2_bitmap >> 32) & l2_bitmap should be 0. - For QCOW2_CLUSTER_UNALLOCATED, l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC should be 0. That's enough to cover all cases of QCOW2_SUBCLUSTER_INVALID that we have at the moment. Berto
Re: [RFC v3 3/6] qmp: decode feature bits in virtio-status
On 5/7/20 6:49 AM, Laurent Vivier wrote: Display feature names instead of a features bitmap for host, guest and backend. Decode features according device type, transport features are on the first line. Undecoded bits (if any) are stored in a separate field. Signed-off-by: Laurent Vivier --- I didn't closely review the code, but for the QAPI parts: +++ b/qapi/virtio.json +## +# @VirtioBlkFeature: +# +# An enumeration of Virtio block features +# +# Since: 5.1 +## + +{ 'enum': 'VirtioBlkFeature', + 'data': [ 'size-max', 'seg-max', 'geometry', 'ro', 'blk-size', 'topology', 'mq', 'discard', 'write-zeroes', 'barrier', 'scsi', 'flush', Missing newline. +## +# @VirtioDeviceFeatures: +# +# An union to define the list of features for a virtio device s/An/A/ (in English, 'an' goes with soft u, 'a' goes with pronounced u. You can remember with "a unicorn gets an umbrella") +# +# Since: 5.1 +## + +{ 'union': 'VirtioDeviceFeatures', + 'data': { +'virtio-serial': [ 'VirtioSerialFeature' ], +'virtio-blk': [ 'VirtioBlkFeature' ], +'virtio-gpu': [ 'VirtioGpuFeature' ], +'virtio-net': [ 'VirtioNetFeature' ], +'virtio-scsi': [ 'VirtioScsiFeature' ], +'virtio-balloon': [ 'VirtioBalloonFeature' ], +'virtio-iommu': [ 'VirtioIommuFeature' ] + } +} This is a legacy union rather than a flat union, which results in more {} in the QMP wire format. Is it worth trying to make this a flat union, by labeling an appropriate member as 'discriminator'? + +## +# @VirtioStatusFeatures: +# +# @transport: the list of transport features of the virtio device +# +# @device: the list of the virtio device specific features +# +# @unknown: virtio bitmap of the undecoded features +# +# Since: 5.1 +## + +{ 'struct': 'VirtioStatusFeatures', + 'data': { 'transport': [ 'VirtioTransportFeature' ], +'device': 'VirtioDeviceFeatures', +'unknown': 'uint64' } +} + ## # @VirtioStatus: # @@ -101,9 +245,9 @@ 'data': { 'device-id': 'int', 'device-endian': 'VirtioStatusEndianness', -'guest-features': 'uint64', -'host-features': 'uint64', -'backend-features': 'uint64', +'guest-features': 'VirtioStatusFeatures', +'host-features': 'VirtioStatusFeatures', +'backend-features': 'VirtioStatusFeatures', This is intra-series churn. Should we be trying to get the right types in place from the get-go? Or at least clarify in the commit message of the earlier patch that the format will be incrementally improved later? 'num-vqs': 'uint16' } } @@ -123,18 +267,40 @@ # # -> { "execute": "x-debug-virtio-status", # "arguments": { -# "path": "/machine/peripheral-anon/device[3]/virtio-backend" +# "path": "/machine/peripheral-anon/device[1]/virtio-backend" # } # } # <- { "return": { -# "backend-features": 0, -# "guest-features": 5111807911, -# "num-vqs": 3, -# "host-features": 6337593319, # "device-endian": "little", -# "device-id": 1 +# "device-id": 3, +# "backend-features": { +# "device": { +# "type": "virtio-serial", +# "data": [] +# }, +# "unknown": 0, +# "transport": [] +# }, If we use a flat union, this could look like: "backend-feature": { "type": "virtio-serial", "features": [], "unknown": 0, "transport": [] }, Should 'unknown' be optional so it can be omitted when zero? Should it be named 'unknown-features'? -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v4 09/13] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()
* Dr. David Alan Gilbert (dgilb...@redhat.com) wrote: > * David Hildenbrand (da...@redhat.com) wrote: > > Let's consolidate resetting the variables. > > > > Cc: "Dr. David Alan Gilbert" > > Cc: Juan Quintela > > Cc: Peter Xu > > Signed-off-by: David Hildenbrand > > Reviewed-by: Dr. David Alan Gilbert Queued this one only; I had to do manual patch application due to some renaming in 91ba442; but I think it's still OK. > > --- > > migration/ram.c | 10 +- > > 1 file changed, 5 insertions(+), 5 deletions(-) > > > > diff --git a/migration/ram.c b/migration/ram.c > > index 2a2165b478..7eca3165c8 100644 > > --- a/migration/ram.c > > +++ b/migration/ram.c > > @@ -3150,7 +3150,7 @@ static int ram_load_postcopy(QEMUFile *f) > > /* Temporary page that is later 'placed' */ > > void *postcopy_host_page = mis->postcopy_tmp_page; > > void *host_page = NULL; > > -bool all_zero = false; > > +bool all_zero = true; > > int target_pages = 0; > > > > while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { > > @@ -3176,7 +3176,6 @@ static int ram_load_postcopy(QEMUFile *f) > > addr &= TARGET_PAGE_MASK; > > > > trace_ram_load_postcopy_loop((uint64_t)addr, flags); > > -place_needed = false; > > if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | > > RAM_SAVE_FLAG_COMPRESS_PAGE)) { > > block = ram_block_from_stream(f, flags); > > @@ -3204,9 +3203,7 @@ static int ram_load_postcopy(QEMUFile *f) > > */ > > page_buffer = postcopy_host_page + > >host_page_offset_from_ram_block_offset(block, > > addr); > > -/* If all TP are zero then we can optimise the place */ > > if (target_pages == 1) { > > -all_zero = true; > > host_page = host_page_from_ram_block_offset(block, addr); > > } else if (host_page != host_page_from_ram_block_offset(block, > > addr)) > > { > > @@ -3223,7 +3220,6 @@ static int ram_load_postcopy(QEMUFile *f) > > */ > > if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { > > place_needed = true; > > -target_pages = 0; > > } > > place_source = postcopy_host_page; > > } > > @@ -3300,6 +3296,10 @@ static int ram_load_postcopy(QEMUFile *f) > > ret = postcopy_place_page(mis, host_page, place_source, > >block); > > } > > +place_needed = false; > > +target_pages = 0; > > +/* Assume we have a zero page until we detect something > > different */ > > +all_zero = true; > > } > > } > > > > -- > > 2.25.1 > > > -- > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
Re: [RFC v3 1/6] qmp: add QMP command x-debug-query-virtio
On 5/7/20 6:49 AM, Laurent Vivier wrote: This new command lists all the instances of VirtIODevice with their path and virtio type Signed-off-by: Laurent Vivier --- +++ b/qapi/virtio.json @@ -0,0 +1,68 @@ +## +# = Virtio devices +## + +## +# @VirtioType: +# +# An enumeration of Virtio device types. +# +# Since: 5.1.0 Inconsistent on x.y.z vs. +## +{ 'enum': 'VirtioType', + 'data': [ 'unknown', 'virtio-9p', 'virtio-blk', 'virtio-serial', +'virtio-gpu', 'virtio-input', 'virtio-net', 'virtio-scsi', +'vhost-user-fs', 'vhost-vsock', 'virtio-balloon', 'virtio-crypto', +'virtio-iommu', 'virtio-pmem', 'virtio-rng' ] +} + +## +# @VirtioInfo: +# +# Information about a given VirtIODevice +# +# @path: VirtIO device canonical path. +# +# @type: VirtIO device type. +# +# Since: 5.1 x.y. Most of our QAPI seems to have settled on just x.y these days. The x-debug- prefix is nice; it gives us more flexibility to mess with this later as needed. Otherwise: Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v2 5/5] vhost: add device started check in migration set log
On Wed, May 06, 2020 at 06:08:34PM -0400, Raphael Norwitz wrote: > As you correctly point out, this code needs to be looked at more > carefully so that > if the device does disconnect in the background we can handle the migration > path > gracefully. In particular, we need to decide whether a migration > should be allowed > to continue if a device disconnects durning the migration stage. >From what i see from the code it is allowed. At the start of the hw/virtio/vhost.c:vhost_migration_log() routine there is a check: if (!dev->started) { dev->log_enabled = enable; return 0; } So our changes had the same idea. If device isn't started then 0 can be returned. Please note, that if we want to return error here then the following assert will be hit (hw/virtio/vhost.c) static void vhost_log_global_start(MemoryListener *listener) { int r; r = vhost_migration_log(listener, true); if (r < 0) { abort(); } } But as i mentioned we didn't change this logic, we just propogate it on the whole migration start process during vhost handshake. After it our tests passed successfully. > > mst, any thoughts? > > Have you looked at the suggestion I gave Li Feng to move vhost_dev_cleanup() > into the connection path in vhost-user-blk? I’m not sure if he’s > actively working on it, > but I would prefer if we can find a way to keep some state around > between reconnects > so we aren’t constantly checking dev->started. A device can be stopped > for reasons > other than backend disconnect so I’d rather not reuse this field to > check for backend > disconnect failures. In fact i didn't try to use >started field to signal about disconnect. What i tried to follow is that if device not started (because of disconnect or any other reason), there is no need to continue initialization and we can proceed with the next migration step. > > On Thu, Apr 30, 2020 at 9:57 AM Dima Stepanov wrote: > > > > If vhost-user daemon is used as a backend for the vhost device, then we > > should consider a possibility of disconnect at any moment. If such > > disconnect happened in the vhost_migration_log() routine the vhost > > device structure will be clean up. > > At the start of the vhost_migration_log() function there is a check: > > if (!dev->started) { > > dev->log_enabled = enable; > > return 0; > > } > > To be consistent with this check add the same check after calling the > > vhost_dev_set_log() routine. This in general help not to break a > > Could you point to the specific asserts which are being triggered? Just to be clear here. The assert message i mentioned is described above. I wanted to explain why we followed the "(!dev->started) return 0" logic. And in this case we didn't return error and return 0. But the first error we hit during migration testing was SIGSEGV: Program terminated with signal SIGSEGV, Segmentation fault. #0 0x56354db0a74a in vhost_dev_has_iommu (dev=0x563550562b00) at hw/virtio/vhost.c:299 299 return vdev->dma_as != _space_memory && (gdb) p vdev $1 = (VirtIODevice *) 0x0 (gdb) bt #0 0x56354db0a74a in vhost_dev_has_iommu (dev=0x563550562b00) at hw/virtio/vhost.c:299 #1 0x56354db0bb76 in vhost_dev_set_features (dev=0x563550562b00, enable_log=true) at hw/virtio/vhost.c:777 #2 0x56354db0bc1e in vhost_dev_set_log (dev=0x563550562b00, enable_log=true) at hw/virtio/vhost.c:790 #3 0x56354db0be58 in vhost_migration_log (listener=0x563550562b08, enable=1) at hw/virtio/vhost.c:834 #4 0x56354db0be9b in vhost_log_global_start (listener=0x563550562b08) at hw/virtio/vhost.c:847 #5 0x56354da72e7e in memory_global_dirty_log_start () at memory.c:2611 ... > > > migration due the assert() message. But it looks like that this code > > should be revised to handle these errors more carefully. > > > > In case of vhost-user device backend the fail paths should consider the > > state of the device. In this case we should skip some function calls > > during rollback on the error paths, so not to get the NULL dereference > > errors. > > > > Signed-off-by: Dima Stepanov > > --- > > hw/virtio/vhost.c | 39 +++ > > 1 file changed, 35 insertions(+), 4 deletions(-) > > > > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > > index 3ee50c4..d5ab96d 100644 > > --- a/hw/virtio/vhost.c > > +++ b/hw/virtio/vhost.c > > @@ -787,6 +787,17 @@ static int vhost_dev_set_features(struct vhost_dev > > *dev, > > static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) > > { > > int r, i, idx; > > A couple points here > > > (1) This will fail the live migration if the device is disconnected. > That my be the right thing > to do, but if there are cases where migrations can proceed with > a disconnected device, > this may not be desirable. I'm not sure that it is correct. VM could be migrated successfully during
Re: [PATCH v5 19/31] qcow2: Add subcluster support to calculate_l2_meta()
On Wed 06 May 2020 07:39:48 PM CEST, Eric Blake wrote: > In fact, if we rely on 20/31 checking for invalid subclusters when > computing nb_clusters, we could probably assert that the start and end > cluster in this function are not invalid, instead of adding the fail: > label. I think you're right with that, good catch! There's no need to return an error code in this function. Berto
Re: [PATCH v3] migration/throttle: Add cpu-throttle-tailslow migration parameter
* Keqian Zhu (zhukeqi...@huawei.com) wrote: > At the tail stage of throttling, the Guest is very sensitive to > CPU percentage while the @cpu-throttle-increment is excessive > usually at tail stage. > > If this parameter is true, we will compute the ideal CPU percentage > used by the Guest, which may exactly make the dirty rate match the > dirty rate threshold. Then we will choose a smaller throttle increment > between the one specified by @cpu-throttle-increment and the one > generated by ideal CPU percentage. > > Therefore, it is compatible to traditional throttling, meanwhile > the throttle increment won't be excessive at tail stage. This may > make migration time longer, and is disabled by default. > > Signed-off-by: Keqian Zhu Queued > --- > Cc: Juan Quintela > Cc: "Dr. David Alan Gilbert" > Cc: Eric Blake > Cc: Markus Armbruster > --- > migration/migration.c | 13 > migration/ram.c | 25 +- > monitor/hmp-cmds.c| 8 > qapi/migration.json | 48 +++ > 4 files changed, 89 insertions(+), 5 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index 187ac0410c..d478a87290 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error > **errp) > params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; > params->has_cpu_throttle_increment = true; > params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; > +params->has_cpu_throttle_tailslow = true; > +params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; > params->has_tls_creds = true; > params->tls_creds = g_strdup(s->parameters.tls_creds); > params->has_tls_hostname = true; > @@ -1324,6 +1326,10 @@ static void > migrate_params_test_apply(MigrateSetParameters *params, > dest->cpu_throttle_increment = params->cpu_throttle_increment; > } > > +if (params->has_cpu_throttle_tailslow) { > +dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; > +} > + > if (params->has_tls_creds) { > assert(params->tls_creds->type == QTYPE_QSTRING); > dest->tls_creds = g_strdup(params->tls_creds->u.s); > @@ -1412,6 +1418,10 @@ static void migrate_params_apply(MigrateSetParameters > *params, Error **errp) > s->parameters.cpu_throttle_increment = > params->cpu_throttle_increment; > } > > +if (params->has_cpu_throttle_tailslow) { > +s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; > +} > + > if (params->has_tls_creds) { > g_free(s->parameters.tls_creds); > assert(params->tls_creds->type == QTYPE_QSTRING); > @@ -3594,6 +3604,8 @@ static Property migration_properties[] = { > DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, >parameters.cpu_throttle_increment, >DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), > +DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, > + parameters.cpu_throttle_tailslow, false), > DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, >parameters.max_bandwidth, MAX_THROTTLE), > DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, > @@ -3700,6 +3712,7 @@ static void migration_instance_init(Object *obj) > params->has_throttle_trigger_threshold = true; > params->has_cpu_throttle_initial = true; > params->has_cpu_throttle_increment = true; > +params->has_cpu_throttle_tailslow = true; > params->has_max_bandwidth = true; > params->has_downtime_limit = true; > params->has_x_checkpoint_delay = true; > diff --git a/migration/ram.c b/migration/ram.c > index 04f13feb2e..3317c99786 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile > *f, RAMBlock *block, > * able to complete migration. Some workloads dirty memory way too > * fast and will not effectively converge, even with auto-converge. > */ > -static void mig_throttle_guest_down(void) > +static void mig_throttle_guest_down(uint64_t bytes_dirty_period, > +uint64_t bytes_dirty_threshold) > { > MigrationState *s = migrate_get_current(); > uint64_t pct_initial = s->parameters.cpu_throttle_initial; > -uint64_t pct_icrement = s->parameters.cpu_throttle_increment; > +uint64_t pct_increment = s->parameters.cpu_throttle_increment; > +bool pct_tailslow = s->parameters.cpu_throttle_tailslow; > int pct_max = s->parameters.max_cpu_throttle; > > +uint64_t throttle_now = cpu_throttle_get_percentage(); > +uint64_t cpu_now, cpu_ideal, throttle_inc; > + > /* We have not started throttling yet. Let's start it. */ > if (!cpu_throttle_active()) { >
Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
On 4/20/20 11:32 AM, Cindy Lu wrote: > diff --git a/include/hw/virtio/vhost-backend.h > b/include/hw/virtio/vhost-backend.h > index 6f6670783f..d81bd9885f 100644 > --- a/include/hw/virtio/vhost-backend.h > +++ b/include/hw/virtio/vhost-backend.h > @@ -17,7 +17,8 @@ typedef enum VhostBackendType { > VHOST_BACKEND_TYPE_NONE = 0, > VHOST_BACKEND_TYPE_KERNEL = 1, > VHOST_BACKEND_TYPE_USER = 2, > -VHOST_BACKEND_TYPE_MAX = 3, > +VHOST_BACKEND_TYPE_VDPA = 3, > +VHOST_BACKEND_TYPE_MAX = 4, > } VhostBackendType; > > typedef enum VhostSetConfigType { > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev > *dev, > typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev, > struct vhost_inflight *inflight); > > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state); I think state should be of type uint8_t.
Re: [PATCH] linux-user/sparc64: Translate flushw opcode
On Thu, May 7, 2020 at 4:17 PM Laurent Vivier wrote: > > Hi, > > could someone with SPARC knowledge review this patch? > I'd like to add it to linux-user queue. > Since it's TCG, I'd really preferred a review from Richard. Regards, Artyom > > Le 17/04/2020 à 13:06, LemonBoy a écrit : > > Ping, I forgot to CC the SPARC mantainers in the previous mail. > > > > On 10/04/20 23:14, LemonBoy wrote: > >> From 11d0cfe58d12e0f191b435ade88622cfceb2098a Mon Sep 17 00:00:00 2001 > >> From: LemonBoy > >> Date: Fri, 10 Apr 2020 22:55:26 +0200 > >> Subject: [PATCH] linux-user/sparc64: Translate flushw opcode > >> > >> The ifdef logic should unconditionally compile in the `xop == 0x2b` case > >> when targeting sparc64. > >> > >> Fix the handling of window spill traps by keeping cansave into account > >> when calculating the new CWP. > >> > >> Signed-off-by: Giuseppe Musacchio > >> --- > >> bsd-user/main.c | 4 +++- > >> linux-user/sparc/cpu_loop.c | 4 +++- > >> target/sparc/translate.c| 2 ++ > >> 3 files changed, 8 insertions(+), 2 deletions(-) > >> > >> diff --git a/bsd-user/main.c b/bsd-user/main.c > >> index 770c2b267a..d6b1c997e3 100644 > >> --- a/bsd-user/main.c > >> +++ b/bsd-user/main.c > >> @@ -413,7 +413,9 @@ static void save_window(CPUSPARCState *env) > >> save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2)); > >> env->wim = new_wim; > >> #else > >> -save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2)); > >> +/* cansave is zero if the spill trap handler is triggered by `save` > >> and */ > >> +/* nonzero if triggered by a `flushw` */ > >> +save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - > >> 2)); > >> env->cansave++; > >> env->canrestore--; > >> #endif > >> diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c > >> index 7645cc04ca..20a7401126 100644 > >> --- a/linux-user/sparc/cpu_loop.c > >> +++ b/linux-user/sparc/cpu_loop.c > >> @@ -69,7 +69,9 @@ static void save_window(CPUSPARCState *env) > >> save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2)); > >> env->wim = new_wim; > >> #else > >> -save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2)); > >> +/* cansave is zero if the spill trap handler is triggered by `save` > >> and */ > >> +/* nonzero if triggered by a `flushw` */ > >> +save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - > >> 2)); > >> env->cansave++; > >> env->canrestore--; > >> #endif > >> diff --git a/target/sparc/translate.c b/target/sparc/translate.c > >> index 9416a551cf..1a4efd4ed6 100644 > >> --- a/target/sparc/translate.c > >> +++ b/target/sparc/translate.c > >> @@ -3663,6 +3663,8 @@ static void disas_sparc_insn(DisasContext * dc, > >> unsigned int insn) > >> #endif > >> gen_store_gpr(dc, rd, cpu_tmp0); > >> break; > >> +#endif > >> +#if defined(TARGET_SPARC64) || !defined(CONFIG_USER_ONLY) > >> } else if (xop == 0x2b) { /* rdtbr / V9 flushw */ > >> #ifdef TARGET_SPARC64 > >> gen_helper_flushw(cpu_env); > >> > > > -- Regards, Artyom Tarasenko SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu
Re: [PATCH v5 00/14] LUKS: encryption slot management using amend interface
Patchew URL: https://patchew.org/QEMU/20200507125414.2151-1-mlevi...@redhat.com/ Hi, This series seems to have some coding style problems. See output below for more information: Message-id: 20200507125414.2151-1-mlevi...@redhat.com Subject: [PATCH v5 00/14] LUKS: encryption slot management using amend interface Type: series === TEST SCRIPT BEGIN === #!/bin/bash git rev-parse base > /dev/null || exit 0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. === TEST SCRIPT END === Switched to a new branch 'test' c3de092 iotests: add tests for blockdev-amend 309ee4c block/qcow2: implement blockdev-amend 7779b12 block/crypto: implement blockdev-amend d8486a5 block/core: add generic infrastructure for x-blockdev-amend qmp command f67a1ec iotests: qemu-img tests for luks key management 80686b5 iotests: filter few more luks specific create options 2e8c1b2 block/qcow2: extend qemu-img amend interface with crypto options 92bdda5 block/crypto: implement the encryption key management a324070 block/crypto: rename two functions 4ab0233 block/amend: refactor qcow2 amend options eec91bb block/amend: separate amend and create options for qemu-img b93a373 block/amend: add 'force' option f24e468 qcrypto/luks: implement encryption key management aebcb19 qcrypto/core: add generic infrastructure for crypto options amendment === OUTPUT BEGIN === 1/14 Checking commit aebcb1911ca9 (qcrypto/core: add generic infrastructure for crypto options amendment) 2/14 Checking commit f24e46892c52 (qcrypto/luks: implement encryption key management) 3/14 Checking commit b93a3730838d (block/amend: add 'force' option) 4/14 Checking commit eec91bb69321 (block/amend: separate amend and create options for qemu-img) ERROR: Macros with multiple statements should be enclosed in a do - while loop #31: FILE: block/qcow2.c:5515: +#define QCOW_COMMON_OPTIONS \ +{ \ +.name = BLOCK_OPT_SIZE, \ +.type = QEMU_OPT_SIZE, \ +.help = "Virtual disk size" \ +}, \ +{ \ +.name = BLOCK_OPT_COMPAT_LEVEL, \ +.type = QEMU_OPT_STRING,\ +.help = "Compatibility level (v2 [0.10] or v3 [1.1])" \ +}, \ +{ \ +.name = BLOCK_OPT_BACKING_FILE, \ +.type = QEMU_OPT_STRING,\ +.help = "File name of a base image" \ +}, \ +{ \ +.name = BLOCK_OPT_BACKING_FMT, \ +.type = QEMU_OPT_STRING,\ +.help = "Image format of the base image"\ +}, \ +{ \ +.name = BLOCK_OPT_DATA_FILE,\ +.type = QEMU_OPT_STRING,\ +.help = "File name of an external data file"\ +}, \ +{ \ +.name = BLOCK_OPT_DATA_FILE_RAW,\ +.type = QEMU_OPT_BOOL, \ +.help = "The external data file must stay valid " \ +"as a raw image"\ +}, \ +{ \ +.name = BLOCK_OPT_ENCRYPT, \ +.type = QEMU_OPT_BOOL, \ +.help = "Encrypt the image with format 'aes'. (Deprecated " \ +"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",\ +}, \ +{ \ +.name = BLOCK_OPT_ENCRYPT_FORMAT, \ +.type = QEMU_OPT_STRING,\ +.help = "Encrypt the image, format choices: 'aes', 'luks'", \ +}, \ +
Re: [PATCH v4 11/14] block/core: add generic infrastructure for x-blockdev-amend qmp command
On 5/5/20 3:08 PM, Maxim Levitsky wrote: blockdev-amend will be used similiar to blockdev-create to allow on the fly changes of the structure of the format based block devices. Current plan is to first support encryption keyslot management for luks based formats (raw and embedded in qcow2) Signed-off-by: Maxim Levitsky Reviewed-by: Daniel P. Berrangé --- +++ b/qapi/block-core.json @@ -4649,6 +4649,48 @@ 'data': { 'job-id': 'str', 'options': 'BlockdevCreateOptions' } } +## +# @BlockdevAmendOptions: +# +# Options for amending an image format +# +# @driver block driver that is suitable for the image +# +# Since: 5.0 We'll need to scrub patches for s/5.0/5.1/ -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH-for-5.1 v3 4/7] migration/colo: Add missing error-propagation code
* Philippe Mathieu-Daudé (f4...@amsat.org) wrote: > Running the coccinelle script produced: > > $ spatch \ > --macro-file scripts/cocci-macro-file.h --include-headers \ > --sp-file scripts/coccinelle/find-missing-error_propagate.cocci \ > --keep-comments --smpl-spacing --dir . > HANDLING: ./migration/colo.c > [[manual check required: error_propagate() might be missing in > migrate_set_block_enabled() ./migration/colo.c:439:4]] > > Add the missing error_propagate() after review. > > Reviewed-by: Juan Quintela > Signed-off-by: Philippe Mathieu-Daudé Queued this entry for migration > --- > migration/colo.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/migration/colo.c b/migration/colo.c > index a54ac84f41..57b2adb0cc 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -437,6 +437,9 @@ static int colo_do_checkpoint_transaction(MigrationState > *s, > > /* Disable block migration */ > migrate_set_block_enabled(false, _err); > +if (local_err) { > +goto out; > +} > qemu_mutex_lock_iothread(); > > #ifdef CONFIG_REPLICATION > -- > 2.21.1 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
[PATCH 5/5] docs/system: Document Musca boards
Provide a minimal documentation of the Musca boards. Signed-off-by: Peter Maydell --- docs/system/arm/musca.rst | 31 +++ docs/system/target-arm.rst | 1 + MAINTAINERS| 1 + 3 files changed, 33 insertions(+) create mode 100644 docs/system/arm/musca.rst diff --git a/docs/system/arm/musca.rst b/docs/system/arm/musca.rst new file mode 100644 index 000..8375c5008d5 --- /dev/null +++ b/docs/system/arm/musca.rst @@ -0,0 +1,31 @@ +Arm Musca boards (``musca-a``, ``musca-b1``) + + +The Arm Musca development boards are a reference implementation +of a system using the SSE-200 subsystem for embedded. They are +dual Cortex-M33 systems. + +QEMU provides models of the A and B1 variants of this board. + +Unimplemented devices: + +- SPI +- |I2C| +- |I2S| +- PWM +- QSPI +- Timer +- SCC +- GPIO +- eFlash +- MHU +- PVT +- SDIO +- CryptoCell + +Note that (like the real hardware) the Musca-A machine is +asymmetric: CPU 0 does not have the FPU or DSP extensions, +but CPU 1 does. Also like the real hardware, the memory maps +for the A and B1 variants differ significantly, so guest +software must be built for the right variant. + diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index 15bcf9f81f0..1b86b93c346 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -72,6 +72,7 @@ undocumented; you can get a complete list by running arm/integratorcp arm/mps2 + arm/musca arm/realview arm/versatile arm/vexpress diff --git a/MAINTAINERS b/MAINTAINERS index ea7bdd359e0..f8e0fdb4ef2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -706,6 +706,7 @@ M: Peter Maydell L: qemu-...@nongnu.org S: Maintained F: hw/arm/musca.c +F: docs/system/arm/musca.rst Musicpal M: Jan Kiszka -- 2.20.1
[PATCH 4/5] docs/system: Document the various MPS2 models
Add basic documentation of the MPS2 board models. Signed-off-by: Peter Maydell --- docs/system/arm/mps2.rst | 29 + docs/system/target-arm.rst | 1 + MAINTAINERS| 1 + 3 files changed, 31 insertions(+) create mode 100644 docs/system/arm/mps2.rst diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst new file mode 100644 index 000..3a98cb59b0d --- /dev/null +++ b/docs/system/arm/mps2.rst @@ -0,0 +1,29 @@ +Arm MPS2 boards (``mps2-an385``, ``mps2-an505``, ``mps2-an511``, ``mps2-an521``) + + +These board models all use Arm M-profile CPUs. + +The Arm MPS2 and MPS2+ dev boards are FPGA based (the 2+ has a bigger +FPGA but is otherwise the same as the 2). Since the CPU itself +and most of the devices are in the FPGA, the details of the board +as seen by the guest depend significantly on the FPGA image. + +QEMU models the following FPGA images: + +``mps2-an385`` + Cortex-M3 as documented in ARM Application Note AN385 +``mps2-an511`` + Cortex-M3 'DesignStart' as documented in AN511 +``mps2-an505`` + Cortex-M33 as documented in ARM Application Note AN505 +``mps2-an521`` + Dual Cortex-M33 as documented in Application Note AN521 + +Differences between QEMU and real hardware: + +- AN385 remapping of low 16K of memory to either ZBT SSRAM1 or to + block RAM is unimplemented (QEMU always maps this to ZBT SSRAM1, as + if zbt_boot_ctrl is always zero) +- QEMU provides a LAN9118 ethernet rather than LAN9220; the only guest + visible difference is that the LAN9118 doesn't support checksum + offloading diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index 4779293d731..15bcf9f81f0 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -71,6 +71,7 @@ undocumented; you can get a complete list by running :maxdepth: 1 arm/integratorcp + arm/mps2 arm/realview arm/versatile arm/vexpress diff --git a/MAINTAINERS b/MAINTAINERS index 74cff1c3818..ea7bdd359e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -699,6 +699,7 @@ F: hw/misc/armsse-cpuid.c F: include/hw/misc/armsse-cpuid.h F: hw/misc/armsse-mhu.c F: include/hw/misc/armsse-mhu.h +F: docs/system/arm/mps2.rst Musca M: Peter Maydell -- 2.20.1
[PATCH 3/5] docs/system: Document Arm Versatile Express boards
Provide a minimal documentation of the Versatile Express boards (vexpress-a9, vexpress-a15). Signed-off-by: Peter Maydell --- docs/system/arm/vexpress.rst | 60 docs/system/target-arm.rst | 1 + MAINTAINERS | 1 + 3 files changed, 62 insertions(+) create mode 100644 docs/system/arm/vexpress.rst diff --git a/docs/system/arm/vexpress.rst b/docs/system/arm/vexpress.rst new file mode 100644 index 000..7f1bcbef078 --- /dev/null +++ b/docs/system/arm/vexpress.rst @@ -0,0 +1,60 @@ +Arm Versatile Express boards (``vexpress-a9``, ``vexpress-a15``) + + +QEMU models two variants of the Arm Versatile Express development +board family: + +- ``vexpress-a9`` models the combination of the Versatile Express + motherboard and the CoreTile Express A9x4 daughterboard +- ``vexpress-a15`` models the combination of the Versatile Express + motherboard and the CoreTile Express A15x2 daughterboard + +Note that as this hardware does not have PCI, IDE or SCSI, +the only available storage option is emulated SD card. + +Implemented devices: + +- PL041 audio +- PL181 SD controller +- PL050 keyboard and mouse +- PL011 UARTs +- SP804 timers +- I2C controller +- PL031 RTC +- PL111 LCD display controller +- Flash memory +- LAN9118 ethernet + +Unimplemented devices: + +- SP810 system control block +- PCI-express +- USB controller (Philips ISP1761) +- Local DAP ROM +- CoreSight interfaces +- PL301 AXI interconnect +- SCC +- System counter +- HDLCD controller (``vexpress-a15``) +- SP805 watchdog +- PL341 dynamic memory controller +- DMA330 DMA controller +- PL354 static memory controller +- BP147 TrustZone Protection Controller +- TrustZone Address Space Controller + +Other differences between the hardware and the QEMU model: + +- QEMU will default to creating one CPU unless you pass a different + ``-smp`` argument +- QEMU allows the amount of RAM provided to be specified with the + ``-m`` argument +- QEMU defaults to providing a CPU which does not provide either + TrustZone or the Virtualization Extensions: if you want these you + must enable them with ``-machine secure=on`` and ``-machine + virtualization=on`` +- QEMU provides 4 virtio-mmio virtio transports; these start at + address ``0x10013000`` for ``vexpress-a9`` and at ``0x1c13`` for + ``vexpress-a15``, and have IRQs from 40 upwards. If a dtb is + provided on the command line then QEMU will edit it to include + suitable entries describing these transports for the guest. diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index d1196cbe01c..4779293d731 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -73,6 +73,7 @@ undocumented; you can get a complete list by running arm/integratorcp arm/realview arm/versatile + arm/vexpress arm/musicpal arm/nseries arm/orangepi diff --git a/MAINTAINERS b/MAINTAINERS index 1f84e3ae2c6..74cff1c3818 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -826,6 +826,7 @@ M: Peter Maydell L: qemu-...@nongnu.org S: Maintained F: hw/arm/vexpress.c +F: docs/system/arm/vexpress.rst Versatile PB M: Peter Maydell -- 2.20.1
[Bug 1877384] [NEW] 9pfs file create with mapped-xattr can fail on overlayfs
Public bug reported: QEMU Version: 3.1.0 as packaged in debian buster, but the code appears to do the same in master. qemu command-line: qemu-system-x86_64 -m 1G -nographic -nic "user,model=virtio-net-pci,tftp=$(pwd),net=10.0.2.0/24,host=10.0.2.2" -fsdev local,id=fs,path=$thisdir/..,security_model=mapped-xattr -device virtio-9p-pci,fsdev=fs,mount_tag=fs -drive "file=$rootdisk,if=virtio,format=raw" -kernel "$kernel" -initrd "$initrd" -append "$append" I'm using CI that runs in a Docker container and runs a qemu VM with code and results shared via virtio 9p. The 9p fsdev is configured with security_model=mapped-xattr When the test code attempts to create a log file in an existing directory, open with O_CREAT fails with -ENOENT. The relevant strace excerpt is: 28791 openat(11, ".", O_RDONLY|O_NOFOLLOW|O_PATH|O_DIRECTORY) = 20 28791 openat(20, "src", O_RDONLY|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW|O_DIRECTORY) = 21 28791 fcntl(21, F_SETFL, O_RDONLY|O_DIRECTORY) = 0 28791 close(20) = 0 28791 openat(21, "client.log", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW, 0600) = 20 28791 fcntl(20, F_SETFL, O_WRONLY|O_CREAT|O_NONBLOCK|O_NOFOLLOW) = 0 28791 lsetxattr("/proc/self/fd/21/client.log", "user.virtfs.uid", "\0\0\0", 4, 0) = -1 ENOENT (No such file or directory) My hypothesis for what's going wrong is since the Docker container's overlayfs copies-up on writes, when it opens the file it's created a new version of the `src` directory containing a `client.log`, but this new src directory isn't accessible by file descriptor 20 and the lsetxattr call is instead attempting to set attributes on the path in the old `src` directory. Looking at the code, a fix would be to change `hw/9pfs/9p-local.c` and change `local_open2` to instead of calling `local_set_xattrat` to set the xattrs by directory file descriptor and file name, to have a version of local_set_xattrat` which uses `fsetxattr` to set the virtfs attributes instead of the `fsetxattrat_nofollow` helper. This reliably happened for me in CI, but I don't have access to the CI host or the time to strip the test down to make a minimal test case, and had difficulty reproducing the error on other machines. ** Affects: qemu Importance: Undecided Status: New -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1877384 Title: 9pfs file create with mapped-xattr can fail on overlayfs Status in QEMU: New Bug description: QEMU Version: 3.1.0 as packaged in debian buster, but the code appears to do the same in master. qemu command-line: qemu-system-x86_64 -m 1G -nographic -nic "user,model=virtio-net-pci,tftp=$(pwd),net=10.0.2.0/24,host=10.0.2.2" -fsdev local,id=fs,path=$thisdir/..,security_model=mapped-xattr -device virtio-9p-pci,fsdev=fs,mount_tag=fs -drive "file=$rootdisk,if=virtio,format=raw" -kernel "$kernel" -initrd "$initrd" -append "$append" I'm using CI that runs in a Docker container and runs a qemu VM with code and results shared via virtio 9p. The 9p fsdev is configured with security_model=mapped-xattr When the test code attempts to create a log file in an existing directory, open with O_CREAT fails with -ENOENT. The relevant strace excerpt is: 28791 openat(11, ".", O_RDONLY|O_NOFOLLOW|O_PATH|O_DIRECTORY) = 20 28791 openat(20, "src", O_RDONLY|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW|O_DIRECTORY) = 21 28791 fcntl(21, F_SETFL, O_RDONLY|O_DIRECTORY) = 0 28791 close(20) = 0 28791 openat(21, "client.log", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW, 0600) = 20 28791 fcntl(20, F_SETFL, O_WRONLY|O_CREAT|O_NONBLOCK|O_NOFOLLOW) = 0 28791 lsetxattr("/proc/self/fd/21/client.log", "user.virtfs.uid", "\0\0\0", 4, 0) = -1 ENOENT (No such file or directory) My hypothesis for what's going wrong is since the Docker container's overlayfs copies-up on writes, when it opens the file it's created a new version of the `src` directory containing a `client.log`, but this new src directory isn't accessible by file descriptor 20 and the lsetxattr call is instead attempting to set attributes on the path in the old `src` directory. Looking at the code, a fix would be to change `hw/9pfs/9p-local.c` and change `local_open2` to instead of calling `local_set_xattrat` to set the xattrs by directory file descriptor and file name, to have a version of local_set_xattrat` which uses `fsetxattr` to set the virtfs attributes instead of the `fsetxattrat_nofollow` helper. This reliably happened for me in CI, but I don't have access to the CI host or the time to strip the test down to make a minimal test case, and had difficulty reproducing the error on other machines. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1877384/+subscriptions
Re: [PATCH v2 10/9] qed: Simplify backing reads
On 5/7/20 9:45 AM, Eric Blake wrote: The other four drivers that support backing files (qcow, qcow2, parallels, vmdk) all rely on the block layer to populate zeroes when reading beyond EOF of a short backing file. We can simplify the qed code by doing likewise. Signed-off-by: Eric Blake --- I noticed this during my audit that v1 of Vladimir's series was correct. No change in iotests results (test 274 is currently failing for qed, but for other reasons: +Traceback (most recent call last): + File "274", line 24, in +iotests.verify_image_format(supported_fmts=['qcow2']) +AttributeError: module 'iotests' has no attribute 'verify_image_format' ) That iotest failure was due to a stale branch on my end; after updating to latest git master plus Kevin's latest 'block' branch, 274 is now skipped on qed. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
[PATCH 1/5] docs/system: Add 'Arm' to the Integrator/CP document title
Add 'Arm' to the Integrator/CP document title, for consistency with the titling of the other documentation of Arm devboard models (versatile, realview). Signed-off-by: Peter Maydell --- docs/system/arm/integratorcp.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/system/arm/integratorcp.rst b/docs/system/arm/integratorcp.rst index e6f050f602b..594438008e4 100644 --- a/docs/system/arm/integratorcp.rst +++ b/docs/system/arm/integratorcp.rst @@ -1,5 +1,5 @@ -Integrator/CP (``integratorcp``) - +Arm Integrator/CP (``integratorcp``) + The Arm Integrator/CP board is emulated with the following devices: -- 2.20.1
Re: [PULL 0/5] Misc crypto subsystem fixes
On Thu, 7 May 2020 at 12:59, Daniel P. Berrangé wrote: > > The following changes since commit 609dd53df540edd72faee705205aceca9c42fea5: > > Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200506' into stag= > ing (2020-05-07 09:45:54 +0100) > > are available in the Git repository at: > > https://github.com/berrange/qemu tags/qcrypto-next-pull-request > > for you to fetch changes up to 6022e15d146d8bba9610a9d134afa4bc6e5d9275: > > crypto: extend hash benchmark to cover more algorithms (2020-05-07 12:52:51= > +0100) > > > Misc crypto subsystem fixes > > * Improve error message for large files when creating LUKS volumes > * Expand crypto hash benchmark coverage > * Misc code refactoring with no functional change > > Applied, thanks. Please update the changelog at https://wiki.qemu.org/ChangeLog/5.1 for any user-visible changes. -- PMM
[PATCH 2/5] docs/system: Sort Arm board index into alphabetical order
Sort the board index into alphabetical order. (Note that we need to sort alphabetically by the title text of each file, which isn't the same ordering as sorting by the filename.) Signed-off-by: Peter Maydell --- docs/system/target-arm.rst | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst index 324e2af1cbc..d1196cbe01c 100644 --- a/docs/system/target-arm.rst +++ b/docs/system/target-arm.rst @@ -71,15 +71,15 @@ undocumented; you can get a complete list by running :maxdepth: 1 arm/integratorcp - arm/versatile arm/realview - arm/xscale - arm/palm - arm/nseries - arm/stellaris + arm/versatile arm/musicpal - arm/sx1 + arm/nseries arm/orangepi + arm/palm + arm/xscale + arm/sx1 + arm/stellaris Arm CPU features -- 2.20.1
[PATCH 0/5] docs/system: Document some arm board models
This patchset adds (minimal) documentation of these Arm board models: vexpress-a15 ARM Versatile Express for Cortex-A15 vexpress-a9 ARM Versatile Express for Cortex-A9 mps2-an385 ARM MPS2 with AN385 FPGA image for Cortex-M35 mps2-an505 ARM MPS2 with AN505 FPGA image for Cortex-M33 mps2-an511 ARM MPS2 with AN511 DesignStart FPGA image for Cortex-M3 mps2-an521 ARM MPS2 with AN521 FPGA image for dual Cortex-M33 musca-a ARM Musca-A board (dual Cortex-M33) musca-b1 ARM Musca-B1 board (dual Cortex-M33) to the system emulator manual. Patches 1 and 2 are minor tidyup of the board table-of-contents before we start adding new entries with patches 3-5. I'm aiming more for "at least note that the boards exist" than "fully comprehensive" documentation here -- there are still another 37 Arm board models with no documentation at all... thanks -- PMM Peter Maydell (5): docs/system: Add 'Arm' to the Integrator/CP document title docs/system: Sort Arm board index into alphabetical order docs/system: Document Arm Versatile Express boards docs/system: Document the various MPS2 models docs/system: Document Musca boards docs/system/arm/integratorcp.rst | 4 +-- docs/system/arm/mps2.rst | 29 +++ docs/system/arm/musca.rst| 31 + docs/system/arm/vexpress.rst | 60 docs/system/target-arm.rst | 15 MAINTAINERS | 3 ++ 6 files changed, 134 insertions(+), 8 deletions(-) create mode 100644 docs/system/arm/mps2.rst create mode 100644 docs/system/arm/musca.rst create mode 100644 docs/system/arm/vexpress.rst -- 2.20.1
Re: [PATCH Kernel v18 6/7] vfio iommu: Add migration capability to report supported features
On Thu, 7 May 2020 11:07:26 +0530 Kirti Wankhede wrote: > On 5/7/2020 3:57 AM, Alex Williamson wrote: > > On Mon, 4 May 2020 21:28:58 +0530 > > Kirti Wankhede wrote: > > > >> Added migration capability in IOMMU info chain. > >> User application should check IOMMU info chain for migration capability > >> to use dirty page tracking feature provided by kernel module. > >> > >> Signed-off-by: Kirti Wankhede > >> --- > >> drivers/vfio/vfio_iommu_type1.c | 15 +++ > >> include/uapi/linux/vfio.h | 14 ++ > >> 2 files changed, 29 insertions(+) > >> > >> diff --git a/drivers/vfio/vfio_iommu_type1.c > >> b/drivers/vfio/vfio_iommu_type1.c > >> index 8b27faf1ec38..b38d278d7bff 100644 > >> --- a/drivers/vfio/vfio_iommu_type1.c > >> +++ b/drivers/vfio/vfio_iommu_type1.c > >> @@ -2378,6 +2378,17 @@ static int vfio_iommu_iova_build_caps(struct > >> vfio_iommu *iommu, > >>return ret; > >> } > >> > >> +static int vfio_iommu_migration_build_caps(struct vfio_info_cap *caps) > >> +{ > >> + struct vfio_iommu_type1_info_cap_migration cap_mig; > >> + > >> + cap_mig.header.id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION; > >> + cap_mig.header.version = 1; > >> + cap_mig.flags = VFIO_IOMMU_INFO_CAPS_MIGRATION_DIRTY_PAGE_TRACK; > >> + > >> + return vfio_info_add_capability(caps, _mig.header, sizeof(cap_mig)); > >> +} > >> + > >> static long vfio_iommu_type1_ioctl(void *iommu_data, > >> unsigned int cmd, unsigned long arg) > >> { > >> @@ -2427,6 +2438,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > >>if (ret) > >>return ret; > >> > >> + ret = vfio_iommu_migration_build_caps(); > >> + if (ret) > >> + return ret; > >> + > >>if (caps.size) { > >>info.flags |= VFIO_IOMMU_INFO_CAPS; > >> > >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > >> index e3cbf8b78623..df9ce8aaafab 100644 > >> --- a/include/uapi/linux/vfio.h > >> +++ b/include/uapi/linux/vfio.h > >> @@ -1013,6 +1013,20 @@ struct vfio_iommu_type1_info_cap_iova_range { > >>struct vfio_iova_range iova_ranges[]; > >> }; > >> > >> +/* > >> + * The migration capability allows to report supported features for > >> migration. > >> + * > >> + * The structures below define version 1 of this capability. > >> + */ > >> +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 > >> + > >> +struct vfio_iommu_type1_info_cap_migration { > >> + struct vfio_info_cap_header header; > >> + __u32 flags; > >> + /* supports dirty page tracking */ > >> +#define VFIO_IOMMU_INFO_CAPS_MIGRATION_DIRTY_PAGE_TRACK (1 << 0) > >> +}; > >> + > > > > What about exposing the maximum supported dirty bitmap size and the > > supported page sizes? Thanks, > > > > How should user application use that? How does a user application currently discover that only a PAGE_SIZE dirty bitmap granularity is supported or that the when performing an unmap while requesting the dirty bitmap, those unmaps need to be chunked to no more than 2^31 * PAGE_SIZE? I don't see anything in the uapi that expresses these restrictions. It seems we're currently relying on the QEMU implementation to coincide with bitmap granularity support, with no mechanism other than trial and error to validate or expand that support. Likewise, I think it's largely a coincidence that KVM also has the same slot size restrictions, so we're unlikely to see an unmap exceeding this limit, but our api does not restrict an unmap to only cover a single mapping. We probably also need to think about whether we need to expose a mapping chunk limitation separately or if it should be extrapolated from the migration support (we might have non-migration reasons to limit it at some point). If we leave these as assumptions then we risk breaking userspace or limiting the usefulness of expending this support. If we include within the uapi a mechanism for learning about these restrictions, then it becomes a userspace problem to follow the uapi and take advantage of the uapi provided. Thanks, Alex
Re: [PATCH v5 30/31] qcow2: Add subcluster support to qcow2_measure()
On Wed 06 May 2020 08:13:51 PM CEST, Eric Blake wrote: > On 5/5/20 12:38 PM, Alberto Garcia wrote: >> Extended L2 entries are bigger than normal L2 entries so this has an >> impact on the amount of metadata needed for a qcow2 file. >> >> Signed-off-by: Alberto Garcia >> Reviewed-by: Max Reitz >> --- >> block/qcow2.c | 19 --- >> 1 file changed, 12 insertions(+), 7 deletions(-) > > Should this be hoisted earlier in the series, before 28/31? I can do that if I call qcow2_calc_prealloc_size() with extended_l2 always set to false (because there would be no BLOCK_OPT_EXTL2 that the caller could use). Maybe it's not a bad idea. > Should there be iotest coverage? There are already, in the last patch. Berto
Re: [PATCH 3/3] iotests: Mirror with different source/target size
On 5/7/20 9:52 AM, Kevin Wolf wrote: This tests that the mirror job catches situations where the target node has a different size than the source node. It must also forbid resize operations when the job is already running. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 45 ++ tests/qemu-iotests/041.out | 4 ++-- 2 files changed, 47 insertions(+), 2 deletions(-) Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend
On 4/20/20 11:32 AM, Cindy Lu wrote: > Currently we have 2 types of vhost backends in QEMU: vhost kernel and > vhost-user. The above patch provides a generic device for vDPA purpose, > this vDPA device exposes to user space a non-vendor-specific configuration > interface for setting up a vhost HW accelerator, this patch set introduces > a third vhost backend called vhost-vdpa based on the vDPA interface. > > Vhost-vdpa usage: > > qemu-system-x86_64 -cpu host -enable-kvm \ > .. > -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \ > -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \ > > Author: Tiwei Bie > Signed-off-by: Cindy Lu > --- > hw/net/vhost_net.c| 43 > hw/net/virtio-net.c | 9 + > hw/virtio/Makefile.objs | 2 +- > hw/virtio/vhost-backend.c | 3 + > hw/virtio/vhost-vdpa.c| 379 ++ > hw/virtio/vhost.c | 5 + > include/hw/virtio/vhost-backend.h | 6 +- > include/hw/virtio/vhost-vdpa.h| 14 ++ > 8 files changed, 459 insertions(+), 2 deletions(-) > create mode 100644 hw/virtio/vhost-vdpa.c > create mode 100644 include/hw/virtio/vhost-vdpa.h > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index 4096d64aaf..0d13fda2fc 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c > @@ -17,8 +17,10 @@ > #include "net/net.h" > #include "net/tap.h" > #include "net/vhost-user.h" > +#include "net/vhost-vdpa.h" > > #include "standard-headers/linux/vhost_types.h" > +#include "linux-headers/linux/vhost.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "qemu/error-report.h" > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = { > VHOST_INVALID_FEATURE_BIT > }; > > +static const int vdpa_feature_bits[] = { > +VIRTIO_F_NOTIFY_ON_EMPTY, > +VIRTIO_RING_F_INDIRECT_DESC, > +VIRTIO_RING_F_EVENT_IDX, > +VIRTIO_F_ANY_LAYOUT, > +VIRTIO_F_VERSION_1, > +VIRTIO_NET_F_CSUM, > +VIRTIO_NET_F_GUEST_CSUM, > +VIRTIO_NET_F_GSO, > +VIRTIO_NET_F_GUEST_TSO4, > +VIRTIO_NET_F_GUEST_TSO6, > +VIRTIO_NET_F_GUEST_ECN, > +VIRTIO_NET_F_GUEST_UFO, > +VIRTIO_NET_F_HOST_TSO4, > +VIRTIO_NET_F_HOST_TSO6, > +VIRTIO_NET_F_HOST_ECN, > +VIRTIO_NET_F_HOST_UFO, > +VIRTIO_NET_F_MRG_RXBUF, > +VIRTIO_NET_F_MTU, > +VIRTIO_F_IOMMU_PLATFORM, > +VIRTIO_NET_F_GUEST_ANNOUNCE, > +VHOST_INVALID_FEATURE_BIT > +}; > static const int *vhost_net_get_feature_bits(struct vhost_net *net) > { > const int *feature_bits = 0; > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct > vhost_net *net) > case NET_CLIENT_DRIVER_VHOST_USER: > feature_bits = user_feature_bits; > break; > +case NET_CLIENT_DRIVER_VHOST_VDPA: > +feature_bits = vdpa_feature_bits; > +break; > default: > error_report("Feature bits not defined for this type: %d", > net->nc->info->type); > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc) > assert(vhost_net); > break; > #endif > +case NET_CLIENT_DRIVER_VHOST_VDPA: > +vhost_net = vhost_vdpa_get_vhost_net(nc); > +assert(vhost_net); > +break; > default: > break; > } > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t > mtu) > > return vhost_ops->vhost_net_set_mtu(>dev, mtu); > } > +int vhost_set_state(NetClientState *nc, int state) > +{ > +struct vhost_net *net = get_vhost_net(nc); > +struct vhost_dev *hdev = >dev; > +if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { Maybe checking the vhost_set_state callback is implemented is enough, and it is not need to restrict that to Vhost-vDPA? > +if (hdev->vhost_ops->vhost_set_state) { > +return hdev->vhost_ops->vhost_set_state(hdev, state); > + } > +} > +return 0; > +}
Re: [PATCH 2/3] mirror: Make sure that source and target size match
On 5/7/20 9:52 AM, Kevin Wolf wrote: If the target is shorter than the source, mirror would copy data until it reaches the end of the target and then fail with an I/O error when trying to write past the end. If the target is longer than the source, the mirror job would complete successfully, but the target wouldn't actually be an accurate copy of the source image (it would contain some additional garbage at the end). Fix this by checking that both images have the same size when the job starts. Signed-off-by: Kevin Wolf --- block/mirror.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) An alternative would be trying to resize the target (like we have to do with active commit), but I'm fine with being conservative for now by forcing the user to have correct sizing, where we have the option to add magic resizing later only if it proves useful and not introducing more potential issues. Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH 1/3] iotests/229: Use blkdebug to inject an error
On 5/7/20 9:52 AM, Kevin Wolf wrote: 229 relies on the mirror running into an I/O error when the target is smaller than the source. After changing mirror to catch this condition while starting the job, this test case won't get a job that is paused for an I/O error any more. Use blkdebug instead to inject an error. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/229 | 15 +++ tests/qemu-iotests/229.out | 6 +++--- 2 files changed, 14 insertions(+), 7 deletions(-) Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH] 9pfs: Fix potential deadlock of QEMU mainloop
On Donnerstag, 7. Mai 2020 16:33:28 CEST Greg Kurz wrote: > > I also haven't reviewed QEMU's lock implementations in very detail, but > > IIRC CoMutexes are completely handled in user space, while QemuMutex uses > > regular OS mutexes and hence might cost context switches. > > ... since the locking would only been exercised with an hypothetical > client doing stupid things, this is beginning to look like bike-shedding > to me. :) Aha, keep that in mind when you're doing your next review. ;-) No seriously, like I said, I don't really care too much about Mutex vs. CoMutex in you patch here. It was actually more about wide-picture thinking, i.e. other places of (co)mutexes being used or other potential changes that would make this or other uses more relevant one day. > > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > > > > > index 9e046f7acb51..ac84ae804496 100644 > > > > > --- a/hw/9pfs/9p.c > > > > > +++ b/hw/9pfs/9p.c > > > > > @@ -2170,7 +2170,7 @@ static int coroutine_fn > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, int32_t count = 0; > > > > > > > > > > struct stat stbuf; > > > > > off_t saved_dir_pos; > > > > > > > > > > -struct dirent *dent; > > > > > +struct dirent dent; > > > > > > > > > > /* save the directory position */ > > > > > saved_dir_pos = v9fs_co_telldir(pdu, fidp); > > > > > > > > > > @@ -2181,13 +2181,11 @@ static int coroutine_fn > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, while (1) { > > > > > > > > > > v9fs_path_init(); > > > > > > > > > > -v9fs_readdir_lock(>fs.dir); > > > > > - > > > > > > > > That's the deadlock fix, but ... > > > > > > > > > err = v9fs_co_readdir(pdu, fidp, ); > > > > > > > > > > -if (err || !dent) { > > > > > +if (err <= 0) { > > > > > > > > > > break; > > > > > > > > > > } > > > > > > > > ... even though this code simplification might make sense, I don't > > > > think > > > > it > > > > should be mixed with the deadlock fix together in one patch. They are > > > > not > > > > > > I could possibly split this in two patches, one for returning a copy > > > and one for moving the locking around, but... > > > > > > > related with each other, nor is the code simplification you are aiming > > > > trivial > > > > > > ... this assertion is somewhat wrong: moving the locking to > > > v9fs_co_readdir() really requires it returns a copy. > > > > Yeah, I am also not sure whether a split would make it more trivial enough > > in this case to be worth the hassle. If you find an acceptable solution, > > good, if not then leave it one patch. > > Another option would be to g_malloc() the dirent in v9fs_co_readdir() and > g_free() in the callers. This would cause less churn since we could keep > the same function signature. I was actually just going to suggest the same. So yes, looks like a less invasive change to me. Best regards, Christian Schoenebeck
Re: Emulating Solaris 10 on SPARC64 sun4u
On Thu, May 7, 2020 at 4:29 PM wrote: > > Just thought I'd chime in with an update. > > We are currently emulating a 16550A UART. The guest sees this as the SU > device, referring to the SuperIO port (a pair of 16550A UARTs). On the > Ultra 5, the machine that Sun4u is modelled against, SuperIO was used > for the keyboard and mouse. The Ultra 5 also had a DB25 (ttya default) > and a DB9 (ttyb default) with a SAB82532 ESSC2. > > Using tracing, I've looked through how the 16550A UART is touched and > it looks like Solaris 10 has no issues identifying the device. I've > matched register accesses with driver code in OpenSolaris and I'm > pretty sure the device is attached successfully. Also, if you boot > Solaris 10 with debugging output, you can see that the device gets > labelled as su0. The only time Solaris is capable of writing to the > console is when OpenBIOS is used as a proxy. > > Rather than Solaris deciding against using SuperIO as a tty, I don't > think there was ever any support for doing so (at least on SPARC > machines). This could be an explanation for why the system appears to > be trucking along just fine despite a seemingly frozen console - there > is no console. I don't think the frozen console is the fault of broken > interrupt routing as the 16550A UART is never programmed to generate > them. At least Fujitsu Siemens PRIMEPOWER250 had a tty attached to the 16550A UART. I think there were more such machines. I don't expect there is anything in the Solaris kernel which would prevent any serial device known to it to be used as a tty. > I've started work on emulating the SAB 82532 ESSC2 but it's > unfortunately way more complex than than the 16550A. For instance, it's > possible to configure different baudrates for receiving and > transmitting. QEMU's chardev interface doesn't appear to handle that. > QEMUSerialSetParams has a single speed value that is passed to > cfsetispeed and cfsetospeed. The chip also has support for stick parity > , which aren't valid options right now either. If I'm wrong on either > of those points please correct me. Unless there is an alternative, > changes to the interface may need to be made if adding this device is > to be considered. Well, theoretically yes, but practically there is just one baudrate which can be specified in the OBP. I think it's perfectly safe to use max(rxrate,txrate), or min(rxrate,txrate), whatever you prefer. Regards, Artyom > > On Sun, 2020-02-09 at 11:26 +, Mark Cave-Ayland wrote: > > On 05/02/2020 06:31, jasper.low...@bt.com wrote: > > > > > I'm currently working towards emulating Solaris 10 on sun4u. > > > > > > > > > > > > The Solaris 10 ISO image I am attempting to boot is the one from > > > the Oracle > > > > > > download page at > > > https://www.oracle.com/solaris/solaris10/downloads/solaris10-get-jsp-downloads.html. > > > > > > Image: sol-10-u11-ga-sparc-dvd.iso > > > > > > MD5: 53e8b066f7f250ce2fd2cef063f8072b > > > > > > > > > > > > I am using QEMU commit 7bd9d0a9e26c7a3c67c0f174f0009ba19969b158. > > > > > > > > > > > > The command I am using to run QEMU is: > > > > > > ./qemu/sparc64-softmmu/qemu-system-sparc64 -bios > > > ./openbios/obj-sparc64/openbios-builtin.elf -cdrom > > > ./iso/solaris/sol-10-u11-ga-sparc-dvd.iso -boot d -nographic -m 3G > > > > > > > > > > > > ``` > > > > > > CPUs: 1 x SUNW,UltraSPARC-IIi > > > > > > UUID: ---- > > > > > > Welcome to OpenBIOS v1.1 built on Feb 5 2020 19:15 > > > > > > Type 'help' for detailed information > > > > > > Trying cdrom:f... > > > > > > Not a bootable ELF image > > > > > > Not a bootable a.out image > > > > > > > > > > > > Loading FCode image... > > > > > > Loaded 7420 bytes > > > > > > entry point is 0x4000 > > > > > > Evaluating FCode... > > > > > > Evaluating FCode... > > > > > > Ignoring failed claim for va 100 memsz af6d6! > > > > > > Ignoring failed claim for va 1402000 memsz 4dcc8! > > > > > > Ignoring failed claim for va 180 memsz 510c8! > > > > > > SunOS Release 5.10 Version Generic_147147-26 64-bit > > > > > > Copyright (c) 1983, 2013, Oracle and/or its affiliates. All rights > > > reserved. > > > > > > could not find debugger-vocabulary-hook>threads:interpret: > > > exception -13 caught > > > > > > interpret \ Copyright (c) 1995-1999 by Sun Microsystems, Inc. > > > > > > \ All rights reserved. > > > > > > \ > > > > > > \ ident "@(#)data64.fth 1.3 00/07/17 SMI" > > > > > > > > > > > > hex > > > > > > > > > > > > only forth also definitions > > > > > > vocabulary kdbg-words > > > > > > also kdbg-words definitions > > > > > > > > > > > > defer p@ > > > > > > defer p! > > > > > > ['] x@ is p@ > > > > > > ['] x! is p! > > > > > > > > > > > > 8 constant ptrsize > > > > > > > > > > > > d# 32 constant nbitsminor > > > > > > h# constant maxmin > > > > > > \ > > > > > > \ Copyright 2008 Sun Microsystems, Inc. All rights reserved. > > > > > > \ Use is subject to license terms. > > > > > > \ > > > > > > > > >
Re: [PATCH 0/5] target/i386: fxtract, fscale fixes
On Thu, 7 May 2020, no-re...@patchew.org wrote: > === OUTPUT BEGIN === > 1/5 Checking commit 69eed0bcaaaf (target/i386: implement special cases for > fxtract) > WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? I don't think any MAINTAINERS update is needed for a new testcase in an existing directory. > ERROR: Use of volatile is usually wrong, please add a comment I think the justification for volatile in such testcase code is obvious without comments in individual cases - to avoid any code movement or optimization that might break what the tests are intending to test (these tests are making heavy use of mixed C and inline asm to test how emulated instructions behave, including on input representations that are not valid long double values in the ABI and with the rounding precision changed behind the compiler's back). I think making everything possibly relevant volatile in these tests is better than trying to produce a fragile argument that in fact certain data does not need to be volatile to avoid problematic code movement. > ERROR: spaces required around that '-' (ctx:VxV) > #139: FILE: tests/tcg/i386/test-i386-fxtract.c:80: > + "0" (0x1p-16445L)); > ^ No, this is a C99 hex float contstant, not a subtraction. There are already such constants in tests/tcg/multiarch/float_helpers.c and tests/tcg/multiarch/float_madds.c at least, so I assume they are OK in QEMU floating-point tests and this style checker should not be objecting to them. -- Joseph S. Myers jos...@codesourcery.com
[PATCH 3/3] iotests: Mirror with different source/target size
This tests that the mirror job catches situations where the target node has a different size than the source node. It must also forbid resize operations when the job is already running. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 45 ++ tests/qemu-iotests/041.out | 4 ++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 1812dd8479..601c756117 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -240,6 +240,49 @@ class TestSingleBlockdev(TestSingleDrive): target=self.qmp_target) self.assert_qmp(result, 'error/class', 'GenericError') +def do_test_resize(self, device, node): +def pre_finalize(): +if device: +result = self.vm.qmp('block_resize', device=device, size=65536) +self.assert_qmp(result, 'error/class', 'GenericError') + +result = self.vm.qmp('block_resize', node_name=node, size=65536) +self.assert_qmp(result, 'error/class', 'GenericError') + +result = self.vm.qmp(self.qmp_cmd, job_id='job0', device='drive0', + sync='full', target=self.qmp_target, + auto_finalize=False, auto_dismiss=False) +self.assert_qmp(result, 'return', {}) + +result = self.vm.run_job('job0', auto_finalize=False, + pre_finalize=pre_finalize) +self.assertEqual(result, None) + +def test_source_resize(self): +self.do_test_resize('drive0', 'top') + +def test_target_resize(self): +self.do_test_resize(None, self.qmp_target) + +def do_test_target_size(self, size): +result = self.vm.qmp('block_resize', node_name=self.qmp_target, + size=size) +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp(self.qmp_cmd, job_id='job0', + device='drive0', sync='full', auto_dismiss=False, + target=self.qmp_target) +self.assert_qmp(result, 'return', {}) + +result = self.vm.run_job('job0') +self.assertEqual(result, 'Source and target image have different sizes') + +def test_small_target(self): +self.do_test_target_size(self.image_len // 2) + +def test_large_target(self): +self.do_test_target_size(self.image_len * 2) + test_large_cluster = None test_image_not_found = None test_small_buffer2 = None @@ -251,6 +294,8 @@ class TestSingleDriveZeroLength(TestSingleDrive): class TestSingleBlockdevZeroLength(TestSingleBlockdev): image_len = 0 +test_small_target = None +test_large_target = None class TestSingleDriveUnalignedLength(TestSingleDrive): image_len = 1025 * 1024 diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index 877b76fd31..53abe11d73 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -.. + -- -Ran 94 tests +Ran 104 tests OK -- 2.25.3
[PATCH 2/3] mirror: Make sure that source and target size match
If the target is shorter than the source, mirror would copy data until it reaches the end of the target and then fail with an I/O error when trying to write past the end. If the target is longer than the source, the mirror job would complete successfully, but the target wouldn't actually be an accurate copy of the source image (it would contain some additional garbage at the end). Fix this by checking that both images have the same size when the job starts. Signed-off-by: Kevin Wolf --- block/mirror.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index aca95c9bc9..201ffa26f9 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -872,6 +872,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) BlockDriverState *target_bs = blk_bs(s->target); bool need_drain = true; int64_t length; +int64_t target_length; BlockDriverInfo bdi; char backing_filename[2]; /* we only need 2 characters because we are only checking for a NULL string */ @@ -887,24 +888,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) goto immediate_exit; } +target_length = blk_getlength(s->target); +if (target_length < 0) { +ret = target_length; +goto immediate_exit; +} + /* Active commit must resize the base image if its size differs from the * active layer. */ if (s->base == blk_bs(s->target)) { -int64_t base_length; - -base_length = blk_getlength(s->target); -if (base_length < 0) { -ret = base_length; -goto immediate_exit; -} - -if (s->bdev_length > base_length) { +if (s->bdev_length > target_length) { ret = blk_truncate(s->target, s->bdev_length, false, PREALLOC_MODE_OFF, 0, NULL); if (ret < 0) { goto immediate_exit; } } +} else if (s->bdev_length != target_length) { +error_setg(errp, "Source and target image have different sizes"); +ret = -EINVAL; +goto immediate_exit; } if (s->bdev_length == 0) { -- 2.25.3
[PATCH 1/3] iotests/229: Use blkdebug to inject an error
229 relies on the mirror running into an I/O error when the target is smaller than the source. After changing mirror to catch this condition while starting the job, this test case won't get a job that is paused for an I/O error any more. Use blkdebug instead to inject an error. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/229 | 15 +++ tests/qemu-iotests/229.out | 6 +++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 index 866168b236..a5d4e5d4f2 100755 --- a/tests/qemu-iotests/229 +++ b/tests/qemu-iotests/229 @@ -33,6 +33,7 @@ _cleanup() _cleanup_test_img _rm_test_img "$TEST_IMG" _rm_test_img "$DEST_IMG" +rm -f "$TEST_DIR/blkdebug.conf" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -49,11 +50,10 @@ _supported_os Linux DEST_IMG="$TEST_DIR/d.$IMGFMT" TEST_IMG="$TEST_DIR/b.$IMGFMT" +BLKDEBUG_CONF="$TEST_DIR/blkdebug.conf" _make_test_img 2M - -# destination for mirror will be too small, causing error -TEST_IMG=$DEST_IMG _make_test_img 1M +TEST_IMG=$DEST_IMG _make_test_img 2M $QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io @@ -67,11 +67,18 @@ echo echo '=== Starting drive-mirror, causing error & stop ===' echo +cat > "$BLKDEBUG_CONF" <
[PATCH 0/3] mirror: Make sure that source and target size match
Same thing as the recent fix for backup, except that mirror already forbids resizing during the job. So what remains is checking that the sizes match at the start of the job. Kevin Wolf (3): iotests/229: Use blkdebug to inject an error mirror: Make sure that source and target size match iotests: Mirror with different source/target size block/mirror.c | 21 ++ tests/qemu-iotests/041 | 45 ++ tests/qemu-iotests/041.out | 4 ++-- tests/qemu-iotests/229 | 15 + tests/qemu-iotests/229.out | 6 ++--- 5 files changed, 73 insertions(+), 18 deletions(-) -- 2.25.3
[Bug 1877136] Re: Qemu GDB Arm core registers XML description not valid for M-profile
Patch submitted: https://patchew.org/QEMU/20200507134755.13997-1-peter.mayd...@linaro.org/ ** Changed in: qemu Status: New => In Progress -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1877136 Title: Qemu GDB Arm core registers XML description not valid for M-profile Status in QEMU: In Progress Bug description: When trying to debug an armv7-m binary running on Qemu, GDB makes some mistakes due to mistakenly believing the target is not M-profile. One observable is that backtraces over signal handlers are not handled correctly -- since the special M-profile EXC_RETURN value is not recognised. That happens because GDB doesn't think the target is M-profile. This happens because GDB sees a reported feature set from the Qemu remote connection that includes the feature `org.gnu.gdb.arm.core`. As described in the GDB online docs, for "M-profile targets (e.g. Cortex-M3), the ‘org.gnu.gdb.arm.core’ feature is replaced by ‘org.gnu.gdb.arm.m-profile’" https://sourceware.org/gdb/current/onlinedocs/gdb/ARM-Features.html From a scan of the Qemu source code on commit ea1329bb3a8d5cd25b70e3dbf73e7ded4d5ad756 it seems that when emulating an arm core it uses `arm-core.xml` unconditionally for `CPUClass->gdb_core_xml_file`, and that means the only feature provided is `org.gnu.gdb.arm.core`. Note that even though there is a command to set the architecture in GDB, setting the target architecture to an M-profile core is still not a valid workaround. This is because the target description overrides everything in setting the `is_m` attribute within GDB. Reproduction of the observable: Using the examples here https://git.linaro.org/people/peter.maydell/m-profile-tests.git/tree/ . Build the examples, and run ``` qemu-system-arm -s -S -no-reboot -M lm3s6965evb -m 16 -serial stdio -display none -net nic -net user,restrict=on -d guest_errors,unimp -kernel test3-kern.bin ``` Then in a GDB session ``` vshcmd: > arm-none-eabi-gdb -q (gdb) vshcmd: > file test3-kern.elf Reading symbols from test3-kern.elf... (gdb) vshcmd: > target remote localhost:1234 Remote debugging using localhost:1234 _start () at init-m.S:53 53mov r0, #0 (gdb) vshcmd: > show architecture The target architecture is set automatically (currently armv7) (gdb) vshcmd: > break svc Breakpoint 1 at 0x6fc: svc. (2 locations) (gdb) vshcmd: > cont Continuing. Breakpoint 1, svc () at test3.c:16 16 int test = SEQ(); (gdb) vshcmd: > bt #0 svc () at test3.c:16 #1 0xfff8 in ?? () Backtrace stopped: previous frame identical to this frame (corrupt stack?) (gdb) vshcmd: > print/x $lr $1 = 0xfff9 (gdb) ``` To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1877136/+subscriptions
device hotplug & file handles
Hi, For usb device pass-through (aka -device usb-host) it would be very useful to pass file handles from libvirt to qemu. The workflow would change from ... (1) libvirt enables access to /dev/usb/$bus/$dev (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties) to qemu. (3) qemu opens /dev/usb/$bus/$dev ... to ... (1) libvirt opens /dev/usb/$bus/$dev (2) libvirt passes filehandle to qemu. Question is how can we pass the file descriptor best? My idea would be to simply add an fd property to usb-host: * Coldplug would be "-device usb-host,fd=" (cmd line). * Hotplug would be "device_add usb-host,fd=" (monitor). Will that work from libvirt point of view? Or does anyone have an better idea? thanks, Gerd PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525
Re: [PATCH] docs/devel/migration: start a debugging section
* Marc-André Lureau (marcandre.lur...@redhat.com) wrote: > Explain how to use analyze-migration.py, this may help. > > Signed-off-by: Marc-André Lureau Queued > --- > docs/devel/migration.rst | 20 > 1 file changed, 20 insertions(+) > > diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst > index e88918f7639..2eb08624fc3 100644 > --- a/docs/devel/migration.rst > +++ b/docs/devel/migration.rst > @@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure > to > save/restore state devices. This infrastructure is shared with the > savevm/loadvm functionality. > > +Debugging > += > + > +The migration stream can be analyzed thanks to > `scripts/analyze_migration.py`. > + > +Example usage: > + > +.. code-block:: shell > + > + $ qemu-system-x86_64 > + (qemu) migrate "exec:cat > mig" > + $ ./scripts/analyze_migration.py -f mig > + { > +"ram (3)": { > +"section sizes": { > +"pc.ram": "0x0800", > + ... > + > +See also ``analyze_migration.py -h`` help for more options. > + > Common infrastructure > = > > -- > 2.26.0.rc2.42.g98cedd0233 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
Re: [PATCH v2 0/3] Improved reporting for migrate parameters
* Mao Zhongyi (maozhon...@cmss.chinamobile.com) wrote: > This series mainly improve the report message of migrate parameters > to make it easier to read. Queued > v2->v1 > -p1: avoid using constants, replace it with stringify(). > > Cc: quint...@redhat.com > Cc: dgilb...@redhat.com > > Mao Zhongyi (3): > migration/migration: improve error reporting for migrate parameters > monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed() > migration: move the units of migrate parameters from milliseconds to > ms > > migration/migration.c | 20 > monitor/hmp-cmds.c| 13 - > 2 files changed, 20 insertions(+), 13 deletions(-) > > -- > 2.17.1 > > > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
[Bug 1856335] Re: Cache Layout wrong on many Zen Arch CPUs
Yes. Sieger. Please install 5.0 it should work fine. I am not sure about 4.2. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1856335 Title: Cache Layout wrong on many Zen Arch CPUs Status in QEMU: New Bug description: AMD CPUs have L3 cache per 2, 3 or 4 cores. Currently, TOPOEXT seems to always map Cache ass if it was an 4-Core per CCX CPU, which is incorrect, and costs upwards 30% performance (more realistically 10%) in L3 Cache Layout aware applications. Example on a 4-CCX CPU (1950X /w 8 Cores and no SMT): EPYC-IBPB AMD In windows, coreinfo reports correctly: Unified Cache 1, Level 3,8 MB, Assoc 16, LineSize 64 Unified Cache 6, Level 3,8 MB, Assoc 16, LineSize 64 On a 3-CCX CPU (3960X /w 6 cores and no SMT): EPYC-IBPB AMD in windows, coreinfo reports incorrectly: -- Unified Cache 1, Level 3,8 MB, Assoc 16, LineSize 64 ** Unified Cache 6, Level 3,8 MB, Assoc 16, LineSize 64 Validated against 3.0, 3.1, 4.1 and 4.2 versions of qemu-kvm. With newer Qemu there is a fix (that does behave correctly) in using the dies parameter: The problem is that the dies are exposed differently than how AMD does it natively, they are exposed to Windows as sockets, which means, that if you are nto a business user, you can't ever have a machine with more than two CCX (6 cores) as consumer versions of Windows only supports two sockets. (Should this be reported as a separate bug?) To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1856335/+subscriptions
[PATCH v2 10/9] qed: Simplify backing reads
The other four drivers that support backing files (qcow, qcow2, parallels, vmdk) all rely on the block layer to populate zeroes when reading beyond EOF of a short backing file. We can simplify the qed code by doing likewise. Signed-off-by: Eric Blake --- I noticed this during my audit that v1 of Vladimir's series was correct. No change in iotests results (test 274 is currently failing for qed, but for other reasons: +Traceback (most recent call last): + File "274", line 24, in +iotests.verify_image_format(supported_fmts=['qcow2']) +AttributeError: module 'iotests' has no attribute 'verify_image_format' ) block/qed.h | 1 - block/qed.c | 64 + 2 files changed, 6 insertions(+), 59 deletions(-) diff --git a/block/qed.h b/block/qed.h index 42c115d8220c..3d12bf78d412 100644 --- a/block/qed.h +++ b/block/qed.h @@ -140,7 +140,6 @@ typedef struct QEDAIOCB { /* Current cluster scatter-gather list */ QEMUIOVector cur_qiov; -QEMUIOVector *backing_qiov; uint64_t cur_pos; /* position on block device, in bytes */ uint64_t cur_cluster; /* cluster offset in image file */ unsigned int cur_nclusters; /* number of clusters being accessed */ diff --git a/block/qed.c b/block/qed.c index 927382995a0c..bea4b9f6cc97 100644 --- a/block/qed.c +++ b/block/qed.c @@ -849,56 +849,18 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb) * @s: QED state * @pos:Byte position in device * @qiov: Destination I/O vector - * @backing_qiov: Possibly shortened copy of qiov, to be allocated here - * @cb: Completion function - * @opaque: User data for completion function * * This function reads qiov->size bytes starting at pos from the backing file. * If there is no backing file then zeroes are read. */ static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos, - QEMUIOVector *qiov, - QEMUIOVector **backing_qiov) + QEMUIOVector *qiov) { -uint64_t backing_length = 0; -size_t size; -int ret; - -/* If there is a backing file, get its length. Treat the absence of a - * backing file like a zero length backing file. - */ if (s->bs->backing) { -int64_t l = bdrv_getlength(s->bs->backing->bs); -if (l < 0) { -return l; -} -backing_length = l; -} - -/* Zero all sectors if reading beyond the end of the backing file */ -if (pos >= backing_length || -pos + qiov->size > backing_length) { -qemu_iovec_memset(qiov, 0, 0, qiov->size); -} - -/* Complete now if there are no backing file sectors to read */ -if (pos >= backing_length) { -return 0; -} - -/* If the read straddles the end of the backing file, shorten it */ -size = MIN((uint64_t)backing_length - pos, qiov->size); - -assert(*backing_qiov == NULL); -*backing_qiov = g_new(QEMUIOVector, 1); -qemu_iovec_init(*backing_qiov, qiov->niov); -qemu_iovec_concat(*backing_qiov, qiov, 0, size); - -BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); -ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0); -if (ret < 0) { -return ret; +BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); +return bdrv_co_preadv(s->bs->backing, pos, qiov->size, qiov, 0); } +qemu_iovec_memset(qiov, 0, 0, qiov->size); return 0; } @@ -915,7 +877,6 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, uint64_t offset) { QEMUIOVector qiov; -QEMUIOVector *backing_qiov = NULL; int ret; /* Skip copy entirely if there is no work to do */ @@ -925,13 +886,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, qemu_iovec_init_buf(, qemu_blockalign(s->bs, len), len); -ret = qed_read_backing_file(s, pos, , _qiov); - -if (backing_qiov) { -qemu_iovec_destroy(backing_qiov); -g_free(backing_qiov); -backing_qiov = NULL; -} +ret = qed_read_backing_file(s, pos, ); if (ret) { goto out; @@ -1339,8 +1294,7 @@ static int coroutine_fn qed_aio_read_data(void *opaque, int ret, qemu_iovec_memset(>cur_qiov, 0, 0, acb->cur_qiov.size); r = 0; } else if (ret != QED_CLUSTER_FOUND) { -r = qed_read_backing_file(s, acb->cur_pos, >cur_qiov, - >backing_qiov); +r = qed_read_backing_file(s, acb->cur_pos, >cur_qiov); } else { BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size, @@ -1365,12 +1319,6 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb) while (1) { trace_qed_aio_next_io(s, acb, 0, acb->cur_pos +