[PATCH 07/10] exec: Move all RAMBlock functions to 'exec/ramblock.h'

2020-05-07 Thread Philippe Mathieu-Daudé
The RAMBlock API was dispersed in 3 different headers.
One of these headers, "exec/ram_addr.h", is restricted
to target dependent code. However these functions are
not target specific. Move all functions into a single
place.  Now all these functions can be accessed by
target-agnostic code.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/exec/cpu-common.h|  24 ---
 include/exec/ram_addr.h  | 105 ---
 include/exec/ramblock.h  | 134 +++
 migration/migration.h|   1 +
 accel/tcg/translate-all.c|   2 -
 hw/block/nvme.c  |   2 +-
 hw/s390x/s390-stattrib-kvm.c |   1 -
 hw/s390x/s390-stattrib.c |   1 -
 hw/s390x/s390-virtio-ccw.c   |   1 -
 hw/virtio/vhost-user.c   |   1 +
 hw/virtio/vhost.c|   1 +
 hw/virtio/virtio-balloon.c   |   1 +
 memory.c |   1 +
 migration/migration.c|   1 +
 migration/postcopy-ram.c |   1 +
 migration/savevm.c   |   1 +
 stubs/ram-block.c|   2 +-
 target/ppc/kvm.c |   1 -
 target/s390x/kvm.c   |   1 -
 util/vfio-helpers.c  |   2 +-
 20 files changed, 145 insertions(+), 139 deletions(-)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index b47e5630e7..347ceb603b 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -49,25 +49,6 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr 
addr);
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should not be used by devices.  */
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
-RAMBlock *qemu_ram_block_by_name(const char *name);
-RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
-   ram_addr_t *offset);
-ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
-void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
-void qemu_ram_unset_idstr(RAMBlock *block);
-const char *qemu_ram_get_idstr(RAMBlock *rb);
-void *qemu_ram_get_host_addr(RAMBlock *rb);
-ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
-ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
-bool qemu_ram_is_shared(RAMBlock *rb);
-bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
-void qemu_ram_set_uf_zeroable(RAMBlock *rb);
-bool qemu_ram_is_migratable(RAMBlock *rb);
-void qemu_ram_set_migratable(RAMBlock *rb);
-void qemu_ram_unset_migratable(RAMBlock *rb);
-
-size_t qemu_ram_pagesize(RAMBlock *block);
-size_t qemu_ram_pagesize_largest(void);
 
 void cpu_physical_memory_rw(hwaddr addr, void *buf,
 hwaddr len, bool is_write);
@@ -100,11 +81,6 @@ void qemu_flush_coalesced_mmio_buffer(void);
 
 void cpu_flush_icache_range(hwaddr start, hwaddr len);
 
-typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
-
-int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
-int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
-
 #endif
 
 #endif /* CPU_COMMON_H */
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index c61d5188f7..0deffad66f 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -26,112 +26,7 @@
 #include "exec/ramlist.h"
 #include "exec/ramblock.h"
 
-/**
- * clear_bmap_size: calculate clear bitmap size
- *
- * @pages: number of guest pages
- * @shift: guest page number shift
- *
- * Returns: number of bits for the clear bitmap
- */
-static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
-{
-return DIV_ROUND_UP(pages, 1UL << shift);
-}
 
-/**
- * clear_bmap_set: set clear bitmap for the page range
- *
- * @rb: the ramblock to operate on
- * @start: the start page number
- * @size: number of pages to set in the bitmap
- *
- * Returns: None
- */
-static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
-  uint64_t npages)
-{
-uint8_t shift = rb->clear_bmap_shift;
-
-bitmap_set_atomic(rb->clear_bmap, start >> shift,
-  clear_bmap_size(npages, shift));
-}
-
-/**
- * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
- *
- * @rb: the ramblock to operate on
- * @page: the page number to check
- *
- * Returns: true if the bit was set, false otherwise
- */
-static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
-{
-uint8_t shift = rb->clear_bmap_shift;
-
-return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
-}
-
-static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
-{
-return (b && b->host && offset < b->used_length) ? true : false;
-}
-
-static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
-{
-assert(offset_in_ramblock(block, offset));
-return (char *)block->host + offset;
-}
-
-bool ramblock_is_pmem(RAMBlock *rb);
-
-/**
- * qemu_ram_alloc_from_file,
- * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
- *  file or device
- *
- * 

[PATCH 05/10] exec: Move qemu_minrampagesize/qemu_maxrampagesize to 'qemu-common.h'

2020-05-07 Thread Philippe Mathieu-Daudé
Move these generic functions to a more common place, with other
functions related to host page size. Document a little.

Cc: Alexey Kardashevskiy 
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/exec/ram_addr.h|  3 ---
 include/qemu-common.h  | 10 ++
 hw/ppc/spapr_caps.c|  2 +-
 hw/s390x/s390-virtio-ccw.c |  1 +
 hw/vfio/spapr.c|  2 +-
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 06096e8c6a..195b67d3c8 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -93,9 +93,6 @@ static inline unsigned long int 
ramblock_recv_bitmap_offset(void *host_addr,
 
 bool ramblock_is_pmem(RAMBlock *rb);
 
-long qemu_minrampagesize(void);
-long qemu_maxrampagesize(void);
-
 /**
  * qemu_ram_alloc_from_file,
  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
diff --git a/include/qemu-common.h b/include/qemu-common.h
index d0142f29ac..2821a6ef76 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -80,6 +80,16 @@ bool set_preferred_target_page_bits(int bits);
  */
 void finalize_target_page_bits(void);
 
+/**
+ * qemu_minrampagesize:
+ * qemu_maxrampagesize:
+ *
+ * If backed via -memdev, return the device page size,
+ * else return the host kernel page size.
+ */
+long qemu_minrampagesize(void);
+long qemu_maxrampagesize(void);
+
 /**
  * Sends a (part of) iovec down a socket, yielding when the socket is full, or
  * Receives data into a (part of) iovec from a socket,
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index eb54f94227..33a802a103 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -23,11 +23,11 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "sysemu/hw_accel.h"
-#include "exec/ram_addr.h"
 #include "target/ppc/cpu.h"
 #include "target/ppc/mmu-hash64.h"
 #include "cpu-models.h"
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index f660070d22..c009384505 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu-common.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "hw/boards.h"
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 2900bd1941..c64db940a7 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -9,13 +9,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu-common.h"
 #include "cpu.h"
 #include 
 #include 
 
 #include "hw/vfio/vfio-common.h"
 #include "hw/hw.h"
-#include "exec/ram_addr.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "trace.h"
-- 
2.21.3




[PATCH 01/10] exec: Rename qemu_ram_writeback() as qemu_ram_msync()

2020-05-07 Thread Philippe Mathieu-Daudé
Rename qemu_ram_writeback() as qemu_ram_msync() to better
match what it does.

Suggested-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/exec/ram_addr.h | 4 ++--
 exec.c  | 2 +-
 hw/block/nvme.c | 3 +--
 memory.c| 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 4e05292f91..7b5c24e928 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -136,12 +136,12 @@ void qemu_ram_free(RAMBlock *block);
 
 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
 
-void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length);
+void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length);
 
 /* Clear whole block of mem */
 static inline void qemu_ram_block_writeback(RAMBlock *block)
 {
-qemu_ram_writeback(block, 0, block->used_length);
+qemu_ram_msync(block, 0, block->used_length);
 }
 
 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1)
diff --git a/exec.c b/exec.c
index 2874bb5088..f5a8cdf370 100644
--- a/exec.c
+++ b/exec.c
@@ -2127,7 +2127,7 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, 
Error **errp)
  * Otherwise no-op.
  * @Note: this is supposed to be a synchronous op.
  */
-void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length)
+void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
 {
 /* The requested range should fit in within the block range */
 g_assert((start + length) <= block->used_length);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9b453423cf..21a199e53b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1207,8 +1207,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, 
unsigned size)
  */
 if (addr == 0xE08 &&
 (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
-qemu_ram_writeback(n->pmrdev->mr.ram_block,
-   0, n->pmrdev->size);
+qemu_ram_msync(n->pmrdev->mr.ram_block, 0, n->pmrdev->size);
 }
 memcpy(, ptr + addr, size);
 } else {
diff --git a/memory.c b/memory.c
index 601b749906..3e65e33ac4 100644
--- a/memory.c
+++ b/memory.c
@@ -2205,7 +2205,7 @@ void memory_region_do_writeback(MemoryRegion *mr, hwaddr 
addr, hwaddr size)
  * different types of memory regions
  */
 if (mr->ram_block && mr->dirty_log_mask) {
-qemu_ram_writeback(mr->ram_block, addr, size);
+qemu_ram_msync(mr->ram_block, addr, size);
 }
 }
 
-- 
2.21.3




[PATCH 00/10] exec: Shear 'exec/ram_addr.h' and make NVMe device target-agnostic

2020-05-07 Thread Philippe Mathieu-Daudé
Stefan suggested to make qemu_ram_writeback() target agnostic,
Paolo to add memory_region_msync(), and Peter to remove
"exec/ram_addr.h" [*].

I let a single function in this file,
cpu_physical_memory_sync_dirty_bitmap(), to let the maintainer
have the pleasure to remove this header definitively himself :)

Based-on: <20200507155813.16169-1-phi...@redhat.com>
"Move Xen accelerator code under accel/xen/"
https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg01722.html

[*] https://www.mail-archive.com/qemu-block@nongnu.org/msg66242.html

Philippe Mathieu-Daudé (10):
  exec: Rename qemu_ram_writeback() as qemu_ram_msync()
  exec/ramblock: Add missing 'qemu/rcu.h' include
  exec: Move tb_invalidate_phys_range() to 'exec/exec-all.h'
  exec/memory-internal: Check CONFIG_SOFTMMU instead of CONFIG_USER_ONLY
  exec: Move qemu_minrampagesize/qemu_maxrampagesize to 'qemu-common.h'
  exec: Move ramblock_recv_bitmap_offset() to migration/ram.c
  exec: Move all RAMBlock functions to 'exec/ramblock.h'
  hw/block: Let the NVMe emulated device be target-agnostic
  exec: Update coding style to make checkpatch.pl happy
  exec: Move cpu_physical_memory_* functions to 'exec/memory-internal.h'

 include/exec/cpu-common.h  |  24 --
 include/exec/exec-all.h|  14 +-
 include/exec/memory-internal.h | 307 +++-
 include/exec/ram_addr.h| 415 +
 include/exec/ramblock.h| 135 +++
 include/qemu-common.h  |  10 +
 migration/migration.h  |   1 +
 accel/tcg/cputlb.c |   1 -
 accel/tcg/translate-all.c  |   2 -
 exec.c |   2 +-
 hw/block/nvme.c|   5 +-
 hw/ppc/spapr.c |   1 -
 hw/ppc/spapr_caps.c|   2 +-
 hw/ppc/spapr_pci.c |   1 -
 hw/s390x/s390-stattrib-kvm.c   |   1 -
 hw/s390x/s390-stattrib.c   |   1 -
 hw/s390x/s390-virtio-ccw.c |   2 +-
 hw/vfio/spapr.c|   2 +-
 hw/virtio/vhost-user.c |   1 +
 hw/virtio/vhost.c  |   1 +
 hw/virtio/virtio-balloon.c |   1 +
 memory.c   |   4 +-
 migration/migration.c  |   1 +
 migration/postcopy-ram.c   |   1 +
 migration/ram.c|   8 +
 migration/savevm.c |   1 +
 stubs/ram-block.c  |   2 +-
 target/ppc/kvm.c   |   1 -
 target/s390x/kvm.c |   1 -
 util/vfio-helpers.c|   2 +-
 hw/block/Makefile.objs |   2 +-
 31 files changed, 485 insertions(+), 467 deletions(-)

-- 
2.21.3




[Bug 1877418] Re: qemu-nbd freezes access to VDI file

2020-05-07 Thread Bump
** Attachment added: "shell-commands.txt"
   
https://bugs.launchpad.net/qemu/+bug/1877418/+attachment/5367894/+files/shell-commands.txt

** Description changed:

  Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS:
+ sudo modprobe nbd max_part=16
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi
  
  Then I am operating on the btrfs filesystem and suddenly it freezes.

** Description changed:

- Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS:
+ Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has 
GTP+BTRFS:
  sudo modprobe nbd max_part=16
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi
  
  Then I am operating on the btrfs filesystem and suddenly it freezes.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877418

Title:
  qemu-nbd freezes access to VDI file

Status in QEMU:
  New
Status in btrfs-progs package in Ubuntu:
  New

Bug description:
  Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has 
GTP+BTRFS:
  sudo modprobe nbd max_part=16
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi
  mount /dev/nbd0p1 /mydata/

  Then I am operating on the btrfs filesystem and suddenly it freezes.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions



[PATCH 4/6] target/ppc: Add missing braces in ppc_radix64_partition_scoped_xlate()

2020-05-07 Thread Greg Kurz
As per CODING_STYLE.

Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
translation"
Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 5e2d912ee346..956519b3ad20 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -282,8 +282,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
   pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
   , _cause, _addr) ||
 ppc_radix64_check_prot(cpu, rwx, pte, _cause, h_prot, true)) {
-if (pde_addr) /* address being translated was that of a guest pde */
+if (pde_addr) { /* address being translated was that of a guest pde */
 fault_cause |= DSISR_PRTABLE_FAULT;
+}
 if (cause_excp) {
 ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
 }




[Bug 1877418] Re: qemu-nbd freezes access to VDI file

2020-05-07 Thread Bump
** Attachment added: "dmesg-bug-qemu-nbd-btrfs-vdi.txt"
   
https://bugs.launchpad.net/qemu/+bug/1877418/+attachment/5367895/+files/dmesg-bug-qemu-nbd-btrfs-vdi.txt

** Also affects: btrfs-progs (Ubuntu)
   Importance: Undecided
   Status: New

** Description changed:

  Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has 
GTP+BTRFS:
  sudo modprobe nbd max_part=16
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi
+ mount /dev/nbd0p1 /mydata/
  
  Then I am operating on the btrfs filesystem and suddenly it freezes.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877418

Title:
  qemu-nbd freezes access to VDI file

Status in QEMU:
  New
Status in btrfs-progs package in Ubuntu:
  New

Bug description:
  Mounted Oracle Virtualbox .vdi drive (dynamically allocated), which has 
GTP+BTRFS:
  sudo modprobe nbd max_part=16
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi
  mount /dev/nbd0p1 /mydata/

  Then I am operating on the btrfs filesystem and suddenly it freezes.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions



[PATCH 3/6] target/ppc: Don't initialize some local variables in ppc_radix64_xlate()

2020-05-07 Thread Greg Kurz
It is the job of the ppc_radix64_get_fully_qualified_addr() function
which is called at the beginning of ppc_radix64_xlate() to set both
lpid *and* pid. It doesn't buy us anything to initialize them first.

Worse, a bug in ppc_radix64_get_fully_qualified_addr(), eg. failing to
set either lpid or pid, would be undetectable by static analysis tools
like coverity.

Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index c76879f65b78..5e2d912ee346 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -433,7 +433,7 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
int rwx,
  bool cause_excp)
 {
 CPUPPCState *env = >env;
-uint64_t lpid = 0, pid = 0;
+uint64_t lpid, pid;
 ppc_v3_pate_t pate;
 int psize, prot;
 hwaddr g_raddr;




[PATCH 5/6] target/ppc: Fix arguments to ppc_radix64_partition_scoped_xlate()

2020-05-07 Thread Greg Kurz
The last two arguments have the bool type. Also, we shouldn't raise an
exception when using gdbstub.

This was found while reading the code. Since it only affects the powernv
machine, I didn't dig further to find an actual bug.

Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
translation"
Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 956519b3ad20..ceeb3dfe2d49 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -335,7 +335,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
  */
 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
  pate, _raddr, _prot,
- _page_size, 1, 1);
+ _page_size, true,
+ cause_excp);
 if (ret) {
 return ret;
 }
@@ -374,7 +375,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 do {
 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
  pate, _raddr, _prot,
- _page_size, 1, 1);
+ _page_size, true,
+ cause_excp);
 if (ret) {
 return ret;
 }




[PATCH 6/6] target/ppc: Don't update radix PTE R/C bits with gdbstub

2020-05-07 Thread Greg Kurz
gdbstub shouldn't silently change guest visible state when doing address
translation. While here drop a not very useful comment.

This was found while reading the code. I could verify that this affects
both powernv and pseries, but I failed to observe any actual bug.

Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
translation"
Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.c |   36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index ceeb3dfe2d49..bc51cd89a079 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -270,7 +270,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
   ppc_v3_pate_t pate,
   hwaddr *h_raddr, int *h_prot,
   int *h_page_size, bool pde_addr,
-  bool cause_excp)
+  bool cause_excp,
+  bool cause_rc_update)
 {
 int fault_cause = 0;
 hwaddr pte_addr;
@@ -291,8 +292,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 return 1;
 }
 
-/* Update Reference and Change Bits */
-ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
+if (cause_rc_update) {
+ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
+}
 
 return 0;
 }
@@ -301,7 +303,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 vaddr eaddr, uint64_t pid,
 ppc_v3_pate_t pate, hwaddr 
*g_raddr,
 int *g_prot, int *g_page_size,
-bool cause_excp)
+bool cause_excp,
+bool cause_rc_update)
 {
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = >env;
@@ -336,7 +339,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
  pate, _raddr, _prot,
  _page_size, true,
- cause_excp);
+ cause_excp,
+ cause_rc_update);
 if (ret) {
 return ret;
 }
@@ -376,7 +380,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
  pate, _raddr, _prot,
  _page_size, true,
- cause_excp);
+ cause_excp,
+ cause_rc_update);
 if (ret) {
 return ret;
 }
@@ -408,7 +413,9 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 return 1;
 }
 
-ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot);
+if (cause_rc_update) {
+ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot);
+}
 
 return 0;
 }
@@ -433,7 +440,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu, int rwx,
 static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
  bool relocation,
  hwaddr *raddr, int *psizep, int *protp,
- bool cause_excp)
+ bool cause_excp,
+ bool cause_rc_update)
 {
 CPUPPCState *env = >env;
 uint64_t lpid, pid;
@@ -483,7 +491,9 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
int rwx,
 if (relocation) {
 int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
pate, _raddr, ,
-   , cause_excp);
+   ,
+   cause_excp,
+   cause_rc_update);
 if (ret) {
 return ret;
 }
@@ -506,7 +516,9 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
int rwx,
 
 ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
  pate, raddr, , 
,
- 0, cause_excp);
+

[PATCH 2/6] target/ppc: Pass const pointer to ppc_radix64_get_fully_qualified_addr()

2020-05-07 Thread Greg Kurz
This doesn't require write access to the CPU registers.

Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 1404e53deca8..c76879f65b78 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -28,7 +28,8 @@
 #include "mmu-radix64.h"
 #include "mmu-book3s-v3.h"
 
-static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr eaddr,
+static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
+ vaddr eaddr,
  uint64_t *lpid, uint64_t *pid)
 {
 if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */




[PATCH v2 4/4] target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed)

2020-05-07 Thread Richard Henderson
DUP (indexed) can duplicate 128-bit elements, so using esz
unconditionally can assert in tcg_gen_gvec_dup_imm.

Fixes: 8711e71f9cbb
Reported-by: Laurent Desnogues 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-sve.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index c8649283be..83614e9e70 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -2044,7 +2044,11 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
 } else {
-tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0);
+/*
+ * While dup_mem handles 128-bit elements, dup_imm does not.
+ * Thankfully element size doesn't matter for splatting zero.
+ */
+tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
 }
 }
 return true;
-- 
2.20.1




[PATCH 0/6] target/ppc: Various clean-up and fixes for radix64

2020-05-07 Thread Greg Kurz
First three patches of this series are simple cleanups. The other
ones fix some regressions introduced by Cedric's recent addition
of partition-scoped translation.

--
Greg

---

Greg Kurz (6):
  target/ppc: Pass const pointer to ppc_radix64_get_prot_amr()
  target/ppc: Pass const pointer to ppc_radix64_get_fully_qualified_addr()
  target/ppc: Don't initialize some local variables in ppc_radix64_xlate()
  target/ppc: Add missing braces in ppc_radix64_partition_scoped_xlate()
  target/ppc: Fix arguments to ppc_radix64_partition_scoped_xlate()
  target/ppc: Don't update radix PTE R/C bits with gdbstub


 target/ppc/mmu-radix64.c |   46 +++---
 target/ppc/mmu-radix64.h |4 ++--
 2 files changed, 33 insertions(+), 17 deletions(-)




[PATCH 1/6] target/ppc: Pass const pointer to ppc_radix64_get_prot_amr()

2020-05-07 Thread Greg Kurz
This doesn't require write access to the CPU structure.

Signed-off-by: Greg Kurz 
---
 target/ppc/mmu-radix64.h |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
index 96228546aa85..f28c5794d071 100644
--- a/target/ppc/mmu-radix64.h
+++ b/target/ppc/mmu-radix64.h
@@ -55,9 +55,9 @@ static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
(pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
 }
 
-static inline int ppc_radix64_get_prot_amr(PowerPCCPU *cpu)
+static inline int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
 {
-CPUPPCState *env = >env;
+const CPUPPCState *env = >env;
 int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
 int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 
*/
 




[PATCH v2 1/4] target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA

2020-05-07 Thread Richard Henderson
Now that we can pass 7 parameters, do not encode register
operands within simd_data.

Reviewed-by: Alex Bennée 
Reviewed-by: Taylor Simpson 
Signed-off-by: Richard Henderson 
---
v2: Remove gen_helper_sve_fmla typedef (phil).
---
 target/arm/helper-sve.h|  45 +++
 target/arm/sve_helper.c| 157 ++---
 target/arm/translate-sve.c |  70 ++---
 3 files changed, 114 insertions(+), 158 deletions(-)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 2f47279155..7a200755ac 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1099,25 +1099,40 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index fdfa652094..33b5a54a47 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -3372,23 +3372,11 @@ DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , 
uint64_to_float64)
 
 #undef DO_ZPZ_FP
 
-/* 4-operand predicated multiply-add.  This requires 7 operands to pass
- * "properly", so we need to encode some of the registers into DESC.
- */
-QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32);
-
-static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc,
+static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
+float_status *status, uint32_t desc,
 uint16_t neg1, uint16_t neg3)
 {
 intptr_t i = simd_oprsz(desc);
-unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
-unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
-unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
-unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
-void *vd = >vfp.zregs[rd];
-void *vn = >vfp.zregs[rn];
-void *vm = >vfp.zregs[rm];
-void *va = 

[PATCH v2 3/4] target/arm: Use clear_vec_high more effectively

2020-05-07 Thread Richard Henderson
Do not explicitly store zero to the NEON high part
when we can pass !is_q to clear_vec_high.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 59 +++---
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b6feb2b9dc..0947eaee05 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -939,11 +939,10 @@ static void do_fp_ld(DisasContext *s, int destidx, 
TCGv_i64 tcg_addr, int size)
 {
 /* This always zero-extends and writes to a full 128 bit wide vector */
 TCGv_i64 tmplo = tcg_temp_new_i64();
-TCGv_i64 tmphi;
+TCGv_i64 tmphi = NULL;
 
 if (size < 4) {
 MemOp memop = s->be_data + size;
-tmphi = tcg_const_i64(0);
 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 } else {
 bool be = s->be_data == MO_BE;
@@ -961,12 +960,13 @@ static void do_fp_ld(DisasContext *s, int destidx, 
TCGv_i64 tcg_addr, int size)
 }
 
 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
-tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
-
 tcg_temp_free_i64(tmplo);
-tcg_temp_free_i64(tmphi);
 
-clear_vec_high(s, true, destidx);
+if (tmphi) {
+tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
+tcg_temp_free_i64(tmphi);
+}
+clear_vec_high(s, tmphi != NULL, destidx);
 }
 
 /*
@@ -6960,8 +6960,8 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
 return;
 }
 
-tcg_resh = tcg_temp_new_i64();
 tcg_resl = tcg_temp_new_i64();
+tcg_resh = NULL;
 
 /* Vd gets bits starting at pos bits into Vm:Vn. This is
  * either extracting 128 bits from a 128:128 concatenation, or
@@ -6973,7 +6973,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
 read_vec_element(s, tcg_resh, rm, 0, MO_64);
 do_ext64(s, tcg_resh, tcg_resl, pos);
 }
-tcg_gen_movi_i64(tcg_resh, 0);
 } else {
 TCGv_i64 tcg_hh;
 typedef struct {
@@ -6988,6 +6987,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
 pos -= 64;
 }
 
+tcg_resh = tcg_temp_new_i64();
 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
 elt++;
 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
@@ -7003,9 +7003,12 @@ static void disas_simd_ext(DisasContext *s, uint32_t 
insn)
 
 write_vec_element(s, tcg_resl, rd, 0, MO_64);
 tcg_temp_free_i64(tcg_resl);
-write_vec_element(s, tcg_resh, rd, 1, MO_64);
-tcg_temp_free_i64(tcg_resh);
-clear_vec_high(s, true, rd);
+
+if (is_q) {
+write_vec_element(s, tcg_resh, rd, 1, MO_64);
+tcg_temp_free_i64(tcg_resh);
+}
+clear_vec_high(s, is_q, rd);
 }
 
 /* TBL/TBX
@@ -7042,17 +7045,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t 
insn)
  * the input.
  */
 tcg_resl = tcg_temp_new_i64();
-tcg_resh = tcg_temp_new_i64();
+tcg_resh = NULL;
 
 if (is_tblx) {
 read_vec_element(s, tcg_resl, rd, 0, MO_64);
 } else {
 tcg_gen_movi_i64(tcg_resl, 0);
 }
-if (is_tblx && is_q) {
-read_vec_element(s, tcg_resh, rd, 1, MO_64);
-} else {
-tcg_gen_movi_i64(tcg_resh, 0);
+
+if (is_q) {
+tcg_resh = tcg_temp_new_i64();
+if (is_tblx) {
+read_vec_element(s, tcg_resh, rd, 1, MO_64);
+} else {
+tcg_gen_movi_i64(tcg_resh, 0);
+}
 }
 
 tcg_idx = tcg_temp_new_i64();
@@ -7072,9 +7079,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
 
 write_vec_element(s, tcg_resl, rd, 0, MO_64);
 tcg_temp_free_i64(tcg_resl);
-write_vec_element(s, tcg_resh, rd, 1, MO_64);
-tcg_temp_free_i64(tcg_resh);
-clear_vec_high(s, true, rd);
+
+if (is_q) {
+write_vec_element(s, tcg_resh, rd, 1, MO_64);
+tcg_temp_free_i64(tcg_resh);
+}
+clear_vec_high(s, is_q, rd);
 }
 
 /* ZIP/UZP/TRN
@@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t 
insn)
 }
 
 tcg_resl = tcg_const_i64(0);
-tcg_resh = tcg_const_i64(0);
+tcg_resh = is_q ? tcg_const_i64(0) : NULL;
 tcg_res = tcg_temp_new_i64();
 
 for (i = 0; i < elements; i++) {
@@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t 
insn)
 
 write_vec_element(s, tcg_resl, rd, 0, MO_64);
 tcg_temp_free_i64(tcg_resl);
-write_vec_element(s, tcg_resh, rd, 1, MO_64);
-tcg_temp_free_i64(tcg_resh);
-clear_vec_high(s, true, rd);
+
+if (is_q) {
+write_vec_element(s, tcg_resh, rd, 1, MO_64);
+tcg_temp_free_i64(tcg_resh);
+}
+clear_vec_high(s, is_q, rd);
 }
 
 /*
-- 
2.20.1




Re: [PATCH v2 0/4] target/arm: Misc cleanups

2020-05-07 Thread Richard Henderson
On 5/7/20 10:23 AM, Richard Henderson wrote:
> Version 2 adds a fix to a just merged patch.

Ho hum.  This is actually v4.  Sigh.


r~



[PATCH v2 2/4] target/arm: Use tcg_gen_gvec_mov for clear_vec_high

2020-05-07 Thread Richard Henderson
The 8-byte store for the end a !is_q operation can be
merged with the other stores.  Use a no-op vector move
to trigger the expand_clr portion of tcg_gen_gvec_mov.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 62e5729904..b6feb2b9dc 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -496,14 +496,8 @@ static void clear_vec_high(DisasContext *s, bool is_q, int 
rd)
 unsigned ofs = fp_reg_offset(s, rd, MO_64);
 unsigned vsz = vec_full_reg_size(s);
 
-if (!is_q) {
-TCGv_i64 tcg_zero = tcg_const_i64(0);
-tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
-tcg_temp_free_i64(tcg_zero);
-}
-if (vsz > 16) {
-tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
-}
+/* Nop move, with side effect of clearing the tail. */
+tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 }
 
 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
-- 
2.20.1




[PATCH v2 0/4] target/arm: Misc cleanups

2020-05-07 Thread Richard Henderson
Version 2 adds a fix to a just merged patch.


r~


Richard Henderson (4):
  target/arm: Use tcg_gen_gvec_5_ptr for sve FMLA/FCMLA
  target/arm: Use tcg_gen_gvec_mov for clear_vec_high
  target/arm: Use clear_vec_high more effectively
  target/arm: Fix tcg_gen_gvec_dup_imm vs DUP (indexed)

 target/arm/helper-sve.h|  45 +++
 target/arm/sve_helper.c| 157 ++---
 target/arm/translate-a64.c |  69 
 target/arm/translate-sve.c |  76 +++---
 4 files changed, 157 insertions(+), 190 deletions(-)

-- 
2.20.1




Re: tst-arm-mte bug: PSTATE.TCO is cleared on exceptions

2020-05-07 Thread Richard Henderson
On 5/7/20 2:59 AM, Szabolcs Nagy wrote:
> is there some recommended way to turn some form
> of tracing on in qemu before i execute the
> problematic application?

I didn't add any tracing within mte.  I can do so if we can guess what we're
looking for.

> or is it better if i try to extract a reproducer?
> (that does not use the network)

A reproducer would be most helpful.

Something that can help is saving a VM snapshot with the kernel booted and the
user logged in, just ready to run the test program.  Then you can get back to
exactly the state you want before things go wrong, even with a different qemu
build.


r~



Re: [PULL 04/10] target/arm: Use tcg_gen_gvec_dup_imm

2020-05-07 Thread Richard Henderson
On 5/7/20 7:39 AM, Laurent Desnogues wrote:
>> @@ -2044,7 +2044,7 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
>>  unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
>>  tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
>>  } else {
>> -tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
>> +tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0);
> 
> For an indexed DUP, size can be 128-bit so that will fail the first
> assert in tcg-op-gvec.c:do_dup.

Ho hum, quite right.  This has already been merged, so I'll send a follow-up 
fix.


r~




Re: [PATCH] ppc/pnv: Fix NMI system reset SRR1 value

2020-05-07 Thread Cédric Le Goater
On 5/7/20 1:48 PM, Nicholas Piggin wrote:
> Commit a77fed5bd926 ("ppc/pnv: Add support for NMI interface") got the
> SRR1 setting wrong for sresets that hit outside of power-save states.
> 
> Fix this, better documenting the source for the bit definitions.
> 
> Fixes: a77fed5bd926 ("ppc/pnv: Add support for NMI interface") got the
> Cc: Cédric Le Goater 
> Cc: David Gibson 
> Signed-off-by: Nicholas Piggin 

We should introduce some defines like the SRR1_WAKE ones in Linux and 
cleanup powerpc_reset_wakeup(). This function uses cryptic values. 
That can be done later on as a followup.

Reviewed-by: Cédric Le Goater 

> ---
> 
> Thanks to Cedric for pointing out concerns with a previous MCE patch
> that unearthed this as well. Linux does not actually care what these
> SRR1[42:45] bits look like for non-powersave sresets, but we should
> follow documented behaviour as far as possible.

We should introduce some defines like the SRR1_WAKE ones in Linux and 
cleanup powerpc_reset_wakeup(). This function uses cryptic values. 
That can be done later on as a followup.


I am currently after a bug which results in a CPU hard lockup because 
of a pending interrupt. It occurs on a SMP PowerNV machine when it is 
stressed with IO, such as scp of a big file. 

I am suspecting more and more an issue with an interrupt being handled 
when the CPU is coming out of idle. I haven't seen anything wrong in
the models. Unless this maybe :

/* Pretend to be returning from doze always as we don't lose state */
*msr |= (0x1ull << (63 - 47));

I am not sure how in sync it is with PSSCR.


Thanks, 

C.

>  hw/ppc/pnv.c | 26 --
>  1 file changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index a3b7a8d0ff..1b4748ce6d 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -1986,12 +1986,26 @@ static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, 
> run_on_cpu_data arg)
>  
>  cpu_synchronize_state(cs);
>  ppc_cpu_do_system_reset(cs);
> -/*
> - * SRR1[42:45] is set to 0100 which the ISA defines as implementation
> - * dependent. POWER processors use this for xscom triggered interrupts,
> - * which come from the BMC or NMI IPIs.
> - */
> -env->spr[SPR_SRR1] |= PPC_BIT(43);
> +if (env->spr[SPR_SRR1] & PPC_BITMASK(46, 47)) {
> +/*
> +  * Power-save wakeups, as indicated by non-zero SRR1[46:47] put the
> +  * wakeup reason in SRR1[42:45], system reset is indicated with 0b0100
> +  * (PPC_BIT(43)).
> +  */
> +if (!(env->spr[SPR_SRR1] & PPC_BIT(43))) {
> +warn_report("ppc_cpu_do_system_reset does not set system reset 
> wakeup reason");
> +env->spr[SPR_SRR1] |= PPC_BIT(43);
> +}
> +} else {
> +/*
> +  * For non-powersave system resets, SRR1[42:45] are defined to be
> +  * implementation-dependent. The POWER9 User Manual specifies that
> +  * an external (SCOM driven, which may come from a BMC nmi command or
> +  * another CPU requesting a NMI IPI) system reset exception should be
> +  * 0b0010 (PPC_BIT(44)).
> + */
> +env->spr[SPR_SRR1] |= PPC_BIT(44);
> +}
>  }
>  
>  static void pnv_nmi(NMIState *n, int cpu_index, Error **errp)
> 




[Bug 1877418] [NEW] qemu-nbd freezes access to VDI file

2020-05-07 Thread Bump
Public bug reported:

Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS:
sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi

Then I am operating on the btrfs filesystem and suddenly it freezes.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877418

Title:
  qemu-nbd freezes access to VDI file

Status in QEMU:
  New

Bug description:
  Mounted Oracle Virtualbox .vdi drive, which has GTP+BTRFS:
  sudo qemu-nbd -c /dev/nbd0 /storage/btrfs.vdi

  Then I am operating on the btrfs filesystem and suddenly it freezes.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877418/+subscriptions



Re: device hotplug & file handles

2020-05-07 Thread Eric Blake

On 5/7/20 9:49 AM, Gerd Hoffmann wrote:

   Hi,

For usb device pass-through (aka -device usb-host) it would be very
useful to pass file handles from libvirt to qemu.  The workflow would
change from ...

   (1) libvirt enables access to /dev/usb/$bus/$dev
   (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties)
   to qemu.
   (3) qemu opens /dev/usb/$bus/$dev

... to ...

   (1) libvirt opens /dev/usb/$bus/$dev
   (2) libvirt passes filehandle to qemu.

Question is how can we pass the file descriptor best?  My idea would be
to simply add an fd property to usb-host:

  * Coldplug would be "-device usb-host,fd=" (cmd line).
  * Hotplug would be "device_add usb-host,fd=" (monitor).

Will that work from libvirt point of view?
Or does anyone have an better idea?


Qemu already has -add-fd (both a CLI version, and a QMP version when a 
Unix socket can pass fds), at which point any existing interface that 
uses qemu_open() will understand the magic syntax /dev/fdset/NNN to 
refer to the existing fd previously passed in via -add-fd.  Libvirt is 
already able to use this feature for some cases (for example, see 
src/qemu/qemu_command.c:qemuBuildChrChardevFileStr).  So all that 
remains is making sure -device usb-host uses qemu_open(), and if it 
didn't already do so, also making sure libvirt can find a way to 
introspect when usb-host started supporting fdset usage.


Or put another way, let's use the generic fd mechanism that qemu already 
supports, rather than inventing yet another syntax.




thanks,
   Gerd

PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [RFC PATCH] qom: Implement qom-get HMP command

2020-05-07 Thread Dr. David Alan Gilbert
* Markus Armbruster (arm...@redhat.com) wrote:
> Cédric Le Goater  writes:
> 
> > From: "Dr. David Alan Gilbert" 
> >
> > Reimplement it based on qmp_qom_get() to avoid converting QObjects back
> > to strings.
> >
> > Inspired-by: Paolo Bonzini 
> > Signed-off-by: Andreas Färber 
> >
> > Slight fix for bit-rot:
> > Signed-off-by: Dr. David Alan Gilbert 
> > [clg: updates for QEMU 5.0 ]
> > Signed-off-by: Cédric Le Goater 
> > ---
> >
> >  I would like to restart the discussion on qom-get command to understand
> >  what was the conclusion and see if things have changed since.
> 
> I've since learned more about QOM.  I believe we should do HMP qom-get,
> but not quite like this patch does.  Let me explain.
> 
> A QOM property has ->get() and ->set() methods to expose the property
> via the Visitor interface.
> 
> ->get() works with an output visitor.  With the QObject output visitor,
> you can get the property value as a QObject.  QMP qom-get uses this.
> With the string output visitor, you can get it as a string.  Your
> proposed HMP qom-get uses this.
> 
> ->set() works with an input visitor.  With the QObject input visitor,
> you can set the property value from a QObject.  QMP qom-set uses this.
> With the string input visitor, you can set it from a string.  HMP
> qom-set uses this.  With the options visitor, you can set it from a
> QemuOpts.  CLI -object uses this.
> 
> The Visitor interface supports arbitrary QAPI types.  The ->get() and
> ->set() methods can use them all.
> 
> Some visitors, howerver, carry restrictions:
> 
>  * The string output visitor does not implement support for visiting
>  * QAPI structs, alternates, null, or arbitrary QTypes.  It also
>  * requires a non-null list argument to visit_start_list().
> 
>  * The string input visitor does not implement support for visiting
>  * QAPI structs, alternates, null, or arbitrary QTypes. Only flat lists
>  * of integers (except type "size") are supported.
> 
>  * The Opts input visitor does not implement support for visiting QAPI
>  * alternates, numbers (other than integers), null, or arbitrary
>  * QTypes.  It also requires a non-null list argument to
>  * visit_start_list().
> 
> If you try to qom-set a property whose ->set() uses something the string
> input visitor doesn't support, QEMU crashes.  Same for -object, and your
> proposed qom-get.

Crashing would be bad.

> I'm not aware of such a ->set(), but this is a death trap all the same.
> 
> The obvious way to disarm it is removing the restrictions.
> 
> One small step we took towards that goal is visible in the comments
> above: support for flat lists of integers.  The code for that will make
> your eyes bleed.  It's been a thorn in my side ever since I inherited
> QAPI.  Perhaps it could be done better.  But there's a reason why it
> should not be done at all: it's language design.
> 
> The QObject visitors translate between QAPI types and our internal
> representation of JSON.  The JSON parser and formatter complete the
> translation to and from JSON.  Sensible enough.
> 
> The string visitors translate between QAPI and ...  well, a barely
> documented language of our own "design".  Tolerable when the language it
> limited to numbers, booleans and strings.  Foolish when it grows into
> something isomorphic to JSON.
> 
> This is a dead end.
> 
> Can we still implement a safe and tolerably useful HMP qom-get and
> qom-set?  I think we can.  Remember the basic rule of HMP command
> implementation: wrap around the QMP command.  So let's try that.
> 
> hmp_qom_get() calls qmp_qom_get() and formats the resulting QObject with
> qobject_to_json_pretty().

Why don't we *just* do this?
OK, we wont fix the qom-set or the CLI, but if we just get hmp_qom_get
to call QMP, format it into json and then dump the json to the user,
then we get a usable, if not pretty, qom-get command, without having to
solve all these hard problems to move it forward?

Dave

> hmp_qom_get() parses the value with qobject_from_json(), and feeds the
> resulting QObject to qmp_qom_set().  To avoid interference with the HMP
> parser, we probably want argument type 'S'.
> 
> The two commands then parse / print JSON instead of the string visitors'
> language.  Is that bad?
> 
> * Integers: qom-set loses support for hexadecimal and octal.
> 
> * Bools: qom-set loses alternate spellings of true and false.
> 
> * Floating-point numbers: no change
> 
> * Strings: gain enclosing quotes.
> 
> * List of integers: gain enclosing square brackets.  qom-set loses
>   support for ranges.
> 
>   The only use of lists I can find is memory-backend property
>   host-nodes.
> 
> * Everything else: lose support for crashing QEMU.
> 
>   Again, I'm not aware of properties that crash now.
> 
> I think these changes are just fine.  If you disagree, you could try to
> mitigate with convenience magic like "if it doesn't parse as JSON, and
> it looks hexadecimal, convert to decimal and try again", or "looks kind
> of stringy, enclose in 

[PULL 09/12] migration/rdma: fix a memleak on error path in rdma_start_incoming_migration

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Pan Nengyuan 

'rdma->host' is malloced in qemu_rdma_data_init, but forgot to free on the error
path in rdma_start_incoming_migration(), this patch fix that.

The leak stack:
Direct leak of 2 byte(s) in 1 object(s) allocated from:
#0 0x7fb7add18ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8)
#1 0x7fb7ad0df1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5)
#2 0x7fb7ad0f8b32 in g_strdup (/lib64/libglib-2.0.so.0+0x6cb32)
#3 0x55a0464a0f6f in qemu_rdma_data_init /mnt/sdb/qemu/migration/rdma.c:2647
#4 0x55a0464b0e76 in rdma_start_incoming_migration 
/mnt/sdb/qemu/migration/rdma.c:4020
#5 0x55a0463f898a in qemu_start_incoming_migration 
/mnt/sdb/qemu/migration/migration.c:365
#6 0x55a0458c75d3 in qemu_init /mnt/sdb/qemu/softmmu/vl.c:4438
#7 0x55a046a3d811 in main /mnt/sdb/qemu/softmmu/main.c:48
#8 0x7fb7a8417872 in __libc_start_main (/lib64/libc.so.6+0x23872)
#9 0x55a04536b26d in _start 
(/mnt/sdb/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x286926d)

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
Message-Id: <20200420102727.17339-1-pannengy...@huawei.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/rdma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index f61587891b..967fda5b0c 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4056,6 +4056,7 @@ void rdma_start_incoming_migration(const char *host_port, 
Error **errp)
 return;
 err:
 error_propagate(errp, local_err);
+g_free(rdma->host);
 g_free(rdma);
 g_free(rdma_return_path);
 }
-- 
2.26.2




[PULL 11/12] migration/multifd: fix memleaks in multifd_new_send_channel_async

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Pan Nengyuan 

When error happen in multifd_new_send_channel_async, 'sioc' will not be used
to create the multifd_send_thread. Let's free it to avoid a memleak. And also
do error_free after migrate_set_error() to avoid another leak in the same place.

The leak stack:
Direct leak of 2880 byte(s) in 8 object(s) allocated from:
#0 0x7f20b5118ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8)
#1 0x7f20b44df1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5)
#2 0x564133bce18b in object_new_with_type 
/mnt/sdb/backup/qemu/qom/object.c:683
#3 0x564133eea950 in qio_channel_socket_new 
/mnt/sdb/backup/qemu/io/channel-socket.c:56
#4 0x5641339cfe4f in socket_send_channel_create 
/mnt/sdb/backup/qemu/migration/socket.c:37
#5 0x564133a10328 in multifd_save_setup 
/mnt/sdb/backup/qemu/migration/multifd.c:772
#6 0x5641339cebed in migrate_fd_connect 
/mnt/sdb/backup/qemu/migration/migration.c:3530
#7 0x5641339d15e4 in migration_channel_connect 
/mnt/sdb/backup/qemu/migration/channel.c:92
#8 0x5641339cf5b7 in socket_outgoing_migration 
/mnt/sdb/backup/qemu/migration/socket.c:108

Direct leak of 384 byte(s) in 8 object(s) allocated from:
#0 0x7f20b5118cf0 in calloc (/lib64/libasan.so.5+0xefcf0)
#1 0x7f20b44df22d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5322d)
#2 0x56413406fc17 in error_setv /mnt/sdb/backup/qemu/util/error.c:61
#3 0x564134070464 in error_setg_errno_internal 
/mnt/sdb/backup/qemu/util/error.c:109
#4 0x5641340851be in inet_connect_addr 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:379
#5 0x5641340851be in inet_connect_saddr 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:458
#6 0x5641340870ab in socket_connect 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:1105
#7 0x564133eeaabf in qio_channel_socket_connect_sync 
/mnt/sdb/backup/qemu/io/channel-socket.c:145
#8 0x564133eeabf5 in qio_channel_socket_connect_worker 
/mnt/sdb/backup/qemu/io/channel-socket.c:168

Indirect leak of 360 byte(s) in 8 object(s) allocated from:
#0 0x7f20b5118ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8)
#1 0x7f20af901817 in __GI___vasprintf_chk (/lib64/libc.so.6+0x10d817)
#2 0x7f20b451fa6c in g_vasprintf (/lib64/libglib-2.0.so.0+0x93a6c)
#3 0x7f20b44f8cd0 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x6ccd0)
#4 0x7f20b44f8d8c in g_strdup_printf (/lib64/libglib-2.0.so.0+0x6cd8c)
#5 0x56413406fc86 in error_setv /mnt/sdb/backup/qemu/util/error.c:65
#6 0x564134070464 in error_setg_errno_internal 
/mnt/sdb/backup/qemu/util/error.c:109
#7 0x5641340851be in inet_connect_addr 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:379
#8 0x5641340851be in inet_connect_saddr 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:458
#9 0x5641340870ab in socket_connect 
/mnt/sdb/backup/qemu/util/qemu-sockets.c:1105
#10 0x564133eeaabf in qio_channel_socket_connect_sync 
/mnt/sdb/backup/qemu/io/channel-socket.c:145
#11 0x564133eeabf5 in qio_channel_socket_connect_worker 
/mnt/sdb/backup/qemu/io/channel-socket.c:168

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
Message-Id: <20200506095416.26099-2-pannengy...@huawei.com>
Reviewed-by: Juan Quintela 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/multifd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/migration/multifd.c b/migration/multifd.c
index 9123c111a3..e3efd33a0d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -727,6 +727,8 @@ static void multifd_new_send_channel_async(QIOTask *task, 
gpointer opaque)
  * its status.
  */
 p->quit = true;
+object_unref(OBJECT(sioc));
+error_free(local_err);
 } else {
 p->c = QIO_CHANNEL(sioc);
 qio_channel_set_delay(p->c, false);
-- 
2.26.2




[PULL 07/12] migration/throttle: Add cpu-throttle-tailslow migration parameter

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Keqian Zhu 

At the tail stage of throttling, the Guest is very sensitive to
CPU percentage while the @cpu-throttle-increment is excessive
usually at tail stage.

If this parameter is true, we will compute the ideal CPU percentage
used by the Guest, which may exactly make the dirty rate match the
dirty rate threshold. Then we will choose a smaller throttle increment
between the one specified by @cpu-throttle-increment and the one
generated by ideal CPU percentage.

Therefore, it is compatible to traditional throttling, meanwhile
the throttle increment won't be excessive at tail stage. This may
make migration time longer, and is disabled by default.

Signed-off-by: Keqian Zhu 
Message-Id: <20200413101508.54793-1-zhukeqi...@huawei.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 13 
 migration/ram.c   | 25 +-
 monitor/hmp-cmds.c|  8 
 qapi/migration.json   | 48 +++
 4 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 79f16f6625..f5dbffc442 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
 params->has_cpu_throttle_increment = true;
 params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
+params->has_cpu_throttle_tailslow = true;
+params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
 params->has_tls_creds = true;
 params->tls_creds = g_strdup(s->parameters.tls_creds);
 params->has_tls_hostname = true;
@@ -1327,6 +1329,10 @@ static void 
migrate_params_test_apply(MigrateSetParameters *params,
 dest->cpu_throttle_increment = params->cpu_throttle_increment;
 }
 
+if (params->has_cpu_throttle_tailslow) {
+dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
+}
+
 if (params->has_tls_creds) {
 assert(params->tls_creds->type == QTYPE_QSTRING);
 dest->tls_creds = g_strdup(params->tls_creds->u.s);
@@ -1415,6 +1421,10 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
 }
 
+if (params->has_cpu_throttle_tailslow) {
+s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
+}
+
 if (params->has_tls_creds) {
 g_free(s->parameters.tls_creds);
 assert(params->tls_creds->type == QTYPE_QSTRING);
@@ -3597,6 +3607,8 @@ static Property migration_properties[] = {
 DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
   parameters.cpu_throttle_increment,
   DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
+DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
+  parameters.cpu_throttle_tailslow, false),
 DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
   parameters.max_bandwidth, MAX_THROTTLE),
 DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
@@ -3703,6 +3715,7 @@ static void migration_instance_init(Object *obj)
 params->has_throttle_trigger_threshold = true;
 params->has_cpu_throttle_initial = true;
 params->has_cpu_throttle_increment = true;
+params->has_cpu_throttle_tailslow = true;
 params->has_max_bandwidth = true;
 params->has_downtime_limit = true;
 params->has_x_checkpoint_delay = true;
diff --git a/migration/ram.c b/migration/ram.c
index 53166fc279..52fc032b83 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f, 
 RAMBlock *block,
  * able to complete migration. Some workloads dirty memory way too
  * fast and will not effectively converge, even with auto-converge.
  */
-static void mig_throttle_guest_down(void)
+static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
+uint64_t bytes_dirty_threshold)
 {
 MigrationState *s = migrate_get_current();
 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
-uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
+uint64_t pct_increment = s->parameters.cpu_throttle_increment;
+bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
 int pct_max = s->parameters.max_cpu_throttle;
 
+uint64_t throttle_now = cpu_throttle_get_percentage();
+uint64_t cpu_now, cpu_ideal, throttle_inc;
+
 /* We have not started throttling yet. Let's start it. */
 if (!cpu_throttle_active()) {
 cpu_throttle_set(pct_initial);
 } else {
 /* Throttling already on, just increase the rate */
-cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement,
- pct_max));
+if 

[PULL 10/12] migration/xbzrle: add encoding rate

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Wei Wang 

Users may need to check the xbzrle encoding rate to know if the guest
memory is xbzrle encoding-friendly, and dynamically turn off the
encoding if the encoding rate is low.

Signed-off-by: Yi Sun 
Signed-off-by: Wei Wang 
Message-Id: <1588208375-19556-1-git-send-email-wei.w.w...@intel.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c |  1 +
 migration/ram.c   | 39 +--
 monitor/hmp-cmds.c|  2 ++
 qapi/migration.json   |  5 -
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index f5dbffc442..0bb042a0f7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -932,6 +932,7 @@ static void populate_ram_info(MigrationInfo *info, 
MigrationState *s)
 info->xbzrle_cache->pages = xbzrle_counters.pages;
 info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
 info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
+info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
 info->xbzrle_cache->overflow = xbzrle_counters.overflow;
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 08eb382f53..859f835f1a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -327,6 +327,10 @@ struct RAMState {
 uint64_t num_dirty_pages_period;
 /* xbzrle misses since the beginning of the period */
 uint64_t xbzrle_cache_miss_prev;
+/* Amount of xbzrle pages since the beginning of the period */
+uint64_t xbzrle_pages_prev;
+/* Amount of xbzrle encoded bytes since the beginning of the period */
+uint64_t xbzrle_bytes_prev;
 
 /* compression statistics since the beginning of the period */
 /* amount of count that no free thread to compress data */
@@ -710,6 +714,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
**current_data,
 return -1;
 }
 
+/*
+ * Reaching here means the page has hit the xbzrle cache, no matter what
+ * encoding result it is (normal encoding, overflow or skipping the page),
+ * count the page as encoded. This is used to caculate the encoding rate.
+ *
+ * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
+ * 2nd page turns out to be skipped (i.e. no new bytes written to the
+ * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
+ * skipped page included. In this way, the encoding rate can tell if the
+ * guest page is good for xbzrle encoding.
+ */
+xbzrle_counters.pages++;
 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 
 /* save current buffer into memory */
@@ -740,6 +756,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
**current_data,
 } else if (encoded_len == -1) {
 trace_save_xbzrle_page_overflow();
 xbzrle_counters.overflow++;
+xbzrle_counters.bytes += TARGET_PAGE_SIZE;
 return -1;
 }
 
@@ -750,8 +767,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
**current_data,
 qemu_put_be16(rs->f, encoded_len);
 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
 bytes_xbzrle += encoded_len + 1 + 2;
-xbzrle_counters.pages++;
-xbzrle_counters.bytes += bytes_xbzrle;
+/*
+ * Like compressed_size (please see update_compress_thread_counts),
+ * the xbzrle encoded bytes don't count the 8 byte header with
+ * RAM_SAVE_FLAG_CONTINUE.
+ */
+xbzrle_counters.bytes += bytes_xbzrle - 8;
 ram_counters.transferred += bytes_xbzrle;
 
 return 1;
@@ -884,9 +905,23 @@ static void migration_update_rates(RAMState *rs, int64_t 
end_time)
 }
 
 if (migrate_use_xbzrle()) {
+double encoded_size, unencoded_size;
+
 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
 rs->xbzrle_cache_miss_prev) / page_count;
 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
+unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
+ TARGET_PAGE_SIZE;
+encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
+if (xbzrle_counters.pages == rs->xbzrle_pages_prev) {
+xbzrle_counters.encoding_rate = 0;
+} else if (!encoded_size) {
+xbzrle_counters.encoding_rate = UINT64_MAX;
+} else {
+xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
+}
+rs->xbzrle_pages_prev = xbzrle_counters.pages;
+rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
 }
 
 if (migrate_use_compression()) {
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index ade1f85e0c..9c61e769ca 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -303,6 +303,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
info->xbzrle_cache->cache_miss);
 monitor_printf(mon, "xbzrle cache miss rate: 

[PULL 08/12] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: David Hildenbrand 

Let's consolidate resetting the variables.

Cc: "Dr. David Alan Gilbert" 
Cc: Juan Quintela 
Cc: Peter Xu 
Signed-off-by: David Hildenbrand 
Message-Id: <20200421085300.7734-10-da...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
  Fixup for context conflicts with 91ba442
---
 migration/ram.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 52fc032b83..08eb382f53 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3147,7 +3147,7 @@ static int ram_load_postcopy(QEMUFile *f)
 /* Temporary page that is later 'placed' */
 void *postcopy_host_page = mis->postcopy_tmp_page;
 void *this_host = NULL;
-bool all_zero = false;
+bool all_zero = true;
 int target_pages = 0;
 
 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
@@ -3174,7 +3174,6 @@ static int ram_load_postcopy(QEMUFile *f)
 addr &= TARGET_PAGE_MASK;
 
 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
-place_needed = false;
 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
  RAM_SAVE_FLAG_COMPRESS_PAGE)) {
 block = ram_block_from_stream(f, flags);
@@ -3199,9 +3198,7 @@ static int ram_load_postcopy(QEMUFile *f)
  */
 page_buffer = postcopy_host_page +
   ((uintptr_t)host & (block->page_size - 1));
-/* If all TP are zero then we can optimise the place */
 if (target_pages == 1) {
-all_zero = true;
 this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
 block->page_size);
 } else {
@@ -3221,7 +3218,6 @@ static int ram_load_postcopy(QEMUFile *f)
  */
 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
 place_needed = true;
-target_pages = 0;
 }
 place_source = postcopy_host_page;
 }
@@ -3303,6 +3299,10 @@ static int ram_load_postcopy(QEMUFile *f)
 ret = postcopy_place_page(mis, place_dest,
   place_source, block);
 }
+place_needed = false;
+target_pages = 0;
+/* Assume we have a zero page until we detect something different 
*/
+all_zero = true;
 }
 }
 
-- 
2.26.2




[PULL 06/12] migration/colo: Add missing error-propagation code

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Philippe Mathieu-Daudé 

Running the coccinelle script produced:

  $ spatch \
--macro-file scripts/cocci-macro-file.h --include-headers \
--sp-file scripts/coccinelle/find-missing-error_propagate.cocci \
--keep-comments --smpl-spacing --dir .
  HANDLING: ./migration/colo.c
  [[manual check required: error_propagate() might be missing in 
migrate_set_block_enabled() ./migration/colo.c:439:4]]

Add the missing error_propagate() after review.

Reviewed-by: Juan Quintela 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20200413205250.687-1-f4...@amsat.org>
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/colo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/migration/colo.c b/migration/colo.c
index 1b3493729b..d015d4f84e 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -443,6 +443,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
 
 /* Disable block migration */
 migrate_set_block_enabled(false, _err);
+if (local_err) {
+goto out;
+}
 qemu_mutex_lock_iothread();
 
 #ifdef CONFIG_REPLICATION
-- 
2.26.2




[PULL 05/12] docs/devel/migration: start a debugging section

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Marc-André Lureau 

Explain how to use analyze-migration.py, this may help.

Signed-off-by: Marc-André Lureau 
Message-Id: <20200330174852.456148-1-marcandre.lur...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Dr. David Alan Gilbert 
---
 docs/devel/migration.rst | 20 
 1 file changed, 20 insertions(+)

diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index e88918f763..2eb08624fc 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure to
 save/restore state devices.  This infrastructure is shared with the
 savevm/loadvm functionality.
 
+Debugging
+=
+
+The migration stream can be analyzed thanks to `scripts/analyze_migration.py`.
+
+Example usage:
+
+.. code-block:: shell
+
+  $ qemu-system-x86_64
+   (qemu) migrate "exec:cat > mig"
+  $ ./scripts/analyze_migration.py -f mig
+  {
+"ram (3)": {
+"section sizes": {
+"pc.ram": "0x0800",
+  ...
+
+See also ``analyze_migration.py -h`` help for more options.
+
 Common infrastructure
 =
 
-- 
2.26.2




[PULL 12/12] migration/multifd: Do error_free after migrate_set_error to avoid memleaks

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Pan Nengyuan 

When error happen in multifd_send_thread, it use error_copy to set migrate 
error in
multifd_send_terminate_threads(). We should call error_free after it.

Similarly, fix another two places in multifd_recv_thread/multifd_save_cleanup.

The leak stack:
Direct leak of 48 byte(s) in 1 object(s) allocated from:
#0 0x7f781af07cf0 in calloc (/lib64/libasan.so.5+0xefcf0)
#1 0x7f781a2ce22d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5322d)
#2 0x55ee1d075c17 in error_setv /mnt/sdb/backup/qemu/util/error.c:61
#3 0x55ee1d076464 in error_setg_errno_internal 
/mnt/sdb/backup/qemu/util/error.c:109
#4 0x55ee1cef066e in qio_channel_socket_writev 
/mnt/sdb/backup/qemu/io/channel-socket.c:569
#5 0x55ee1cee806b in qio_channel_writev 
/mnt/sdb/backup/qemu/io/channel.c:207
#6 0x55ee1cee806b in qio_channel_writev_all 
/mnt/sdb/backup/qemu/io/channel.c:171
#7 0x55ee1cee8248 in qio_channel_write_all 
/mnt/sdb/backup/qemu/io/channel.c:257
#8 0x55ee1ca12c9a in multifd_send_thread 
/mnt/sdb/backup/qemu/migration/multifd.c:657
#9 0x55ee1d0607fc in qemu_thread_start 
/mnt/sdb/backup/qemu/util/qemu-thread-posix.c:519
#10 0x7f78159ae2dd in start_thread (/lib64/libpthread.so.0+0x82dd)
#11 0x7f78156df4b2 in __GI___clone (/lib64/libc.so.6+0xfc4b2)

Indirect leak of 52 byte(s) in 1 object(s) allocated from:
#0 0x7f781af07f28 in __interceptor_realloc (/lib64/libasan.so.5+0xeff28)
#1 0x7f78156f07d9 in __GI___vasprintf_chk (/lib64/libc.so.6+0x10d7d9)
#2 0x7f781a30ea6c in g_vasprintf (/lib64/libglib-2.0.so.0+0x93a6c)
#3 0x7f781a2e7cd0 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x6ccd0)
#4 0x7f781a2e7d8c in g_strdup_printf (/lib64/libglib-2.0.so.0+0x6cd8c)
#5 0x55ee1d075c86 in error_setv /mnt/sdb/backup/qemu/util/error.c:65
#6 0x55ee1d076464 in error_setg_errno_internal 
/mnt/sdb/backup/qemu/util/error.c:109
#7 0x55ee1cef066e in qio_channel_socket_writev 
/mnt/sdb/backup/qemu/io/channel-socket.c:569
#8 0x55ee1cee806b in qio_channel_writev 
/mnt/sdb/backup/qemu/io/channel.c:207
#9 0x55ee1cee806b in qio_channel_writev_all 
/mnt/sdb/backup/qemu/io/channel.c:171
#10 0x55ee1cee8248 in qio_channel_write_all 
/mnt/sdb/backup/qemu/io/channel.c:257
#11 0x55ee1ca12c9a in multifd_send_thread 
/mnt/sdb/backup/qemu/migration/multifd.c:657
#12 0x55ee1d0607fc in qemu_thread_start 
/mnt/sdb/backup/qemu/util/qemu-thread-posix.c:519
#13 0x7f78159ae2dd in start_thread (/lib64/libpthread.so.0+0x82dd)
#14 0x7f78156df4b2 in __GI___clone (/lib64/libc.so.6+0xfc4b2)

Reported-by: Euler Robot 
Signed-off-by: Pan Nengyuan 
Message-Id: <20200506095416.26099-3-pannengy...@huawei.com>
Reviewed-by: Juan Quintela 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/multifd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/migration/multifd.c b/migration/multifd.c
index e3efd33a0d..5a3e4d0d46 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -550,6 +550,7 @@ void multifd_save_cleanup(void)
 multifd_send_state->ops->send_cleanup(p, _err);
 if (local_err) {
 migrate_set_error(migrate_get_current(), local_err);
+error_free(local_err);
 }
 }
 qemu_sem_destroy(_send_state->channels_ready);
@@ -688,6 +689,7 @@ out:
 if (local_err) {
 trace_multifd_send_error(p->id);
 multifd_send_terminate_threads(local_err);
+error_free(local_err);
 }
 
 /*
@@ -965,6 +967,7 @@ static void *multifd_recv_thread(void *opaque)
 
 if (local_err) {
 multifd_recv_terminate_threads(local_err);
+error_free(local_err);
 }
 qemu_mutex_lock(>mutex);
 p->running = false;
-- 
2.26.2




[PULL 04/12] migration: move the units of migrate parameters from milliseconds to ms

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Mao Zhongyi 

Signed-off-by: Mao Zhongyi 
Reviewed-by: Juan Quintela 
Message-Id: 
<474bb6cf67defb8be9de5035c11aee57a680557a.1585641083.git.maozhon...@cmss.chinamobile.com>
Reviewed-by: Stefano Garzarella 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 2 +-
 monitor/hmp-cmds.c| 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 6e079efdcc..79f16f6625 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1214,7 +1214,7 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
"downtime_limit",
"an integer in the range of 0 to "
-stringify(MAX_MIGRATE_DOWNTIME)" milliseconds");
+stringify(MAX_MIGRATE_DOWNTIME)" ms");
 return false;
 }
 
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9bb6946fbf..1552dee489 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -231,18 +231,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 monitor_printf(mon, "\n");
 }
 
-monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+monitor_printf(mon, "total time: %" PRIu64 " ms\n",
info->total_time);
 if (info->has_expected_downtime) {
-monitor_printf(mon, "expected downtime: %" PRIu64 " 
milliseconds\n",
+monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n",
info->expected_downtime);
 }
 if (info->has_downtime) {
-monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
+monitor_printf(mon, "downtime: %" PRIu64 " ms\n",
info->downtime);
 }
 if (info->has_setup_time) {
-monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n",
+monitor_printf(mon, "setup: %" PRIu64 " ms\n",
info->setup_time);
 }
 }
-- 
2.26.2




[PULL 03/12] monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed()

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Mao Zhongyi 

Signed-off-by: Mao Zhongyi 
Reviewed-by: Juan Quintela 
Message-Id: 
<305323f835436023c53d759f5ab18af3ec874183.1585641083.git.maozhon...@cmss.chinamobile.com>
Signed-off-by: Dr. David Alan Gilbert 
---
 monitor/hmp-cmds.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 7f6e982dc8..9bb6946fbf 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1198,8 +1198,11 @@ void hmp_migrate_set_cache_size(Monitor *mon, const 
QDict *qdict)
 /* Kept for backwards compatibility */
 void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict)
 {
+Error *err = NULL;
+
 int64_t value = qdict_get_int(qdict, "value");
-qmp_migrate_set_speed(value, NULL);
+qmp_migrate_set_speed(value, );
+hmp_handle_error(mon, err);
 }
 
 void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict)
-- 
2.26.2




[PULL 02/12] migration/migration: improve error reporting for migrate parameters

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Mao Zhongyi 

use QERR_INVALID_PARAMETER_VALUE instead of
"Parameter '%s' expects" for consistency.

Signed-off-by: Mao Zhongyi 
Message-Id: 
<4ce71da4a5f98ad6ead0806ec71043473dcb4c07.1585641083.git.maozhon...@cmss.chinamobile.com>
Reviewed-by: Juan Quintela 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 8f27174ff6..6e079efdcc 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1202,16 +1202,19 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 }
 
 if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
-error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
- " range of 0 to %zu bytes/second", SIZE_MAX);
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "max_bandwidth",
+   "an integer in the range of 0 to "stringify(SIZE_MAX)
+   " bytes/second");
 return false;
 }
 
 if (params->has_downtime_limit &&
 (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
-error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
- "the range of 0 to %d milliseconds",
- MAX_MIGRATE_DOWNTIME);
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "downtime_limit",
+   "an integer in the range of 0 to "
+stringify(MAX_MIGRATE_DOWNTIME)" milliseconds");
 return false;
 }
 
@@ -2107,9 +2110,10 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
 void qmp_migrate_set_downtime(double value, Error **errp)
 {
 if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
-error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
- "the range of 0 to %d seconds",
- MAX_MIGRATE_DOWNTIME_SECONDS);
+error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+   "downtime_limit",
+   "an integer in the range of 0 to "
+stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds");
 return;
 }
 
-- 
2.26.2




[PULL 00/12] migration queue

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

The following changes since commit 3c7adbc67d9a5c3e992a4dd13b8704464daaad5b:

  Merge remote-tracking branch 
'remotes/berrange/tags/qcrypto-next-pull-request' into staging (2020-05-07 
14:30:12 +0100)

are available in the Git repository at:

  git://github.com/dagrh/qemu.git tags/pull-migration-20200507a

for you to fetch changes up to 13f2cb21e5fb33e9f8d7db8eee48edc1c67b812f:

  migration/multifd: Do error_free after migrate_set_error to avoid memleaks 
(2020-05-07 17:40:24 +0100)


Migration pull 2020-05-07

Mostly tidy-ups, but two new features:
  cpu-throttle-tailslow for making a gentler throttle
  xbzrle encoding rate measurement for getting a feal for xbzrle
performance.


David Hildenbrand (1):
  migration/ram: Consolidate variable reset after placement in 
ram_load_postcopy()

Keqian Zhu (1):
  migration/throttle: Add cpu-throttle-tailslow migration parameter

Mao Zhongyi (4):
  migration: fix bad indentation in error_report()
  migration/migration: improve error reporting for migrate parameters
  monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed()
  migration: move the units of migrate parameters from milliseconds to ms

Marc-André Lureau (1):
  docs/devel/migration: start a debugging section

Pan Nengyuan (3):
  migration/rdma: fix a memleak on error path in 
rdma_start_incoming_migration
  migration/multifd: fix memleaks in multifd_new_send_channel_async
  migration/multifd: Do error_free after migrate_set_error to avoid memleaks

Philippe Mathieu-Daudé (1):
  migration/colo: Add missing error-propagation code

Wei Wang (1):
  migration/xbzrle: add encoding rate

 docs/devel/migration.rst | 20 +
 migration/colo.c |  3 ++
 migration/migration.c| 44 +++-
 migration/multifd.c  |  5 
 migration/ram.c  | 74 
 migration/rdma.c |  1 +
 monitor/hmp-cmds.c   | 23 +++
 qapi/migration.json  | 53 +-
 8 files changed, 192 insertions(+), 31 deletions(-)




[PULL 01/12] migration: fix bad indentation in error_report()

2020-05-07 Thread Dr. David Alan Gilbert (git)
From: Mao Zhongyi 

bad indentation conflicts with CODING_STYLE doc.

Signed-off-by: Mao Zhongyi 
Message-Id: 
<09f7529c665cac0c6a5e032ac6fdb6ca701f7e37.1585329482.git.maozhon...@cmss.chinamobile.com>
Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 177cce9e95..8f27174ff6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2494,7 +2494,7 @@ retry:
 if (header_type >= MIG_RP_MSG_MAX ||
 header_type == MIG_RP_MSG_INVALID) {
 error_report("RP: Received invalid message 0x%04x length 0x%04x",
-header_type, header_len);
+ header_type, header_len);
 mark_source_rp_bad(ms);
 goto out;
 }
@@ -2503,9 +2503,9 @@ retry:
 header_len != rp_cmd_args[header_type].len) ||
 header_len > sizeof(buf)) {
 error_report("RP: Received '%s' message (0x%04x) with"
-"incorrect length %d expecting %zu",
-rp_cmd_args[header_type].name, header_type, header_len,
-(size_t)rp_cmd_args[header_type].len);
+ "incorrect length %d expecting %zu",
+ rp_cmd_args[header_type].name, header_type, 
header_len,
+ (size_t)rp_cmd_args[header_type].len);
 mark_source_rp_bad(ms);
 goto out;
 }
@@ -2560,7 +2560,7 @@ retry:
 }
 if (header_len != expected_len) {
 error_report("RP: Req_Page_id with length %d expecting %zd",
-header_len, expected_len);
+ header_len, expected_len);
 mark_source_rp_bad(ms);
 goto out;
 }
-- 
2.26.2




Re: [PATCH] 9pfs: Fix potential deadlock of QEMU mainloop

2020-05-07 Thread Greg Kurz
On Thu, 07 May 2020 17:03:46 +0200
Christian Schoenebeck  wrote:

> On Donnerstag, 7. Mai 2020 16:33:28 CEST Greg Kurz wrote:
> > > I also haven't reviewed QEMU's lock implementations in very detail, but
> > > IIRC CoMutexes are completely handled in user space, while QemuMutex uses
> > > regular OS mutexes and hence might cost context switches.
> > 
> > ... since the locking would only been exercised with an hypothetical
> > client doing stupid things, this is beginning to look like bike-shedding
> > to me. :)
> 
> Aha, keep that in mind when you're doing your next review. ;-)
> 

Fair enough. :)

> No seriously, like I said, I don't really care too much about Mutex vs. 
> CoMutex in you patch here. It was actually more about wide-picture thinking, 
> i.e. other places of (co)mutexes being used or other potential changes that 
> would make this or other uses more relevant one day.
> 

Then I agree with you that it would be better to use CoMutex if we were
experiencing thread pool exhaustion indeed.

> > > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> > > > > > index 9e046f7acb51..ac84ae804496 100644
> > > > > > --- a/hw/9pfs/9p.c
> > > > > > +++ b/hw/9pfs/9p.c
> > > > > > @@ -2170,7 +2170,7 @@ static int coroutine_fn
> > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, int32_t count = 0;
> > > > > > 
> > > > > >  struct stat stbuf;
> > > > > >  off_t saved_dir_pos;
> > > > > > 
> > > > > > -struct dirent *dent;
> > > > > > +struct dirent dent;
> > > > > > 
> > > > > >  /* save the directory position */
> > > > > >  saved_dir_pos = v9fs_co_telldir(pdu, fidp);
> > > > > > 
> > > > > > @@ -2181,13 +2181,11 @@ static int coroutine_fn
> > > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, while (1) {
> > > > > > 
> > > > > >  v9fs_path_init();
> > > > > > 
> > > > > > -v9fs_readdir_lock(>fs.dir);
> > > > > > -
> > > > > 
> > > > > That's the deadlock fix, but ...
> > > > > 
> > > > > >  err = v9fs_co_readdir(pdu, fidp, );
> > > > > > 
> > > > > > -if (err || !dent) {
> > > > > > +if (err <= 0) {
> > > > > > 
> > > > > >  break;
> > > > > >  
> > > > > >  }
> > > > > 
> > > > > ... even though this code simplification might make sense, I don't
> > > > > think
> > > > > it
> > > > > should be mixed with the deadlock fix together in one patch. They are
> > > > > not
> > > > 
> > > > I could possibly split this in two patches, one for returning a copy
> > > > and one for moving the locking around, but...
> > > > 
> > > > > related with each other, nor is the code simplification you are aiming
> > > > > trivial
> > > > 
> > > > ... this assertion is somewhat wrong: moving the locking to
> > > > v9fs_co_readdir() really requires it returns a copy.
> > > 
> > > Yeah, I am also not sure whether a split would make it more trivial enough
> > > in this case to be worth the hassle. If you find an acceptable solution,
> > > good, if not then leave it one patch.
> > 
> > Another option would be to g_malloc() the dirent in v9fs_co_readdir() and
> > g_free() in the callers. This would cause less churn since we could keep
> > the same function signature.
> 
> I was actually just going to suggest the same. So yes, looks like a less 
> invasive change to me.
> 

I'll just do that then :)

> Best regards,
> Christian Schoenebeck
> 
> 




Re: [RFC PATCH 6/6] hw/block/nvme: Make device target agnostic

2020-05-07 Thread Paolo Bonzini
On 07/05/20 12:04, Stefan Hajnoczi wrote:
>>>   (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
>>> -qemu_ram_writeback(n->pmrdev->mr.ram_block,
>>> -   0, n->pmrdev->size);
>>> +memory_region_do_writeback(>pmrdev->mr, 0, n->pmrdev->size);
> qemu_ram_write() is being called because we need to msync or persist
> pmem here.
> 
> The memory_region_do_writeback() API is not equivalent, its purpose is
> for dirty write logging (which we don't care about here because the
> writes themselves will already have been logged when the guest performed
> them).
> 
> I think qemu_ram_writeback() should just be made common so that this
> code isn't target-specific. Maybe it should be renamed to
> qemu_ram_msync() to avoid confusion with dirty write APIs.

Yes, we can add qemu_ram_msync and memory_region_msync.

Paolo



signature.asc
Description: OpenPGP digital signature


[PATCH] tests/acceptance/boot_linux: Skip slow Aarch64 'virt' machine TCG test

2020-05-07 Thread Philippe Mathieu-Daudé
The BootLinuxAarch64.test_virt_tcg is reported to take >7min to run.
Add a possibility to users to skip this particular test, by setting
the AVOCADO_SKIP_SLOW_TESTS environment variable:

  $ AVOCADO_SKIP_SLOW_TESTS=please make check-acceptance
  ...
(05/88) tests/acceptance/boot_linux.py:BootLinuxAarch64.test_virt_tcg: 
SKIP: Test takes >7min
  ...

Reported-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/acceptance/boot_linux.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py
index 075a386300..a8df50d769 100644
--- a/tests/acceptance/boot_linux.py
+++ b/tests/acceptance/boot_linux.py
@@ -15,6 +15,7 @@
 from qemu.accel import kvm_available
 from qemu.accel import tcg_available
 
+from avocado import skipIf
 from avocado.utils import cloudinit
 from avocado.utils import network
 from avocado.utils import vmimage
@@ -159,6 +160,7 @@ def add_common_args(self):
 self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
 self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
 
+@skipIf(os.getenv('AVOCADO_SKIP_SLOW_TESTS'), 'Test takes >7min')
 def test_virt_tcg(self):
 """
 :avocado: tags=accel:tcg
-- 
2.21.3




Re: device hotplug & file handles

2020-05-07 Thread Peter Krempa
On Thu, May 07, 2020 at 16:49:14 +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> For usb device pass-through (aka -device usb-host) it would be very
> useful to pass file handles from libvirt to qemu.  The workflow would
> change from ...
> 
>   (1) libvirt enables access to /dev/usb/$bus/$dev
>   (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties)
>   to qemu.
>   (3) qemu opens /dev/usb/$bus/$dev
> 
> ... to ...
> 
>   (1) libvirt opens /dev/usb/$bus/$dev
>   (2) libvirt passes filehandle to qemu.
> 
> Question is how can we pass the file descriptor best?  My idea would be
> to simply add an fd property to usb-host:
> 
>  * Coldplug would be "-device usb-host,fd=" (cmd line).
>  * Hotplug would be "device_add usb-host,fd=" (monitor).

We have prior art for both approaches so it's fine.

> 
> Will that work from libvirt point of view?

Sure! Just please make sure that the new approach is detectable somehow.
Either via device-list-properties or query-qmp-schema.

> Or does anyone have an better idea?
> 
> thanks,
>   Gerd
> 
> PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525
> 




[PATCH] target/ppc: Untabify excp_helper.c

2020-05-07 Thread Greg Kurz
Some tabs crept in with a recent change.

Fixes: 6dc6b557913f "target/ppc: Improve syscall exception logging"
Signed-off-by: Greg Kurz 
---
 target/ppc/excp_helper.c |   16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 639cac38f9c3..4db3d9ed9af5 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -70,16 +70,16 @@ static inline void dump_syscall(CPUPPCState *env)
 static inline void dump_hcall(CPUPPCState *env)
 {
 qemu_log_mask(CPU_LOG_INT, "hypercall r3=%016" PRIx64
- " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64
- " r7=%016" PRIx64 " r8=%016" PRIx64 " r9=%016" PRIx64
- " r10=%016" PRIx64 " r11=%016" PRIx64 " r12=%016" PRIx64
+  " r4=%016" PRIx64 " r5=%016" PRIx64 " r6=%016" PRIx64
+  " r7=%016" PRIx64 " r8=%016" PRIx64 " r9=%016" PRIx64
+  " r10=%016" PRIx64 " r11=%016" PRIx64 " r12=%016" PRIx64
   " nip=" TARGET_FMT_lx "\n",
   ppc_dump_gpr(env, 3), ppc_dump_gpr(env, 4),
- ppc_dump_gpr(env, 5), ppc_dump_gpr(env, 6),
- ppc_dump_gpr(env, 7), ppc_dump_gpr(env, 8),
- ppc_dump_gpr(env, 9), ppc_dump_gpr(env, 10),
- ppc_dump_gpr(env, 11), ppc_dump_gpr(env, 12),
- env->nip);
+  ppc_dump_gpr(env, 5), ppc_dump_gpr(env, 6),
+  ppc_dump_gpr(env, 7), ppc_dump_gpr(env, 8),
+  ppc_dump_gpr(env, 9), ppc_dump_gpr(env, 10),
+  ppc_dump_gpr(env, 11), ppc_dump_gpr(env, 12),
+  env->nip);
 }
 
 static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,




Re: [PATCH v5 01/14] qcrypto/core: add generic infrastructure for crypto options amendment

2020-05-07 Thread Eric Blake

On 5/7/20 7:54 AM, Maxim Levitsky wrote:

This will be used first to implement luks keyslot management.

block_crypto_amend_opts_init will be used to convert
qemu-img cmdline to QCryptoBlockAmendOptions

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---



+++ b/qapi/crypto.json
@@ -309,3 +309,19 @@
'base': 'QCryptoBlockInfoBase',
'discriminator': 'format',
'data': { 'luks': 'QCryptoBlockInfoLUKS' } }
+
+
+
+##
+# @QCryptoBlockAmendOptions:
+#
+# The options that are available for all encryption formats
+# when amending encryption settings
+#
+# Since: 5.0


Looks like our mail crossed, my v4 review landed after you sent v5. 
We'll still have to scrub this series for s/5.0/5.1/



+##
+{ 'union': 'QCryptoBlockAmendOptions',
+  'base': 'QCryptoBlockOptionsBase',
+  'discriminator': 'format',
+  'data': {
+} }



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v4 6/6] net/colo-compare.c: Correct ordering in complete and finalize

2020-05-07 Thread Lukas Straub
On Thu, 7 May 2020 13:26:11 +
"Zhang, Chen"  wrote:

> > -Original Message-
> > From: Lukas Straub 
> > Sent: Monday, May 4, 2020 6:28 PM
> > To: qemu-devel 
> > Cc: Zhang, Chen ; Li Zhijian
> > ; Jason Wang ; Marc-
> > André Lureau ; Paolo Bonzini
> > 
> > Subject: [PATCH v4 6/6] net/colo-compare.c: Correct ordering in complete
> > and finalize
> > 
> > In colo_compare_complete, insert CompareState into net_compares only
> > after everything has been initialized.
> > In colo_compare_finalize, remove CompareState from net_compares before
> > anything is deinitialized.  
> 
> S/deinitialized/finalized
> 
> It looks no dependences on each step on initialization and finalization.
> Do you means we just need add/remove each colo-compare module at last in 
> logic?

Yes. While I didn't see any crashes here, there is the possibility that if 
colo-compare is removed during checkpoint, the destroyed event_bh is called 
from colo_notify_compares_event. Same with colo_compare_complete (very 
unlikely) if colo-compare is created while colo is running, 
colo_notify_compares_event may call the uninitialized event_bh.

Regards,
Lukas Straub

> Or current code have some issue?
> 
> Thanks
> Zhang Chen
> 
> > 
> > Signed-off-by: Lukas Straub 
> > ---
> >  net/colo-compare.c | 45 +++--
> >  1 file changed, 23 insertions(+), 22 deletions(-)
> > 
> > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > c7572d75e9..6f80bcece6 100644
> > --- a/net/colo-compare.c
> > +++ b/net/colo-compare.c
> > @@ -1283,15 +1283,6 @@ static void
> > colo_compare_complete(UserCreatable *uc, Error **errp)
> > s->vnet_hdr);
> >  }
> > 
> > -qemu_mutex_lock(_compare_mutex);
> > -if (!colo_compare_active) {
> > -qemu_mutex_init(_mtx);
> > -qemu_cond_init(_complete_cond);
> > -colo_compare_active = true;
> > -}
> > -QTAILQ_INSERT_TAIL(_compares, s, next);
> > -qemu_mutex_unlock(_compare_mutex);
> > -
> >  s->out_sendco.s = s;
> >  s->out_sendco.chr = >chr_out;
> >  s->out_sendco.notify_remote_frame = false; @@ -1312,6 +1303,16 @@
> > static void colo_compare_complete(UserCreatable *uc, Error **errp)
> >connection_destroy);
> > 
> >  colo_compare_iothread(s);
> > +
> > +qemu_mutex_lock(_compare_mutex);
> > +if (!colo_compare_active) {
> > +qemu_mutex_init(_mtx);
> > +qemu_cond_init(_complete_cond);
> > +colo_compare_active = true;
> > +}
> > +QTAILQ_INSERT_TAIL(_compares, s, next);
> > +qemu_mutex_unlock(_compare_mutex);
> > +
> >  return;
> >  }
> > 
> > @@ -1384,19 +1385,6 @@ static void colo_compare_finalize(Object *obj)
> >  CompareState *s = COLO_COMPARE(obj);
> >  CompareState *tmp = NULL;
> > 
> > -qemu_chr_fe_deinit(>chr_pri_in, false);
> > -qemu_chr_fe_deinit(>chr_sec_in, false);
> > -qemu_chr_fe_deinit(>chr_out, false);
> > -if (s->notify_dev) {
> > -qemu_chr_fe_deinit(>chr_notify_dev, false);
> > -}
> > -
> > -if (s->iothread) {
> > -colo_compare_timer_del(s);
> > -}
> > -
> > -qemu_bh_delete(s->event_bh);
> > -
> >  qemu_mutex_lock(_compare_mutex);
> >  QTAILQ_FOREACH(tmp, _compares, next) {
> >  if (tmp == s) {
> > @@ -1411,6 +1399,19 @@ static void colo_compare_finalize(Object *obj)
> >  }
> >  qemu_mutex_unlock(_compare_mutex);
> > 
> > +qemu_chr_fe_deinit(>chr_pri_in, false);
> > +qemu_chr_fe_deinit(>chr_sec_in, false);
> > +qemu_chr_fe_deinit(>chr_out, false);
> > +if (s->notify_dev) {
> > +qemu_chr_fe_deinit(>chr_notify_dev, false);
> > +}
> > +
> > +if (s->iothread) {
> > +colo_compare_timer_del(s);
> > +}
> > +
> > +qemu_bh_delete(s->event_bh);
> > +
> >  AioContext *ctx = iothread_get_aio_context(s->iothread);
> >  aio_context_acquire(ctx);
> >  AIO_WAIT_WHILE(ctx, !s->out_sendco.done);
> > --
> > 2.20.1  



pgpTILdNe6FCb.pgp
Description: OpenPGP digital signature


[PATCH] accel: Move Xen accelerator code under accel/xen/

2020-05-07 Thread Philippe Mathieu-Daudé
This code is not related to hardware emulation.
Move it under accel/ with the other hypervisors.

Signed-off-by: Philippe Mathieu-Daudé 
---
We could also move the memory management functions from
hw/i386/xen/xen-hvm.c but it is not trivial.

Necessary step to remove "exec/ram_addr.h" in the next series.
---
 include/exec/ram_addr.h|  2 +-
 include/hw/xen/xen.h   | 11 --
 include/sysemu/xen.h   | 40 ++
 hw/xen/xen-common.c => accel/xen/xen-all.c |  3 ++
 hw/acpi/piix4.c|  2 +-
 hw/i386/pc.c   |  1 +
 hw/i386/pc_piix.c  |  1 +
 hw/i386/pc_q35.c   |  1 +
 hw/i386/xen/xen-hvm.c  |  1 +
 hw/i386/xen/xen_platform.c |  1 +
 hw/isa/piix3.c |  1 +
 hw/pci/msix.c  |  1 +
 migration/savevm.c |  2 +-
 softmmu/vl.c   |  2 +-
 stubs/xen-hvm.c|  9 -
 target/i386/cpu.c  |  2 +-
 MAINTAINERS|  2 ++
 accel/Makefile.objs|  1 +
 accel/xen/Makefile.objs|  1 +
 hw/xen/Makefile.objs   |  2 +-
 20 files changed, 60 insertions(+), 26 deletions(-)
 create mode 100644 include/sysemu/xen.h
 rename hw/xen/xen-common.c => accel/xen/xen-all.c (99%)
 create mode 100644 accel/xen/Makefile.objs

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 5e59a3d8d7..4e05292f91 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -21,7 +21,7 @@
 
 #ifndef CONFIG_USER_ONLY
 #include "cpu.h"
-#include "hw/xen/xen.h"
+#include "sysemu/xen.h"
 #include "sysemu/tcg.h"
 #include "exec/ramlist.h"
 #include "exec/ramblock.h"
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 5ac1c6dc55..771dd447f2 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -20,13 +20,6 @@ extern uint32_t xen_domid;
 extern enum xen_mode xen_mode;
 extern bool xen_domid_restrict;
 
-extern bool xen_allowed;
-
-static inline bool xen_enabled(void)
-{
-return xen_allowed;
-}
-
 int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
 void xen_piix3_set_irq(void *opaque, int irq_num, int level);
 void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len);
@@ -39,10 +32,6 @@ void xenstore_store_pv_console_info(int i, struct Chardev 
*chr);
 
 void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory);
 
-void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
-   struct MemoryRegion *mr, Error **errp);
-void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
-
 void xen_register_framebuffer(struct MemoryRegion *mr);
 
 #endif /* QEMU_HW_XEN_H */
diff --git a/include/sysemu/xen.h b/include/sysemu/xen.h
new file mode 100644
index 00..609d2d4184
--- /dev/null
+++ b/include/sysemu/xen.h
@@ -0,0 +1,40 @@
+/*
+ * QEMU Xen support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SYSEMU_XEN_H
+#define SYSEMU_XEN_H
+
+#ifdef CONFIG_XEN
+
+extern bool xen_allowed;
+
+#define xen_enabled() (xen_allowed)
+
+#ifndef CONFIG_USER_ONLY
+void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length);
+void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+   struct MemoryRegion *mr, Error **errp);
+#endif
+
+#else /* !CONFIG_XEN */
+
+#define xen_enabled() 0
+#ifndef CONFIG_USER_ONLY
+static inline void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+abort();
+}
+static inline void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
+ MemoryRegion *mr, Error **errp)
+{
+abort();
+}
+#endif
+
+#endif /* CONFIG_XEN */
+
+#endif
diff --git a/hw/xen/xen-common.c b/accel/xen/xen-all.c
similarity index 99%
rename from hw/xen/xen-common.c
rename to accel/xen/xen-all.c
index a15070f7f6..5c64571cb8 100644
--- a/hw/xen/xen-common.c
+++ b/accel/xen/xen-all.c
@@ -16,6 +16,7 @@
 #include "hw/xen/xen_pt.h"
 #include "chardev/char.h"
 #include "sysemu/accel.h"
+#include "sysemu/xen.h"
 #include "sysemu/runstate.h"
 #include "migration/misc.h"
 #include "migration/global_state.h"
@@ -31,6 +32,8 @@
 do { } while (0)
 #endif
 
+bool xen_allowed;
+
 xc_interface *xen_xc;
 xenforeignmemory_handle *xen_fmem;
 xendevicemodel_handle *xen_dmod;
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 964d6f5990..daed273687 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -30,6 +30,7 @@
 #include "hw/acpi/acpi.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/xen.h"
 #include "qapi/error.h"
 #include "qemu/range.h"
 #include "exec/address-spaces.h"
@@ -41,7 +42,6 @@
 #include "hw/mem/nvdimm.h"
 #include 

Re: [PATCH 0/2] migration/multifd: fix two memleaks

2020-05-07 Thread Dr. David Alan Gilbert
* Pan Nengyuan (pannengy...@huawei.com) wrote:
> Fix two memleaks in multifd_send_thread/multifd_new_send_channel_async when 
> error happen.
> 
> Pan Nengyuan (2):
>   migration/multifd: fix memleaks in multifd_new_send_channel_async
>   migration/multifd: Do error_free after migrate_set_error to avoid
> memleaks
> 
>  migration/multifd.c | 5 +
>  1 file changed, 5 insertions(+)

Queued

> 
> -- 
> 2.18.2
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH 2/2] block/block-copy: Simplify block_copy_do_copy()

2020-05-07 Thread Eric Blake

On 5/7/20 7:11 AM, Philippe Mathieu-Daudé wrote:

block_copy_do_copy() is static, only used in block_copy_task_entry
with the error_is_read argument set. No need to check for it,
simplify.

Signed-off-by: Philippe Mathieu-Daudé 
---
  block/block-copy.c | 12 +++-
  1 file changed, 3 insertions(+), 9 deletions(-)


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend

2020-05-07 Thread Cindy Lu
On Thu, May 7, 2020 at 11:30 PM Maxime Coquelin
 wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > diff --git a/include/hw/virtio/vhost-backend.h 
> > b/include/hw/virtio/vhost-backend.h
> > index 6f6670783f..d81bd9885f 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
> >  VHOST_BACKEND_TYPE_NONE = 0,
> >  VHOST_BACKEND_TYPE_KERNEL = 1,
> >  VHOST_BACKEND_TYPE_USER = 2,
> > -VHOST_BACKEND_TYPE_MAX = 3,
> > +VHOST_BACKEND_TYPE_VDPA = 3,
> > +VHOST_BACKEND_TYPE_MAX = 4,
> >  } VhostBackendType;
> >
> >  typedef enum VhostSetConfigType {
> > @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct 
> > vhost_dev *dev,
> >  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
> >  struct vhost_inflight *inflight);
> >
> > +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);
>
> I think state should be of type uint8_t.
>
ok, I will change this to uint8_t




Re: [PATCH v4 09/13] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()

2020-05-07 Thread David Hildenbrand
On 07.05.20 17:42, Dr. David Alan Gilbert wrote:
> * Dr. David Alan Gilbert (dgilb...@redhat.com) wrote:
>> * David Hildenbrand (da...@redhat.com) wrote:
>>> Let's consolidate resetting the variables.
>>>
>>> Cc: "Dr. David Alan Gilbert" 
>>> Cc: Juan Quintela 
>>> Cc: Peter Xu 
>>> Signed-off-by: David Hildenbrand 
>>
>> Reviewed-by: Dr. David Alan Gilbert 
> 
> Queued this one only; I had to do manual patch application due to some
> renaming in 91ba442; but I think it's still OK.

Thanks, how to best proceed with the other 12 patches in this series?


-- 
Thanks,

David / dhildenb




Re: [PATCH v6 3/5] 9pfs: add new function v9fs_co_readdir_many()

2020-05-07 Thread Greg Kurz
On Thu, 07 May 2020 14:16:43 +0200
Christian Schoenebeck  wrote:

> On Montag, 4. Mai 2020 11:18:34 CEST Greg Kurz wrote:
> > On Fri, 01 May 2020 16:04:41 +0200
> > 
> > Christian Schoenebeck  wrote:
> > > On Donnerstag, 30. April 2020 15:30:49 CEST Greg Kurz wrote:
> > > > > > I agree that a client that issues concurrent readdir requests on the
> > > > > > same fid is probably asking for troubles, but this permitted by the
> > > > > > spec. Whether we should detect such conditions and warn or even fail
> > > > > > is discussion for another thread.
> > > > > > 
> > > > > > The locking is only needed to avoid concurrent accesses to the
> > > > > > dirent
> > > > > > structure returned by readdir(), otherwise we could return partially
> > > > > > overwritten file names to the client. It must be done for each
> > > > > > individual
> > > > > > call to readdir(), but certainly not for multiple calls.
> > > > > 
> > > > > Yeah, that would resolve this issue more appropriately for 9p2000.L,
> > > > > since
> > > > > Treaddir specifies an offset, but for 9p2000.u the result of a
> > > > > concurrent
> > > > > read on a directory (9p2000.u) would still be undefined.
> > > > 
> > > > The bad client behavior you want to tackle has nothing to do with
> > > > the locking itself. Since all the code in 9p.c runs serialized in
> > > > the context of the QEMU main loop, concurrent readdir requests could
> > > > easily be detected up-front with a simple flag in the fid structure.
> > > 
> > > Well, it's fine with me. I don't really see an issue here right now. But
> > > that all the code was serialized is not fully true. Most of the 9p.c code
> > > is still heavily dispatching between main thread and worker threads back
> > > and forth. And for that reason the order of request processing might
> > > change quite arbitrarily in between. Just keep that in mind.
> > 
> > Just to make things clear. The code in 9p.c is ALWAYS exclusively run by
> > the main thread. Only the code called under v9fs_co_run_in_worker() is
> > dispatched on worker threads. So, yes the order of individual backend
> > operations may change, but the start of a new client request is necessarily
> > serialized with the completion of pending ones, which is the only thing
> > we care for actually.
> 
> I just looked at this. 9p.c code is called by main I/O thread only, that's 
> clear. The start of requests come also in in order, yes, but it seems you 
> would think that main I/O thread would not grab the next client request from 
> queue before completing the current/previous client request (that is not 
> before transmitting result to client). If yes, I am not so sure about this 
> claim:
> 

Hrm... I don't think I've ever said anything like that :)

What I'm saying is that, thanks to the serialization of
request start and completion, v9fs_readdir() and v9fs_read()
can:
- flag the fid as being involved in a readdir request
- clear the flag when the request is complete or failed
- directly fail or print a warning if the fid already
  has the flag set (ie. a previous request hasn't
  completed yet)

But again, I'm not a big fan of adding code to handle
such an unlikely situation which is even not explicitly
forbidden by the 9p spec.

> For instance v9fs_path_write_lock() is using a co-mutex, right? So an 
> unsuccesful lock would cause main I/O thread to grab the next request before 
> completing the current/previous request.
> 

An unsuccessful lock would cause the main I/O thread to switch
to some other task.

> And what happens on any run_in_worker({}) call? If there is another client 
> request in the queue, main I/O thread would pull that request from the queue 
> before waiting for the worker thread to complete its task, wouldn't it?
> 

The main I/O thread won't wait for anything, so, yes, it will probably
start handling the new request until it yields.

> Just looked at the code so far, haven't tested it yet ...
> 
> Best regards,
> Christian Schoenebeck
> 
> 




Re: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-compare is active

2020-05-07 Thread Lukas Straub
On Thu, 7 May 2020 11:38:04 +
"Zhang, Chen"  wrote:

> > -Original Message-
> > From: Lukas Straub 
> > Sent: Monday, May 4, 2020 6:28 PM
> > To: qemu-devel 
> > Cc: Zhang, Chen ; Li Zhijian
> > ; Jason Wang ; Marc-
> > André Lureau ; Paolo Bonzini
> > 
> > Subject: [PATCH v4 5/6] net/colo-compare.c, softmmu/vl.c: Check that colo-
> > compare is active
> > 
> > If the colo-compare object is removed before failover and a checkpoint
> > happens, qemu crashes because it tries to lock the destroyed event_mtx in
> > colo_notify_compares_event.
> > 
> > Fix this by checking if everything is initialized by introducing a new 
> > variable
> > colo_compare_active which is protected by a new mutex
> > colo_compare_mutex. The new mutex also protects against concurrent
> > access of the net_compares list and makes sure that
> > colo_notify_compares_event isn't active while we destroy event_mtx and
> > event_complete_cond.
> > 
> > With this it also is again possible to use colo without colo-compare 
> > (periodic
> > mode) and to use multiple colo-compare for multiple network interfaces.
> >   
> 
> Hi Lukas,
> 
> For this case I think we don't need to touch vl.c code, we can solve this 
> issue from another perspective:
> How to remove colo-compare?
> User will use qemu-monitor or QMP command to disable an object, so we just 
> need return operation failed
> When user try to remove colo-compare object while COLO is running.

Yeah, but that still leaves the other problem that colo can't be used without 
colo-compare (qemu crashes then).

Regards,
Lukas Straub

> Thanks
> Zhang Chen
> 
> > Signed-off-by: Lukas Straub 
> > ---
> >  net/colo-compare.c | 35 +--
> >  net/colo-compare.h |  1 +
> >  softmmu/vl.c   |  2 ++
> >  3 files changed, 32 insertions(+), 6 deletions(-)
> > 
> > diff --git a/net/colo-compare.c b/net/colo-compare.c index
> > 56db3d3bfc..c7572d75e9 100644
> > --- a/net/colo-compare.c
> > +++ b/net/colo-compare.c
> > @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers =  #define
> > REGULAR_PACKET_CHECK_MS 3000  #define DEFAULT_TIME_OUT_MS 3000
> > 
> > +static QemuMutex colo_compare_mutex;
> > +static bool colo_compare_active;
> >  static QemuMutex event_mtx;
> >  static QemuCond event_complete_cond;
> >  static int event_unhandled_count;
> > @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque)
> > void colo_notify_compares_event(void *opaque, int event, Error **errp)  {
> >  CompareState *s;
> > +qemu_mutex_lock(_compare_mutex);
> > +
> > +if (!colo_compare_active) {
> > +qemu_mutex_unlock(_compare_mutex);
> > +return;
> > +}
> > 
> >  qemu_mutex_lock(_mtx);
> >  QTAILQ_FOREACH(s, _compares, next) { @@ -919,6 +927,7 @@ void
> > colo_notify_compares_event(void *opaque, int event, Error **errp)
> >  }
> > 
> >  qemu_mutex_unlock(_mtx);
> > +qemu_mutex_unlock(_compare_mutex);
> >  }
> > 
> >  static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14
> > @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
> > s->vnet_hdr);
> >  }
> > 
> > +qemu_mutex_lock(_compare_mutex);
> > +if (!colo_compare_active) {
> > +qemu_mutex_init(_mtx);
> > +qemu_cond_init(_complete_cond);
> > +colo_compare_active = true;
> > +}
> >  QTAILQ_INSERT_TAIL(_compares, s, next);
> > +qemu_mutex_unlock(_compare_mutex);
> > 
> >  s->out_sendco.s = s;
> >  s->out_sendco.chr = >chr_out;
> > @@ -1290,9 +1306,6 @@ static void colo_compare_complete(UserCreatable
> > *uc, Error **errp)
> > 
> >  g_queue_init(>conn_list);
> > 
> > -qemu_mutex_init(_mtx);
> > -qemu_cond_init(_complete_cond);
> > -
> >  s->connection_track_table =
> > g_hash_table_new_full(connection_key_hash,
> >connection_key_equal,
> >g_free, @@ -1384,12 
> > +1397,19 @@ static void
> > colo_compare_finalize(Object *obj)
> > 
> >  qemu_bh_delete(s->event_bh);
> > 
> > +qemu_mutex_lock(_compare_mutex);
> >  QTAILQ_FOREACH(tmp, _compares, next) {
> >  if (tmp == s) {
> >  QTAILQ_REMOVE(_compares, s, next);
> >  break;
> >  }
> >  }
> > +if (QTAILQ_EMPTY(_compares)) {
> > +colo_compare_active = false;
> > +qemu_mutex_destroy(_mtx);
> > +qemu_cond_destroy(_complete_cond);
> > +}
> > +qemu_mutex_unlock(_compare_mutex);
> > 
> >  AioContext *ctx = iothread_get_aio_context(s->iothread);
> >  aio_context_acquire(ctx);
> > @@ -1413,15 +1433,18 @@ static void colo_compare_finalize(Object *obj)
> >  object_unref(OBJECT(s->iothread));
> >  }
> > 
> > -qemu_mutex_destroy(_mtx);
> > -qemu_cond_destroy(_complete_cond);
> > -
> >  g_free(s->pri_indev);
> >  g_free(s->sec_indev);
> >  

Re: [PULL 13/32] cpus: Fix configure_icount() error API violation

2020-05-07 Thread Peter Maydell
On Wed, 29 Apr 2020 at 08:34, Markus Armbruster  wrote:
>
> The Error ** argument must be NULL, _abort, _fatal, or a
> pointer to a variable containing NULL.  Passing an argument of the
> latter kind twice without clearing it in between is wrong: if the
> first call sets an error, it no longer points to NULL for the second
> call.
>
> configure_icount() is wrong that way.  Harmless, because its @errp is
> always _abort or _fatal.
>
> Just as wrong (and just as harmless): when it fails, it can still
> update global state.

Hi; Coverity complains about this change (CID 1428754):
>
>  void configure_icount(QemuOpts *opts, Error **errp)
>  {
> -const char *option;
> +const char *option = qemu_opt_get(opts, "shift");
> +bool sleep = qemu_opt_get_bool(opts, "sleep", true);
> +bool align = qemu_opt_get_bool(opts, "align", false);
> +long time_shift = -1;
>  char *rem_str = NULL;
>
> -option = qemu_opt_get(opts, "shift");
> -if (!option) {
> -if (qemu_opt_get(opts, "align") != NULL) {
> -error_setg(errp, "Please specify shift option when using align");
> -}
> +if (!option && qemu_opt_get(opts, "align")) {
> +error_setg(errp, "Please specify shift option when using align");
>  return;

Previously, if option was NULL we would always take this early
exit. Now we only take the exit if option is NULL and the
qemu_opt_get() returns true, so in some cases execution
can continue through the function with a NULL option...

>  }
>
> -icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
> +if (align && !sleep) {
> +error_setg(errp, "align=on and sleep=off are incompatible");
> +return;
> +}
> +
> +if (strcmp(option, "auto") != 0) {

...but here we pass option to strcmp(), which is wrong if it
can be NULL.

thanks
-- PMM



Re: [PATCH 1/2] block/block-copy: Fix uninitialized variable in block_copy_task_entry

2020-05-07 Thread Eric Blake

On 5/7/20 7:11 AM, Philippe Mathieu-Daudé wrote:

Fix when building with -Os:

 CC  block/block-copy.o
   block/block-copy.c: In function ‘block_copy_task_entry’:
   block/block-copy.c:428:38: error: ‘error_is_read’ may be used uninitialized 
in this function [-Werror=maybe-uninitialized]
 428 | t->call_state->error_is_read = error_is_read;
 | ~^~~



Looks like -Os triggered different inlining of block_copy_do_copy().  I 
confirm that block_copy_do_copy does NOT initialize error_is_read except 
when returning < 0, but similarly block_copy_task_entry() does not read 
error_is_read except in the same setups.  So it looks like no actual bug 
was triggered, but we can definitely aid the compiler's analysis by 
initializing.



Signed-off-by: Philippe Mathieu-Daudé 
---
  block/block-copy.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend

2020-05-07 Thread Cindy Lu
On Thu, May 7, 2020 at 11:13 PM Maxime Coquelin
 wrote:
>
>
>
> On 4/20/20 11:32 AM, Cindy Lu wrote:
> > Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> > vhost-user. The above patch provides a generic device for vDPA purpose,
> > this vDPA device exposes to user space a non-vendor-specific configuration
> > interface for setting up a vhost HW accelerator, this patch set introduces
> > a third vhost backend called vhost-vdpa based on the vDPA interface.
> >
> > Vhost-vdpa usage:
> >
> >   qemu-system-x86_64 -cpu host -enable-kvm \
> > ..
> >   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
> >   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> >
> > Author: Tiwei Bie
> > Signed-off-by: Cindy Lu 
> > ---
> >  hw/net/vhost_net.c|  43 
> >  hw/net/virtio-net.c   |   9 +
> >  hw/virtio/Makefile.objs   |   2 +-
> >  hw/virtio/vhost-backend.c |   3 +
> >  hw/virtio/vhost-vdpa.c| 379 ++
> >  hw/virtio/vhost.c |   5 +
> >  include/hw/virtio/vhost-backend.h |   6 +-
> >  include/hw/virtio/vhost-vdpa.h|  14 ++
> >  8 files changed, 459 insertions(+), 2 deletions(-)
> >  create mode 100644 hw/virtio/vhost-vdpa.c
> >  create mode 100644 include/hw/virtio/vhost-vdpa.h
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 4096d64aaf..0d13fda2fc 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -17,8 +17,10 @@
> >  #include "net/net.h"
> >  #include "net/tap.h"
> >  #include "net/vhost-user.h"
> > +#include "net/vhost-vdpa.h"
> >
> >  #include "standard-headers/linux/vhost_types.h"
> > +#include "linux-headers/linux/vhost.h"
> >  #include "hw/virtio/virtio-net.h"
> >  #include "net/vhost_net.h"
> >  #include "qemu/error-report.h"
> > @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
> >  VHOST_INVALID_FEATURE_BIT
> >  };
> >
> > +static const int vdpa_feature_bits[] = {
> > +VIRTIO_F_NOTIFY_ON_EMPTY,
> > +VIRTIO_RING_F_INDIRECT_DESC,
> > +VIRTIO_RING_F_EVENT_IDX,
> > +VIRTIO_F_ANY_LAYOUT,
> > +VIRTIO_F_VERSION_1,
> > +VIRTIO_NET_F_CSUM,
> > +VIRTIO_NET_F_GUEST_CSUM,
> > +VIRTIO_NET_F_GSO,
> > +VIRTIO_NET_F_GUEST_TSO4,
> > +VIRTIO_NET_F_GUEST_TSO6,
> > +VIRTIO_NET_F_GUEST_ECN,
> > +VIRTIO_NET_F_GUEST_UFO,
> > +VIRTIO_NET_F_HOST_TSO4,
> > +VIRTIO_NET_F_HOST_TSO6,
> > +VIRTIO_NET_F_HOST_ECN,
> > +VIRTIO_NET_F_HOST_UFO,
> > +VIRTIO_NET_F_MRG_RXBUF,
> > +VIRTIO_NET_F_MTU,
> > +VIRTIO_F_IOMMU_PLATFORM,
> > +VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +VHOST_INVALID_FEATURE_BIT
> > +};
> >  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >  {
> >  const int *feature_bits = 0;
> > @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct 
> > vhost_net *net)
> >  case NET_CLIENT_DRIVER_VHOST_USER:
> >  feature_bits = user_feature_bits;
> >  break;
> > +case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +feature_bits = vdpa_feature_bits;
> > +break;
> >  default:
> >  error_report("Feature bits not defined for this type: %d",
> >  net->nc->info->type);
> > @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >  assert(vhost_net);
> >  break;
> >  #endif
> > +case NET_CLIENT_DRIVER_VHOST_VDPA:
> > +vhost_net = vhost_vdpa_get_vhost_net(nc);
> > +assert(vhost_net);
> > +break;
> >  default:
> >  break;
> >  }
> > @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t 
> > mtu)
> >
> >  return vhost_ops->vhost_net_set_mtu(>dev, mtu);
> >  }
> > +int vhost_set_state(NetClientState *nc, int state)
> > +{
> > +struct vhost_net *net = get_vhost_net(nc);
> > +struct vhost_dev *hdev = >dev;
> > +if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>
> Maybe checking the vhost_set_state callback is implemented is enough,
> and it is not need to restrict that to Vhost-vDPA?
Sure, Will remove this

> > +if (hdev->vhost_ops->vhost_set_state) {
> > +return hdev->vhost_ops->vhost_set_state(hdev, state);
> > + }
> > +}
> > +return 0;
> > +}
>




Re: [PULL 24/24] block/block-copy: use aio-task-pool API

2020-05-07 Thread Peter Maydell
On Tue, 5 May 2020 at 13:59, Max Reitz  wrote:
>
> From: Vladimir Sementsov-Ogievskiy 
>
> Run block_copy iterations in parallel in aio tasks.
>
> Changes:
>   - BlockCopyTask becomes aio task structure. Add zeroes field to pass
> it to block_copy_do_copy
>   - add call state - it's a state of one call of block_copy(), shared
> between parallel tasks. For now used only to keep information about
> first error: is it read or not.
>   - convert block_copy_dirty_clusters to aio-task loop.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Message-Id: <20200429130847.28124-6-vsement...@virtuozzo.com>
> Signed-off-by: Max Reitz 

Hi; this patch seems to introduce a use-after-free (CID 1428756):


> @@ -484,6 +554,8 @@ static int coroutine_fn 
> block_copy_dirty_clusters(BlockCopyState *s,
>  int ret = 0;
>  bool found_dirty = false;
>  int64_t end = offset + bytes;
> +AioTaskPool *aio = NULL;
> +BlockCopyCallState call_state = {false, false};
>
>  /*
>   * block_copy() user is responsible for keeping source and target in same
> @@ -495,11 +567,11 @@ static int coroutine_fn 
> block_copy_dirty_clusters(BlockCopyState *s,
>  assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
>  assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
>
> -while (bytes) {
> -g_autofree BlockCopyTask *task = NULL;
> +while (bytes && aio_task_pool_status(aio) == 0) {
> +BlockCopyTask *task;
>  int64_t status_bytes;
>
> -task = block_copy_task_create(s, offset, bytes);
> +task = block_copy_task_create(s, _state, offset, bytes);
>  if (!task) {
>  /* No more dirty bits in the bitmap */
>  trace_block_copy_skip_range(s, offset, bytes);
> @@ -519,6 +591,7 @@ static int coroutine_fn 
> block_copy_dirty_clusters(BlockCopyState *s,
>  }
>  if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
>  block_copy_task_end(task, 0);
> +g_free(task);

This hunk frees 'task' here...

>  progress_set_remaining(s->progress,
> bdrv_get_dirty_count(s->copy_bitmap) +
> s->in_flight_bytes);

...but the next lines after this one (just out of context) are:

trace_block_copy_skip_range(s, task->offset, task->bytes);
offset = task_end(task);

which now dereference 'task' after it is freed.

thanks
-- PMM



Re: [PATCH v3] migration/xbzrle: add encoding rate

2020-05-07 Thread Dr. David Alan Gilbert
* Wei Wang (wei.w.w...@intel.com) wrote:
> Users may need to check the xbzrle encoding rate to know if the guest
> memory is xbzrle encoding-friendly, and dynamically turn off the
> encoding if the encoding rate is low.
> 
> Signed-off-by: Yi Sun 
> Signed-off-by: Wei Wang 

Queued

> ---
>  migration/migration.c |  1 +
>  migration/ram.c   | 39 +--
>  monitor/hmp-cmds.c|  2 ++
>  qapi/migration.json   |  5 -
>  4 files changed, 44 insertions(+), 3 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 187ac0410c..e40421353c 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -930,6 +930,7 @@ static void populate_ram_info(MigrationInfo *info, 
> MigrationState *s)
>  info->xbzrle_cache->pages = xbzrle_counters.pages;
>  info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
>  info->xbzrle_cache->cache_miss_rate = 
> xbzrle_counters.cache_miss_rate;
> +info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
>  info->xbzrle_cache->overflow = xbzrle_counters.overflow;
>  }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 04f13feb2e..41b75a0a0f 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -327,6 +327,10 @@ struct RAMState {
>  uint64_t num_dirty_pages_period;
>  /* xbzrle misses since the beginning of the period */
>  uint64_t xbzrle_cache_miss_prev;
> +/* Amount of xbzrle pages since the beginning of the period */
> +uint64_t xbzrle_pages_prev;
> +/* Amount of xbzrle encoded bytes since the beginning of the period */
> +uint64_t xbzrle_bytes_prev;
>  
>  /* compression statistics since the beginning of the period */
>  /* amount of count that no free thread to compress data */
> @@ -696,6 +700,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
> **current_data,
>  return -1;
>  }
>  
> +/*
> + * Reaching here means the page has hit the xbzrle cache, no matter what
> + * encoding result it is (normal encoding, overflow or skipping the 
> page),
> + * count the page as encoded. This is used to caculate the encoding rate.
> + *
> + * Example: 2 pages (8KB) being encoded, first page encoding generates 
> 2KB,
> + * 2nd page turns out to be skipped (i.e. no new bytes written to the
> + * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
> + * skipped page included. In this way, the encoding rate can tell if the
> + * guest page is good for xbzrle encoding.
> + */
> +xbzrle_counters.pages++;
>  prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
>  
>  /* save current buffer into memory */
> @@ -726,6 +742,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
> **current_data,
>  } else if (encoded_len == -1) {
>  trace_save_xbzrle_page_overflow();
>  xbzrle_counters.overflow++;
> +xbzrle_counters.bytes += TARGET_PAGE_SIZE;
>  return -1;
>  }
>  
> @@ -736,8 +753,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
> **current_data,
>  qemu_put_be16(rs->f, encoded_len);
>  qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
>  bytes_xbzrle += encoded_len + 1 + 2;
> -xbzrle_counters.pages++;
> -xbzrle_counters.bytes += bytes_xbzrle;
> +/*
> + * Like compressed_size (please see update_compress_thread_counts),
> + * the xbzrle encoded bytes don't count the 8 byte header with
> + * RAM_SAVE_FLAG_CONTINUE.
> + */
> +xbzrle_counters.bytes += bytes_xbzrle - 8;
>  ram_counters.transferred += bytes_xbzrle;
>  
>  return 1;
> @@ -870,9 +891,23 @@ static void migration_update_rates(RAMState *rs, int64_t 
> end_time)
>  }
>  
>  if (migrate_use_xbzrle()) {
> +double encoded_size, unencoded_size;
> +
>  xbzrle_counters.cache_miss_rate = 
> (double)(xbzrle_counters.cache_miss -
>  rs->xbzrle_cache_miss_prev) / page_count;
>  rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
> +unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
> + TARGET_PAGE_SIZE;
> +encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
> +if (xbzrle_counters.pages == rs->xbzrle_pages_prev) {
> +xbzrle_counters.encoding_rate = 0;
> +} else if (!encoded_size) {
> +xbzrle_counters.encoding_rate = UINT64_MAX;
> +} else {
> +xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
> +}
> +rs->xbzrle_pages_prev = xbzrle_counters.pages;
> +rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
>  }
>  
>  if (migrate_use_compression()) {
> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
> index 9b94e67879..c2a3a667ae 100644
> --- a/monitor/hmp-cmds.c
> +++ b/monitor/hmp-cmds.c
> @@ -303,6 +303,8 @@ void hmp_info_migrate(Monitor *mon, 

Re: [PATCH v4 5/5] block/block-copy: use aio-task-pool API

2020-05-07 Thread Kevin Wolf
Am 29.04.2020 um 15:08 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Run block_copy iterations in parallel in aio tasks.
> 
> Changes:
>   - BlockCopyTask becomes aio task structure. Add zeroes field to pass
> it to block_copy_do_copy
>   - add call state - it's a state of one call of block_copy(), shared
> between parallel tasks. For now used only to keep information about
> first error: is it read or not.
>   - convert block_copy_dirty_clusters to aio-task loop.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

> @@ -519,6 +591,7 @@ static int coroutine_fn 
> block_copy_dirty_clusters(BlockCopyState *s,
>  }
>  if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
>  block_copy_task_end(task, 0);
> +g_free(task);
>  progress_set_remaining(s->progress,
> bdrv_get_dirty_count(s->copy_bitmap) +
> s->in_flight_bytes);
>  trace_block_copy_skip_range(s, task->offset, task->bytes);
>  offset = task_end(task);
>  bytes = end - offset;

Coverity found this use after free for task. Please fix.

Kevin




Re: [PATCH v4 3/6] net/colo-compare.c: Fix deadlock in compare_chr_send

2020-05-07 Thread Lukas Straub
On Thu, 7 May 2020 11:00:26 +
"Zhang, Chen"  wrote:

> > -Original Message-
> > From: Lukas Straub 
> > Sent: Monday, May 4, 2020 6:28 PM
> > To: qemu-devel 
> > Cc: Zhang, Chen ; Li Zhijian 
> > ; Jason Wang ; Marc- 
> > André Lureau ; Paolo Bonzini 
> > 
> > Subject: [PATCH v4 3/6] net/colo-compare.c: Fix deadlock in 
> > compare_chr_send
> > 
> > The chr_out chardev is connected to a filter-redirector running in the 
> > main loop. qemu_chr_fe_write_all might block here in compare_chr_send 
> > if the (socket-)buffer is full.
> > If another filter-redirector in the main loop want's to send data to 
> > chr_pri_in it might also block if the buffer is full. This leads to a 
> > deadlock because both event loops get blocked.
> > 
> > Fix this by converting compare_chr_send to a coroutine and putting the 
> > packets in a send queue.
> > 
> > Signed-off-by: Lukas Straub 
> > ---
> >  net/colo-compare.c | 187 ++---
> > 
> >  net/colo.c |   7 ++
> >  net/colo.h |   1 +
> >  3 files changed, 150 insertions(+), 45 deletions(-)
> > 
> > diff --git a/net/colo-compare.c b/net/colo-compare.c index 
> > 1de4220fe2..2a4e7f7c4e 100644
> > --- a/net/colo-compare.c
> > +++ b/net/colo-compare.c
> > @@ -32,6 +32,9 @@
> >  #include "migration/migration.h"
> >  #include "util.h"
> > 
> > +#include "block/aio-wait.h"
> > +#include "qemu/coroutine.h"
> > +
> >  #define TYPE_COLO_COMPARE "colo-compare"
> >  #define COLO_COMPARE(obj) \
> >  OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) @@ -77,6
> > +80,23 @@ static int event_unhandled_count;
> >   *|packet  |  |packet  +|packet  | |packet  +
> >   *++  ++++ ++
> >   */
> > +
> > +typedef struct SendCo {
> > +Coroutine *co;
> > +struct CompareState *s;
> > +CharBackend *chr;
> > +GQueue send_list;
> > +bool notify_remote_frame;
> > +bool done;
> > +int ret;
> > +} SendCo;
> > +
> > +typedef struct SendEntry {
> > +uint32_t size;
> > +uint32_t vnet_hdr_len;
> > +uint8_t *buf;
> > +} SendEntry;
> > +
> >  typedef struct CompareState {
> >  Object parent;
> > 
> > @@ -91,6 +111,8 @@ typedef struct CompareState {
> >  SocketReadState pri_rs;
> >  SocketReadState sec_rs;
> >  SocketReadState notify_rs;
> > +SendCo out_sendco;
> > +SendCo notify_sendco;
> >  bool vnet_hdr;
> >  uint32_t compare_timeout;
> >  uint32_t expired_scan_cycle;
> > @@ -124,10 +146,11 @@ enum {
> > 
> > 
> >  static int compare_chr_send(CompareState *s,
> > -const uint8_t *buf,
> > +uint8_t *buf,
> >  uint32_t size,
> >  uint32_t vnet_hdr_len,
> > -bool notify_remote_frame);
> > +bool notify_remote_frame,
> > +bool zero_copy);
> > 
> >  static bool packet_matches_str(const char *str,
> > const uint8_t *buf, @@ -145,7 +168,7 
> > @@ static void notify_remote_frame(CompareState *s)
> >  char msg[] = "DO_CHECKPOINT";
> >  int ret = 0;
> > 
> > -ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true);
> > +ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, 
> > + false);
> >  if (ret < 0) {
> >  error_report("Notify Xen COLO-frame failed");
> >  }
> > @@ -272,12 +295,13 @@ static void
> > colo_release_primary_pkt(CompareState *s, Packet *pkt)
> > pkt->data,
> > pkt->size,
> > pkt->vnet_hdr_len,
> > -   false);
> > +   false,
> > +   true);
> >  if (ret < 0) {
> >  error_report("colo send primary packet failed");
> >  }
> >  trace_colo_compare_main("packet same and release packet");
> > -packet_destroy(pkt, NULL);
> > +packet_destroy_partial(pkt, NULL);
> >  }
> > 
> >  /*
> > @@ -699,65 +723,115 @@ static void colo_compare_connection(void 
> > *opaque, void *user_data)
> >  }
> >  }
> > 
> > -static int compare_chr_send(CompareState *s,
> > -const uint8_t *buf,
> > -uint32_t size,
> > -uint32_t vnet_hdr_len,
> > -bool notify_remote_frame)
> > +static void coroutine_fn _compare_chr_send(void *opaque)
> >  {
> > +SendCo *sendco = opaque;
> > +CompareState *s = sendco->s;
> >  int ret = 0;
> > -uint32_t len = htonl(size);
> > 
> > -if (!size) {
> > -return 0;
> > -}
> > +while (!g_queue_is_empty(>send_list)) {
> > +SendEntry *entry = g_queue_pop_tail(>send_list);
> > +uint32_t len = htonl(entry->size);
> > 
> > -if (notify_remote_frame) {
> > -ret = 

Re: [RFC v3 6/6] hmp: add x-debug-virtio commands

2020-05-07 Thread Eric Blake

On 5/7/20 6:49 AM, Laurent Vivier wrote:

This patch implements HMP version of the virtio QMP commands

Signed-off-by: Laurent Vivier 


Most HMP commands do not use '-' in their name.  Also, HMP doesn't 
promise api compatibility; so we could just name this 'info virtio' or 
'debug_virtio' without an x- prefix, with no real loss.



---
  Makefile|   2 +-
  Makefile.target |   7 +-
  docs/system/monitor.rst |   2 +
  hmp-commands-virtio.hx  | 160 +
  hmp-commands.hx |  10 +++
  hw/virtio/virtio.c  | 193 +++-
  include/monitor/hmp.h   |   4 +
  monitor/misc.c  |  17 
  8 files changed, 391 insertions(+), 4 deletions(-)
  create mode 100644 hmp-commands-virtio.hx




+SRST
+``x-debug-virtio`` *subcommand*
+  Show various information about virtio.
+
+  Example:
+
+  List all sub-commands::
+
+(qemu) x-debug-virtio
+x-debug-virtio query  -- List all available virtio devices
+x-debug-virtio status path -- Display status of a given virtio device
+x-debug-virtio queue-status path queue -- Display status of a given virtio 
queue
+x-debug-virtio queue-element path queue [index] -- Display element of a 
given virtio queue


Oh, you're introducing it as a metacommand (like 'info' already is) with 
several subcommands.  Still, naming it 'virtio' instead of 
'x-debug-virtio' would make sense to me, even though the underlying QMP 
commands are (correctly) in the x-debug- namespace.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v5 19/31] qcow2: Add subcluster support to calculate_l2_meta()

2020-05-07 Thread Alberto Garcia
On Thu 07 May 2020 05:34:18 PM CEST, Alberto Garcia wrote:
> On Wed 06 May 2020 07:39:48 PM CEST, Eric Blake wrote:
>>   In fact, if we rely on 20/31 checking for invalid subclusters when
>> computing nb_clusters, we could probably assert that the start and end
>> cluster in this function are not invalid, instead of adding the fail:
>> label.
>
> I think you're right with that, good catch! There's no need to return an
> error code in this function.

No, wait, I got confused. 

Patch 20/31 updates qcow2_get_host_offset(), and that's used for
reading.

The caller of calculate_l2_meta() is qcow2_alloc_cluster_offset()
(indirectly), which is used for writing.

nb_clusters here is calculated with cluster_needs_new_alloc(), but that
doesn't check whether a cluster is valid or not, and certainly doesn't
check if every single subcluster is valid.

Thinking about it, there are probably faster ways to check for the
validity of extended L2 entries, for example:

- For QCOW2_CLUSTER_NORMAL, (l2_bitmap >> 32) & l2_bitmap should be 0.

- For QCOW2_CLUSTER_UNALLOCATED, l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC
  should be 0.

That's enough to cover all cases of QCOW2_SUBCLUSTER_INVALID that we
have at the moment.

Berto



Re: [RFC v3 3/6] qmp: decode feature bits in virtio-status

2020-05-07 Thread Eric Blake

On 5/7/20 6:49 AM, Laurent Vivier wrote:

Display feature names instead of a features bitmap for host, guest
and backend.

Decode features according device type, transport features are
on the first line. Undecoded bits (if any) are stored in a separate
field.

Signed-off-by: Laurent Vivier 
---


I didn't closely review the code, but for the QAPI parts:


+++ b/qapi/virtio.json



+##
+# @VirtioBlkFeature:
+#
+# An enumeration of Virtio block features
+#
+# Since: 5.1
+##
+
+{ 'enum': 'VirtioBlkFeature',
+  'data': [ 'size-max', 'seg-max', 'geometry', 'ro', 'blk-size', 'topology',   
   'mq', 'discard', 'write-zeroes', 'barrier', 'scsi', 'flush',


Missing newline.


+##
+# @VirtioDeviceFeatures:
+#
+# An union to define the list of features for a virtio device


s/An/A/ (in English, 'an' goes with soft u, 'a' goes with pronounced u. 
You can remember with "a unicorn gets an umbrella")



+#
+# Since: 5.1
+##
+
+{ 'union': 'VirtioDeviceFeatures',
+  'data': {
+'virtio-serial': [ 'VirtioSerialFeature' ],
+'virtio-blk': [ 'VirtioBlkFeature' ],
+'virtio-gpu': [ 'VirtioGpuFeature' ],
+'virtio-net': [ 'VirtioNetFeature' ],
+'virtio-scsi': [ 'VirtioScsiFeature' ],
+'virtio-balloon': [ 'VirtioBalloonFeature' ],
+'virtio-iommu': [ 'VirtioIommuFeature' ]
+  }
+}


This is a legacy union rather than a flat union, which results in more 
{} in the QMP wire format.  Is it worth trying to make this a flat 
union, by labeling an appropriate member as 'discriminator'?



+
+##
+# @VirtioStatusFeatures:
+#
+# @transport: the list of transport features of the virtio device
+#
+# @device: the list of the virtio device specific features
+#
+# @unknown: virtio bitmap of the undecoded features
+#
+# Since: 5.1
+##
+
+{ 'struct': 'VirtioStatusFeatures',
+  'data': { 'transport': [ 'VirtioTransportFeature' ],
+'device': 'VirtioDeviceFeatures',
+'unknown': 'uint64' }
+}
+
  ##
  # @VirtioStatus:
  #
@@ -101,9 +245,9 @@
'data': {
  'device-id': 'int',
  'device-endian': 'VirtioStatusEndianness',
-'guest-features': 'uint64',
-'host-features': 'uint64',
-'backend-features': 'uint64',
+'guest-features': 'VirtioStatusFeatures',
+'host-features': 'VirtioStatusFeatures',
+'backend-features': 'VirtioStatusFeatures',


This is intra-series churn.  Should we be trying to get the right types 
in place from the get-go?  Or at least clarify in the commit message of 
the earlier patch that the format will be incrementally improved later?



  'num-vqs': 'uint16'
}
  }
@@ -123,18 +267,40 @@
  #
  # -> { "execute": "x-debug-virtio-status",
  #  "arguments": {
-#  "path": "/machine/peripheral-anon/device[3]/virtio-backend"
+#  "path": "/machine/peripheral-anon/device[1]/virtio-backend"
  #  }
  #   }
  # <- { "return": {
-#  "backend-features": 0,
-#  "guest-features": 5111807911,
-#  "num-vqs": 3,
-#  "host-features": 6337593319,
  #  "device-endian": "little",
-#  "device-id": 1
+#  "device-id": 3,
+#  "backend-features": {
+#  "device": {
+#  "type": "virtio-serial",
+#  "data": []
+#  },
+#  "unknown": 0,
+#  "transport": []
+#  },


If we use a flat union, this could look like:

"backend-feature": {
  "type": "virtio-serial",
  "features": [],
  "unknown": 0,
  "transport": []
},

Should 'unknown' be optional so it can be omitted when zero?  Should it 
be named 'unknown-features'?


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v4 09/13] migration/ram: Consolidate variable reset after placement in ram_load_postcopy()

2020-05-07 Thread Dr. David Alan Gilbert
* Dr. David Alan Gilbert (dgilb...@redhat.com) wrote:
> * David Hildenbrand (da...@redhat.com) wrote:
> > Let's consolidate resetting the variables.
> > 
> > Cc: "Dr. David Alan Gilbert" 
> > Cc: Juan Quintela 
> > Cc: Peter Xu 
> > Signed-off-by: David Hildenbrand 
> 
> Reviewed-by: Dr. David Alan Gilbert 

Queued this one only; I had to do manual patch application due to some
renaming in 91ba442; but I think it's still OK.

> > ---
> >  migration/ram.c | 10 +-
> >  1 file changed, 5 insertions(+), 5 deletions(-)
> > 
> > diff --git a/migration/ram.c b/migration/ram.c
> > index 2a2165b478..7eca3165c8 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -3150,7 +3150,7 @@ static int ram_load_postcopy(QEMUFile *f)
> >  /* Temporary page that is later 'placed' */
> >  void *postcopy_host_page = mis->postcopy_tmp_page;
> >  void *host_page = NULL;
> > -bool all_zero = false;
> > +bool all_zero = true;
> >  int target_pages = 0;
> >  
> >  while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
> > @@ -3176,7 +3176,6 @@ static int ram_load_postcopy(QEMUFile *f)
> >  addr &= TARGET_PAGE_MASK;
> >  
> >  trace_ram_load_postcopy_loop((uint64_t)addr, flags);
> > -place_needed = false;
> >  if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
> >   RAM_SAVE_FLAG_COMPRESS_PAGE)) {
> >  block = ram_block_from_stream(f, flags);
> > @@ -3204,9 +3203,7 @@ static int ram_load_postcopy(QEMUFile *f)
> >   */
> >  page_buffer = postcopy_host_page +
> >host_page_offset_from_ram_block_offset(block, 
> > addr);
> > -/* If all TP are zero then we can optimise the place */
> >  if (target_pages == 1) {
> > -all_zero = true;
> >  host_page = host_page_from_ram_block_offset(block, addr);
> >  } else if (host_page != host_page_from_ram_block_offset(block,
> >  addr)) 
> > {
> > @@ -3223,7 +3220,6 @@ static int ram_load_postcopy(QEMUFile *f)
> >   */
> >  if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
> >  place_needed = true;
> > -target_pages = 0;
> >  }
> >  place_source = postcopy_host_page;
> >  }
> > @@ -3300,6 +3296,10 @@ static int ram_load_postcopy(QEMUFile *f)
> >  ret = postcopy_place_page(mis, host_page, place_source,
> >block);
> >  }
> > +place_needed = false;
> > +target_pages = 0;
> > +/* Assume we have a zero page until we detect something 
> > different */
> > +all_zero = true;
> >  }
> >  }
> >  
> > -- 
> > 2.25.1
> > 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [RFC v3 1/6] qmp: add QMP command x-debug-query-virtio

2020-05-07 Thread Eric Blake

On 5/7/20 6:49 AM, Laurent Vivier wrote:

This new command lists all the instances of VirtIODevice with
their path and virtio type

Signed-off-by: Laurent Vivier 
---



+++ b/qapi/virtio.json
@@ -0,0 +1,68 @@
+##
+# = Virtio devices
+##
+
+##
+# @VirtioType:
+#
+# An enumeration of Virtio device types.
+#
+# Since: 5.1.0


Inconsistent on x.y.z vs.


+##
+{ 'enum': 'VirtioType',
+  'data': [ 'unknown', 'virtio-9p', 'virtio-blk', 'virtio-serial',
+'virtio-gpu', 'virtio-input', 'virtio-net', 'virtio-scsi',
+'vhost-user-fs', 'vhost-vsock', 'virtio-balloon', 'virtio-crypto',
+'virtio-iommu', 'virtio-pmem', 'virtio-rng' ]
+}
+
+##
+# @VirtioInfo:
+#
+# Information about a given VirtIODevice
+#
+# @path: VirtIO device canonical path.
+#
+# @type: VirtIO device type.
+#
+# Since: 5.1


x.y.  Most of our QAPI seems to have settled on just x.y these days.

The x-debug- prefix is nice; it gives us more flexibility to mess with 
this later as needed.


Otherwise:
Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v2 5/5] vhost: add device started check in migration set log

2020-05-07 Thread Dima Stepanov
On Wed, May 06, 2020 at 06:08:34PM -0400, Raphael Norwitz wrote:
> As you correctly point out, this code needs to be looked at more
> carefully so that
> if the device does disconnect in the background we can handle the migration 
> path
> gracefully. In particular, we need to decide whether a migration
> should be allowed
> to continue if a device disconnects durning the migration stage.
>From what i see from the code it is allowed. At the start of the
hw/virtio/vhost.c:vhost_migration_log() routine there is a check:
if (!dev->started) {
dev->log_enabled = enable;
return 0;
}
So our changes had the same idea. If device isn't started then 0 can be
returned. Please note, that if we want to return error here then the
following assert will be hit (hw/virtio/vhost.c)
static void vhost_log_global_start(MemoryListener *listener)
{
int r;

r = vhost_migration_log(listener, true);
if (r < 0) {
abort();
}
}
But as i mentioned we didn't change this logic, we just propogate it on
the whole migration start process during vhost handshake. After it our
tests passed successfully.

> 
> mst, any thoughts?
> 
> Have you looked at the suggestion I gave Li Feng to move vhost_dev_cleanup()
> into the connection path in vhost-user-blk? I’m not sure if he’s
> actively working on it,
> but I would prefer if we can find a way to keep some state around
> between reconnects
> so we aren’t constantly checking dev->started. A device can be stopped
> for reasons
> other than backend disconnect so I’d rather not reuse this field to
> check for backend
> disconnect failures.
In fact i didn't try to use >started field to signal about disconnect.
What i tried to follow is that if device not started (because of
disconnect or any other reason), there is no need to continue
initialization and we can proceed with the next migration step.

> 
> On Thu, Apr 30, 2020 at 9:57 AM Dima Stepanov  wrote:
> >
> > If vhost-user daemon is used as a backend for the vhost device, then we
> > should consider a possibility of disconnect at any moment. If such
> > disconnect happened in the vhost_migration_log() routine the vhost
> > device structure will be clean up.
> > At the start of the vhost_migration_log() function there is a check:
> >   if (!dev->started) {
> >   dev->log_enabled = enable;
> >   return 0;
> >   }
> > To be consistent with this check add the same check after calling the
> > vhost_dev_set_log() routine. This in general help not to break a
> 
> Could you point to the specific asserts which are being triggered?
Just to be clear here. The assert message i mentioned is described
above. I wanted to explain why we followed the "(!dev->started) return 0"
logic. And in this case we didn't return error and return 0.

But the first error we hit during migration testing was SIGSEGV:
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x56354db0a74a in vhost_dev_has_iommu (dev=0x563550562b00)
at hw/virtio/vhost.c:299
299 return vdev->dma_as != _space_memory &&
(gdb) p vdev
$1 = (VirtIODevice *) 0x0
(gdb) bt
#0  0x56354db0a74a in vhost_dev_has_iommu (dev=0x563550562b00)
at hw/virtio/vhost.c:299
#1  0x56354db0bb76 in vhost_dev_set_features (dev=0x563550562b00, 
enable_log=true)
at hw/virtio/vhost.c:777
#2  0x56354db0bc1e in vhost_dev_set_log (dev=0x563550562b00, 
enable_log=true)
at hw/virtio/vhost.c:790
#3  0x56354db0be58 in vhost_migration_log (listener=0x563550562b08, 
enable=1)
at hw/virtio/vhost.c:834
#4  0x56354db0be9b in vhost_log_global_start (listener=0x563550562b08)
at hw/virtio/vhost.c:847
#5  0x56354da72e7e in memory_global_dirty_log_start ()
at memory.c:2611
...


> 
> > migration due the assert() message. But it looks like that this code
> > should be revised to handle these errors more carefully.
> >
> > In case of vhost-user device backend the fail paths should consider the
> > state of the device. In this case we should skip some function calls
> > during rollback on the error paths, so not to get the NULL dereference
> > errors.
> >
> > Signed-off-by: Dima Stepanov 
> > ---
> >  hw/virtio/vhost.c | 39 +++
> >  1 file changed, 35 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 3ee50c4..d5ab96d 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -787,6 +787,17 @@ static int vhost_dev_set_features(struct vhost_dev 
> > *dev,
> >  static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
> >  {
> >  int r, i, idx;
> 
> A couple points here
> 
> 
> (1) This will fail the live migration if the device is disconnected.
> That my be the right thing
>   to do, but if there are cases where migrations can proceed with
> a disconnected device,
>   this may not be desirable.
I'm not sure that it is correct. VM could be migrated successfully
during 

Re: [PATCH v5 19/31] qcow2: Add subcluster support to calculate_l2_meta()

2020-05-07 Thread Alberto Garcia
On Wed 06 May 2020 07:39:48 PM CEST, Eric Blake wrote:
>   In fact, if we rely on 20/31 checking for invalid subclusters when
> computing nb_clusters, we could probably assert that the start and end
> cluster in this function are not invalid, instead of adding the fail:
> label.

I think you're right with that, good catch! There's no need to return an
error code in this function.

Berto



Re: [PATCH v3] migration/throttle: Add cpu-throttle-tailslow migration parameter

2020-05-07 Thread Dr. David Alan Gilbert
* Keqian Zhu (zhukeqi...@huawei.com) wrote:
> At the tail stage of throttling, the Guest is very sensitive to
> CPU percentage while the @cpu-throttle-increment is excessive
> usually at tail stage.
> 
> If this parameter is true, we will compute the ideal CPU percentage
> used by the Guest, which may exactly make the dirty rate match the
> dirty rate threshold. Then we will choose a smaller throttle increment
> between the one specified by @cpu-throttle-increment and the one
> generated by ideal CPU percentage.
> 
> Therefore, it is compatible to traditional throttling, meanwhile
> the throttle increment won't be excessive at tail stage. This may
> make migration time longer, and is disabled by default.
> 
> Signed-off-by: Keqian Zhu 

Queued

> ---
> Cc: Juan Quintela 
> Cc: "Dr. David Alan Gilbert" 
> Cc: Eric Blake 
> Cc: Markus Armbruster 
> ---
>  migration/migration.c | 13 
>  migration/ram.c   | 25 +-
>  monitor/hmp-cmds.c|  8 
>  qapi/migration.json   | 48 +++
>  4 files changed, 89 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 187ac0410c..d478a87290 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
> **errp)
>  params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
>  params->has_cpu_throttle_increment = true;
>  params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
> +params->has_cpu_throttle_tailslow = true;
> +params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
>  params->has_tls_creds = true;
>  params->tls_creds = g_strdup(s->parameters.tls_creds);
>  params->has_tls_hostname = true;
> @@ -1324,6 +1326,10 @@ static void 
> migrate_params_test_apply(MigrateSetParameters *params,
>  dest->cpu_throttle_increment = params->cpu_throttle_increment;
>  }
>  
> +if (params->has_cpu_throttle_tailslow) {
> +dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
> +}
> +
>  if (params->has_tls_creds) {
>  assert(params->tls_creds->type == QTYPE_QSTRING);
>  dest->tls_creds = g_strdup(params->tls_creds->u.s);
> @@ -1412,6 +1418,10 @@ static void migrate_params_apply(MigrateSetParameters 
> *params, Error **errp)
>  s->parameters.cpu_throttle_increment = 
> params->cpu_throttle_increment;
>  }
>  
> +if (params->has_cpu_throttle_tailslow) {
> +s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
> +}
> +
>  if (params->has_tls_creds) {
>  g_free(s->parameters.tls_creds);
>  assert(params->tls_creds->type == QTYPE_QSTRING);
> @@ -3594,6 +3604,8 @@ static Property migration_properties[] = {
>  DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
>parameters.cpu_throttle_increment,
>DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
> +DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
> +  parameters.cpu_throttle_tailslow, false),
>  DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
>parameters.max_bandwidth, MAX_THROTTLE),
>  DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
> @@ -3700,6 +3712,7 @@ static void migration_instance_init(Object *obj)
>  params->has_throttle_trigger_threshold = true;
>  params->has_cpu_throttle_initial = true;
>  params->has_cpu_throttle_increment = true;
> +params->has_cpu_throttle_tailslow = true;
>  params->has_max_bandwidth = true;
>  params->has_downtime_limit = true;
>  params->has_x_checkpoint_delay = true;
> diff --git a/migration/ram.c b/migration/ram.c
> index 04f13feb2e..3317c99786 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -616,20 +616,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile 
> *f,  RAMBlock *block,
>   * able to complete migration. Some workloads dirty memory way too
>   * fast and will not effectively converge, even with auto-converge.
>   */
> -static void mig_throttle_guest_down(void)
> +static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
> +uint64_t bytes_dirty_threshold)
>  {
>  MigrationState *s = migrate_get_current();
>  uint64_t pct_initial = s->parameters.cpu_throttle_initial;
> -uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
> +uint64_t pct_increment = s->parameters.cpu_throttle_increment;
> +bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
>  int pct_max = s->parameters.max_cpu_throttle;
>  
> +uint64_t throttle_now = cpu_throttle_get_percentage();
> +uint64_t cpu_now, cpu_ideal, throttle_inc;
> +
>  /* We have not started throttling yet. Let's start it. */
>  if (!cpu_throttle_active()) {
>  

Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend

2020-05-07 Thread Maxime Coquelin



On 4/20/20 11:32 AM, Cindy Lu wrote:
> diff --git a/include/hw/virtio/vhost-backend.h 
> b/include/hw/virtio/vhost-backend.h
> index 6f6670783f..d81bd9885f 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -17,7 +17,8 @@ typedef enum VhostBackendType {
>  VHOST_BACKEND_TYPE_NONE = 0,
>  VHOST_BACKEND_TYPE_KERNEL = 1,
>  VHOST_BACKEND_TYPE_USER = 2,
> -VHOST_BACKEND_TYPE_MAX = 3,
> +VHOST_BACKEND_TYPE_VDPA = 3,
> +VHOST_BACKEND_TYPE_MAX = 4,
>  } VhostBackendType;
>  
>  typedef enum VhostSetConfigType {
> @@ -112,6 +113,7 @@ typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev 
> *dev,
>  typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
>  struct vhost_inflight *inflight);
>  
> +typedef int (*vhost_set_state_op)(struct vhost_dev *dev, int state);

I think state should be of type uint8_t.




Re: [PATCH] linux-user/sparc64: Translate flushw opcode

2020-05-07 Thread Artyom Tarasenko
On Thu, May 7, 2020 at 4:17 PM Laurent Vivier  wrote:
>
> Hi,
>
> could someone with SPARC knowledge review this patch?
> I'd like to add it to linux-user queue.
>

Since it's TCG, I'd really preferred a review from Richard.

Regards,
Artyom
>
> Le 17/04/2020 à 13:06, LemonBoy a écrit :
> > Ping, I forgot to CC the SPARC mantainers in the previous mail.
> >
> > On 10/04/20 23:14, LemonBoy wrote:
> >> From 11d0cfe58d12e0f191b435ade88622cfceb2098a Mon Sep 17 00:00:00 2001
> >> From: LemonBoy 
> >> Date: Fri, 10 Apr 2020 22:55:26 +0200
> >> Subject: [PATCH] linux-user/sparc64: Translate flushw opcode
> >>
> >> The ifdef logic should unconditionally compile in the `xop == 0x2b` case
> >> when targeting sparc64.
> >>
> >> Fix the handling of window spill traps by keeping cansave into account
> >> when calculating the new CWP.
> >>
> >> Signed-off-by: Giuseppe Musacchio 
> >> ---
> >>  bsd-user/main.c | 4 +++-
> >>  linux-user/sparc/cpu_loop.c | 4 +++-
> >>  target/sparc/translate.c| 2 ++
> >>  3 files changed, 8 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/bsd-user/main.c b/bsd-user/main.c
> >> index 770c2b267a..d6b1c997e3 100644
> >> --- a/bsd-user/main.c
> >> +++ b/bsd-user/main.c
> >> @@ -413,7 +413,9 @@ static void save_window(CPUSPARCState *env)
> >>  save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
> >>  env->wim = new_wim;
> >>  #else
> >> -save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
> >> +/* cansave is zero if the spill trap handler is triggered by `save` 
> >> and */
> >> +/* nonzero if triggered by a `flushw` */
> >> +save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - 
> >> 2));
> >>  env->cansave++;
> >>  env->canrestore--;
> >>  #endif
> >> diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
> >> index 7645cc04ca..20a7401126 100644
> >> --- a/linux-user/sparc/cpu_loop.c
> >> +++ b/linux-user/sparc/cpu_loop.c
> >> @@ -69,7 +69,9 @@ static void save_window(CPUSPARCState *env)
> >>  save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
> >>  env->wim = new_wim;
> >>  #else
> >> -save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
> >> +/* cansave is zero if the spill trap handler is triggered by `save` 
> >> and */
> >> +/* nonzero if triggered by a `flushw` */
> >> +save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - 
> >> 2));
> >>  env->cansave++;
> >>  env->canrestore--;
> >>  #endif
> >> diff --git a/target/sparc/translate.c b/target/sparc/translate.c
> >> index 9416a551cf..1a4efd4ed6 100644
> >> --- a/target/sparc/translate.c
> >> +++ b/target/sparc/translate.c
> >> @@ -3663,6 +3663,8 @@ static void disas_sparc_insn(DisasContext * dc, 
> >> unsigned int insn)
> >>  #endif
> >>  gen_store_gpr(dc, rd, cpu_tmp0);
> >>  break;
> >> +#endif
> >> +#if defined(TARGET_SPARC64) || !defined(CONFIG_USER_ONLY)
> >>  } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
> >>  #ifdef TARGET_SPARC64
> >>  gen_helper_flushw(cpu_env);
> >>
> >
>


-- 
Regards,
Artyom Tarasenko

SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu



Re: [PATCH v5 00/14] LUKS: encryption slot management using amend interface

2020-05-07 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200507125414.2151-1-mlevi...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 20200507125414.2151-1-mlevi...@redhat.com
Subject: [PATCH v5 00/14] LUKS: encryption slot management using amend interface
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
c3de092 iotests: add tests for blockdev-amend
309ee4c block/qcow2: implement blockdev-amend
7779b12 block/crypto: implement blockdev-amend
d8486a5 block/core: add generic infrastructure for x-blockdev-amend qmp command
f67a1ec iotests: qemu-img tests for luks key management
80686b5 iotests: filter few more luks specific create options
2e8c1b2 block/qcow2: extend qemu-img amend interface with crypto options
92bdda5 block/crypto: implement the encryption key management
a324070 block/crypto: rename two functions
4ab0233 block/amend: refactor qcow2 amend options
eec91bb block/amend: separate amend and create options for qemu-img
b93a373 block/amend: add 'force' option
f24e468 qcrypto/luks: implement encryption key management
aebcb19 qcrypto/core: add generic infrastructure for crypto options amendment

=== OUTPUT BEGIN ===
1/14 Checking commit aebcb1911ca9 (qcrypto/core: add generic infrastructure for 
crypto options amendment)
2/14 Checking commit f24e46892c52 (qcrypto/luks: implement encryption key 
management)
3/14 Checking commit b93a3730838d (block/amend: add 'force' option)
4/14 Checking commit eec91bb69321 (block/amend: separate amend and create 
options for qemu-img)
ERROR: Macros with multiple statements should be enclosed in a do - while loop
#31: FILE: block/qcow2.c:5515:
+#define QCOW_COMMON_OPTIONS \
+{   \
+.name = BLOCK_OPT_SIZE, \
+.type = QEMU_OPT_SIZE,  \
+.help = "Virtual disk size" \
+},  \
+{   \
+.name = BLOCK_OPT_COMPAT_LEVEL, \
+.type = QEMU_OPT_STRING,\
+.help = "Compatibility level (v2 [0.10] or v3 [1.1])"   \
+},  \
+{   \
+.name = BLOCK_OPT_BACKING_FILE, \
+.type = QEMU_OPT_STRING,\
+.help = "File name of a base image" \
+},  \
+{   \
+.name = BLOCK_OPT_BACKING_FMT,  \
+.type = QEMU_OPT_STRING,\
+.help = "Image format of the base image"\
+},  \
+{   \
+.name = BLOCK_OPT_DATA_FILE,\
+.type = QEMU_OPT_STRING,\
+.help = "File name of an external data file"\
+},  \
+{   \
+.name = BLOCK_OPT_DATA_FILE_RAW,\
+.type = QEMU_OPT_BOOL,  \
+.help = "The external data file must stay valid "   \
+"as a raw image"\
+},  \
+{   \
+.name = BLOCK_OPT_ENCRYPT,  \
+.type = QEMU_OPT_BOOL,  \
+.help = "Encrypt the image with format 'aes'. (Deprecated " \
+"in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",\
+},  \
+{   \
+.name = BLOCK_OPT_ENCRYPT_FORMAT,   \
+.type = QEMU_OPT_STRING,\
+.help = "Encrypt the image, format choices: 'aes', 'luks'", \
+},  \
+

Re: [PATCH v4 11/14] block/core: add generic infrastructure for x-blockdev-amend qmp command

2020-05-07 Thread Eric Blake

On 5/5/20 3:08 PM, Maxim Levitsky wrote:

blockdev-amend will be used similiar to blockdev-create
to allow on the fly changes of the structure of the format based block devices.

Current plan is to first support encryption keyslot management for luks
based formats (raw and embedded in qcow2)

Signed-off-by: Maxim Levitsky 
Reviewed-by: Daniel P. Berrangé 
---



+++ b/qapi/block-core.json
@@ -4649,6 +4649,48 @@
'data': { 'job-id': 'str',
  'options': 'BlockdevCreateOptions' } }
  
+##

+# @BlockdevAmendOptions:
+#
+# Options for amending an image format
+#
+# @driver   block driver that is suitable for the image
+#
+# Since: 5.0


We'll need to scrub patches for s/5.0/5.1/

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH-for-5.1 v3 4/7] migration/colo: Add missing error-propagation code

2020-05-07 Thread Dr. David Alan Gilbert
* Philippe Mathieu-Daudé (f4...@amsat.org) wrote:
> Running the coccinelle script produced:
> 
>   $ spatch \
> --macro-file scripts/cocci-macro-file.h --include-headers \
> --sp-file scripts/coccinelle/find-missing-error_propagate.cocci \
> --keep-comments --smpl-spacing --dir .
>   HANDLING: ./migration/colo.c
>   [[manual check required: error_propagate() might be missing in 
> migrate_set_block_enabled() ./migration/colo.c:439:4]]
> 
> Add the missing error_propagate() after review.
> 
> Reviewed-by: Juan Quintela 
> Signed-off-by: Philippe Mathieu-Daudé 

Queued this entry for migration

> ---
>  migration/colo.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index a54ac84f41..57b2adb0cc 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -437,6 +437,9 @@ static int colo_do_checkpoint_transaction(MigrationState 
> *s,
>  
>  /* Disable block migration */
>  migrate_set_block_enabled(false, _err);
> +if (local_err) {
> +goto out;
> +}
>  qemu_mutex_lock_iothread();
>  
>  #ifdef CONFIG_REPLICATION
> -- 
> 2.21.1
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[PATCH 5/5] docs/system: Document Musca boards

2020-05-07 Thread Peter Maydell
Provide a minimal documentation of the Musca boards.

Signed-off-by: Peter Maydell 
---
 docs/system/arm/musca.rst  | 31 +++
 docs/system/target-arm.rst |  1 +
 MAINTAINERS|  1 +
 3 files changed, 33 insertions(+)
 create mode 100644 docs/system/arm/musca.rst

diff --git a/docs/system/arm/musca.rst b/docs/system/arm/musca.rst
new file mode 100644
index 000..8375c5008d5
--- /dev/null
+++ b/docs/system/arm/musca.rst
@@ -0,0 +1,31 @@
+Arm Musca boards (``musca-a``, ``musca-b1``)
+
+
+The Arm Musca development boards are a reference implementation
+of a system using the SSE-200 subsystem for embedded. They are
+dual Cortex-M33 systems.
+
+QEMU provides models of the A and B1 variants of this board.
+
+Unimplemented devices:
+
+- SPI
+- |I2C|
+- |I2S|
+- PWM
+- QSPI
+- Timer
+- SCC
+- GPIO
+- eFlash
+- MHU
+- PVT
+- SDIO
+- CryptoCell
+
+Note that (like the real hardware) the Musca-A machine is
+asymmetric: CPU 0 does not have the FPU or DSP extensions,
+but CPU 1 does. Also like the real hardware, the memory maps
+for the A and B1 variants differ significantly, so guest
+software must be built for the right variant.
+
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 15bcf9f81f0..1b86b93c346 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -72,6 +72,7 @@ undocumented; you can get a complete list by running
 
arm/integratorcp
arm/mps2
+   arm/musca
arm/realview
arm/versatile
arm/vexpress
diff --git a/MAINTAINERS b/MAINTAINERS
index ea7bdd359e0..f8e0fdb4ef2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -706,6 +706,7 @@ M: Peter Maydell 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/arm/musca.c
+F: docs/system/arm/musca.rst
 
 Musicpal
 M: Jan Kiszka 
-- 
2.20.1




[PATCH 4/5] docs/system: Document the various MPS2 models

2020-05-07 Thread Peter Maydell
Add basic documentation of the MPS2 board models.

Signed-off-by: Peter Maydell 
---
 docs/system/arm/mps2.rst   | 29 +
 docs/system/target-arm.rst |  1 +
 MAINTAINERS|  1 +
 3 files changed, 31 insertions(+)
 create mode 100644 docs/system/arm/mps2.rst

diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst
new file mode 100644
index 000..3a98cb59b0d
--- /dev/null
+++ b/docs/system/arm/mps2.rst
@@ -0,0 +1,29 @@
+Arm MPS2 boards (``mps2-an385``, ``mps2-an505``, ``mps2-an511``, 
``mps2-an521``)
+
+
+These board models all use Arm M-profile CPUs.
+
+The Arm MPS2 and MPS2+ dev boards are FPGA based (the 2+ has a bigger
+FPGA but is otherwise the same as the 2). Since the CPU itself
+and most of the devices are in the FPGA, the details of the board
+as seen by the guest depend significantly on the FPGA image.
+
+QEMU models the following FPGA images:
+
+``mps2-an385``
+  Cortex-M3 as documented in ARM Application Note AN385
+``mps2-an511``
+  Cortex-M3 'DesignStart' as documented in AN511
+``mps2-an505``
+  Cortex-M33 as documented in ARM Application Note AN505
+``mps2-an521``
+  Dual Cortex-M33 as documented in Application Note AN521
+
+Differences between QEMU and real hardware:
+
+- AN385 remapping of low 16K of memory to either ZBT SSRAM1 or to
+  block RAM is unimplemented (QEMU always maps this to ZBT SSRAM1, as
+  if zbt_boot_ctrl is always zero)
+- QEMU provides a LAN9118 ethernet rather than LAN9220; the only guest
+  visible difference is that the LAN9118 doesn't support checksum
+  offloading
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 4779293d731..15bcf9f81f0 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -71,6 +71,7 @@ undocumented; you can get a complete list by running
:maxdepth: 1
 
arm/integratorcp
+   arm/mps2
arm/realview
arm/versatile
arm/vexpress
diff --git a/MAINTAINERS b/MAINTAINERS
index 74cff1c3818..ea7bdd359e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -699,6 +699,7 @@ F: hw/misc/armsse-cpuid.c
 F: include/hw/misc/armsse-cpuid.h
 F: hw/misc/armsse-mhu.c
 F: include/hw/misc/armsse-mhu.h
+F: docs/system/arm/mps2.rst
 
 Musca
 M: Peter Maydell 
-- 
2.20.1




[PATCH 3/5] docs/system: Document Arm Versatile Express boards

2020-05-07 Thread Peter Maydell
Provide a minimal documentation of the Versatile Express boards
(vexpress-a9, vexpress-a15).

Signed-off-by: Peter Maydell 
---
 docs/system/arm/vexpress.rst | 60 
 docs/system/target-arm.rst   |  1 +
 MAINTAINERS  |  1 +
 3 files changed, 62 insertions(+)
 create mode 100644 docs/system/arm/vexpress.rst

diff --git a/docs/system/arm/vexpress.rst b/docs/system/arm/vexpress.rst
new file mode 100644
index 000..7f1bcbef078
--- /dev/null
+++ b/docs/system/arm/vexpress.rst
@@ -0,0 +1,60 @@
+Arm Versatile Express boards (``vexpress-a9``, ``vexpress-a15``)
+
+
+QEMU models two variants of the Arm Versatile Express development
+board family:
+
+- ``vexpress-a9`` models the combination of the Versatile Express
+  motherboard and the CoreTile Express A9x4 daughterboard
+- ``vexpress-a15`` models the combination of the Versatile Express
+  motherboard and the CoreTile Express A15x2 daughterboard
+
+Note that as this hardware does not have PCI, IDE or SCSI,
+the only available storage option is emulated SD card.
+
+Implemented devices:
+
+- PL041 audio
+- PL181 SD controller
+- PL050 keyboard and mouse
+- PL011 UARTs
+- SP804 timers
+- I2C controller
+- PL031 RTC
+- PL111 LCD display controller
+- Flash memory
+- LAN9118 ethernet
+
+Unimplemented devices:
+
+- SP810 system control block
+- PCI-express
+- USB controller (Philips ISP1761)
+- Local DAP ROM
+- CoreSight interfaces
+- PL301 AXI interconnect
+- SCC
+- System counter
+- HDLCD controller (``vexpress-a15``)
+- SP805 watchdog
+- PL341 dynamic memory controller
+- DMA330 DMA controller
+- PL354 static memory controller
+- BP147 TrustZone Protection Controller
+- TrustZone Address Space Controller
+
+Other differences between the hardware and the QEMU model:
+
+- QEMU will default to creating one CPU unless you pass a different
+  ``-smp`` argument
+- QEMU allows the amount of RAM provided to be specified with the
+  ``-m`` argument
+- QEMU defaults to providing a CPU which does not provide either
+  TrustZone or the Virtualization Extensions: if you want these you
+  must enable them with ``-machine secure=on`` and ``-machine
+  virtualization=on``
+- QEMU provides 4 virtio-mmio virtio transports; these start at
+  address ``0x10013000`` for ``vexpress-a9`` and at ``0x1c13`` for
+  ``vexpress-a15``, and have IRQs from 40 upwards. If a dtb is
+  provided on the command line then QEMU will edit it to include
+  suitable entries describing these transports for the guest.
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index d1196cbe01c..4779293d731 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -73,6 +73,7 @@ undocumented; you can get a complete list by running
arm/integratorcp
arm/realview
arm/versatile
+   arm/vexpress
arm/musicpal
arm/nseries
arm/orangepi
diff --git a/MAINTAINERS b/MAINTAINERS
index 1f84e3ae2c6..74cff1c3818 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -826,6 +826,7 @@ M: Peter Maydell 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/arm/vexpress.c
+F: docs/system/arm/vexpress.rst
 
 Versatile PB
 M: Peter Maydell 
-- 
2.20.1




[Bug 1877384] [NEW] 9pfs file create with mapped-xattr can fail on overlayfs

2020-05-07 Thread Fishface60
Public bug reported:

QEMU Version: 3.1.0 as packaged in debian buster, but the code appears to do 
the same in master.
qemu command-line: qemu-system-x86_64 -m 1G -nographic -nic 
"user,model=virtio-net-pci,tftp=$(pwd),net=10.0.2.0/24,host=10.0.2.2" -fsdev 
local,id=fs,path=$thisdir/..,security_model=mapped-xattr -device 
virtio-9p-pci,fsdev=fs,mount_tag=fs -drive 
"file=$rootdisk,if=virtio,format=raw" -kernel "$kernel" -initrd "$initrd" 
-append "$append"


I'm using CI that runs in a Docker container and runs a qemu VM with code and 
results shared via virtio 9p.
The 9p fsdev is configured with security_model=mapped-xattr
When the test code attempts to create a log file in an existing directory, open 
with O_CREAT fails with -ENOENT.

The relevant strace excerpt is:

28791 openat(11, ".", O_RDONLY|O_NOFOLLOW|O_PATH|O_DIRECTORY) = 20
28791 openat(20, "src", O_RDONLY|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW|O_DIRECTORY) = 
21
28791 fcntl(21, F_SETFL, O_RDONLY|O_DIRECTORY) = 0
28791 close(20) = 0
28791 openat(21, "client.log", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW, 
0600) = 20
28791 fcntl(20, F_SETFL, O_WRONLY|O_CREAT|O_NONBLOCK|O_NOFOLLOW) = 0
28791 lsetxattr("/proc/self/fd/21/client.log", "user.virtfs.uid", "\0\0\0", 4, 
0) = -1 ENOENT (No such file or directory)

My hypothesis for what's going wrong is since the Docker container's
overlayfs copies-up on writes, when it opens the file it's created a new
version of the `src` directory containing a `client.log`, but this new
src directory isn't accessible by file descriptor 20 and the lsetxattr
call is instead attempting to set attributes on the path in the old
`src` directory.

Looking at the code, a fix would be to change `hw/9pfs/9p-local.c` and
change `local_open2` to instead of calling `local_set_xattrat` to set
the xattrs by directory file descriptor and file name, to have a version
of local_set_xattrat` which uses `fsetxattr` to set the virtfs
attributes instead of the `fsetxattrat_nofollow` helper.

This reliably happened for me in CI, but I don't have access to the CI
host or the time to strip the test down to make a minimal test case, and
had difficulty reproducing the error on other machines.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877384

Title:
  9pfs file create with mapped-xattr can fail on overlayfs

Status in QEMU:
  New

Bug description:
  QEMU Version: 3.1.0 as packaged in debian buster, but the code appears to do 
the same in master.
  qemu command-line: qemu-system-x86_64 -m 1G -nographic -nic 
"user,model=virtio-net-pci,tftp=$(pwd),net=10.0.2.0/24,host=10.0.2.2" -fsdev 
local,id=fs,path=$thisdir/..,security_model=mapped-xattr -device 
virtio-9p-pci,fsdev=fs,mount_tag=fs -drive 
"file=$rootdisk,if=virtio,format=raw" -kernel "$kernel" -initrd "$initrd" 
-append "$append"

  
  I'm using CI that runs in a Docker container and runs a qemu VM with code and 
results shared via virtio 9p.
  The 9p fsdev is configured with security_model=mapped-xattr
  When the test code attempts to create a log file in an existing directory, 
open with O_CREAT fails with -ENOENT.

  The relevant strace excerpt is:

  28791 openat(11, ".", O_RDONLY|O_NOFOLLOW|O_PATH|O_DIRECTORY) = 20
  28791 openat(20, "src", O_RDONLY|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW|O_DIRECTORY) 
= 21
  28791 fcntl(21, F_SETFL, O_RDONLY|O_DIRECTORY) = 0
  28791 close(20) = 0
  28791 openat(21, "client.log", 
O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW, 0600) = 20
  28791 fcntl(20, F_SETFL, O_WRONLY|O_CREAT|O_NONBLOCK|O_NOFOLLOW) = 0
  28791 lsetxattr("/proc/self/fd/21/client.log", "user.virtfs.uid", "\0\0\0", 
4, 0) = -1 ENOENT (No such file or directory)

  My hypothesis for what's going wrong is since the Docker container's
  overlayfs copies-up on writes, when it opens the file it's created a
  new version of the `src` directory containing a `client.log`, but this
  new src directory isn't accessible by file descriptor 20 and the
  lsetxattr call is instead attempting to set attributes on the path in
  the old `src` directory.

  Looking at the code, a fix would be to change `hw/9pfs/9p-local.c` and
  change `local_open2` to instead of calling `local_set_xattrat` to set
  the xattrs by directory file descriptor and file name, to have a
  version of local_set_xattrat` which uses `fsetxattr` to set the virtfs
  attributes instead of the `fsetxattrat_nofollow` helper.

  This reliably happened for me in CI, but I don't have access to the CI
  host or the time to strip the test down to make a minimal test case,
  and had difficulty reproducing the error on other machines.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877384/+subscriptions



Re: [PATCH v2 10/9] qed: Simplify backing reads

2020-05-07 Thread Eric Blake

On 5/7/20 9:45 AM, Eric Blake wrote:

The other four drivers that support backing files (qcow, qcow2,
parallels, vmdk) all rely on the block layer to populate zeroes when
reading beyond EOF of a short backing file.  We can simplify the qed
code by doing likewise.

Signed-off-by: Eric Blake 
---

I noticed this during my audit that v1 of Vladimir's series was correct.

No change in iotests results (test 274 is currently failing for qed,
but for other reasons:
+Traceback (most recent call last):
+  File "274", line 24, in 
+iotests.verify_image_format(supported_fmts=['qcow2'])
+AttributeError: module 'iotests' has no attribute 'verify_image_format'
)


That iotest failure was due to a stale branch on my end; after updating 
to latest git master plus Kevin's latest 'block' branch, 274 is now 
skipped on qed.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




[PATCH 1/5] docs/system: Add 'Arm' to the Integrator/CP document title

2020-05-07 Thread Peter Maydell
Add 'Arm' to the Integrator/CP document title, for consistency with
the titling of the other documentation of Arm devboard models
(versatile, realview).

Signed-off-by: Peter Maydell 
---
 docs/system/arm/integratorcp.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/system/arm/integratorcp.rst b/docs/system/arm/integratorcp.rst
index e6f050f602b..594438008e4 100644
--- a/docs/system/arm/integratorcp.rst
+++ b/docs/system/arm/integratorcp.rst
@@ -1,5 +1,5 @@
-Integrator/CP (``integratorcp``)
-
+Arm Integrator/CP (``integratorcp``)
+
 
 The Arm Integrator/CP board is emulated with the following devices:
 
-- 
2.20.1




Re: [PULL 0/5] Misc crypto subsystem fixes

2020-05-07 Thread Peter Maydell
On Thu, 7 May 2020 at 12:59, Daniel P. Berrangé  wrote:
>
> The following changes since commit 609dd53df540edd72faee705205aceca9c42fea5:
>
>   Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200506' into stag=
> ing (2020-05-07 09:45:54 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/berrange/qemu tags/qcrypto-next-pull-request
>
> for you to fetch changes up to 6022e15d146d8bba9610a9d134afa4bc6e5d9275:
>
>   crypto: extend hash benchmark to cover more algorithms (2020-05-07 12:52:51=
>  +0100)
>
> 
> Misc crypto subsystem fixes
>
> * Improve error message for large files when creating LUKS volumes
> * Expand crypto hash benchmark coverage
> * Misc code refactoring with no functional change
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.1
for any user-visible changes.

-- PMM



[PATCH 2/5] docs/system: Sort Arm board index into alphabetical order

2020-05-07 Thread Peter Maydell
Sort the board index into alphabetical order.  (Note that we need to
sort alphabetically by the title text of each file, which isn't the
same ordering as sorting by the filename.)

Signed-off-by: Peter Maydell 
---
 docs/system/target-arm.rst | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index 324e2af1cbc..d1196cbe01c 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -71,15 +71,15 @@ undocumented; you can get a complete list by running
:maxdepth: 1
 
arm/integratorcp
-   arm/versatile
arm/realview
-   arm/xscale
-   arm/palm
-   arm/nseries
-   arm/stellaris
+   arm/versatile
arm/musicpal
-   arm/sx1
+   arm/nseries
arm/orangepi
+   arm/palm
+   arm/xscale
+   arm/sx1
+   arm/stellaris
 
 Arm CPU features
 
-- 
2.20.1




[PATCH 0/5] docs/system: Document some arm board models

2020-05-07 Thread Peter Maydell
This patchset adds (minimal) documentation of these Arm board models:

vexpress-a15 ARM Versatile Express for Cortex-A15
vexpress-a9  ARM Versatile Express for Cortex-A9
mps2-an385   ARM MPS2 with AN385 FPGA image for Cortex-M35
mps2-an505   ARM MPS2 with AN505 FPGA image for Cortex-M33
mps2-an511   ARM MPS2 with AN511 DesignStart FPGA image for Cortex-M3
mps2-an521   ARM MPS2 with AN521 FPGA image for dual Cortex-M33
musca-a  ARM Musca-A board (dual Cortex-M33)
musca-b1 ARM Musca-B1 board (dual Cortex-M33)

to the system emulator manual.

Patches 1 and 2 are minor tidyup of the board table-of-contents
before we start adding new entries with patches 3-5.

I'm aiming more for "at least note that the boards exist" than
"fully comprehensive" documentation here -- there are still another
37 Arm board models with no documentation at all...

thanks
-- PMM

Peter Maydell (5):
  docs/system: Add 'Arm' to the Integrator/CP document title
  docs/system: Sort Arm board index into alphabetical order
  docs/system: Document Arm Versatile Express boards
  docs/system: Document the various MPS2 models
  docs/system: Document Musca boards

 docs/system/arm/integratorcp.rst |  4 +--
 docs/system/arm/mps2.rst | 29 +++
 docs/system/arm/musca.rst| 31 +
 docs/system/arm/vexpress.rst | 60 
 docs/system/target-arm.rst   | 15 
 MAINTAINERS  |  3 ++
 6 files changed, 134 insertions(+), 8 deletions(-)
 create mode 100644 docs/system/arm/mps2.rst
 create mode 100644 docs/system/arm/musca.rst
 create mode 100644 docs/system/arm/vexpress.rst

-- 
2.20.1




Re: [PATCH Kernel v18 6/7] vfio iommu: Add migration capability to report supported features

2020-05-07 Thread Alex Williamson
On Thu, 7 May 2020 11:07:26 +0530
Kirti Wankhede  wrote:

> On 5/7/2020 3:57 AM, Alex Williamson wrote:
> > On Mon, 4 May 2020 21:28:58 +0530
> > Kirti Wankhede  wrote:
> >   
> >> Added migration capability in IOMMU info chain.
> >> User application should check IOMMU info chain for migration capability
> >> to use dirty page tracking feature provided by kernel module.
> >>
> >> Signed-off-by: Kirti Wankhede 
> >> ---
> >>   drivers/vfio/vfio_iommu_type1.c | 15 +++
> >>   include/uapi/linux/vfio.h   | 14 ++
> >>   2 files changed, 29 insertions(+)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_type1.c 
> >> b/drivers/vfio/vfio_iommu_type1.c
> >> index 8b27faf1ec38..b38d278d7bff 100644
> >> --- a/drivers/vfio/vfio_iommu_type1.c
> >> +++ b/drivers/vfio/vfio_iommu_type1.c
> >> @@ -2378,6 +2378,17 @@ static int vfio_iommu_iova_build_caps(struct 
> >> vfio_iommu *iommu,
> >>return ret;
> >>   }
> >>   
> >> +static int vfio_iommu_migration_build_caps(struct vfio_info_cap *caps)
> >> +{
> >> +  struct vfio_iommu_type1_info_cap_migration cap_mig;
> >> +
> >> +  cap_mig.header.id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION;
> >> +  cap_mig.header.version = 1;
> >> +  cap_mig.flags = VFIO_IOMMU_INFO_CAPS_MIGRATION_DIRTY_PAGE_TRACK;
> >> +
> >> +  return vfio_info_add_capability(caps, _mig.header, sizeof(cap_mig));
> >> +}
> >> +
> >>   static long vfio_iommu_type1_ioctl(void *iommu_data,
> >>   unsigned int cmd, unsigned long arg)
> >>   {
> >> @@ -2427,6 +2438,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> >>if (ret)
> >>return ret;
> >>   
> >> +  ret = vfio_iommu_migration_build_caps();
> >> +  if (ret)
> >> +  return ret;
> >> +
> >>if (caps.size) {
> >>info.flags |= VFIO_IOMMU_INFO_CAPS;
> >>   
> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >> index e3cbf8b78623..df9ce8aaafab 100644
> >> --- a/include/uapi/linux/vfio.h
> >> +++ b/include/uapi/linux/vfio.h
> >> @@ -1013,6 +1013,20 @@ struct vfio_iommu_type1_info_cap_iova_range {
> >>struct  vfio_iova_range iova_ranges[];
> >>   };
> >>   
> >> +/*
> >> + * The migration capability allows to report supported features for 
> >> migration.
> >> + *
> >> + * The structures below define version 1 of this capability.
> >> + */
> >> +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION  1
> >> +
> >> +struct vfio_iommu_type1_info_cap_migration {
> >> +  struct  vfio_info_cap_header header;
> >> +  __u32   flags;
> >> +  /* supports dirty page tracking */
> >> +#define VFIO_IOMMU_INFO_CAPS_MIGRATION_DIRTY_PAGE_TRACK   (1 << 0)
> >> +};
> >> +  
> > 
> > What about exposing the maximum supported dirty bitmap size and the
> > supported page sizes?  Thanks,
> >   
> 
> How should user application use that?

How does a user application currently discover that only a PAGE_SIZE
dirty bitmap granularity is supported or that the when performing an
unmap while requesting the dirty bitmap, those unmaps need to be
chunked to no more than 2^31 * PAGE_SIZE?  I don't see anything in the
uapi that expresses these restrictions.

It seems we're currently relying on the QEMU implementation to
coincide with bitmap granularity support, with no mechanism other than
trial and error to validate or expand that support.  Likewise, I think
it's largely a coincidence that KVM also has the same slot size
restrictions, so we're unlikely to see an unmap exceeding this limit,
but our api does not restrict an unmap to only cover a single mapping.
We probably also need to think about whether we need to expose a
mapping chunk limitation separately or if it should be extrapolated
from the migration support (we might have non-migration reasons to
limit it at some point).

If we leave these as assumptions then we risk breaking userspace or
limiting the usefulness of expending this support.  If we include
within the uapi a mechanism for learning about these restrictions, then
it becomes a userspace problem to follow the uapi and take advantage of
the uapi provided.  Thanks,

Alex




Re: [PATCH v5 30/31] qcow2: Add subcluster support to qcow2_measure()

2020-05-07 Thread Alberto Garcia
On Wed 06 May 2020 08:13:51 PM CEST, Eric Blake wrote:
> On 5/5/20 12:38 PM, Alberto Garcia wrote:
>> Extended L2 entries are bigger than normal L2 entries so this has an
>> impact on the amount of metadata needed for a qcow2 file.
>> 
>> Signed-off-by: Alberto Garcia 
>> Reviewed-by: Max Reitz 
>> ---
>>   block/qcow2.c | 19 ---
>>   1 file changed, 12 insertions(+), 7 deletions(-)
>
> Should this be hoisted earlier in the series, before 28/31?

I can do that if I call qcow2_calc_prealloc_size() with extended_l2
always set to false (because there would be no BLOCK_OPT_EXTL2 that the
caller could use). Maybe it's not a bad idea.

> Should there be iotest coverage?

There are already, in the last patch.

Berto



Re: [PATCH 3/3] iotests: Mirror with different source/target size

2020-05-07 Thread Eric Blake

On 5/7/20 9:52 AM, Kevin Wolf wrote:

This tests that the mirror job catches situations where the target node
has a different size than the source node. It must also forbid resize
operations when the job is already running.

Signed-off-by: Kevin Wolf 
---
  tests/qemu-iotests/041 | 45 ++
  tests/qemu-iotests/041.out |  4 ++--
  2 files changed, 47 insertions(+), 2 deletions(-)


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [RFC v1 3/4] vhost-vdpa: implement vhost-vdpa backend

2020-05-07 Thread Maxime Coquelin



On 4/20/20 11:32 AM, Cindy Lu wrote:
> Currently we have 2 types of vhost backends in QEMU: vhost kernel and
> vhost-user. The above patch provides a generic device for vDPA purpose,
> this vDPA device exposes to user space a non-vendor-specific configuration
> interface for setting up a vhost HW accelerator, this patch set introduces
> a third vhost backend called vhost-vdpa based on the vDPA interface.
> 
> Vhost-vdpa usage:
> 
>   qemu-system-x86_64 -cpu host -enable-kvm \
> ..
>   -netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-id,id=vhost-vdpa0 \
>   -device virtio-net-pci,netdev=vhost-vdpa0,page-per-vq=on \
> 
> Author: Tiwei Bie
> Signed-off-by: Cindy Lu 
> ---
>  hw/net/vhost_net.c|  43 
>  hw/net/virtio-net.c   |   9 +
>  hw/virtio/Makefile.objs   |   2 +-
>  hw/virtio/vhost-backend.c |   3 +
>  hw/virtio/vhost-vdpa.c| 379 ++
>  hw/virtio/vhost.c |   5 +
>  include/hw/virtio/vhost-backend.h |   6 +-
>  include/hw/virtio/vhost-vdpa.h|  14 ++
>  8 files changed, 459 insertions(+), 2 deletions(-)
>  create mode 100644 hw/virtio/vhost-vdpa.c
>  create mode 100644 include/hw/virtio/vhost-vdpa.h
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 4096d64aaf..0d13fda2fc 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -17,8 +17,10 @@
>  #include "net/net.h"
>  #include "net/tap.h"
>  #include "net/vhost-user.h"
> +#include "net/vhost-vdpa.h"
>  
>  #include "standard-headers/linux/vhost_types.h"
> +#include "linux-headers/linux/vhost.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "qemu/error-report.h"
> @@ -85,6 +87,29 @@ static const int user_feature_bits[] = {
>  VHOST_INVALID_FEATURE_BIT
>  };
>  
> +static const int vdpa_feature_bits[] = {
> +VIRTIO_F_NOTIFY_ON_EMPTY,
> +VIRTIO_RING_F_INDIRECT_DESC,
> +VIRTIO_RING_F_EVENT_IDX,
> +VIRTIO_F_ANY_LAYOUT,
> +VIRTIO_F_VERSION_1,
> +VIRTIO_NET_F_CSUM,
> +VIRTIO_NET_F_GUEST_CSUM,
> +VIRTIO_NET_F_GSO,
> +VIRTIO_NET_F_GUEST_TSO4,
> +VIRTIO_NET_F_GUEST_TSO6,
> +VIRTIO_NET_F_GUEST_ECN,
> +VIRTIO_NET_F_GUEST_UFO,
> +VIRTIO_NET_F_HOST_TSO4,
> +VIRTIO_NET_F_HOST_TSO6,
> +VIRTIO_NET_F_HOST_ECN,
> +VIRTIO_NET_F_HOST_UFO,
> +VIRTIO_NET_F_MRG_RXBUF,
> +VIRTIO_NET_F_MTU,
> +VIRTIO_F_IOMMU_PLATFORM,
> +VIRTIO_NET_F_GUEST_ANNOUNCE,
> +VHOST_INVALID_FEATURE_BIT
> +};
>  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>  {
>  const int *feature_bits = 0;
> @@ -96,6 +121,9 @@ static const int *vhost_net_get_feature_bits(struct 
> vhost_net *net)
>  case NET_CLIENT_DRIVER_VHOST_USER:
>  feature_bits = user_feature_bits;
>  break;
> +case NET_CLIENT_DRIVER_VHOST_VDPA:
> +feature_bits = vdpa_feature_bits;
> +break;
>  default:
>  error_report("Feature bits not defined for this type: %d",
>  net->nc->info->type);
> @@ -434,6 +462,10 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>  assert(vhost_net);
>  break;
>  #endif
> +case NET_CLIENT_DRIVER_VHOST_VDPA:
> +vhost_net = vhost_vdpa_get_vhost_net(nc);
> +assert(vhost_net);
> +break;
>  default:
>  break;
>  }
> @@ -465,3 +497,14 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t 
> mtu)
>  
>  return vhost_ops->vhost_net_set_mtu(>dev, mtu);
>  }
> +int vhost_set_state(NetClientState *nc, int state)
> +{
> +struct vhost_net *net = get_vhost_net(nc);
> +struct vhost_dev *hdev = >dev;
> +if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {

Maybe checking the vhost_set_state callback is implemented is enough,
and it is not need to restrict that to Vhost-vDPA?

> +if (hdev->vhost_ops->vhost_set_state) {
> +return hdev->vhost_ops->vhost_set_state(hdev, state);
> + }
> +}
> +return 0;
> +}




Re: [PATCH 2/3] mirror: Make sure that source and target size match

2020-05-07 Thread Eric Blake

On 5/7/20 9:52 AM, Kevin Wolf wrote:

If the target is shorter than the source, mirror would copy data until
it reaches the end of the target and then fail with an I/O error when
trying to write past the end.

If the target is longer than the source, the mirror job would complete
successfully, but the target wouldn't actually be an accurate copy of
the source image (it would contain some additional garbage at the end).

Fix this by checking that both images have the same size when the job
starts.

Signed-off-by: Kevin Wolf 
---
  block/mirror.c | 21 -
  1 file changed, 12 insertions(+), 9 deletions(-)


An alternative would be trying to resize the target (like we have to do 
with active commit), but I'm fine with being conservative for now by 
forcing the user to have correct sizing, where we have the option to add 
magic resizing later only if it proves useful and not introducing more 
potential issues.


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 1/3] iotests/229: Use blkdebug to inject an error

2020-05-07 Thread Eric Blake

On 5/7/20 9:52 AM, Kevin Wolf wrote:

229 relies on the mirror running into an I/O error when the target is
smaller than the source. After changing mirror to catch this condition
while starting the job, this test case won't get a job that is paused
for an I/O error any more. Use blkdebug instead to inject an error.

Signed-off-by: Kevin Wolf 
---
  tests/qemu-iotests/229 | 15 +++
  tests/qemu-iotests/229.out |  6 +++---
  2 files changed, 14 insertions(+), 7 deletions(-)


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH] 9pfs: Fix potential deadlock of QEMU mainloop

2020-05-07 Thread Christian Schoenebeck
On Donnerstag, 7. Mai 2020 16:33:28 CEST Greg Kurz wrote:
> > I also haven't reviewed QEMU's lock implementations in very detail, but
> > IIRC CoMutexes are completely handled in user space, while QemuMutex uses
> > regular OS mutexes and hence might cost context switches.
> 
> ... since the locking would only been exercised with an hypothetical
> client doing stupid things, this is beginning to look like bike-shedding
> to me. :)

Aha, keep that in mind when you're doing your next review. ;-)

No seriously, like I said, I don't really care too much about Mutex vs. 
CoMutex in you patch here. It was actually more about wide-picture thinking, 
i.e. other places of (co)mutexes being used or other potential changes that 
would make this or other uses more relevant one day.

> > > > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> > > > > index 9e046f7acb51..ac84ae804496 100644
> > > > > --- a/hw/9pfs/9p.c
> > > > > +++ b/hw/9pfs/9p.c
> > > > > @@ -2170,7 +2170,7 @@ static int coroutine_fn
> > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, int32_t count = 0;
> > > > > 
> > > > >  struct stat stbuf;
> > > > >  off_t saved_dir_pos;
> > > > > 
> > > > > -struct dirent *dent;
> > > > > +struct dirent dent;
> > > > > 
> > > > >  /* save the directory position */
> > > > >  saved_dir_pos = v9fs_co_telldir(pdu, fidp);
> > > > > 
> > > > > @@ -2181,13 +2181,11 @@ static int coroutine_fn
> > > > > v9fs_do_readdir_with_stat(V9fsPDU *pdu, while (1) {
> > > > > 
> > > > >  v9fs_path_init();
> > > > > 
> > > > > -v9fs_readdir_lock(>fs.dir);
> > > > > -
> > > > 
> > > > That's the deadlock fix, but ...
> > > > 
> > > > >  err = v9fs_co_readdir(pdu, fidp, );
> > > > > 
> > > > > -if (err || !dent) {
> > > > > +if (err <= 0) {
> > > > > 
> > > > >  break;
> > > > >  
> > > > >  }
> > > > 
> > > > ... even though this code simplification might make sense, I don't
> > > > think
> > > > it
> > > > should be mixed with the deadlock fix together in one patch. They are
> > > > not
> > > 
> > > I could possibly split this in two patches, one for returning a copy
> > > and one for moving the locking around, but...
> > > 
> > > > related with each other, nor is the code simplification you are aiming
> > > > trivial
> > > 
> > > ... this assertion is somewhat wrong: moving the locking to
> > > v9fs_co_readdir() really requires it returns a copy.
> > 
> > Yeah, I am also not sure whether a split would make it more trivial enough
> > in this case to be worth the hassle. If you find an acceptable solution,
> > good, if not then leave it one patch.
> 
> Another option would be to g_malloc() the dirent in v9fs_co_readdir() and
> g_free() in the callers. This would cause less churn since we could keep
> the same function signature.

I was actually just going to suggest the same. So yes, looks like a less 
invasive change to me.

Best regards,
Christian Schoenebeck





Re: Emulating Solaris 10 on SPARC64 sun4u

2020-05-07 Thread Artyom Tarasenko
On Thu, May 7, 2020 at 4:29 PM  wrote:
>
> Just thought I'd chime in with an update.
>
> We are currently emulating a 16550A UART. The guest sees this as the SU
> device, referring to the SuperIO port (a pair of 16550A UARTs). On the
> Ultra 5, the machine that Sun4u is modelled against, SuperIO was used
> for the keyboard and mouse. The Ultra 5 also had a DB25 (ttya default)
> and a DB9 (ttyb default) with a SAB82532 ESSC2.
>
> Using tracing, I've looked through how the 16550A UART is touched and
> it looks like Solaris 10 has no issues identifying the device. I've
> matched register accesses with driver code in OpenSolaris and I'm
> pretty sure the device is attached successfully. Also, if you boot
> Solaris 10 with debugging output, you can see that the device gets
> labelled as su0. The only time Solaris is capable of writing to the
> console is when OpenBIOS is used as a proxy.
>
> Rather than Solaris deciding against using SuperIO as a tty, I don't
> think there was ever any support for doing so (at least on SPARC
> machines). This could be an explanation for why the system appears to
> be trucking along just fine despite a seemingly frozen console - there
> is no console. I don't think the frozen console is the fault of broken
> interrupt routing as the 16550A UART is never programmed to generate
> them.

At least Fujitsu Siemens PRIMEPOWER250 had a tty attached to the
16550A UART. I think there were more such machines. I don't expect
there is anything in the Solaris kernel which would prevent any serial
device known to it to be used as a tty.

> I've started work on emulating the SAB 82532 ESSC2 but it's
> unfortunately way more complex than than the 16550A. For instance, it's
> possible to configure different baudrates for receiving and
> transmitting. QEMU's chardev interface doesn't appear to handle that.
> QEMUSerialSetParams has a single speed value that is passed to
> cfsetispeed and cfsetospeed. The chip also has support for stick parity
> , which aren't valid options right now either. If I'm wrong on either
> of those points please correct me. Unless there is an alternative,
> changes to the interface may need to be made if adding this device is
> to be considered.

Well, theoretically yes, but practically there is just one baudrate
which can be specified in the OBP. I think it's perfectly safe to use
max(rxrate,txrate), or min(rxrate,txrate), whatever you prefer.

Regards,
Artyom
>
> On Sun, 2020-02-09 at 11:26 +, Mark Cave-Ayland wrote:
> > On 05/02/2020 06:31, jasper.low...@bt.com wrote:
> >
> > > I'm currently working towards emulating Solaris 10 on sun4u.
> > >
> > >
> > >
> > > The Solaris 10 ISO image I am attempting to boot is the one from
> > > the Oracle
> > >
> > > download page at
> > > https://www.oracle.com/solaris/solaris10/downloads/solaris10-get-jsp-downloads.html.
> > >
> > > Image: sol-10-u11-ga-sparc-dvd.iso
> > >
> > > MD5:   53e8b066f7f250ce2fd2cef063f8072b
> > >
> > >
> > >
> > > I am using QEMU commit 7bd9d0a9e26c7a3c67c0f174f0009ba19969b158.
> > >
> > >
> > >
> > > The command I am using to run QEMU is:
> > >
> > > ./qemu/sparc64-softmmu/qemu-system-sparc64 -bios
> > > ./openbios/obj-sparc64/openbios-builtin.elf -cdrom
> > > ./iso/solaris/sol-10-u11-ga-sparc-dvd.iso -boot d -nographic -m 3G
> > >
> > >
> > >
> > > ```
> > >
> > > CPUs: 1 x SUNW,UltraSPARC-IIi
> > >
> > > UUID: ----
> > >
> > > Welcome to OpenBIOS v1.1 built on Feb 5 2020 19:15
> > >
> > >   Type 'help' for detailed information
> > >
> > > Trying cdrom:f...
> > >
> > > Not a bootable ELF image
> > >
> > > Not a bootable a.out image
> > >
> > >
> > >
> > > Loading FCode image...
> > >
> > > Loaded 7420 bytes
> > >
> > > entry point is 0x4000
> > >
> > > Evaluating FCode...
> > >
> > > Evaluating FCode...
> > >
> > > Ignoring failed claim for va 100 memsz af6d6!
> > >
> > > Ignoring failed claim for va 1402000 memsz 4dcc8!
> > >
> > > Ignoring failed claim for va 180 memsz 510c8!
> > >
> > > SunOS Release 5.10 Version Generic_147147-26 64-bit
> > >
> > > Copyright (c) 1983, 2013, Oracle and/or its affiliates. All rights
> > > reserved.
> > >
> > > could not find debugger-vocabulary-hook>threads:interpret:
> > > exception -13 caught
> > >
> > > interpret \ Copyright (c) 1995-1999 by Sun Microsystems, Inc.
> > >
> > > \ All rights reserved.
> > >
> > > \
> > >
> > > \ ident "@(#)data64.fth  1.3 00/07/17 SMI"
> > >
> > >
> > >
> > > hex
> > >
> > >
> > >
> > > only forth also definitions
> > >
> > > vocabulary kdbg-words
> > >
> > > also kdbg-words definitions
> > >
> > >
> > >
> > > defer p@
> > >
> > > defer p!
> > >
> > > ['] x@ is p@
> > >
> > > ['] x! is p!
> > >
> > >
> > >
> > > 8 constant ptrsize
> > >
> > >
> > >
> > > d# 32 constant nbitsminor
> > >
> > > h#  constant maxmin
> > >
> > > \
> > >
> > > \ Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
> > >
> > > \ Use is subject to license terms.
> > >
> > > \
> > >
> > >
> > >

Re: [PATCH 0/5] target/i386: fxtract, fscale fixes

2020-05-07 Thread Joseph Myers
On Thu, 7 May 2020, no-re...@patchew.org wrote:

> === OUTPUT BEGIN ===
> 1/5 Checking commit 69eed0bcaaaf (target/i386: implement special cases for 
> fxtract)
> WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?

I don't think any MAINTAINERS update is needed for a new testcase in an 
existing directory.

> ERROR: Use of volatile is usually wrong, please add a comment

I think the justification for volatile in such testcase code is obvious 
without comments in individual cases - to avoid any code movement or 
optimization that might break what the tests are intending to test (these 
tests are making heavy use of mixed C and inline asm to test how emulated 
instructions behave, including on input representations that are not valid 
long double values in the ABI and with the rounding precision changed 
behind the compiler's back).  I think making everything possibly relevant 
volatile in these tests is better than trying to produce a fragile 
argument that in fact certain data does not need to be volatile to avoid 
problematic code movement.

> ERROR: spaces required around that '-' (ctx:VxV)
> #139: FILE: tests/tcg/i386/test-i386-fxtract.c:80:
> +  "0" (0x1p-16445L));
> ^

No, this is a C99 hex float contstant, not a subtraction.  There are 
already such constants in tests/tcg/multiarch/float_helpers.c and 
tests/tcg/multiarch/float_madds.c at least, so I assume they are OK in 
QEMU floating-point tests and this style checker should not be objecting 
to them.

-- 
Joseph S. Myers
jos...@codesourcery.com



[PATCH 3/3] iotests: Mirror with different source/target size

2020-05-07 Thread Kevin Wolf
This tests that the mirror job catches situations where the target node
has a different size than the source node. It must also forbid resize
operations when the job is already running.

Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/041 | 45 ++
 tests/qemu-iotests/041.out |  4 ++--
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 1812dd8479..601c756117 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -240,6 +240,49 @@ class TestSingleBlockdev(TestSingleDrive):
  target=self.qmp_target)
 self.assert_qmp(result, 'error/class', 'GenericError')
 
+def do_test_resize(self, device, node):
+def pre_finalize():
+if device:
+result = self.vm.qmp('block_resize', device=device, size=65536)
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp('block_resize', node_name=node, size=65536)
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+result = self.vm.qmp(self.qmp_cmd, job_id='job0', device='drive0',
+ sync='full', target=self.qmp_target,
+ auto_finalize=False, auto_dismiss=False)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.run_job('job0', auto_finalize=False,
+ pre_finalize=pre_finalize)
+self.assertEqual(result, None)
+
+def test_source_resize(self):
+self.do_test_resize('drive0', 'top')
+
+def test_target_resize(self):
+self.do_test_resize(None, self.qmp_target)
+
+def do_test_target_size(self, size):
+result = self.vm.qmp('block_resize', node_name=self.qmp_target,
+ size=size)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp(self.qmp_cmd, job_id='job0',
+ device='drive0', sync='full', auto_dismiss=False,
+ target=self.qmp_target)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.run_job('job0')
+self.assertEqual(result, 'Source and target image have different 
sizes')
+
+def test_small_target(self):
+self.do_test_target_size(self.image_len // 2)
+
+def test_large_target(self):
+self.do_test_target_size(self.image_len * 2)
+
 test_large_cluster = None
 test_image_not_found = None
 test_small_buffer2 = None
@@ -251,6 +294,8 @@ class TestSingleDriveZeroLength(TestSingleDrive):
 
 class TestSingleBlockdevZeroLength(TestSingleBlockdev):
 image_len = 0
+test_small_target = None
+test_large_target = None
 
 class TestSingleDriveUnalignedLength(TestSingleDrive):
 image_len = 1025 * 1024
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 877b76fd31..53abe11d73 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-..
+
 --
-Ran 94 tests
+Ran 104 tests
 
 OK
-- 
2.25.3




[PATCH 2/3] mirror: Make sure that source and target size match

2020-05-07 Thread Kevin Wolf
If the target is shorter than the source, mirror would copy data until
it reaches the end of the target and then fail with an I/O error when
trying to write past the end.

If the target is longer than the source, the mirror job would complete
successfully, but the target wouldn't actually be an accurate copy of
the source image (it would contain some additional garbage at the end).

Fix this by checking that both images have the same size when the job
starts.

Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index aca95c9bc9..201ffa26f9 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -872,6 +872,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 BlockDriverState *target_bs = blk_bs(s->target);
 bool need_drain = true;
 int64_t length;
+int64_t target_length;
 BlockDriverInfo bdi;
 char backing_filename[2]; /* we only need 2 characters because we are only
  checking for a NULL string */
@@ -887,24 +888,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 goto immediate_exit;
 }
 
+target_length = blk_getlength(s->target);
+if (target_length < 0) {
+ret = target_length;
+goto immediate_exit;
+}
+
 /* Active commit must resize the base image if its size differs from the
  * active layer. */
 if (s->base == blk_bs(s->target)) {
-int64_t base_length;
-
-base_length = blk_getlength(s->target);
-if (base_length < 0) {
-ret = base_length;
-goto immediate_exit;
-}
-
-if (s->bdev_length > base_length) {
+if (s->bdev_length > target_length) {
 ret = blk_truncate(s->target, s->bdev_length, false,
PREALLOC_MODE_OFF, 0, NULL);
 if (ret < 0) {
 goto immediate_exit;
 }
 }
+} else if (s->bdev_length != target_length) {
+error_setg(errp, "Source and target image have different sizes");
+ret = -EINVAL;
+goto immediate_exit;
 }
 
 if (s->bdev_length == 0) {
-- 
2.25.3




[PATCH 1/3] iotests/229: Use blkdebug to inject an error

2020-05-07 Thread Kevin Wolf
229 relies on the mirror running into an I/O error when the target is
smaller than the source. After changing mirror to catch this condition
while starting the job, this test case won't get a job that is paused
for an I/O error any more. Use blkdebug instead to inject an error.

Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/229 | 15 +++
 tests/qemu-iotests/229.out |  6 +++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229
index 866168b236..a5d4e5d4f2 100755
--- a/tests/qemu-iotests/229
+++ b/tests/qemu-iotests/229
@@ -33,6 +33,7 @@ _cleanup()
 _cleanup_test_img
 _rm_test_img "$TEST_IMG"
 _rm_test_img "$DEST_IMG"
+rm -f "$TEST_DIR/blkdebug.conf"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
@@ -49,11 +50,10 @@ _supported_os Linux
 
 DEST_IMG="$TEST_DIR/d.$IMGFMT"
 TEST_IMG="$TEST_DIR/b.$IMGFMT"
+BLKDEBUG_CONF="$TEST_DIR/blkdebug.conf"
 
 _make_test_img 2M
-
-# destination for mirror will be too small, causing error
-TEST_IMG=$DEST_IMG _make_test_img 1M
+TEST_IMG=$DEST_IMG _make_test_img 2M
 
 $QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io
 
@@ -67,11 +67,18 @@ echo
 echo '=== Starting drive-mirror, causing error & stop  ==='
 echo
 
+cat > "$BLKDEBUG_CONF" <

[PATCH 0/3] mirror: Make sure that source and target size match

2020-05-07 Thread Kevin Wolf
Same thing as the recent fix for backup, except that mirror already
forbids resizing during the job. So what remains is checking that the
sizes match at the start of the job.

Kevin Wolf (3):
  iotests/229: Use blkdebug to inject an error
  mirror: Make sure that source and target size match
  iotests: Mirror with different source/target size

 block/mirror.c | 21 ++
 tests/qemu-iotests/041 | 45 ++
 tests/qemu-iotests/041.out |  4 ++--
 tests/qemu-iotests/229 | 15 +
 tests/qemu-iotests/229.out |  6 ++---
 5 files changed, 73 insertions(+), 18 deletions(-)

-- 
2.25.3




[Bug 1877136] Re: Qemu GDB Arm core registers XML description not valid for M-profile

2020-05-07 Thread Peter Maydell
Patch submitted:
https://patchew.org/QEMU/20200507134755.13997-1-peter.mayd...@linaro.org/


** Changed in: qemu
   Status: New => In Progress

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877136

Title:
  Qemu GDB Arm core registers XML description not valid for M-profile

Status in QEMU:
  In Progress

Bug description:
  When trying to debug an armv7-m binary running on Qemu, GDB makes some
  mistakes due to mistakenly believing the target is not M-profile.

  One observable is that backtraces over signal handlers are not handled
  correctly -- since the special M-profile EXC_RETURN value is not
  recognised.  That happens because GDB doesn't think the target is
  M-profile.

  This happens because GDB sees a reported feature set from the Qemu
  remote connection that includes the feature `org.gnu.gdb.arm.core`.

  As described in the GDB online docs, for "M-profile targets (e.g. Cortex-M3), 
the ‘org.gnu.gdb.arm.core’ feature is replaced by ‘org.gnu.gdb.arm.m-profile’"
  https://sourceware.org/gdb/current/onlinedocs/gdb/ARM-Features.html

  From a scan of the Qemu source code on commit
  ea1329bb3a8d5cd25b70e3dbf73e7ded4d5ad756 it seems that when emulating
  an arm core it uses `arm-core.xml` unconditionally for
  `CPUClass->gdb_core_xml_file`, and that means the only feature
  provided is `org.gnu.gdb.arm.core`.

  Note that even though there is a command to set the architecture in GDB, 
setting the target architecture to an M-profile core is still not a valid 
workaround.
  This is because the target description overrides everything in setting the 
`is_m` attribute within GDB.

  Reproduction of the observable:
  Using the examples here 
https://git.linaro.org/people/peter.maydell/m-profile-tests.git/tree/ .
  Build the examples, and run 
  ```
  qemu-system-arm -s -S -no-reboot -M lm3s6965evb -m 16 -serial stdio -display 
none -net nic -net user,restrict=on -d guest_errors,unimp -kernel test3-kern.bin
  ```

  Then in a GDB session
  ```
  vshcmd: > arm-none-eabi-gdb -q


  (gdb)
  vshcmd: > file test3-kern.elf
  Reading symbols from test3-kern.elf...
  (gdb)
  vshcmd: > target remote localhost:1234
  Remote debugging using localhost:1234
  _start () at init-m.S:53
  53mov r0, #0
  (gdb)
  vshcmd: > show architecture
  The target architecture is set automatically (currently armv7)
  (gdb)
  vshcmd: > break svc
  Breakpoint 1 at 0x6fc: svc. (2 locations)
  (gdb)
  vshcmd: > cont
  Continuing.

  Breakpoint 1, svc () at test3.c:16
  16  int test = SEQ();
  (gdb)
  vshcmd: > bt
  #0  svc () at test3.c:16
  #1  0xfff8 in ?? ()
  Backtrace stopped: previous frame identical to this frame (corrupt stack?)
  (gdb)
  vshcmd: > print/x $lr
  $1 = 0xfff9
  (gdb)
  ```

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877136/+subscriptions



device hotplug & file handles

2020-05-07 Thread Gerd Hoffmann
  Hi,

For usb device pass-through (aka -device usb-host) it would be very
useful to pass file handles from libvirt to qemu.  The workflow would
change from ...

  (1) libvirt enables access to /dev/usb/$bus/$dev
  (2) libvirt passes $bus + $dev (using hostbus + hostaddr properties)
  to qemu.
  (3) qemu opens /dev/usb/$bus/$dev

... to ...

  (1) libvirt opens /dev/usb/$bus/$dev
  (2) libvirt passes filehandle to qemu.

Question is how can we pass the file descriptor best?  My idea would be
to simply add an fd property to usb-host:

 * Coldplug would be "-device usb-host,fd=" (cmd line).
 * Hotplug would be "device_add usb-host,fd=" (monitor).

Will that work from libvirt point of view?
Or does anyone have an better idea?

thanks,
  Gerd

PS: background: https://bugzilla.redhat.com/show_bug.cgi?id=1595525




Re: [PATCH] docs/devel/migration: start a debugging section

2020-05-07 Thread Dr. David Alan Gilbert
* Marc-André Lureau (marcandre.lur...@redhat.com) wrote:
> Explain how to use analyze-migration.py, this may help.
> 
> Signed-off-by: Marc-André Lureau 

Queued

> ---
>  docs/devel/migration.rst | 20 
>  1 file changed, 20 insertions(+)
> 
> diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
> index e88918f7639..2eb08624fc3 100644
> --- a/docs/devel/migration.rst
> +++ b/docs/devel/migration.rst
> @@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure 
> to
>  save/restore state devices.  This infrastructure is shared with the
>  savevm/loadvm functionality.
>  
> +Debugging
> +=
> +
> +The migration stream can be analyzed thanks to 
> `scripts/analyze_migration.py`.
> +
> +Example usage:
> +
> +.. code-block:: shell
> +
> +  $ qemu-system-x86_64
> +   (qemu) migrate "exec:cat > mig"
> +  $ ./scripts/analyze_migration.py -f mig
> +  {
> +"ram (3)": {
> +"section sizes": {
> +"pc.ram": "0x0800",
> +  ...
> +
> +See also ``analyze_migration.py -h`` help for more options.
> +
>  Common infrastructure
>  =
>  
> -- 
> 2.26.0.rc2.42.g98cedd0233
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [PATCH v2 0/3] Improved reporting for migrate parameters

2020-05-07 Thread Dr. David Alan Gilbert
* Mao Zhongyi (maozhon...@cmss.chinamobile.com) wrote:
> This series mainly improve the report message of migrate parameters
> to make it easier to read.

Queued

> v2->v1
> -p1: avoid using constants, replace it with stringify().
> 
> Cc: quint...@redhat.com
> Cc: dgilb...@redhat.com
> 
> Mao Zhongyi (3):
>   migration/migration: improve error reporting for migrate parameters
>   monitor/hmp-cmds: add hmp_handle_error() for hmp_migrate_set_speed()
>   migration: move the units of migrate parameters from milliseconds to
> ms
> 
>  migration/migration.c | 20 
>  monitor/hmp-cmds.c| 13 -
>  2 files changed, 20 insertions(+), 13 deletions(-)
> 
> -- 
> 2.17.1
> 
> 
> 
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[Bug 1856335] Re: Cache Layout wrong on many Zen Arch CPUs

2020-05-07 Thread Babu Moger
Yes. Sieger. Please install 5.0 it should work fine. I am not sure about
4.2.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1856335

Title:
  Cache Layout wrong on many Zen Arch CPUs

Status in QEMU:
  New

Bug description:
  AMD CPUs have L3 cache per 2, 3 or 4 cores. Currently, TOPOEXT seems
  to always map Cache ass if it was an 4-Core per CCX CPU, which is
  incorrect, and costs upwards 30% performance (more realistically 10%)
  in L3 Cache Layout aware applications.

  Example on a 4-CCX CPU (1950X /w 8 Cores and no SMT):

    
  EPYC-IBPB
  AMD
  

  In windows, coreinfo reports correctly:

    Unified Cache 1, Level 3,8 MB, Assoc  16, LineSize  64
    Unified Cache 6, Level 3,8 MB, Assoc  16, LineSize  64

  On a 3-CCX CPU (3960X /w 6 cores and no SMT):

   
  EPYC-IBPB
  AMD
  

  in windows, coreinfo reports incorrectly:

  --  Unified Cache  1, Level 3,8 MB, Assoc  16, LineSize  64
  **  Unified Cache  6, Level 3,8 MB, Assoc  16, LineSize  64

  Validated against 3.0, 3.1, 4.1 and 4.2 versions of qemu-kvm.

  With newer Qemu there is a fix (that does behave correctly) in using the dies 
parameter:
   

  The problem is that the dies are exposed differently than how AMD does
  it natively, they are exposed to Windows as sockets, which means, that
  if you are nto a business user, you can't ever have a machine with
  more than two CCX (6 cores) as consumer versions of Windows only
  supports two sockets. (Should this be reported as a separate bug?)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1856335/+subscriptions



[PATCH v2 10/9] qed: Simplify backing reads

2020-05-07 Thread Eric Blake
The other four drivers that support backing files (qcow, qcow2,
parallels, vmdk) all rely on the block layer to populate zeroes when
reading beyond EOF of a short backing file.  We can simplify the qed
code by doing likewise.

Signed-off-by: Eric Blake 
---

I noticed this during my audit that v1 of Vladimir's series was correct.

No change in iotests results (test 274 is currently failing for qed,
but for other reasons:
+Traceback (most recent call last):
+  File "274", line 24, in 
+iotests.verify_image_format(supported_fmts=['qcow2'])
+AttributeError: module 'iotests' has no attribute 'verify_image_format'
)

 block/qed.h |  1 -
 block/qed.c | 64 +
 2 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/block/qed.h b/block/qed.h
index 42c115d8220c..3d12bf78d412 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -140,7 +140,6 @@ typedef struct QEDAIOCB {

 /* Current cluster scatter-gather list */
 QEMUIOVector cur_qiov;
-QEMUIOVector *backing_qiov;
 uint64_t cur_pos;   /* position on block device, in bytes */
 uint64_t cur_cluster;   /* cluster offset in image file */
 unsigned int cur_nclusters; /* number of clusters being accessed */
diff --git a/block/qed.c b/block/qed.c
index 927382995a0c..bea4b9f6cc97 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -849,56 +849,18 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
  * @s:  QED state
  * @pos:Byte position in device
  * @qiov:   Destination I/O vector
- * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
- * @cb: Completion function
- * @opaque: User data for completion function
  *
  * This function reads qiov->size bytes starting at pos from the backing file.
  * If there is no backing file then zeroes are read.
  */
 static int coroutine_fn qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
-  QEMUIOVector *qiov,
-  QEMUIOVector **backing_qiov)
+  QEMUIOVector *qiov)
 {
-uint64_t backing_length = 0;
-size_t size;
-int ret;
-
-/* If there is a backing file, get its length.  Treat the absence of a
- * backing file like a zero length backing file.
- */
 if (s->bs->backing) {
-int64_t l = bdrv_getlength(s->bs->backing->bs);
-if (l < 0) {
-return l;
-}
-backing_length = l;
-}
-
-/* Zero all sectors if reading beyond the end of the backing file */
-if (pos >= backing_length ||
-pos + qiov->size > backing_length) {
-qemu_iovec_memset(qiov, 0, 0, qiov->size);
-}
-
-/* Complete now if there are no backing file sectors to read */
-if (pos >= backing_length) {
-return 0;
-}
-
-/* If the read straddles the end of the backing file, shorten it */
-size = MIN((uint64_t)backing_length - pos, qiov->size);
-
-assert(*backing_qiov == NULL);
-*backing_qiov = g_new(QEMUIOVector, 1);
-qemu_iovec_init(*backing_qiov, qiov->niov);
-qemu_iovec_concat(*backing_qiov, qiov, 0, size);
-
-BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-ret = bdrv_co_preadv(s->bs->backing, pos, size, *backing_qiov, 0);
-if (ret < 0) {
-return ret;
+BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
+return bdrv_co_preadv(s->bs->backing, pos, qiov->size, qiov, 0);
 }
+qemu_iovec_memset(qiov, 0, 0, qiov->size);
 return 0;
 }

@@ -915,7 +877,6 @@ static int coroutine_fn 
qed_copy_from_backing_file(BDRVQEDState *s,
uint64_t offset)
 {
 QEMUIOVector qiov;
-QEMUIOVector *backing_qiov = NULL;
 int ret;

 /* Skip copy entirely if there is no work to do */
@@ -925,13 +886,7 @@ static int coroutine_fn 
qed_copy_from_backing_file(BDRVQEDState *s,

 qemu_iovec_init_buf(, qemu_blockalign(s->bs, len), len);

-ret = qed_read_backing_file(s, pos, , _qiov);
-
-if (backing_qiov) {
-qemu_iovec_destroy(backing_qiov);
-g_free(backing_qiov);
-backing_qiov = NULL;
-}
+ret = qed_read_backing_file(s, pos, );

 if (ret) {
 goto out;
@@ -1339,8 +1294,7 @@ static int coroutine_fn qed_aio_read_data(void *opaque, 
int ret,
 qemu_iovec_memset(>cur_qiov, 0, 0, acb->cur_qiov.size);
 r = 0;
 } else if (ret != QED_CLUSTER_FOUND) {
-r = qed_read_backing_file(s, acb->cur_pos, >cur_qiov,
-  >backing_qiov);
+r = qed_read_backing_file(s, acb->cur_pos, >cur_qiov);
 } else {
 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
 r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
@@ -1365,12 +1319,6 @@ static int coroutine_fn qed_aio_next_io(QEDAIOCB *acb)
 while (1) {
 trace_qed_aio_next_io(s, acb, 0, acb->cur_pos + 

<    1   2   3   4   >