Re: [PATCH 4/4] watchdog/aspeed: Fix AST2600 frequency behaviour

2019-11-11 Thread Cédric Le Goater
On 12/11/2019 07:40, Joel Stanley wrote:
> The AST2600 control register sneakily changed the meaning of bit 4
> without anyone noticing. It no longer controls the 1MHz vs APB clock
> select, and instead always runs at 1MHz.
> 
> The AST2500 was always 1MHz too, but it retained bit 4, making it read
> only. We can model both using the same fixed 1MHz calculation.
> 
> Fixes: ea29711f467f ("watchdog/aspeed: Fix AST2600 control reg behaviour")

which commit is that ^ ? Did you mean :

Fixes: 6b2b2a703cad ("hw: wdt_aspeed: Add AST2600 support")

> Signed-off-by: Joel Stanley 

Reviewed-by: Cédric Le Goater 

C.

> ---
>  hw/watchdog/wdt_aspeed.c | 21 +
>  include/hw/watchdog/wdt_aspeed.h |  1 +
>  2 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
> index 5697ed83325a..f43a3bc88976 100644
> --- a/hw/watchdog/wdt_aspeed.c
> +++ b/hw/watchdog/wdt_aspeed.c
> @@ -93,11 +93,11 @@ static uint64_t aspeed_wdt_read(void *opaque, hwaddr 
> offset, unsigned size)
>  
>  }
>  
> -static void aspeed_wdt_reload(AspeedWDTState *s, bool pclk)
> +static void aspeed_wdt_reload(AspeedWDTState *s)
>  {
>  uint64_t reload;
>  
> -if (pclk) {
> +if (!(s->regs[WDT_CTRL] & WDT_CTRL_1MHZ_CLK)) {
>  reload = muldiv64(s->regs[WDT_RELOAD_VALUE], NANOSECONDS_PER_SECOND,
>s->pclk_freq);
>  } else {
> @@ -109,6 +109,16 @@ static void aspeed_wdt_reload(AspeedWDTState *s, bool 
> pclk)
>  }
>  }
>  
> +static void aspeed_wdt_reload_1mhz(AspeedWDTState *s)
> +{
> +uint64_t reload = s->regs[WDT_RELOAD_VALUE] * 1000ULL;
> +
> +if (aspeed_wdt_is_enabled(s)) {
> +timer_mod(s->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + reload);
> +}
> +}
> +
> +
>  static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data,
>   unsigned size)
>  {
> @@ -130,13 +140,13 @@ static void aspeed_wdt_write(void *opaque, hwaddr 
> offset, uint64_t data,
>  case WDT_RESTART:
>  if ((data & 0x) == WDT_RESTART_MAGIC) {
>  s->regs[WDT_STATUS] = s->regs[WDT_RELOAD_VALUE];
> -aspeed_wdt_reload(s, !(s->regs[WDT_CTRL] & WDT_CTRL_1MHZ_CLK));
> +awc->wdt_reload(s);
>  }
>  break;
>  case WDT_CTRL:
>  if (enable && !aspeed_wdt_is_enabled(s)) {
>  s->regs[WDT_CTRL] = data;
> -aspeed_wdt_reload(s, !(data & WDT_CTRL_1MHZ_CLK));
> +awc->wdt_reload(s);
>  } else if (!enable && aspeed_wdt_is_enabled(s)) {
>  s->regs[WDT_CTRL] = data;
>  timer_del(s->timer);
> @@ -283,6 +293,7 @@ static void aspeed_2400_wdt_class_init(ObjectClass 
> *klass, void *data)
>  awc->offset = 0x20;
>  awc->ext_pulse_width_mask = 0xff;
>  awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
> +awc->wdt_reload = aspeed_wdt_reload;
>  }
>  
>  static const TypeInfo aspeed_2400_wdt_info = {
> @@ -317,6 +328,7 @@ static void aspeed_2500_wdt_class_init(ObjectClass 
> *klass, void *data)
>  awc->ext_pulse_width_mask = 0xf;
>  awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
>  awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
> +awc->wdt_reload = aspeed_wdt_reload_1mhz;
>  }
>  
>  static const TypeInfo aspeed_2500_wdt_info = {
> @@ -336,6 +348,7 @@ static void aspeed_2600_wdt_class_init(ObjectClass 
> *klass, void *data)
>  awc->ext_pulse_width_mask = 0xf; /* TODO */
>  awc->reset_ctrl_reg = AST2600_SCU_RESET_CONTROL1;
>  awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
> +awc->wdt_reload = aspeed_wdt_reload_1mhz;
>  }
>  
>  static const TypeInfo aspeed_2600_wdt_info = {
> diff --git a/include/hw/watchdog/wdt_aspeed.h 
> b/include/hw/watchdog/wdt_aspeed.h
> index dfedd7662dd1..819c22993a6e 100644
> --- a/include/hw/watchdog/wdt_aspeed.h
> +++ b/include/hw/watchdog/wdt_aspeed.h
> @@ -47,6 +47,7 @@ typedef struct AspeedWDTClass {
>  uint32_t ext_pulse_width_mask;
>  uint32_t reset_ctrl_reg;
>  void (*reset_pulse)(AspeedWDTState *s, uint32_t property);
> +void (*wdt_reload)(AspeedWDTState *s);
>  }  AspeedWDTClass;
>  
>  #endif /* WDT_ASPEED_H */
> 




Re: [PATCH 3/4] watchdog/aspeed: Improve watchdog timeout message

2019-11-11 Thread Cédric Le Goater
On 12/11/2019 07:40, Joel Stanley wrote:
> Users benefit from knowing which watchdog timer has expired. The address
> of the watchdog's registers unambiguously indicates which has expired,
> so log that.
> 
> Signed-off-by: Joel Stanley 


The format below should be using HWADDR_PRIx. No need to resend. 
I will fix it. 

Reviewed-by: Cédric Le Goater 

C. 

> ---
>  hw/watchdog/wdt_aspeed.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
> index 145be6f99ce2..5697ed83325a 100644
> --- a/hw/watchdog/wdt_aspeed.c
> +++ b/hw/watchdog/wdt_aspeed.c
> @@ -219,7 +219,8 @@ static void aspeed_wdt_timer_expired(void *dev)
>  return;
>  }
>  
> -qemu_log_mask(CPU_LOG_RESET, "Watchdog timer expired.\n");
> +qemu_log_mask(CPU_LOG_RESET, "Watchdog timer %08lx expired.\n",
> +s->iomem.addr);
>  watchdog_perform_action();
>  timer_del(s->timer);
>  }
> 




Re: [PATCH v2] s390x: Properly fetch the short psw on diag308 subc 0/1

2019-11-11 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/2019152808.13371-1-fran...@linux.ibm.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v2] s390x: Properly fetch the short psw on diag308 subc 0/1
Type: series
Message-id: 2019152808.13371-1-fran...@linux.ibm.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
74d60dc s390x: Properly fetch the short psw on diag308 subc 0/1

=== OUTPUT BEGIN ===
ERROR: code indent should never use tabs
#46: FILE: target/s390x/cpu.h:268:
+#define PSW_MASK_SHORTPSW^I0x0008ULL$

total: 1 errors, 0 warnings, 25 lines checked

Commit 74d60dc486a0 (s390x: Properly fetch the short psw on diag308 subc 0/1) 
has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/2019152808.13371-1-fran...@linux.ibm.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH 2/4] aspeed/scu: Fix W1C behavior

2019-11-11 Thread Cédric Le Goater
On 12/11/2019 07:40, Joel Stanley wrote:
> This models the clock write one to clear registers, and fixes up some
> incorrect behavior in all of the write to clear registers.
> 
> There was also a typo in one of the register definitions.
> 
> Signed-off-by: Joel Stanley 


Reviewed-by: Cédric Le Goater 

> ---
>  hw/misc/aspeed_scu.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
> index 717509bc5460..aac4645f8c3c 100644
> --- a/hw/misc/aspeed_scu.c
> +++ b/hw/misc/aspeed_scu.c
> @@ -98,7 +98,7 @@
>  #define AST2600_CLK_STOP_CTRL TO_REG(0x80)
>  #define AST2600_CLK_STOP_CTRL_CLR TO_REG(0x84)
>  #define AST2600_CLK_STOP_CTRL2 TO_REG(0x90)
> -#define AST2600_CLK_STOP_CTR2L_CLR TO_REG(0x94)
> +#define AST2600_CLK_STOP_CTRL2_CLR TO_REG(0x94)
>  #define AST2600_SDRAM_HANDSHAKE   TO_REG(0x100)
>  #define AST2600_HPLL_PARAMTO_REG(0x200)
>  #define AST2600_HPLL_EXT  TO_REG(0x204)
> @@ -532,11 +532,12 @@ static uint64_t aspeed_ast2600_scu_read(void *opaque, 
> hwaddr offset,
>  return s->regs[reg];
>  }
>  
> -static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, uint64_t 
> data,
> +static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, uint64_t 
> data64,
>   unsigned size)
>  {
>  AspeedSCUState *s = ASPEED_SCU(opaque);
>  int reg = TO_REG(offset);
> +uint32_t data = data64;
>  
>  if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
>  qemu_log_mask(LOG_GUEST_ERROR,
> @@ -563,15 +564,19 @@ static void aspeed_ast2600_scu_write(void *opaque, 
> hwaddr offset, uint64_t data,
>  /* fall through */
>  case AST2600_SYS_RST_CTRL:
>  case AST2600_SYS_RST_CTRL2:
> +case AST2600_CLK_STOP_CTRL:
> +case AST2600_CLK_STOP_CTRL2:
>  /* W1S (Write 1 to set) registers */
>  s->regs[reg] |= data;
>  return;
>  case AST2600_SYS_RST_CTRL_CLR:
>  case AST2600_SYS_RST_CTRL2_CLR:
> +case AST2600_CLK_STOP_CTRL_CLR:
> +case AST2600_CLK_STOP_CTRL2_CLR:
>  case AST2600_HW_STRAP1_CLR:
>  case AST2600_HW_STRAP2_CLR:
>  /* W1C (Write 1 to clear) registers */
> -s->regs[reg] &= ~data;
> +s->regs[reg - 1] &= ~data;
>  return;
>  
>  case AST2600_RNG_DATA:
> 




Re: [PATCH 1/4] aspeed/sdmc: Make ast2600 default 1G

2019-11-11 Thread Cédric Le Goater
On 12/11/2019 07:40, Joel Stanley wrote:
> Most boards have this much.
> 
> Signed-off-by: Joel Stanley 


Reviewed-by: Cédric Le Goater 


> ---
>  hw/misc/aspeed_sdmc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
> index f3a63a2e01db..2df3244b53c8 100644
> --- a/hw/misc/aspeed_sdmc.c
> +++ b/hw/misc/aspeed_sdmc.c
> @@ -208,10 +208,10 @@ static int ast2600_rambits(AspeedSDMCState *s)
>  }
>  
>  /* use a common default */
> -warn_report("Invalid RAM size 0x%" PRIx64 ". Using default 512M",
> +warn_report("Invalid RAM size 0x%" PRIx64 ". Using default 1024M",
>  s->ram_size);
> -s->ram_size = 512 << 20;
> -return ASPEED_SDMC_AST2600_512MB;
> +s->ram_size = 1024 << 20;
> +return ASPEED_SDMC_AST2600_1024MB;
>  }
>  
>  static void aspeed_sdmc_reset(DeviceState *dev)
> 




Re: [PATCH] pl031: Expose RTCICR as proper WC register

2019-11-11 Thread Alexander Graf

Hey Peter,

On 08.11.19 17:58, Peter Maydell wrote:

On Mon, 4 Nov 2019 at 11:52, Alexander Graf  wrote:


The current pl031 RTCICR register implementation always clears the IRQ
pending status on a register write, regardless of the value it writes.

To justify that behavior, it references the arm926e documentation
(DDI0287B) and indicates that said document states that any write clears
the internal IRQ state. I could however not find any text in that document
backing the statement. In fact, it explicitly says:

   "Writing 1 to bit 0 of RTCICR clears the RTCINTR flag."

which describes it as much as a write-to-clear register as the PL031 spec
(DDI0224) does:

   "Writing 1 to bit position 0 clears the corresponding interrupt.
Writing 0 has no effect."


DDI0287B page 11-2 section 11.1 says
"The interrupt is cleared by writing any data value to the
interrupt clear register RTCICR". As you note, this contradicts
what it says later on in section 11.2.2.

(Interestingly, the PL030 does have a "write any value to
clear the interrupt" register, RTCEOI.)

I'm fairly sure this patch is right and the DDI0287B document
has an error, since it isn't internally consistent and doesn't
match the proper PL031 TRM.

Did you find this because you had a guest that assumed the
other behaviour? This bug has been in QEMU for a very long time,
and it seems odd for a guest to deliberately perform an action
(writing 0) which is documented to have no effect on the device...


We found this bug by trying to find justification for the behavior in 
the spec and apparently my spec reading skills were lacking. I could not 
find the reference you cited above.


So no, I did not see any guest breakage.

I still think that being consistent with the actual PL031 spec is 
preferable though. If any real world guest breaks because of this, we 
can still revert this patch and document the exact breakage in the 
comment instead.



Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Ralf Herbrich
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




Re: virtio,iommu_platform=on

2019-11-11 Thread Michael S. Tsirkin
On Tue, Nov 12, 2019 at 02:53:49PM +1100, Alexey Kardashevskiy wrote:
> Hi!
> 
> I am enabling IOMMU for virtio in the pseries firmware (SLOF) and seeing
> problems, one of them is SLOF does SCSI bus scan, then it stops the
> virtio-scsi by clearing MMIO|IO|BUSMASTER from PCI_COMMAND (as SLOF
> stopped using the devices) and when this happens, I see unassigned
> memory access (see below) which happens because disabling busmaster
> disables IOMMU and QEMU cannot access the rings to do some shutdown. And
> when this happens, the device does not come back even if SLOF re-enables it.

In fact clearing bus master should disable ring access even
without the IOMMU.
Once you do this you should not wait for rings to be processed,
it is safe to assume they won't be touched again and just
free up any buffers that have not been used.

Why don't you see this without IOMMU?
It's a bug I think, probably there to work around buggy guests.

So pls fix this in SLOF and then hopefully we can drop the
work arounds and have clearing bus master actually block DMA.

> Hacking SLOF to not clear BUSMASTER makes virtio-scsi work but it is
> hardly a right fix.
> 
> Is this something expected? Thanks,
> 
> 
> Here is the exact command line:
> 
> /home/aik/pbuild/qemu-garrison2-ppc64/ppc64-softmmu/qemu-system-ppc64 \
> 
> -nodefaults \
> 
> -chardev stdio,id=STDIO0,signal=off,mux=on \
> 
> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
> 
> -mon id=MON0,chardev=STDIO0,mode=readline \
> 
> -nographic \
> 
> -vga none \
> 
> -enable-kvm \
> -m 2G \
> 
> -device
> virtio-scsi-pci,id=vscsi0,iommu_platform=on,disable-modern=off,disable-legacy=on
> \
> -drive id=DRIVE0,if=none,file=img/u1804-64le.qcow2,format=qcow2 \
> 
> -device scsi-disk,id=scsi-disk0,drive=DRIVE0 \
> 
> -snapshot \
> 
> -smp 1 \
> 
> -machine pseries \
> 
> -L /home/aik/t/qemu-ppc64-bios/ \
> 
> -trace events=qemu_trace_events \
> 
> -d guest_errors \
> 
> -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.ssh59518 \
> 
> -mon chardev=SOCKET0,mode=control
> 
> 
> 
> Here is the backtrace:
> 
> Thread 5 "qemu-system-ppc" hit Breakpoint 8, unassigned_mem_accepts
> (opaque=0x0, addr=0x5802, size=0x2, is_write=0x0, attrs=...) at /home/
> aik/p/qemu/memory.c:1275
> 1275return false;
> #0  unassigned_mem_accepts (opaque=0x0, addr=0x5802, size=0x2,
> is_write=0x0, attrs=...) at /home/aik/p/qemu/memory.c:1275
> #1  0x100a8ac8 in memory_region_access_valid (mr=0x1105c230
> , addr=0x5802, size=0x2, is_write=0x0, attrs=...) at
> /home/aik/p/qemu/memory.c:1377
> #2  0x100a8c88 in memory_region_dispatch_read (mr=0x1105c230
> , addr=0x5802, pval=0x7550d410, op=MO_16,
> attrs=...) at /home/aik/p/qemu/memory.c:1418
> #3  0x1001cad4 in address_space_lduw_internal_cached_slow
> (cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0,
> endian=DEVICE_LITTLE_ENDIAN) at /home/aik/p/qemu/memory_ldst.inc.c:211
> #4  0x1001cc84 in address_space_lduw_le_cached_slow
> (cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0) at
> /home/aik/p/qemu/memory_ldst.inc.c:249
> #5  0x1019bd80 in address_space_lduw_le_cached
> (cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0) at
> /home/aik/p/qemu/include/exec/memory_ldst_cached.inc.h:56
> #6  0x1019c10c in lduw_le_phys_cached (cache=0x7fff68036fa0,
> addr=0x2) at /home/aik/p/qemu/include/exec/memory_ldst_phys.inc.h:91
> #7  0x1019d86c in virtio_lduw_phys_cached (vdev=0x118b9110,
> cache=0x7fff68036fa0, pa=0x2) at
> /home/aik/p/qemu/include/hw/virtio/virtio-access.h:166
> #8  0x1019e648 in vring_avail_idx (vq=0x118c2720) at
> /home/aik/p/qemu/hw/virtio/virtio.c:302
> #9  0x1019f5bc in virtio_queue_split_empty (vq=0x118c2720) at
> /home/aik/p/qemu/hw/virtio/virtio.c:581
> #10 0x1019f838 in virtio_queue_empty (vq=0x118c2720) at
> /home/aik/p/qemu/hw/virtio/virtio.c:612
> #11 0x101a8fa8 in virtio_queue_host_notifier_aio_poll
> (opaque=0x118c2798) at /home/aik/p/qemu/hw/virtio/virtio.c:3389
> #12 0x1092679c in run_poll_handlers_once (ctx=0x11212e40,
> timeout=0x7550d7d8) at /home/aik/p/qemu/util/aio-posix.c:520
> #13 0x10926aec in try_poll_mode (ctx=0x11212e40,
> timeout=0x7550d7d8) at /home/aik/p/qemu/util/aio-posix.c:607
> #14 0x10926c2c in aio_poll (ctx=0x11212e40, blocking=0x1) at
> /home/aik/p/qemu/util/aio-posix.c:639
> #15 0x1091fe0c in aio_wait_bh_oneshot (ctx=0x11212e40,
> cb=0x1016f35c , opaque=0x118b9110) at
> /home/aik/p/qemu/util/aio-wait.c:71
> #16 0x1016fa60 in virtio_scsi_dataplane_stop (vdev=0x118b9110)
> at /home/aik/p/qemu/hw/scsi/virtio-scsi-dataplane.c:211
> #17 0x10684740 in virtio_bus_stop_ioeventfd (bus=0x118b9098) at
> /home/aik/p/qemu/hw/virtio/virtio-bus.c:245
> #18 0x10688290 in virtio_pci_stop_ioeventfd (proxy=0x118b0fa0)
> at /home/aik/p/qemu/hw/virtio/virtio-pci.c:292
> #19 0x106891e8 in virtio_write_config (pci_dev=0x118b0fa0,
> 

[PULL 0/6] qtest and misc patches

2019-11-11 Thread Thomas Huth
 Hi Peter,

the following changes since commit 654efcb511d394c1d3f5292c28503d1d19e5b1d3:

  Merge remote-tracking branch 'remotes/vivier/tags/q800-branch-pull-request' 
into staging (2019-11-11 09:23:46 +)

are available in the Git repository at:

  https://gitlab.com/huth/qemu.git tags/pull-request-2019-11-12

for you to fetch changes up to 623ef637a2e42e023e7436d4bfbdc5159fb36684:

  configure: Check bzip2 is available (2019-11-11 14:35:41 +0100)


- Fix memory leaks for QTESTS
- Update MAINTAINERS file
- Check for the availability of bzip2 in "configure"


Dr. David Alan Gilbert (1):
  tests/migration: Print some debug on bad status

Jan Kiszka (1):
  MAINTAINERS: slirp: Remove myself as maintainer

Marc-André Lureau (2):
  qtest: fix qtest_qmp_device_add leak
  cpu-plug-test: fix leaks

Philippe Mathieu-Daudé (2):
  configure: Only decompress EDK2 blobs for X86/ARM targets
  configure: Check bzip2 is available

 MAINTAINERS|  2 --
 Makefile   |  2 ++
 configure  | 17 +
 tests/cpu-plug-test.c  |  2 ++
 tests/libqtest.c   |  1 +
 tests/migration-test.c |  9 +++--
 6 files changed, 29 insertions(+), 4 deletions(-)




[PULL 5/6] configure: Only decompress EDK2 blobs for X86/ARM targets

2019-11-11 Thread Thomas Huth
From: Philippe Mathieu-Daudé 

The EDK2 firmware blobs only target the X86/ARM architectures.
Define the DECOMPRESS_EDK2_BLOBS variable and only decompress
the blobs when the variable exists.

See also: 536d2173b2b ("roms: build edk2 firmware binaries ...")
Suggested-by: Thomas Huth 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20191108114531.21518-2-phi...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Luc Michel 
Reviewed-by: Laszlo Ersek 
Signed-off-by: Thomas Huth 
---
 Makefile  |  2 ++
 configure | 13 +
 2 files changed, 15 insertions(+)

diff --git a/Makefile b/Makefile
index aa9d1a42aa..3430eca532 100644
--- a/Makefile
+++ b/Makefile
@@ -480,7 +480,9 @@ $(SOFTMMU_ALL_RULES): $(chardev-obj-y)
 $(SOFTMMU_ALL_RULES): $(crypto-obj-y)
 $(SOFTMMU_ALL_RULES): $(io-obj-y)
 $(SOFTMMU_ALL_RULES): config-all-devices.mak
+ifdef DECOMPRESS_EDK2_BLOBS
 $(SOFTMMU_ALL_RULES): $(edk2-decompressed)
+endif
 
 .PHONY: $(TARGET_DIRS_RULES)
 # The $(TARGET_DIRS_RULES) are of the form SUBDIR/GOAL, so that
diff --git a/configure b/configure
index efe165edf9..9b322284c3 100755
--- a/configure
+++ b/configure
@@ -427,6 +427,7 @@ softmmu="yes"
 linux_user="no"
 bsd_user="no"
 blobs="yes"
+edk2_blobs="no"
 pkgversion=""
 pie=""
 qom_cast_debug="yes"
@@ -2146,6 +2147,14 @@ case " $target_list " in
   ;;
 esac
 
+for target in $target_list; do
+  case "$target" in
+arm-softmmu | aarch64-softmmu | i386-softmmu | x86_64-softmmu)
+  edk2_blobs="yes"
+  ;;
+  esac
+done
+
 feature_not_found() {
   feature=$1
   remedy=$2
@@ -7526,6 +7535,10 @@ if test "$libudev" != "no"; then
 echo "LIBUDEV_LIBS=$libudev_libs" >> $config_host_mak
 fi
 
+if test "$edk2_blobs" = "yes" ; then
+  echo "DECOMPRESS_EDK2_BLOBS=y" >> $config_host_mak
+fi
+
 # use included Linux headers
 if test "$linux" = "yes" ; then
   mkdir -p linux-headers
-- 
2.23.0




Re: [PULL 0/3] target-arm queue

2019-11-11 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/2019135803.14414-1-peter.mayd...@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PULL 0/3] target-arm queue
Type: series
Message-id: 2019135803.14414-1-peter.mayd...@linaro.org

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'

=== OUTPUT BEGIN ===
checkpatch.pl: no revisions returned for revlist '1'
=== OUTPUT END ===

Test command exited with code: 255


The full log is available at
http://patchew.org/logs/2019135803.14414-1-peter.mayd...@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PULL 4/6] tests/migration: Print some debug on bad status

2019-11-11 Thread Thomas Huth
From: "Dr. David Alan Gilbert" 

We're seeing occasional asserts in 'wait_for_migraiton_fail', that
I can't reliably reproduce, and where the cores don't have any useful
state.  Print the 'status' out, so we can see which unexpected state
we're ending up in.

Signed-off-by: Dr. David Alan Gilbert 
Message-Id: <20191108104307.125020-1-dgilb...@redhat.com>
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 tests/migration-test.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/migration-test.c b/tests/migration-test.c
index 59f291c654..ac780dffda 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -899,8 +899,13 @@ static void wait_for_migration_fail(QTestState *from, bool 
allow_active)
 
 do {
 status = migrate_query_status(from);
-g_assert(!strcmp(status, "setup") || !strcmp(status, "failed") ||
- (allow_active && !strcmp(status, "active")));
+bool result = !strcmp(status, "setup") || !strcmp(status, "failed") ||
+ (allow_active && !strcmp(status, "active"));
+if (!result) {
+fprintf(stderr, "%s: unexpected status status=%s 
allow_active=%d\n",
+__func__, status, allow_active);
+}
+g_assert(result);
 failed = !strcmp(status, "failed");
 g_free(status);
 } while (!failed);
-- 
2.23.0




[PULL 3/6] MAINTAINERS: slirp: Remove myself as maintainer

2019-11-11 Thread Thomas Huth
From: Jan Kiszka 

I haven't been doing anything here for a long time, nor does my git repo
still play a role.

Signed-off-by: Jan Kiszka 
Message-Id: <759f8f44-9a01-a9f1-c7cf-65d40151a...@web.de>
Reviewed-by: Marc-André Lureau 
Acked-by: Samuel Thibault 
Signed-off-by: Thomas Huth 
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 363e72a467..ff8d0d29f4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2143,13 +2143,11 @@ F: include/hw/registerfields.h
 
 SLIRP
 M: Samuel Thibault 
-M: Jan Kiszka 
 S: Maintained
 F: slirp/
 F: net/slirp.c
 F: include/net/slirp.h
 T: git https://people.debian.org/~sthibault/qemu.git slirp
-T: git git://git.kiszka.org/qemu.git queues/slirp
 
 Stubs
 M: Paolo Bonzini 
-- 
2.23.0




[PULL 6/6] configure: Check bzip2 is available

2019-11-11 Thread Thomas Huth
From: Philippe Mathieu-Daudé 

The bzip2 tool is not included in default installations.
On freshly installed systems, ./configure succeeds but 'make'
might fail later:

BUNZIP2 pc-bios/edk2-i386-secure-code.fd.bz2
  /bin/sh: bzip2: command not found
  make: *** [Makefile:305: pc-bios/edk2-i386-secure-code.fd] Error 127
  make: *** Deleting file 'pc-bios/edk2-i386-secure-code.fd'
  make: *** Waiting for unfinished jobs

Add a check in ./configure to warn the user if bzip2 is missing.

See also: 536d2173b2b ("roms: build edk2 firmware binaries ...")
Reported-by: Thomas Huth 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20191108114531.21518-3-phi...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Laszlo Ersek 
Reviewed-by: Luc Michel 
Signed-off-by: Thomas Huth 
---
 configure | 4 
 1 file changed, 4 insertions(+)

diff --git a/configure b/configure
index 9b322284c3..6099be1d84 100755
--- a/configure
+++ b/configure
@@ -2154,6 +2154,10 @@ for target in $target_list; do
   ;;
   esac
 done
+# The EDK2 binaries are compressed with bzip2
+if test "$edk2_blobs" = "yes" && ! has bzip2; then
+  error_exit "The bzip2 program is required for building QEMU"
+fi
 
 feature_not_found() {
   feature=$1
-- 
2.23.0




[PULL 2/6] cpu-plug-test: fix leaks

2019-11-11 Thread Thomas Huth
From: Marc-André Lureau 

Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
Message-Id: <20191107192731.17330-4-marcandre.lur...@redhat.com>
Reviewed-by: Thomas Huth 
Fixes: 021a007efc3 ("cpu-plug-test: fix device_add for pc/q35 machines")
Signed-off-by: Thomas Huth 
---
 tests/cpu-plug-test.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c
index 058cef5ac1..30e514bbfb 100644
--- a/tests/cpu-plug-test.c
+++ b/tests/cpu-plug-test.c
@@ -99,6 +99,7 @@ static void test_plug_with_device_add(gconstpointer data)
 
 cpu = qobject_to(QDict, e);
 if (qdict_haskey(cpu, "qom-path")) {
+qobject_unref(e);
 continue;
 }
 
@@ -107,6 +108,7 @@ static void test_plug_with_device_add(gconstpointer data)
 
 qtest_qmp_device_add_qdict(qts, td->device_model, props);
 hotplugged++;
+qobject_unref(e);
 }
 
 /* make sure that there were hotplugged CPUs */
-- 
2.23.0




[PULL 1/6] qtest: fix qtest_qmp_device_add leak

2019-11-11 Thread Thomas Huth
From: Marc-André Lureau 

Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
Message-Id: <20191107192731.17330-3-marcandre.lur...@redhat.com>
Reviewed-by: Thomas Huth 
Fixes: b4510bb4109f5f ("tests: add qtest_qmp_device_add_qdict() helper")
Signed-off-by: Thomas Huth 
---
 tests/libqtest.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 3706bccd8d..91e9cb220c 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -1274,6 +1274,7 @@ void qtest_qmp_device_add(QTestState *qts, const char 
*driver, const char *id,
 qdict_put_str(args, "id", id);
 
 qtest_qmp_device_add_qdict(qts, driver, args);
+qobject_unref(args);
 }
 
 static void device_deleted_cb(void *opaque, const char *name, QDict *data)
-- 
2.23.0




[PATCH 4/4] watchdog/aspeed: Fix AST2600 frequency behaviour

2019-11-11 Thread Joel Stanley
The AST2600 control register sneakily changed the meaning of bit 4
without anyone noticing. It no longer controls the 1MHz vs APB clock
select, and instead always runs at 1MHz.

The AST2500 was always 1MHz too, but it retained bit 4, making it read
only. We can model both using the same fixed 1MHz calculation.

Fixes: ea29711f467f ("watchdog/aspeed: Fix AST2600 control reg behaviour")
Signed-off-by: Joel Stanley 
---
 hw/watchdog/wdt_aspeed.c | 21 +
 include/hw/watchdog/wdt_aspeed.h |  1 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
index 5697ed83325a..f43a3bc88976 100644
--- a/hw/watchdog/wdt_aspeed.c
+++ b/hw/watchdog/wdt_aspeed.c
@@ -93,11 +93,11 @@ static uint64_t aspeed_wdt_read(void *opaque, hwaddr 
offset, unsigned size)
 
 }
 
-static void aspeed_wdt_reload(AspeedWDTState *s, bool pclk)
+static void aspeed_wdt_reload(AspeedWDTState *s)
 {
 uint64_t reload;
 
-if (pclk) {
+if (!(s->regs[WDT_CTRL] & WDT_CTRL_1MHZ_CLK)) {
 reload = muldiv64(s->regs[WDT_RELOAD_VALUE], NANOSECONDS_PER_SECOND,
   s->pclk_freq);
 } else {
@@ -109,6 +109,16 @@ static void aspeed_wdt_reload(AspeedWDTState *s, bool pclk)
 }
 }
 
+static void aspeed_wdt_reload_1mhz(AspeedWDTState *s)
+{
+uint64_t reload = s->regs[WDT_RELOAD_VALUE] * 1000ULL;
+
+if (aspeed_wdt_is_enabled(s)) {
+timer_mod(s->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + reload);
+}
+}
+
+
 static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data,
  unsigned size)
 {
@@ -130,13 +140,13 @@ static void aspeed_wdt_write(void *opaque, hwaddr offset, 
uint64_t data,
 case WDT_RESTART:
 if ((data & 0x) == WDT_RESTART_MAGIC) {
 s->regs[WDT_STATUS] = s->regs[WDT_RELOAD_VALUE];
-aspeed_wdt_reload(s, !(s->regs[WDT_CTRL] & WDT_CTRL_1MHZ_CLK));
+awc->wdt_reload(s);
 }
 break;
 case WDT_CTRL:
 if (enable && !aspeed_wdt_is_enabled(s)) {
 s->regs[WDT_CTRL] = data;
-aspeed_wdt_reload(s, !(data & WDT_CTRL_1MHZ_CLK));
+awc->wdt_reload(s);
 } else if (!enable && aspeed_wdt_is_enabled(s)) {
 s->regs[WDT_CTRL] = data;
 timer_del(s->timer);
@@ -283,6 +293,7 @@ static void aspeed_2400_wdt_class_init(ObjectClass *klass, 
void *data)
 awc->offset = 0x20;
 awc->ext_pulse_width_mask = 0xff;
 awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
+awc->wdt_reload = aspeed_wdt_reload;
 }
 
 static const TypeInfo aspeed_2400_wdt_info = {
@@ -317,6 +328,7 @@ static void aspeed_2500_wdt_class_init(ObjectClass *klass, 
void *data)
 awc->ext_pulse_width_mask = 0xf;
 awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
 awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
+awc->wdt_reload = aspeed_wdt_reload_1mhz;
 }
 
 static const TypeInfo aspeed_2500_wdt_info = {
@@ -336,6 +348,7 @@ static void aspeed_2600_wdt_class_init(ObjectClass *klass, 
void *data)
 awc->ext_pulse_width_mask = 0xf; /* TODO */
 awc->reset_ctrl_reg = AST2600_SCU_RESET_CONTROL1;
 awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
+awc->wdt_reload = aspeed_wdt_reload_1mhz;
 }
 
 static const TypeInfo aspeed_2600_wdt_info = {
diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h
index dfedd7662dd1..819c22993a6e 100644
--- a/include/hw/watchdog/wdt_aspeed.h
+++ b/include/hw/watchdog/wdt_aspeed.h
@@ -47,6 +47,7 @@ typedef struct AspeedWDTClass {
 uint32_t ext_pulse_width_mask;
 uint32_t reset_ctrl_reg;
 void (*reset_pulse)(AspeedWDTState *s, uint32_t property);
+void (*wdt_reload)(AspeedWDTState *s);
 }  AspeedWDTClass;
 
 #endif /* WDT_ASPEED_H */
-- 
2.24.0




[PATCH 2/4] aspeed/scu: Fix W1C behavior

2019-11-11 Thread Joel Stanley
This models the clock write one to clear registers, and fixes up some
incorrect behavior in all of the write to clear registers.

There was also a typo in one of the register definitions.

Signed-off-by: Joel Stanley 
---
 hw/misc/aspeed_scu.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 717509bc5460..aac4645f8c3c 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -98,7 +98,7 @@
 #define AST2600_CLK_STOP_CTRL TO_REG(0x80)
 #define AST2600_CLK_STOP_CTRL_CLR TO_REG(0x84)
 #define AST2600_CLK_STOP_CTRL2 TO_REG(0x90)
-#define AST2600_CLK_STOP_CTR2L_CLR TO_REG(0x94)
+#define AST2600_CLK_STOP_CTRL2_CLR TO_REG(0x94)
 #define AST2600_SDRAM_HANDSHAKE   TO_REG(0x100)
 #define AST2600_HPLL_PARAMTO_REG(0x200)
 #define AST2600_HPLL_EXT  TO_REG(0x204)
@@ -532,11 +532,12 @@ static uint64_t aspeed_ast2600_scu_read(void *opaque, 
hwaddr offset,
 return s->regs[reg];
 }
 
-static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, uint64_t 
data,
+static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, uint64_t 
data64,
  unsigned size)
 {
 AspeedSCUState *s = ASPEED_SCU(opaque);
 int reg = TO_REG(offset);
+uint32_t data = data64;
 
 if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
 qemu_log_mask(LOG_GUEST_ERROR,
@@ -563,15 +564,19 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr 
offset, uint64_t data,
 /* fall through */
 case AST2600_SYS_RST_CTRL:
 case AST2600_SYS_RST_CTRL2:
+case AST2600_CLK_STOP_CTRL:
+case AST2600_CLK_STOP_CTRL2:
 /* W1S (Write 1 to set) registers */
 s->regs[reg] |= data;
 return;
 case AST2600_SYS_RST_CTRL_CLR:
 case AST2600_SYS_RST_CTRL2_CLR:
+case AST2600_CLK_STOP_CTRL_CLR:
+case AST2600_CLK_STOP_CTRL2_CLR:
 case AST2600_HW_STRAP1_CLR:
 case AST2600_HW_STRAP2_CLR:
 /* W1C (Write 1 to clear) registers */
-s->regs[reg] &= ~data;
+s->regs[reg - 1] &= ~data;
 return;
 
 case AST2600_RNG_DATA:
-- 
2.24.0




[PATCH 3/4] watchdog/aspeed: Improve watchdog timeout message

2019-11-11 Thread Joel Stanley
Users benefit from knowing which watchdog timer has expired. The address
of the watchdog's registers unambiguously indicates which has expired,
so log that.

Signed-off-by: Joel Stanley 
---
 hw/watchdog/wdt_aspeed.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
index 145be6f99ce2..5697ed83325a 100644
--- a/hw/watchdog/wdt_aspeed.c
+++ b/hw/watchdog/wdt_aspeed.c
@@ -219,7 +219,8 @@ static void aspeed_wdt_timer_expired(void *dev)
 return;
 }
 
-qemu_log_mask(CPU_LOG_RESET, "Watchdog timer expired.\n");
+qemu_log_mask(CPU_LOG_RESET, "Watchdog timer %08lx expired.\n",
+s->iomem.addr);
 watchdog_perform_action();
 timer_del(s->timer);
 }
-- 
2.24.0




[PATCH 0/4] arm/aspeed: Watchdog and SDRAM fixes

2019-11-11 Thread Joel Stanley
Three of these are fixes for ast2600 models that I found when testing
master. The forth is a usability improvement that is helpful when
diagnosing why a watchdog is biting.

Joel Stanley (4):
  aspeed/sdmc: Make ast2600 default 1G
  aspeed/scu: Fix W1C behavior
  watchdog/aspeed: Improve watchdog timeout message
  watchdog/aspeed: Fix AST2600 frequency behaviour

 hw/misc/aspeed_scu.c | 11 ---
 hw/misc/aspeed_sdmc.c|  6 +++---
 hw/watchdog/wdt_aspeed.c | 24 +++-
 include/hw/watchdog/wdt_aspeed.h |  1 +
 4 files changed, 31 insertions(+), 11 deletions(-)

-- 
2.24.0




[PATCH 1/4] aspeed/sdmc: Make ast2600 default 1G

2019-11-11 Thread Joel Stanley
Most boards have this much.

Signed-off-by: Joel Stanley 
---
 hw/misc/aspeed_sdmc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index f3a63a2e01db..2df3244b53c8 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -208,10 +208,10 @@ static int ast2600_rambits(AspeedSDMCState *s)
 }
 
 /* use a common default */
-warn_report("Invalid RAM size 0x%" PRIx64 ". Using default 512M",
+warn_report("Invalid RAM size 0x%" PRIx64 ". Using default 1024M",
 s->ram_size);
-s->ram_size = 512 << 20;
-return ASPEED_SDMC_AST2600_512MB;
+s->ram_size = 1024 << 20;
+return ASPEED_SDMC_AST2600_1024MB;
 }
 
 static void aspeed_sdmc_reset(DeviceState *dev)
-- 
2.24.0




RE: [RFC v2 10/22] intel_iommu: add virtual command capability support

2019-11-11 Thread Liu, Yi L
> From: Peter Xu 
> Sent: Wednesday, November 6, 2019 10:01 PM
> To: Liu, Yi L 
> Subject: Re: [RFC v2 10/22] intel_iommu: add virtual command capability 
> support
> 
> On Wed, Nov 06, 2019 at 12:40:41PM +, Liu, Yi L wrote:
> > >
> > > Do you know what should happen on bare-metal from spec-wise that when
> > > the guest e.g. writes 2 bytes to these mmio regions?
> >
> > I've no idea to your question. It is not a bare-metal capability. 
> > Personally, I
> > prefer to have a toggle bit to mark the full written of a cmd to VMCD_REG.
> > Reason is that we have no control on guest software. It may write new cmd
> > to VCMD_REG in a bad manner. e.g. write high 32 bits first and then write 
> > the
> > low 32 bits. Then it will have two traps. Apparently, for the first trap, 
> > it fills
> > in the VCMD_REG and no need to handle it since it is not a full written. I'm
> > checking it and evaluating it. How do you think on it?
> 
> Oh I just noticed that vtd_mem_ops.min_access_size==4 now so writting
> 2B should never happen at least.  Then we'll bail out at
> memory_region_access_valid().  Seems fine.

got it.

> 
> >
> > >
> > > > +if (!vtd_handle_vcmd_write(s, val)) {
> > > > +vtd_set_long(s, addr, val);
> > > > +}
> > > > +break;
> > > > +
> > > >  default:
> > > >  if (size == 4) {
> > > >  vtd_set_long(s, addr, val);
> > > > @@ -3617,7 +3769,8 @@ static void vtd_init(IntelIOMMUState *s)
> > > >  s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> > > >  } else if (!strcmp(s->scalable_mode, "modern")) {
> > > >  s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_PASID
> > > > -   | VTD_ECAP_FLTS | VTD_ECAP_PSS;
> > > > +   | VTD_ECAP_FLTS | VTD_ECAP_PSS | VTD_ECAP_VCS;
> > > > +s->vccap |= VTD_VCCAP_PAS;
> > > >  }
> > > >  }
> > > >
> > >
> > > [...]
> > >
> > > > +#define VTD_VCMD_CMD_MASK   0xffUL
> > > > +#define VTD_VCMD_PASID_VALUE(val)   (((val) >> 8) & 0xf)
> > > > +
> > > > +#define VTD_VCRSP_RSLT(val) ((val) << 8)
> > > > +#define VTD_VCRSP_SC(val)   (((val) & 0x3) << 1)
> > > > +
> > > > +#define VTD_VCMD_UNDEFINED_CMD 1ULL
> > > > +#define VTD_VCMD_NO_AVAILABLE_PASID2ULL
> > >
> > > According to 10.4.44 - should this be 1?
> >
> > It's 2 now per VT-d spec 3.1 (2019 June). I should have mentioned it in the 
> > cover
> > letter...
> 
> Well you're right... I hope there won't be other "major" things get
> changed otherwise it'll be really a pain of working on all of these
> before things settle...

As far as I know, only this part has significant change. Other parts are 
consistent.
I'll mention spec version next time in the cover letter.

Thanks,
Yi Liu



Re: [virtio-dev] Re: guest / host buffer sharing ...

2019-11-11 Thread Gurchetan Singh
On Tue, Nov 5, 2019 at 2:55 AM Gerd Hoffmann  wrote:
> Each buffer also has some properties to carry metadata, some fixed (id, size, 
> application), but
> also allow free form (name = value, framebuffers would have
> width/height/stride/format for example).

Sounds a lot like the recently added DMA_BUF_SET_NAME ioctls:

https://patchwork.freedesktop.org/patch/310349/

For virtio-wayland + virtio-vdec, the problem is sharing -- not allocation.

As the buffer reaches a kernel boundary, it's properties devolve into
[fd, size].  Userspace can typically handle sharing metadata.  The
issue is the guest dma-buf fd doesn't mean anything on the host.

One scenario could be:

1) Guest userspace (say, gralloc) allocates using virtio-gpu.  When
allocating, we call uuidgen() and then pass that via RESOURCE_CREATE
hypercall to the host.
2) When exporting the dma-buf, we call DMA_BUF_SET_NAME (the buffer
name will be "virtgpu-buffer-${UUID}").
3) When importing, virtio-{vdec, video} reads the dma-buf name in
userspace, and calls fd to handle.  The name is sent to the host via a
hypercall, giving host virtio-{vdec, video} enough information to
identify the buffer.

This solution is entirely userspace -- we can probably come up with
something in kernel space [generate_random_uuid()] if need be.  We
only need two universal IDs: {device ID, buffer ID}.

> On Wed, Nov 6, 2019 at 2:28 PM Geoffrey McRae  wrote:
> The entire point of this for our purposes is due to the fact that we can
> not allocate the buffer, it's either provided by the GPU driver or
> DirectX. If virtio-gpu were to allocate the buffer we might as well
> forget
> all this and continue using the ivshmem device.

We have a similar problem with closed source drivers.  As @lfy
mentioned, it's possible to map memory directory into virtio-gpu's PCI
bar and it's actually a planned feature.  Would that work for your use
case?



Re: [PATCH] enable translating statx syscalls on more arches

2019-11-11 Thread Andrew Kelley
ping

On 10/16/19 5:01 PM, Andrew Kelley wrote:
> Signed-off-by: Andrew Kelley 
> ---
>  linux-user/aarch64/syscall_nr.h | 13 ++
>  linux-user/arm/syscall_nr.h | 38 
>  linux-user/i386/syscall_nr.h| 43 
>  linux-user/mips/cpu_loop.c  |  6 +
>  linux-user/ppc/syscall_nr.h | 44 +
>  5 files changed, 144 insertions(+)
> 
> diff --git a/linux-user/aarch64/syscall_nr.h
> b/linux-user/aarch64/syscall_nr.h
> index f00ffd7fb8..4e8d0bbb15 100644
> --- a/linux-user/aarch64/syscall_nr.h
> +++ b/linux-user/aarch64/syscall_nr.h
> @@ -276,5 +276,18 @@
>  #define TARGET_NR_membarrier 283
>  #define TARGET_NR_mlock2 284
>  #define TARGET_NR_copy_file_range 285
> +#define TARGET_NR_preadv2 286
> +#define TARGET_NR_pwritev2 287
> +#define TARGET_NR_pkey_mprotect 288
> +#define TARGET_NR_pkey_alloc 289
> +#define TARGET_NR_pkey_free 290
> +#define TARGET_NR_statx 291
> +#define TARGET_NR_io_pgetevents 292
> +#define TARGET_NR_rseq 293
> +#define TARGET_NR_kexec_file_load 294
> +#define TARGET_NR_pidfd_send_signal 424
> +#define TARGET_NR_io_uring_setup 425
> +#define TARGET_NR_io_uring_enter 426
> +#define TARGET_NR_io_uring_register 427
> 
>  #endif
> diff --git a/linux-user/arm/syscall_nr.h b/linux-user/arm/syscall_nr.h
> index e7eda0d766..20afa3992a 100644
> --- a/linux-user/arm/syscall_nr.h
> +++ b/linux-user/arm/syscall_nr.h
> @@ -400,4 +400,42 @@
>  #define TARGET_NR_membarrier   (389)
>  #define TARGET_NR_mlock2   (390)
> 
> +#define TARGET_NR_copy_file_range  (391)
> +#define TARGET_NR_preadv2  (392)
> +#define TARGET_NR_pwritev2 (393)
> +#define TARGET_NR_pkey_mprotect(394)
> +#define TARGET_NR_pkey_alloc   (395)
> +#define TARGET_NR_pkey_free(396)
> +#define TARGET_NR_statx(397)
> +#define TARGET_NR_rseq (398)
> +#define TARGET_NR_io_pgetevents(399)
> +#define TARGET_NR_migrate_pages(400)
> +
> +#define TARGET_NR_kexec_file_load  (401)
> +#define TARGET_NR_clock_gettime64  (403)
> +#define TARGET_NR_clock_settime64  (404)
> +#define TARGET_NR_clock_adjtime64  (405)
> +#define TARGET_NR_clock_getres_time64  (406)
> +#define TARGET_NR_clock_nanosleep_time64   (407)
> +#define TARGET_NR_timer_gettime64  (408)
> +#define TARGET_NR_timer_settime64  (409)
> +#define TARGET_NR_timerfd_gettime64(410)
> +
> +#define TARGET_NR_timerfd_settime64(411)
> +#define TARGET_NR_utimensat_time64 (412)
> +#define TARGET_NR_pselect6_time64  (413)
> +#define TARGET_NR_ppoll_time64 (414)
> +#define TARGET_NR_io_pgetevents_time64 (416)
> +#define TARGET_NR_recvmmsg_time64  (417)
> +#define TARGET_NR_mq_timedsend_time64  (418)
> +#define TARGET_NR_mq_timedreceive_time64   (419)
> +#define TARGET_NR_semtimedop_time64(420)
> +
> +#define TARGET_NR_rt_sigtimedwait_time64   (421)
> +#define TARGET_NR_futex_time64 (422)
> +#define TARGET_NR_sched_rr_get_interval_time64 (423)
> +#define TARGET_NR_pidfd_send_signal(424)
> +#define TARGET_NR_io_uring_setup   (425)
> +#define TARGET_NR_io_uring_enter   (426)
> +#define TARGET_NR_io_uring_register(427)
>  #endif
> diff --git a/linux-user/i386/syscall_nr.h b/linux-user/i386/syscall_nr.h
> index 3234ec21c6..e641674daf 100644
> --- a/linux-user/i386/syscall_nr.h
> +++ b/linux-user/i386/syscall_nr.h
> @@ -383,5 +383,48 @@
>  #define TARGET_NR_membarrier375
>  #define TARGET_NR_mlock2376
>  #define TARGET_NR_copy_file_range   377
> +#define TARGET_NR_preadv2 378
> +#define TARGET_NR_pwritev2 379
> +#define TARGET_NR_pkey_mprotect 380
> +#define TARGET_NR_pkey_alloc 381
> +#define TARGET_NR_pkey_free 382
> +#define TARGET_NR_statx 383
> +#define TARGET_NR_arch_prctl 384
> +#define TARGET_NR_io_pgetevents 385
> +#define TARGET_NR_rseq 386
> +#define TARGET_NR_semget 393
> +#define TARGET_NR_semctl 394
> +#define TARGET_NR_shmget 395
> +#define TARGET_NR_shmctl 396
> +#define TARGET_NR_shmat 397
> +#define TARGET_NR_shmdt 398
> +#define TARGET_NR_msgget 399
> +#define TARGET_NR_msgsnd 400
> +#define TARGET_NR_msgrcv 401
> +#define TARGET_NR_msgctl 402
> +#define TARGET_NR_clock_gettime64 403
> +#define TARGET_NR_clock_settime64 404
> +#define TARGET_NR_clock_adjtime64 405
> +#define TARGET_NR_clock_getres_time64 406
> +#define TARGET_NR_clock_nanosleep_time64 407
> +#define TARGET_NR_timer_gettime64 408
> +#define TARGET_NR_timer_settime64 409
> +#define TARGET_NR_timerfd_gettime64 410
> +#define TARGET_NR_timerfd_settime64 411
> +#define TARGET_NR_utimensat_time64 412
> +#define 

virtio,iommu_platform=on

2019-11-11 Thread Alexey Kardashevskiy
Hi!

I am enabling IOMMU for virtio in the pseries firmware (SLOF) and seeing
problems, one of them is SLOF does SCSI bus scan, then it stops the
virtio-scsi by clearing MMIO|IO|BUSMASTER from PCI_COMMAND (as SLOF
stopped using the devices) and when this happens, I see unassigned
memory access (see below) which happens because disabling busmaster
disables IOMMU and QEMU cannot access the rings to do some shutdown. And
when this happens, the device does not come back even if SLOF re-enables it.

Hacking SLOF to not clear BUSMASTER makes virtio-scsi work but it is
hardly a right fix.

Is this something expected? Thanks,


Here is the exact command line:

/home/aik/pbuild/qemu-garrison2-ppc64/ppc64-softmmu/qemu-system-ppc64 \

-nodefaults \

-chardev stdio,id=STDIO0,signal=off,mux=on \

-device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \

-mon id=MON0,chardev=STDIO0,mode=readline \

-nographic \

-vga none \

-enable-kvm \
-m 2G \

-device
virtio-scsi-pci,id=vscsi0,iommu_platform=on,disable-modern=off,disable-legacy=on
\
-drive id=DRIVE0,if=none,file=img/u1804-64le.qcow2,format=qcow2 \

-device scsi-disk,id=scsi-disk0,drive=DRIVE0 \

-snapshot \

-smp 1 \

-machine pseries \

-L /home/aik/t/qemu-ppc64-bios/ \

-trace events=qemu_trace_events \

-d guest_errors \

-chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.ssh59518 \

-mon chardev=SOCKET0,mode=control



Here is the backtrace:

Thread 5 "qemu-system-ppc" hit Breakpoint 8, unassigned_mem_accepts
(opaque=0x0, addr=0x5802, size=0x2, is_write=0x0, attrs=...) at /home/
aik/p/qemu/memory.c:1275
1275return false;
#0  unassigned_mem_accepts (opaque=0x0, addr=0x5802, size=0x2,
is_write=0x0, attrs=...) at /home/aik/p/qemu/memory.c:1275
#1  0x100a8ac8 in memory_region_access_valid (mr=0x1105c230
, addr=0x5802, size=0x2, is_write=0x0, attrs=...) at
/home/aik/p/qemu/memory.c:1377
#2  0x100a8c88 in memory_region_dispatch_read (mr=0x1105c230
, addr=0x5802, pval=0x7550d410, op=MO_16,
attrs=...) at /home/aik/p/qemu/memory.c:1418
#3  0x1001cad4 in address_space_lduw_internal_cached_slow
(cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0,
endian=DEVICE_LITTLE_ENDIAN) at /home/aik/p/qemu/memory_ldst.inc.c:211
#4  0x1001cc84 in address_space_lduw_le_cached_slow
(cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0) at
/home/aik/p/qemu/memory_ldst.inc.c:249
#5  0x1019bd80 in address_space_lduw_le_cached
(cache=0x7fff68036fa0, addr=0x2, attrs=..., result=0x0) at
/home/aik/p/qemu/include/exec/memory_ldst_cached.inc.h:56
#6  0x1019c10c in lduw_le_phys_cached (cache=0x7fff68036fa0,
addr=0x2) at /home/aik/p/qemu/include/exec/memory_ldst_phys.inc.h:91
#7  0x1019d86c in virtio_lduw_phys_cached (vdev=0x118b9110,
cache=0x7fff68036fa0, pa=0x2) at
/home/aik/p/qemu/include/hw/virtio/virtio-access.h:166
#8  0x1019e648 in vring_avail_idx (vq=0x118c2720) at
/home/aik/p/qemu/hw/virtio/virtio.c:302
#9  0x1019f5bc in virtio_queue_split_empty (vq=0x118c2720) at
/home/aik/p/qemu/hw/virtio/virtio.c:581
#10 0x1019f838 in virtio_queue_empty (vq=0x118c2720) at
/home/aik/p/qemu/hw/virtio/virtio.c:612
#11 0x101a8fa8 in virtio_queue_host_notifier_aio_poll
(opaque=0x118c2798) at /home/aik/p/qemu/hw/virtio/virtio.c:3389
#12 0x1092679c in run_poll_handlers_once (ctx=0x11212e40,
timeout=0x7550d7d8) at /home/aik/p/qemu/util/aio-posix.c:520
#13 0x10926aec in try_poll_mode (ctx=0x11212e40,
timeout=0x7550d7d8) at /home/aik/p/qemu/util/aio-posix.c:607
#14 0x10926c2c in aio_poll (ctx=0x11212e40, blocking=0x1) at
/home/aik/p/qemu/util/aio-posix.c:639
#15 0x1091fe0c in aio_wait_bh_oneshot (ctx=0x11212e40,
cb=0x1016f35c , opaque=0x118b9110) at
/home/aik/p/qemu/util/aio-wait.c:71
#16 0x1016fa60 in virtio_scsi_dataplane_stop (vdev=0x118b9110)
at /home/aik/p/qemu/hw/scsi/virtio-scsi-dataplane.c:211
#17 0x10684740 in virtio_bus_stop_ioeventfd (bus=0x118b9098) at
/home/aik/p/qemu/hw/virtio/virtio-bus.c:245
#18 0x10688290 in virtio_pci_stop_ioeventfd (proxy=0x118b0fa0)
at /home/aik/p/qemu/hw/virtio/virtio-pci.c:292
#19 0x106891e8 in virtio_write_config (pci_dev=0x118b0fa0,
address=0x4, val=0x100100, len=0x4) at
/home/aik/p/qemu/hw/virtio/virtio-pci.c:613
#20 0x105b7228 in pci_host_config_write_common
(pci_dev=0x118b0fa0, addr=0x4, limit=0x100, val=0x100100, len=0x4) at
/home/aik/p/qemu/hw/pci/pci_host.c:81
#21 0x101f7bdc in finish_write_pci_config (spapr=0x11217200,
buid=0x8002000, addr=0x4, size=0x4, val=0x100100,
rets=0x7e7533e0) at /home/aik/p/qemu/hw/ppc/spapr_pci.c:192
#22 0x101f7cec in rtas_ibm_write_pci_config (cpu=0x11540df0,
spapr=0x11217200, token=0x2017, nargs=0x5, args=0x7e7533cc, nret=0x1,
rets=0x7e7533e0) at /home/aik/p/qemu/hw/ppc/spapr_pci.c:216
#23 0x101f5860 in spapr_rtas_call (cpu=0x11540df0,
spapr=0x11217200, token=0x2017, nargs=0x5, args=0x7e7533cc, nret=0x1,

[PATCH] i386: define the 'flush_l1d' CPUID feature bit (CVE-2018-3646)

2019-11-11 Thread Kyle Copperfield via
New microcode introduces the "Flush L1D Cache" CPUID feature bit.
This needs to be exposed to guest OS to allow them to protect against
CVE-2018-3646.

Signed-off-by: Kyle Copperfield 
---
 docs/qemu-cpu-models.texi | 7 +++
 target/i386/cpu.c | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi
index f88a1def0d..1b5349d86a 100644
--- a/docs/qemu-cpu-models.texi
+++ b/docs/qemu-cpu-models.texi
@@ -180,6 +180,13 @@ Must be explicitly turned on for all Intel CPU models.
 Requires the host CPU microcode to support this feature before it
 can be used for guest CPUs.
 
+@item @code{flush_l1d}
+
+Required to enable strong Foreshadow-NG (VMM) (CVE-2018-3646) fixes in
+guests.
+
+Requires the host CPU microcode to support this feature before it
+can be used for guest CPUs.
 
 @item @code{ssbd}
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a624163ac2..1fb6d677e2 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1083,7 +1083,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = 
{
 NULL, NULL, NULL /* pconfig */, NULL,
 NULL, NULL, NULL, NULL,
 NULL, NULL, "spec-ctrl", "stibp",
-NULL, "arch-capabilities", "core-capability", "ssbd",
+"flush_l1d", "arch-capabilities", "core-capability", "ssbd",
 },
 .cpuid = {
 .eax = 7,
-- 
2.24.0




[PATCH qemu] scripts: Detect git worktrees for get_maintainer.pl --git

2019-11-11 Thread Alexey Kardashevskiy
Recent git versions support worktrees where .git is not a directory but
a file with a path to the .git repository; however the get_maintainer.pl
script only recognises the .git directory, let's fix it.

Signed-off-by: Alexey Kardashevskiy 
---
 scripts/get_maintainer.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 71415e3c7061..27991eb1cfb4 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -81,7 +81,7 @@ my %VCS_cmds;
 
 my %VCS_cmds_git = (
 "execute_cmd" => \_execute_cmd,
-"available" => '(which("git") ne "") && (-d ".git")',
+"available" => '(which("git") ne "") && (-e ".git")',
 "find_signers_cmd" =>
"git log --no-color --follow --since=\$email_git_since " .
'--format="GitCommit: %H%n' .
-- 
2.17.1




Re: [PATCH v4] iotests: Test NBD client reconnection

2019-11-11 Thread Andrey Shinkevich



On 11/11/2019 20:53, Eric Blake wrote:
> On 11/11/19 4:04 AM, Andrey Shinkevich wrote:
>> The test for an NBD client. The NBD server is disconnected after the
>> client write request. The NBD client should reconnect and complete
>> the write operation.
>>
>> Suggested-by: Denis V. Lunev 
>> Suggested-by: Vladimir Sementsov-Ogievskiy 
>> Signed-off-by: Andrey Shinkevich 
>> ---
>> v3 was discussed in the email thread with the message ID
>> <1572627272-23359-1-git-send-email-andrey.shinkev...@virtuozzo.com>
>>
> 
>> +import os
>> +import subprocess
>> +import iotests
>> +from iotests import file_path, log
>> +
>> +
>> +nbd_sock, conf_file = file_path('nbd-sock', "nbd-fault-injector.conf")
> 
> Mixing '' and "" in the same line is odd.  I don't know if we have a 
> strong preference for one style over the other.
> 
>> +++ b/tests/qemu-iotests/group
>> @@ -284,3 +284,4 @@
>>  268 rw auto quick
>>  270 rw backing quick
>>  272 rw
>> +277 rw
> 
> The test completes in 3 seconds for me; we could add 'quick'.
> 
> Otherwise, looks reasonable to me.
> 
> Reviewed-by: Eric Blake 
> Tested-by: Eric Blake 
> 
> I'll probably queue this through my NBD tree for rc1, as it adds test 
> coverage of a new feature for the 4.2 release.
> 
Eric,
Thank you very much.
Please let me know when the patch is queued.
-- 
With the best regards,
Andrey Shinkevich




[PATCH v5] iotests: Test NBD client reconnection

2019-11-11 Thread Andrey Shinkevich
The test for an NBD client. The NBD server is disconnected after the
client write request. The NBD client should reconnect and complete
the write operation.

Suggested-by: Denis V. Lunev 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Eric Blake 
Tested-by: Eric Blake 
---
v5:  "" were replaced with '' in the test except the function comments.
The 'quick' word was added to the 'qroup' file next to the test #277.

 tests/qemu-iotests/277   | 96 
 tests/qemu-iotests/277.out   |  6 ++
 tests/qemu-iotests/group |  1 +
 tests/qemu-iotests/iotests.py|  5 ++
 tests/qemu-iotests/nbd-fault-injector.py |  3 +-
 5 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100755 tests/qemu-iotests/277
 create mode 100644 tests/qemu-iotests/277.out

diff --git a/tests/qemu-iotests/277 b/tests/qemu-iotests/277
new file mode 100755
index 000..1f72dca
--- /dev/null
+++ b/tests/qemu-iotests/277
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+#
+# Test NBD client reconnection
+#
+# Copyright (c) 2019 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import subprocess
+import iotests
+from iotests import file_path, log
+
+
+nbd_sock, conf_file = file_path('nbd-sock', 'nbd-fault-injector.conf')
+
+
+def make_conf_file(event):
+"""
+Create configuration file for the nbd-fault-injector.py
+
+:param event: which event the server should close a connection on
+"""
+with open(conf_file, 'w') as conff:
+conff.write('[inject-error]\nevent={}\nwhen=after'.format(event))
+
+
+def start_server_NBD(event):
+make_conf_file(event)
+
+srv = subprocess.Popen(['nbd-fault-injector.py', '--classic-negotiation',
+   nbd_sock, conf_file], stdout=subprocess.PIPE,
+   stderr=subprocess.STDOUT, universal_newlines=True)
+line = srv.stdout.readline()
+if 'Listening on ' in line:
+log('NBD server: started')
+else:
+log('NBD server: ' + line.rstrip())
+
+return srv
+
+
+def start_client_NBD():
+log('NBD client: QEMU-IO write')
+args = iotests.qemu_io_args_no_fmt + \
+['-c', 'write -P 0x7 0 3M', '--image-opts',
+ 'driver=nbd,server.type=unix,server.path={},'
+ 'reconnect-delay=7'.format(nbd_sock)]
+clt = subprocess.Popen(args, stdout=subprocess.PIPE,
+   stderr=subprocess.STDOUT,
+   universal_newlines=True)
+return clt
+
+
+def check_proc_NBD(proc, connector):
+try:
+outs, errs = proc.communicate(timeout=10)
+
+if proc.returncode < 0:
+log('NBD {}: EXIT SIGNAL {}\n'.format(connector, proc.returncode))
+log(outs)
+else:
+msg = outs.split('\n', 1)
+log('NBD {}: {}'.format(connector, msg[0]))
+
+except subprocess.TimeoutExpired:
+proc.kill()
+log('NBD {}: ERROR timeout expired'.format(connector))
+finally:
+if connector == 'server':
+os.remove(nbd_sock)
+os.remove(conf_file)
+
+
+srv = start_server_NBD('data')
+clt = start_client_NBD()
+# The server should close the connection after a client write request
+check_proc_NBD(srv, 'server')
+# Start the NBD server again
+srv = start_server_NBD('reply')
+# The client should reconnect and complete the write operation
+check_proc_NBD(clt, 'client')
+# Make it sure that server terminated
+check_proc_NBD(srv, 'server')
diff --git a/tests/qemu-iotests/277.out b/tests/qemu-iotests/277.out
new file mode 100644
index 000..45404b3
--- /dev/null
+++ b/tests/qemu-iotests/277.out
@@ -0,0 +1,6 @@
+NBD server: started
+NBD client: QEMU-IO write
+NBD server: Closing connection on rule match inject-error
+NBD server: started
+NBD client: wrote 3145728/3145728 bytes at offset 0
+NBD server: Closing connection on rule match inject-error
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 0650403..6a9acfb 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -284,3 +284,4 @@
 268 rw auto quick
 270 rw backing quick
 272 rw
+277 rw quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 075f473..295b3e4 100644
--- a/tests/qemu-iotests/iotests.py
+++ 

[PATCH V4] target/i386/kvm: Add Hyper-V direct tlb flush support

2019-11-11 Thread lantianyu1986
From: Tianyu Lan 

Hyper-V direct tlb flush targets KVM on Hyper-V guest.
Enable direct TLB flush for its guests meaning that TLB
flush hypercalls are handled by Level 0 hypervisor (Hyper-V)
bypassing KVM in Level 1. Due to the different ABI for hypercall
parameters between Hyper-V and KVM, KVM capabilities should be
hidden when enable Hyper-V direct tlb flush otherwise KVM
hypercalls may be intercepted by Hyper-V. Add new parameter
"hv-direct-tlbflush". Check expose_kvm and Hyper-V tlb flush
capability status before enabling the feature.

Signed-off-by: Tianyu Lan 
---
Change since v3:
   - Fix logic of Hyper-V passthrough mode with direct
   tlb flush.

Change sicne v2:
   - Update new feature description and name.
   - Change failure print log.

Change since v1:
   - Add direct tlb flush's Hyper-V property and use
   hv_cpuid_check_and_set() to check the dependency of tlbflush
   feature.
   - Make new feature work with Hyper-V passthrough mode.
---
 docs/hyperv.txt   | 10 ++
 target/i386/cpu.c |  2 ++
 target/i386/cpu.h |  1 +
 target/i386/kvm.c | 24 
 4 files changed, 37 insertions(+)

diff --git a/docs/hyperv.txt b/docs/hyperv.txt
index 8fdf25c829..140a5c7e44 100644
--- a/docs/hyperv.txt
+++ b/docs/hyperv.txt
@@ -184,6 +184,16 @@ enabled.
 
 Requires: hv-vpindex, hv-synic, hv-time, hv-stimer
 
+3.18. hv-direct-tlbflush
+===
+Enable direct TLB flush for KVM when it is running as a nested
+hypervisor on top Hyper-V. When enabled, TLB flush hypercalls from L2
+guests are being passed through to L0 (Hyper-V) for handling. Due to ABI
+differences between Hyper-V and KVM hypercalls, L2 guests will not be
+able to issue KVM hypercalls (as those could be mishanled by L0
+Hyper-V), this requires KVM hypervisor signature to be hidden.
+
+Requires: hv-tlbflush, -kvm
 
 4. Development features
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 44f1bbdcac..7bc7fee512 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6156,6 +6156,8 @@ static Property x86_cpu_properties[] = {
   HYPERV_FEAT_IPI, 0),
 DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features,
   HYPERV_FEAT_STIMER_DIRECT, 0),
+DEFINE_PROP_BIT64("hv-direct-tlbflush", X86CPU, hyperv_features,
+  HYPERV_FEAT_DIRECT_TLBFLUSH, 0),
 DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false),
 
 DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index eaa5395aa5..3cb105f7d6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -907,6 +907,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define HYPERV_FEAT_EVMCS   12
 #define HYPERV_FEAT_IPI 13
 #define HYPERV_FEAT_STIMER_DIRECT   14
+#define HYPERV_FEAT_DIRECT_TLBFLUSH 15
 
 #ifndef HYPERV_SPINLOCK_NEVER_RETRY
 #define HYPERV_SPINLOCK_NEVER_RETRY 0x
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 11b9c854b5..43f5cbc3f6 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -900,6 +900,10 @@ static struct {
 },
 .dependencies = BIT(HYPERV_FEAT_STIMER)
 },
+[HYPERV_FEAT_DIRECT_TLBFLUSH] = {
+.desc = "direct paravirtualized TLB flush (hv-direct-tlbflush)",
+.dependencies = BIT(HYPERV_FEAT_TLBFLUSH)
+},
 };
 
 static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max)
@@ -1224,6 +1228,7 @@ static int hyperv_handle_properties(CPUState *cs,
 r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_EVMCS);
 r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_IPI);
 r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_STIMER_DIRECT);
+r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_DIRECT_TLBFLUSH);
 
 /* Additional dependencies not covered by kvm_hyperv_properties[] */
 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
@@ -1243,6 +1248,25 @@ static int hyperv_handle_properties(CPUState *cs,
 goto free;
 }
 
+if (hyperv_feat_enabled(cpu, HYPERV_FEAT_DIRECT_TLBFLUSH)) {
+if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_DIRECT_TLBFLUSH, 0, 0)) {
+if (!cpu->hyperv_passthrough) {
+fprintf(stderr,
+"Hyper-V %s is not supported by kernel\n",
+kvm_hyperv_properties[HYPERV_FEAT_DIRECT_TLBFLUSH].desc);
+return -ENOSYS;
+}
+
+cpu->hyperv_features &= ~BIT(HYPERV_FEAT_DIRECT_TLBFLUSH);
+} else if (cpu->expose_kvm) {
+fprintf(stderr,
+"Hyper-V %s requires KVM hypervisor signature "
+"to be hidden (-kvm).\n",
+kvm_hyperv_properties[HYPERV_FEAT_DIRECT_TLBFLUSH].desc);
+return -ENOSYS;
+}
+}
+
 if (cpu->hyperv_passthrough) {
 /* We already copied all feature words from KVM as is */
 

Re: [PATCH v7 3/8] Acceptance tests: use avocado tags for machine type

2019-11-11 Thread Cleber Rosa
On Fri, Nov 08, 2019 at 02:20:45PM +0100, Philippe Mathieu-Daudé wrote:
> On 11/4/19 4:13 PM, Cleber Rosa wrote:
> >   """
> > -self.vm.set_machine('none')
> >   self.vm.add_args('-S')
> >   self.vm.launch()
> > diff --git a/tests/acceptance/linux_initrd.py 
> > b/tests/acceptance/linux_initrd.py
> > index c61d9826a4..3a0ff7b098 100644
> > --- a/tests/acceptance/linux_initrd.py
> > +++ b/tests/acceptance/linux_initrd.py
> > @@ -20,6 +20,7 @@ class LinuxInitrd(Test):
> >   Checks QEMU evaluates correctly the initrd file passed as -initrd 
> > option.
> >   :avocado: tags=arch:x86_64
> > +:avocado: tags=machine:pc
> 
> For some tests we can run on multiple machines (here q35), I was tempted to
> use multiple tags. How could I do that now?
>

I missed this comment: you can add many tag values here to *classify*
the test as being "q35 machine type capable".

But, Avocado will only run a test multiple times with a varianter
plugin active.  In that case, a "machine" *parameter* with different
values will be passed to the tests.  This tag value is being used
as a default value for the parameter, so it has a lower precedence.

We have a pending task[1] to create an initial CIT file for arch and
machine types.

CC'ing Jan Richter, who is supposed to start working on it soon.

- Cleber.

[1] - 
https://trello.com/c/1wvzcxHY/105-create-cit-parameter-for-acceptance-tests




Re: [RFC][PATCH 0/3] IVSHMEM version 2 device for QEMU

2019-11-11 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/cover.1573477032.git.jan.kis...@siemens.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [RFC][PATCH 0/3] IVSHMEM version 2 device for QEMU
Type: series
Message-id: cover.1573477032.git.jan.kis...@siemens.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
45625de contrib: Add server for ivshmem revision 2
df18ce0 docs/specs: Add specification of ivshmem device revision 2
ff35318 hw/misc: Add implementation of ivshmem revision 2 device

=== OUTPUT BEGIN ===
1/3 Checking commit ff35318fdf84 (hw/misc: Add implementation of ivshmem 
revision 2 device)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#63: 
new file mode 100644

ERROR: return is not a function, parentheses are not required
#206: FILE: hw/misc/ivshmem2.c:139:
+return (ivs->features & (1 << feature));

ERROR: memory barrier without comment
#250: FILE: hw/misc/ivshmem2.c:183:
+smp_mb();

ERROR: braces {} are necessary for all arms of this statement
#625: FILE: hw/misc/ivshmem2.c:558:
+if (msg->vector == 0)
[...]

WARNING: Block comments use a leading /* on a separate line
#775: FILE: hw/misc/ivshmem2.c:708:
+/* Select the MSI-X vectors used by device.

WARNING: Block comments use a trailing */ on a separate line
#777: FILE: hw/misc/ivshmem2.c:710:
+ * we just enable all vectors on init and after reset. */

total: 3 errors, 3 warnings, 1147 lines checked

Patch 1/3 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/3 Checking commit df18ce079161 (docs/specs: Add specification of ivshmem 
device revision 2)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#24: 
new file mode 100644

total: 0 errors, 1 warnings, 333 lines checked

Patch 2/3 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/3 Checking commit 45625def0d51 (contrib: Add server for ivshmem revision 2)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#77: 
new file mode 100644

WARNING: Block comments use a leading /* on a separate line
#174: FILE: contrib/ivshmem2-server/ivshmem2-server.c:86:
+/* free a peer when the server advertises a disconnection or when the

WARNING: Block comments use a trailing */ on a separate line
#175: FILE: contrib/ivshmem2-server/ivshmem2-server.c:87:
+ * server is freed */

ERROR: memory barrier without comment
#194: FILE: contrib/ivshmem2-server/ivshmem2-server.c:106:
+smp_mb();

WARNING: Block comments use a leading /* on a separate line
#276: FILE: contrib/ivshmem2-server/ivshmem2-server.c:188:
+/* XXX: this could use id allocation such as Linux IDA, or simply

WARNING: Block comments use a trailing */ on a separate line
#277: FILE: contrib/ivshmem2-server/ivshmem2-server.c:189:
+ * a free-list */

WARNING: Block comments use a leading /* on a separate line
#342: FILE: contrib/ivshmem2-server/ivshmem2-server.c:254:
+/* Try to ftruncate a file to next power of 2 of shmsize.

WARNING: Block comments use a trailing */ on a separate line
#346: FILE: contrib/ivshmem2-server/ivshmem2-server.c:258:
+ * shm_size value. */

WARNING: Block comments use a leading /* on a separate line
#619: FILE: contrib/ivshmem2-server/ivshmem2-server.h:63:
+const char *shm_path;   /**< Path to the shared memory; path

WARNING: Block comments use * on subsequent lines
#620: FILE: contrib/ivshmem2-server/ivshmem2-server.h:64:
+const char *shm_path;   /**< Path to the shared memory; path
+ corresponds to a POSIX shm name or a

WARNING: Block comments use a trailing */ on a separate line
#621: FILE: contrib/ivshmem2-server/ivshmem2-server.h:65:
+ hugetlbfs mount point. */

WARNING: Block comments use a leading /* on a separate line
#622: FILE: contrib/ivshmem2-server/ivshmem2-server.h:66:
+bool use_shm_open;  /**< true to use shm_open, false for

WARNING: Block comments use * on subsequent lines
#623: FILE: contrib/ivshmem2-server/ivshmem2-server.h:67:
+bool use_shm_open;  /**< true to use shm_open, false for
+ file-backed shared memory */

WARNING: Block comments use a trailing */ on a separate line
#623: FILE: contrib/ivshmem2-server/ivshmem2-server.h:67:
+ file-backed shared memory */

ERROR: spaces required around that '*' (ctx:VxV)
#742: FILE: contrib/ivshmem2-server/main.c:22:
+#define 

Re: [Qemu-devel] [PATCH] migration: check length directly to make sure the range is aligned

2019-11-11 Thread Paolo Bonzini
On 29/10/19 09:21, Wei Yang wrote:
> Thanks Dave
> 
> Paolo
> 
> Do you feel comfortable with this?
> 

Queued now, thanks.

Paolo




Re: pcmcia support in windows xp guest

2019-11-11 Thread Paolo Bonzini
On 24/09/19 14:45, Ilver Belletti wrote:
> We would like to install Windows XP 32 bit as a guest operating system
> in a Windows 10 64 bit host operating system. With the QEMU emulator
> will be the pcmcia slot available ?
> 
> We would like to use an epp parallel port in the guest operating system
> by means of Quatech pcmcia card installed in the slot.

No, however CardBus PCMCIA cards are essentially PCI devices so you
could add emulation of a PCI parallel port device.  Note that QEMU does
_not_ come with such a device, only with an ISA parallel port device.
It should not be hard to write one if it follows the same I/O port
structure as the ISA parallel port at 0x378/0x278.

Paolo




Re: [PATCH v7 6/8] Acceptance tests: add the build directory to the system PATH

2019-11-11 Thread Cleber Rosa
On Thu, Nov 07, 2019 at 05:46:13PM -0200, Wainer dos Santos Moschetta wrote:
> 
> On 11/4/19 1:13 PM, Cleber Rosa wrote:
> > So that when binaries such as qemu-img are searched for, those in the
> > build tree will be favored.  As a clarification, SRC_ROOT_DIR is
> > dependent on the location from where tests are executed, so they are
> > equal to the build directory if one is being used.
> > 
> > The original motivation is that Avocado libraries such as
> > avocado.utils.vmimage.get() may use the matching binaries, but it may
> > also apply to any other binary that test code may eventually attempt
> > to execute.
> > 
> > Signed-off-by: Cleber Rosa 
> > ---
> >   tests/acceptance/avocado_qemu/__init__.py | 6 ++
> >   1 file changed, 6 insertions(+)
> > 
> > diff --git a/tests/acceptance/avocado_qemu/__init__.py 
> > b/tests/acceptance/avocado_qemu/__init__.py
> > index 17ce583c87..a4bb796a47 100644
> > --- a/tests/acceptance/avocado_qemu/__init__.py
> > +++ b/tests/acceptance/avocado_qemu/__init__.py
> > @@ -110,6 +110,12 @@ class Test(avocado.Test):
> >   return None
> >   def setUp(self):
> > +# Some utility code uses binaries from the system's PATH.  For
> > +# instance, avocado.utils.vmimage.get() uses qemu-img, to
> > +# create a snapshot image.  This is a transparent way of
> 
> Because PATH is changed in a transparent way, wouldn't be better to also
> self.log.info() that fact?
>

I don't have a problem with logging it, but because it will happen for
*every single* test, it seems like it will become noise.  I think it's
better to properly document this aspect of "avocado_qemu.Test" instead
(which is currently missing here).  Something like:

"Tests based on avocado_qemu.Test will have, as a convenience, the 
QEMU build directory added to their PATH environment variable.  The goal
is to allow tests to seamless use matching built binaries, instead of
binaries installed elsewhere in the system".

How does it sound?

> > +# making sure those utilities find and use binaries on the
> > +# build tree by default.
> > +os.environ['PATH'] = '%s:%s' % (SRC_ROOT_DIR, os.environ['PATH'])
> 
> I think PATH should be set only once at class initialization. Perhaps in
> setUpClass()?
> 
> - Wainer
>

The Avocado test isolation model makes setUpClass() unnecessary,
unsupported and pointless, so we only support setUp().

Every test already runs on its own process, and with the nrunner
model, should be able to run on completely different systems.  That's
why we don't want to support a setUpClass() like approach.

- Cleber.




Re: [PATCH v7 5/8] Acceptance tests: keep a stable reference to the QEMU build dir

2019-11-11 Thread Cleber Rosa
On Thu, Nov 07, 2019 at 05:22:24PM -0200, Wainer dos Santos Moschetta wrote:
> 
> On 11/4/19 1:13 PM, Cleber Rosa wrote:
> > This is related to the the differences in in-tree and out-of-tree
> > builds in QEMU.  For simplification,  means my build directory.
> > 
> > Currently, by running a `make check-acceptance` one gets (in
> > tests/acceptance/avocado_qemu/__init__.py):
> > 
> > SRC_ROOT_DIR: /tests/acceptance/avocado_qemu/../../..
> > 
> > This in itself is problematic, because after the parent directories
> > are applied, one may be left not with a pointer to the build directory
> > as intended, but with the location of the source tree (assuming they
> > differ). Built binaries, such as qemu-img, are of course not there and
> > can't be found.
> > 
> > Given that a Python '__file__' will contain the absolute path to the
> > file backing the module, say:
> > 
> > __file__: /tests/acceptance/avocado_qemu/__init__.py
> > 
> >|  4  | 3|  2 | 1 |
> > 
> > A solution is to not "evaluate" the third parent dir (marked as 4
> > here) because that ends up following the "tests" directory symlink to
> > the source tree.  In fact, there's no need to keep or evaluate any of
> > the parent directories, we can just drop the rightmost 4 components,
> > and we'll keep a stable reference to the build directory (with no
> > symlink being followed).  This works for either a dedicated build
> > directory or also a combined source and build tree.
> > 
> > Signed-off-by: Cleber Rosa 
> > ---
> >   tests/acceptance/avocado_qemu/__init__.py | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/tests/acceptance/avocado_qemu/__init__.py 
> > b/tests/acceptance/avocado_qemu/__init__.py
> > index 6618ea67c1..17ce583c87 100644
> > --- a/tests/acceptance/avocado_qemu/__init__.py
> > +++ b/tests/acceptance/avocado_qemu/__init__.py
> > @@ -16,7 +16,7 @@ import tempfile
> >   import avocado
> > -SRC_ROOT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', '..')
> > +SRC_ROOT_DIR = 
> > os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__
> 
> In this case, wouldn't make sense to rename the constant from SRC_ROOT_DIR
> to BUILD_ROOT_DIR?
>

True.  I remember thinking about doing that as a separate change and
ended up forgetting.  Maybe it's better to just do it here.

> This patch looks good to me besides that.
> 
> - Wainer
>

Thanks!
- Cleber.




Re: [PATCH v7 4/8] Acceptance tests: use relative location for tests

2019-11-11 Thread Cleber Rosa
On Mon, Nov 04, 2019 at 07:26:23PM +0100, Philippe Mathieu-Daudé wrote:
> On 11/4/19 4:13 PM, Cleber Rosa wrote:
> > An Avocado Test ID[1] is composed by a number of components, but it
> > starts with the Test Name, usually a file system location that was
> > given to the loader.
> > 
> > Because the source directory is being given as a prefix to the
> > "tests/acceptance" directory containing the acceptance tests, the test
> > names will needlessly include the directory the user is using to host
> > the QEMU sources (and/or build tree).
> > 
> > Let's remove the source dir (or a build dir) from the path given to
> > the test loader.  This should give more constant names, and when using
> > result servers and databases, it should give the same test names
> > across executions from different people or from different directories.
> 
> Can we strip the full path to directory and only keep the filename in the
> database? (Thinking about out-of-tree tests).
>

Yes, absolutely, but this needs to be done one the Avocado side.  TBH,
I have ideas to make this go even further, such as:

 1) the stripping of the "test_" prefix of the test method

 2) replace the full path to a directory with tests for a "test suite"
alias (default to the directory name itself)

 3) test suite alias will be persisted on test result such as reports
or machine, but ommited from the human UI

 4) full path to directory, exact version of test files (git hash) will
all be metadata and not part of the test ID

Roughly speaking, something which is listed like this currently:

  $ avocado list tests/acceptance/
  INSTRUMENTED 
tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc
  ...

When executed, would be shown as:

  JOB ID : fb885e9c3e7dc50534ec380a7e988cbf94233f07
  JOB LOG: 
/home/cleber/avocado/job-results/job-2019-11-11T17.07-fb885e9/job.log
   (1/1) acceptance/boot_linux_console.py:BootLinuxConsole.x86_64_pc: PASS 
(2.17 s)
  RESULTS: PASS 1 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
CANCEL 0
  JOB TIME   : 2.35 s

How does that sound?

- Cleber.




Re: [PATCH v7 3/8] Acceptance tests: use avocado tags for machine type

2019-11-11 Thread Cleber Rosa
On Fri, Nov 08, 2019 at 02:20:45PM +0100, Philippe Mathieu-Daudé wrote:
> > @@ -310,7 +302,7 @@ class BootLinuxConsole(Test):
> >   def test_arm_emcraft_sf2(self):
> >   """
> >   :avocado: tags=arch:arm
> > -:avocado: tags=machine:emcraft_sf2
> > +:avocado: tags=machine:emcraft-sf2
> 
> Maybe add a comment about this change, "Since avocado 72(?) we can ... so
> use ..."
> 

You mean on this specific test docstring?  I'm confused if there's a
special reason for doing it here, of if you're suggesting adding a
similar command to all tag entries which make use of the extended
character set (which I think would be too verbose, repetitve, and hard
to maintain).

> > diff --git a/tests/acceptance/cpu_queries.py 
> > b/tests/acceptance/cpu_queries.py
> > index af47d2795a..293dccb89a 100644
> > --- a/tests/acceptance/cpu_queries.py
> > +++ b/tests/acceptance/cpu_queries.py
> > @@ -20,8 +20,8 @@ class QueryCPUModelExpansion(Test):
> >   def test(self):
> >   """
> >   :avocado: tags=arch:x86_64
> > +:avocado: tags=machine:none
> 
> Not to confuse with None :)
> 

Yep! :)

- Cleber.




Re: [PATCH v7 2/8] Acceptance tests: introduce utility method for tags unique vals

2019-11-11 Thread Cleber Rosa
On Fri, Nov 08, 2019 at 02:14:58PM +0100, Philippe Mathieu-Daudé wrote:
> On 11/4/19 4:13 PM, Cleber Rosa wrote:
> > Currently a test can describe the target architecture binary that it
> > should primarily be run with, be setting a single tag value.
> > 
> > The same approach is expected to be done with other QEMU aspects to be
> > tested, for instance, the machine type and accelerator, so let's
> > generalize the logic into a utility method.
> > 
> > Signed-off-by: Cleber Rosa 
> > ---
> >   tests/acceptance/avocado_qemu/__init__.py | 19 +--
> >   1 file changed, 13 insertions(+), 6 deletions(-)
> > 
> > diff --git a/tests/acceptance/avocado_qemu/__init__.py 
> > b/tests/acceptance/avocado_qemu/__init__.py
> > index 9a57c020d8..e676d9c4e7 100644
> > --- a/tests/acceptance/avocado_qemu/__init__.py
> > +++ b/tests/acceptance/avocado_qemu/__init__.py
> > @@ -100,14 +100,21 @@ def exec_command_and_wait_for_pattern(test, command,
> >   class Test(avocado.Test):
> > +def _get_unique_tag_val(self, tag_name):
> > +"""
> > +Gets a tag value, if unique for a key
> 
> 'Get'?
> 

Not sure what's better, I was thinking along the lines that *this
method* gets (one) a tag value...  But you may be right.

- Cleber.




Re: [PATCH v6 1/3] block: introduce compress filter driver

2019-11-11 Thread Eric Blake

On 11/11/19 10:04 AM, Andrey Shinkevich wrote:

Allow writing all the data compressed through the filter driver.
The written data will be aligned by the cluster size.
Based on the QEMU current implementation, that data can be written to
unallocated clusters only. May be used for a backup job.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
  block/Makefile.objs |   1 +
  block/filter-compress.c | 212 
  qapi/block-core.json|  10 ++-
  3 files changed, 219 insertions(+), 4 deletions(-)
  create mode 100644 block/filter-compress.c



+++ b/qapi/block-core.json
@@ -2884,15 +2884,16 @@
  # @copy-on-read: Since 3.0
  # @blklogwrites: Since 3.0
  # @blkreplay: Since 4.2
+# @compress: Since 4.2


Are we still trying to get this in 4.2, even though soft freeze is past? 
 Or are we going to have to defer it to 5.0 as a new feature?


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




[PATCH] qemu-coroutine-sleep: Silence Coverity warning

2019-11-11 Thread Eric Blake
Coverity warns that we store the address of a stack variable through a
pointer passed in by the caller, which would let the caller trivially
trigger use-after-free if that stored value is still present when we
finish execution.  However, the way coroutines work is that after our
call to qemu_coroutine_yield(), control is temporarily continued in
the caller prior to our function concluding, and in order to resume
our coroutine, the caller must poll until the variable has been set to
NULL.  Thus, we can add an assert that we do not leak stack storage to
the caller on function exit.

Fixes: Coverity CID 1406474
CC: Peter Maydell 
Signed-off-by: Eric Blake 
---

I don't know if this actually shuts Coverity up; Peter, since you
reported the Coverity issue, are you in a better position to test if
this makes a difference?  At any rate, the tests still pass after
this is in place.

I'm not sure if the compiler wants us to insert a 'volatile' in any
of our uses of QemuCoSleepState.user_state_pointer.

 util/qemu-coroutine-sleep.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index ae91b92b6e78..769a76e57df0 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -68,5 +68,12 @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType 
type, int64_t ns,
 }
 timer_mod(state.ts, qemu_clock_get_ns(type) + ns);
 qemu_coroutine_yield();
+if (sleep_state) {
+/*
+ * Note that *sleep_state is cleared during qemu_co_sleep_wake
+ * before resuming this coroutine.
+ */
+assert(*sleep_state == NULL);
+}
 timer_free(state.ts);
 }
-- 
2.21.0




Re: [PATCH] riscv/virt: Add syscon reboot and poweroff DT nodes

2019-11-11 Thread Alistair Francis
On Mon, Nov 11, 2019 at 5:38 AM Anup Patel  wrote:
>
> The SiFive test device found on virt machine can be used by
> generic syscon reboot and poweroff drivers available in Linux
> kernel.
>
> This patch updates FDT generation in virt machine so that
> Linux kernel can probe and use generic syscon drivers.
>
> Signed-off-by: Anup Patel 

Overall this looks fine. Palmer currently has a patch on list changing
the sifive test string as well. It's probably best to rebase this on
that patch.

We probably also need to make sure this is accepted in the RISC-V
kernel tree first.

Alistair

> ---
>  hw/riscv/virt.c | 28 
>  1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index cc8f311e6b..fdfa359713 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -182,11 +182,11 @@ static void create_fdt(RISCVVirtState *s, const struct 
> MemmapEntry *memmap,
>  uint64_t mem_size, const char *cmdline)
>  {
>  void *fdt;
> -int cpu;
> +int cpu, i;
>  uint32_t *cells;
>  char *nodename;
> -uint32_t plic_phandle, phandle = 1;
> -int i;
> +const char test_compat[] = "sifive,test0\0syscon";
> +uint32_t plic_phandle, test_phandle, phandle = 1;
>  hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
>  hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
>
> @@ -356,13 +356,33 @@ static void create_fdt(RISCVVirtState *s, const struct 
> MemmapEntry *memmap,
>  create_pcie_irq_map(fdt, nodename, plic_phandle);
>  g_free(nodename);
>
> +test_phandle = phandle++;
>  nodename = g_strdup_printf("/test@%lx",
>  (long)memmap[VIRT_TEST].base);
>  qemu_fdt_add_subnode(fdt, nodename);
> -qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,test0");
> +qemu_fdt_setprop(fdt, nodename, "compatible",
> + test_compat, sizeof(test_compat));
>  qemu_fdt_setprop_cells(fdt, nodename, "reg",
>  0x0, memmap[VIRT_TEST].base,
>  0x0, memmap[VIRT_TEST].size);
> +qemu_fdt_setprop_cell(fdt, nodename, "phandle", test_phandle);
> +test_phandle = qemu_fdt_get_phandle(fdt, nodename);
> +g_free(nodename);
> +
> +nodename = g_strdup_printf("/reboot");
> +qemu_fdt_add_subnode(fdt, nodename);
> +qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-reboot");
> +qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle);
> +qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0);
> +qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_RESET);
> +g_free(nodename);
> +
> +nodename = g_strdup_printf("/poweroff");
> +qemu_fdt_add_subnode(fdt, nodename);
> +qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-poweroff");
> +qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle);
> +qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0);
> +qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_PASS);
>  g_free(nodename);
>
>  nodename = g_strdup_printf("/uart@%lx",
> --
> 2.17.1
>
>



RE: [PATCH v2] tcg plugins: expose an API version concept

2019-11-11 Thread Robert Foley
On Mon, 11 Nov 2019 at 06:35, Alex Bennée  wrote:
>
> This is a very simple versioning API which allows the plugin
> infrastructure to check the API a plugin was built against. We also
> expose a min/cur API version to the plugin via the info block in case
> it wants to avoid using old deprecated APIs in the future.
>
> Signed-off-by: Alex Bennée 
> Reviewed-by: Philippe Mathieu-Daudé 
>
> ---
> v2
>   - error out on missing plugin version symbol
>   - fix missing symbol on hotblocks.so
>   - more verbose error text, avoid bad grammar
> ---

Reviewed-by: Robert Foley 
Thanks,
Rob



[Bug 1852115] [NEW] qemu --static user build fails with fedora rawhide glibc-2.30.9000

2019-11-11 Thread Cole Robinson
Public bug reported:

Building qemu latest git 654efcb511d on fedora rawhide fails with this
configure line:

./configure \
--static \
--disable-system \
--enable-linux-user \
--disable-werror \
--disable-tools \
--disable-capstone

make fails with:

/usr/bin/ld: linux-user/syscall.o: in function `do_syscall1':
/root/qemu.git/linux-user/syscall.c:7769: undefined reference to `stime'
collect2: error: ld returned 1 exit status

Seems related to this glibc change:
https://sourceware.org/git/?p=glibc.git;a=commit;h=12cbde1dae6fa4a9a792b64564c7e0debf7544cc

...

+* The obsolete function stime is no longer available to newly linked
+  binaries and it has been removed from  header.  This function
+  has been deprecated in favor of clock_settime.
+

# rpm -q glibc
glibc-2.30.9000-17.fc32.x86_64


FWIW there's some other messages but I don't think they are fatal:

/usr/bin/ld: 
/usr/lib/gcc/x86_64-redhat-linux/9/../../../../lib64/libglib-2.0.a(gutils.c.o): 
in function `g_get_user_database_entry':
(.text+0x267): warning: Using 'getpwuid' in statically linked applications 
requires at runtime the shared libraries from the glibc version used for linking
/usr/bin/ld: (.text+0xe0): warning: Using 'getpwnam_r' in statically linked 
applications requires at runtime the shared libraries from the glibc version 
used for linking
/usr/bin/ld: (.text+0x11e): warning: Using 'getpwuid_r' in statically linked 
applications requires at runtime the shared libraries from the glibc version 
used for linking


Also, --disable-capstone is required to avoid this error, but it is 
pre-existing, not sure if it's a bug, if so I can file a separate one:

  LINKaarch64-linux-user/qemu-aarch64
/usr/bin/ld: cannot find -lcapstone
collect2: error: ld returned 1 exit status

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1852115

Title:
  qemu --static user build fails with fedora rawhide glibc-2.30.9000

Status in QEMU:
  New

Bug description:
  Building qemu latest git 654efcb511d on fedora rawhide fails with this
  configure line:

  ./configure \
--static \
--disable-system \
--enable-linux-user \
--disable-werror \
--disable-tools \
--disable-capstone

  make fails with:

  /usr/bin/ld: linux-user/syscall.o: in function `do_syscall1':
  /root/qemu.git/linux-user/syscall.c:7769: undefined reference to `stime'
  collect2: error: ld returned 1 exit status

  Seems related to this glibc change:
  
https://sourceware.org/git/?p=glibc.git;a=commit;h=12cbde1dae6fa4a9a792b64564c7e0debf7544cc

  ...

  +* The obsolete function stime is no longer available to newly linked
  +  binaries and it has been removed from  header.  This function
  +  has been deprecated in favor of clock_settime.
  +

  # rpm -q glibc
  glibc-2.30.9000-17.fc32.x86_64

  
  FWIW there's some other messages but I don't think they are fatal:

  /usr/bin/ld: 
/usr/lib/gcc/x86_64-redhat-linux/9/../../../../lib64/libglib-2.0.a(gutils.c.o): 
in function `g_get_user_database_entry':
  (.text+0x267): warning: Using 'getpwuid' in statically linked applications 
requires at runtime the shared libraries from the glibc version used for linking
  /usr/bin/ld: (.text+0xe0): warning: Using 'getpwnam_r' in statically linked 
applications requires at runtime the shared libraries from the glibc version 
used for linking
  /usr/bin/ld: (.text+0x11e): warning: Using 'getpwuid_r' in statically linked 
applications requires at runtime the shared libraries from the glibc version 
used for linking

  
  Also, --disable-capstone is required to avoid this error, but it is 
pre-existing, not sure if it's a bug, if so I can file a separate one:

LINKaarch64-linux-user/qemu-aarch64
  /usr/bin/ld: cannot find -lcapstone
  collect2: error: ld returned 1 exit status

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1852115/+subscriptions



Re: API definition for LUKS key management

2019-11-11 Thread Daniel P . Berrangé
On Mon, Nov 11, 2019 at 05:58:20PM +0200, Maxim Levitsky wrote:
> Hi!
> 
> I would like to discuss the API for LUKS key management.
> 
> First of all very brief overview of LUKS v1 format:
> 
> Each sector of the image is encrypted with same master key, which
> is not stored directly on the disk.
> 
> Instead in the LUKS header we have 8 slots. Each slot optionally stores
> an encrypted version of the master key, encrypted by the user password.
> Knowing the password, you can retrieve the master key from the keyslot.
> Slot can be marked as active or inactive, inactive slots are not considered
> when opening the image.
> 
> In addition to that LUKS header has a hash of the master key, so that
> you can check if the password 'opens' a keyslot by decrypting it
> with given the password, and then checking if 
> the hash of the decrypted master key candidate obtained matches the stored 
> hash.
> 
> That basically means that you can have up to 8 different passwords that will
> open a luks volume and you can change them as you wish without re-encrypting
> everything.
> 
> Now for raw luks volumes you have cryptsetup which allows to manage these
> keyslots, but we also have so called encrypted qcow2 format which
> basically has the luks header, together with keyslots embedded, plus each
> cluster is encrypted with the master key as in raw luks.
> Cryptsetup doesn't support this, thus I implemented this in qemu block layer.

Even for raw luks volumes, the traditional "cryptsetup" tool is
undesirable. eg consider LUKS on an RBD or ISCSI volume where
you are using the in-QEMU RBD/ISCSI client. You don't want to
have to configure the host kernel client just to change the
keyslot info. You don't want to use the in-QEMU clients for
qemu-img.

> 
> Link to bugzilla here: https://bugzilla.redhat.com/show_bug.cgi?id=1662412
> 
> 
> Relevant to the API,
> first of all qemu has the notion of amend (qemu-img amend), which allows
> currently to change format specific extensions of qcow2.
> 
> Since luks, especially luks inside qcow2 is a format on its own, it fits to 
> use that interface to change the 'format' options, in this case,
> the encryption key slots.
> 
> 
> There are the following requirements (they are 100% hardcoded, we might 
> discuss
> to drop some of these):
> 
> 
> 1. ability to add a new password to a free keyslot 
> (best is to let api to pick a free keyslot)
> Also user should not need to know all the passwords in existing keyslots.
> 
> 
> 2. ability to erase a keyslot, usually by giving the password that should be 
> erased, and erasing all
> the keyslots that match the password, or by giving a keyslot index.
> This will usually be done after adding a new password.
> 
> 
> 3. Allow to do so online, that is while qemu is running, but also support 
> offline management.
> Note that online management is even useful for raw luks volumes, since its 
> not safe
> to run cryptsetup on them while qemu is using the images.
> 
> 
> I implemented those requirements using the following interface.
> (I have sent the patches already)
> 
> I will try to explain the interface with bunch of examples:
> 
> 
> # adds a new password, defined by qemu secret 'sec0' to first unused slot
> # give user a error if all keyslots are occupied
> qemu-img amend --secret ... -o key-secret=sec1 image.luks

I think you meant "--object secret," instead of "--secret ..."

Also, this example needs to have 2 secrets provided. The first
secret to unlock the image using the existing password, and the
second secret is the one being added.

> # erases all keyslots that can be opened by password that is contained in a 
> qemu secret 'sec0'
> # active=off means that the given password/keyslot won't be active after the 
> operation
> qemu-img amend --secret ... -o key-secret=sec0,active=off image.luks
> 
> 
> # erase the slot 5 (this is more low level command, less expected to be used)
> qemu-img amend --secret ... -o slot=5,active=off image.luks
> 
> # add new secret to slot 5 (will fail if the slot is already marked as active)
> qemu-img amend --secret ... -o slot=5,key-secret=sec1 image.luks

This also needs two secrets provideed.

> 
> 
> This is basically it.
> 
> The full option syntax is as following:
> 
> active=on/off (optional, default to on) - toggles if we enabling a keyslot or 
> are erasing it.
> 
> slot=number (optional, advanced option) - specifies which exactly slot to 
> erase or which
> slot to put the new key on
> 
> key-secret = id of the secret object - specifies the secret. when slot is 
> enabled,
> it will be put into the new slot. when disabling (erasing a keyslot), all 
> keyslots
> matching that secret will be erased. 
> Specifying both key-secret and slot index is treated as error I think
> 
> 
> As as very advanced option, --force is added to qemu-img to allow to do 
> unsafe operation,
> which in this case is removing last keyslot which will render the encrypted 
> image useless.
> 
> 
> In addition to that, 

Re: [PATCH v4] iotests: Test NBD client reconnection

2019-11-11 Thread Eric Blake

On 11/11/19 4:04 AM, Andrey Shinkevich wrote:

The test for an NBD client. The NBD server is disconnected after the
client write request. The NBD client should reconnect and complete
the write operation.

Suggested-by: Denis V. Lunev 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
v3 was discussed in the email thread with the message ID
<1572627272-23359-1-git-send-email-andrey.shinkev...@virtuozzo.com>




+import os
+import subprocess
+import iotests
+from iotests import file_path, log
+
+
+nbd_sock, conf_file = file_path('nbd-sock', "nbd-fault-injector.conf")


Mixing '' and "" in the same line is odd.  I don't know if we have a 
strong preference for one style over the other.



+++ b/tests/qemu-iotests/group
@@ -284,3 +284,4 @@
 268 rw auto quick
 270 rw backing quick
 272 rw
+277 rw


The test completes in 3 seconds for me; we could add 'quick'.

Otherwise, looks reasonable to me.

Reviewed-by: Eric Blake 
Tested-by: Eric Blake 

I'll probably queue this through my NBD tree for rc1, as it adds test 
coverage of a new feature for the 4.2 release.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [virtio-dev] Re: guest / host buffer sharing ...

2019-11-11 Thread Liam Girdwood
On Mon, 2019-11-11 at 12:04 +0900, David Stevens wrote:
> Having a centralized buffer allocator device is one way to deal with
> sharing buffers, since it gives a definitive buffer identifier that
> can be used by all drivers/devices to refer to the buffer. That being
> said, I think the device as proposed is insufficient, as such a
> centralized buffer allocator should probably be responsible for
> allocating all shared buffers, not just linear guest ram buffers.

This would work for audio. I need to be able to :-

1) Allocate buffers on guests that I can pass as SG physical pages to
DMA engine (via privileged VM driver) for audio data. Can be any memory
as long as it's DMA-able.

2) Export hardware mailbox memory (in a real device PCI BAR) as RO to
each guest to give guests low latency information on each audio stream.
To support use cases like voice calls, gaming, system notifications and
general audio processing.

Liam




Re: [PULL 0/4] tcg patch queue

2019-11-11 Thread Peter Maydell
On Mon, 11 Nov 2019 at 15:56, Richard Henderson
 wrote:
>
> The following changes since commit 654efcb511d394c1d3f5292c28503d1d19e5b1d3:
>
>   Merge remote-tracking branch 'remotes/vivier/tags/q800-branch-pull-request' 
> into staging (2019-11-11 09:23:46 +)
>
> are available in the Git repository at:
>
>   https://github.com/rth7680/qemu.git tags/pull-tcg-2019
>
> for you to fetch changes up to cb974c95df0e1c9e73a37facd3e13894bd3eedc2:
>
>   tcg/LICENSE: Remove out of date claim about TCG subdirectory licensing 
> (2019-11-11 15:11:21 +0100)
>
> 
> Remove no-longer-true statement that TCG is BSD-licensed
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/4.2
for any user-visible changes.

-- PMM



Re: [Resend PATCH 0/3] Add CPU model for intel processor Cooper Lake

2019-11-11 Thread Bruce Rogers
On Wed, 2019-10-23 at 23:52 -0300, Eduardo Habkost wrote:
> On Tue, Oct 22, 2019 at 03:35:25PM +0800, Cathy Zhang wrote:
> > This patchset is to add CPU model for intel processor Cooper Lake.
> > It
> > will inherit features from the existing CPU model Cascadelake-
> > Server,
> > meanwhile, add the platform associated new instruction and feature
> > for speculative execution which the host supports. There are
> > associated
> > feature bit and macro defined here as needed.
> 
> Queued, thanks.
> 
> > Cathy Zhang (3):
> >   i386: Add MSR feature bit for MDS-NO
> >   i386: Add macro for stibp
> >   i386: Add new CPU model Cooperlake
> > 
> >  target/i386/cpu.c | 60
> > +++
> >  target/i386/cpu.h |  3 +++
> >  2 files changed, 63 insertions(+)
> > 

Hi Eduardo,

Is this going to make it into v4.2?

Regards,
Bruce


[PATCH v8 4/4] qemu-iotests: adding LUKS cleanup for non-UTF8 secret error

2019-11-11 Thread Daniel Henrique Barboza
This patch adds a new test file to exercise the case where
qemu-img fails to complete for the LUKS format when a non-UTF8
secret is used.

Signed-off-by: Daniel Henrique Barboza 
---
 tests/qemu-iotests/273 | 67 ++
 tests/qemu-iotests/273.out | 11 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 79 insertions(+)
 create mode 100755 tests/qemu-iotests/273
 create mode 100644 tests/qemu-iotests/273.out

diff --git a/tests/qemu-iotests/273 b/tests/qemu-iotests/273
new file mode 100755
index 00..cb362598b4
--- /dev/null
+++ b/tests/qemu-iotests/273
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+#
+# Test qemu-img file cleanup for LUKS when using a non-UTF8 secret
+#
+# Copyright (C) 2019, IBM Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+TEST_IMAGE_FILE='vol.img'
+
+_cleanup()
+{
+  _cleanup_test_img
+  rm non_utf8_secret
+  rm -f $TEST_IMAGE_FILE
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt luks
+_supported_proto generic
+_unsupported_proto vxhs
+
+echo "== Create non-UTF8 secret =="
+echo -n -e '\x3a\x3c\x3b\xff' > non_utf8_secret
+SECRET="secret,id=sec0,file=non_utf8_secret"
+
+echo "== Throws an error because of invalid UTF-8 secret =="
+$QEMU_IMG create -f $IMGFMT --object $SECRET -o "key-secret=sec0" 
$TEST_IMAGE_FILE 4M
+
+echo "== Image file should not exist after the error =="
+if test -f "$TEST_IMAGE_FILE"; then
+exit 1
+fi
+
+echo "== Create a stub image file and run qemu-img again =="
+touch $TEST_IMAGE_FILE
+$QEMU_IMG create -f $IMGFMT --object $SECRET -o "key-secret=sec0" 
$TEST_IMAGE_FILE 4M
+
+echo "== Pre-existing image file should also be deleted after the error =="
+if test -f "$TEST_IMAGE_FILE"; then
+exit 1
+fi
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out
new file mode 100644
index 00..8c6653cd82
--- /dev/null
+++ b/tests/qemu-iotests/273.out
@@ -0,0 +1,11 @@
+QA output created by 273
+== Create non-UTF8 secret ==
+== Throws an error because of invalid UTF-8 secret ==
+qemu-img: vol.img: Data from secret sec0 is not valid UTF-8
+Formatting 'vol.img', fmt=luks size=4194304 key-secret=sec0
+== Image file should not exist after the error ==
+== Create a stub image file and run qemu-img again ==
+qemu-img: vol.img: Data from secret sec0 is not valid UTF-8
+Formatting 'vol.img', fmt=luks size=4194304 key-secret=sec0
+== Pre-existing image file should also be deleted after the error ==
+ *** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 065040398d..fc5a680739 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -284,3 +284,4 @@
 268 rw auto quick
 270 rw backing quick
 272 rw
+273 rw img quick
\ No newline at end of file
-- 
2.21.0




[PATCH v8 2/4] block.c: adding bdrv_co_delete_file

2019-11-11 Thread Daniel Henrique Barboza
Using the new 'bdrv_co_delete_file' interface, a pure co_routine function
'bdrv_co_delete_file' inside block.c can can be used in a way similar of
the existing bdrv_create_file to to clean up a created file.

We're creating a pure co_routine because the only caller of
'bdrv_co_delete_file' will be already in co_routine context, thus there
is no need to add all the machinery to check for qemu_in_coroutine() and
create a separated co_routine to do the job.

Suggested-by: Daniel P. Berrangé 
Signed-off-by: Daniel Henrique Barboza 
---
 block.c   | 26 ++
 include/block/block.h |  1 +
 2 files changed, 27 insertions(+)

diff --git a/block.c b/block.c
index 4cffc2bc35..c325104b8c 100644
--- a/block.c
+++ b/block.c
@@ -548,6 +548,32 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, 
Error **errp)
 return ret;
 }
 
+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
+{
+Error *local_err = NULL;
+int ret;
+
+assert(bs != NULL);
+
+if (!bs->drv) {
+error_setg(errp, "Block node '%s' is not opened", bs->filename);
+return -ENOMEDIUM;
+}
+
+if (!bs->drv->bdrv_co_delete_file) {
+error_setg(errp, "Driver '%s' does not support image deletion",
+   bs->drv->format_name);
+return -ENOTSUP;
+}
+
+ret = bs->drv->bdrv_co_delete_file(bs, _err);
+if (ret < 0) {
+error_propagate(errp, local_err);
+}
+
+return ret;
+}
+
 /**
  * Try to get @bs's logical and physical block size.
  * On success, store them in @bsz struct and return 0.
diff --git a/include/block/block.h b/include/block/block.h
index 1df9848e74..ec0d82f6b0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -372,6 +372,7 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, 
BlockDriverState *base,
 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
   Error **errp);
 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
 
 
 typedef struct BdrvCheckResult {
-- 
2.21.0




[PATCH v8 0/4] delete created files when block_crypto_co_create_opts_luks fails

2019-11-11 Thread Daniel Henrique Barboza
changes from previous version 7 [1], all suggested by Kevin Wolf:

- patch 1:
* removed function comment of raw_co_delete_file;
* removed 'done' label from raw_co_delete_file;
* removed 'local' remark from bdrv_co_delete_file comment. The comment
  is now single-lined;
* added missing space in the commit msg;
- patch 2:
* ditched bdrv_delete_co_entry and bdrv_delete_file, now it's a single
  coroutine_fn bdrv_co_delete_file;
* BlockDriverState != NULL dropped - the caller will need to ensure it
  is not null;
* changed the error message of '!bs->drv' condition;
* s/delete/deletion in the error message of !bs->drv->bdrv_co_delete_file;
* 'out' label removed - function will return immediately on error;
- patch 3:
* check for (ret && bs);
* drop the ENOENT verification;
* do not prepend the filename in the error message;
* removed an extra blank line.


[1] https://lists.gnu.org/archive/html/qemu-devel/2019-09/msg00282.html

Daniel Henrique Barboza (4):
  block: introducing 'bdrv_co_delete_file' interface
  block.c: adding bdrv_co_delete_file
  crypto.c: cleanup created file when block_crypto_co_create_opts_luks
fails
  qemu-iotests: adding LUKS cleanup for non-UTF8 secret error

 block.c| 26 +++
 block/crypto.c | 18 ++
 block/file-posix.c | 23 +
 include/block/block.h  |  1 +
 include/block/block_int.h  |  4 +++
 tests/qemu-iotests/273 | 67 ++
 tests/qemu-iotests/273.out | 11 +++
 tests/qemu-iotests/group   |  1 +
 8 files changed, 151 insertions(+)
 create mode 100755 tests/qemu-iotests/273
 create mode 100644 tests/qemu-iotests/273.out

-- 
2.21.0




[PATCH v8 3/4] crypto.c: cleanup created file when block_crypto_co_create_opts_luks fails

2019-11-11 Thread Daniel Henrique Barboza
When using a non-UTF8 secret to create a volume using qemu-img, the
following error happens:

$ qemu-img create -f luks --object 
secret,id=vol_1_encrypt0,file=vol_resize_pool.vol_1.secret.qzVQrI -o 
key-secret=vol_1_encrypt0 /var/tmp/pool_target/vol_1 10240K

Formatting '/var/tmp/pool_target/vol_1', fmt=luks size=10485760 
key-secret=vol_1_encrypt0
qemu-img: /var/tmp/pool_target/vol_1: Data from secret vol_1_encrypt0 is not 
valid UTF-8

However, the created file '/var/tmp/pool_target/vol_1' is left behind in the
file system after the failure. This behavior can be observed when creating
the volume using Libvirt, via 'virsh vol-create', and then getting "volume
target path already exist" errors when trying to re-create the volume.

The volume file is created inside block_crypto_co_create_opts_luks(), in
block/crypto.c. If the bdrv_create_file() call is successful but any
succeeding step fails*, the existing 'fail' label does not take into
account the created file, leaving it behind.

This patch changes block_crypto_co_create_opts_luks() to delete
'filename' in case of failure. A failure in this point means that
the volume is now truncated/corrupted, so even if 'filename' was an
existing volume before calling qemu-img, it is now unusable. Deleting
the file it is not much worse than leaving it in the filesystem in
this scenario, and we don't have to deal with checking the file
pre-existence in the code.

* in our case, block_crypto_co_create_generic calls qcrypto_block_create,
which calls qcrypto_block_luks_create, and this function fails when
calling qcrypto_secret_lookup_as_utf8.

Reported-by: Srikanth Aithal 
Suggested-by: Kevin Wolf 
Signed-off-by: Daniel Henrique Barboza 
---
 block/crypto.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/block/crypto.c b/block/crypto.c
index 24823835c1..00e8ec537d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -30,6 +30,7 @@
 #include "qapi/error.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
+#include "qemu/cutils.h"
 #include "crypto.h"
 
 typedef struct BlockCrypto BlockCrypto;
@@ -596,6 +597,23 @@ static int coroutine_fn 
block_crypto_co_create_opts_luks(const char *filename,
 
 ret = 0;
 fail:
+/*
+ * If an error occurred, delete 'filename'. Even if the file existed
+ * beforehand, it has been truncated and corrupted in the process.
+ */
+if (ret && bs) {
+Error *local_delete_err = NULL;
+int r_del = bdrv_co_delete_file(bs, _delete_err);
+/*
+ * ENOTSUP will happen if the block driver doesn't support
+ * the 'bdrv_co_delete_file' interface. This is a predictable
+ * scenario and shouldn't be reported back to the user.
+ */
+if ((r_del < 0) && (r_del != -ENOTSUP)) {
+error_report_err(local_delete_err);
+}
+}
+
 bdrv_unref(bs);
 qapi_free_QCryptoBlockCreateOptions(create_opts);
 qobject_unref(cryptoopts);
-- 
2.21.0




Re: [PATCH v2] s390x: Properly fetch the short psw on diag308 subc 0/1

2019-11-11 Thread David Hildenbrand



> Am 11.11.2019 um 16:28 schrieb Janosch Frank :
> 
> We need to actually fetch the cpu mask and set it. As we invert the
> short psw indication in the mask, SIE will report a specification
> exception, if it wasn't present in the reset psw.
> 
> Signed-off-by: Janosch Frank 
> ---
> target/s390x/cpu.c | 12 ++--
> target/s390x/cpu.h |  1 +
> 2 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index 736a7903e2..40aa42e092 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -76,8 +76,16 @@ static bool s390_cpu_has_work(CPUState *cs)
> static void s390_cpu_load_normal(CPUState *s)
> {
> S390CPU *cpu = S390_CPU(s);
> -cpu->env.psw.addr = ldl_phys(s->as, 4) & PSW_MASK_ESA_ADDR;
> -cpu->env.psw.mask = PSW_MASK_32 | PSW_MASK_64;
> +uint64_t spsw = ldq_phys(s->as, 0);
> +
> +cpu->env.psw.mask = spsw & 0x8000UL;

ULL

> +/*
> + * Invert short psw indication, so SIE will report a specification
> + * exception if it was not set.
> + */

It would be interesting to know how the PSW mask in the PGM old PSW looks like 
on LPAR. IOW, „you forgot to set the short indication, here is an exception. 
see, the short indication is set now.“ Sounds weird, most probably nobody cares.

> +cpu->env.psw.mask ^= PSW_MASK_SHORTPSW;
> +cpu->env.psw.addr = spsw & 0x7fffUL;

Eventually also ULL

> +
> s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
> }
> #endif
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 947553386f..2c687185f1 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -261,6 +261,7 @@ extern const struct VMStateDescription vmstate_s390_cpu;
> #define PSW_MASK_EXT0x0100ULL
> #define PSW_MASK_KEY0x00F0ULL
> #define PSW_SHIFT_KEY   52
> +#define PSW_MASK_SHORTPSW0x0008ULL
> #define PSW_MASK_MCHECK 0x0004ULL
> #define PSW_MASK_WAIT   0x0002ULL
> #define PSW_MASK_PSTATE 0x0001ULL
> -- 
> 2.20.1
> 

Looks good to me

Reviewed-by: David Hildenbrand 




Re: [PATCH] tests/migration: use the common library function

2019-11-11 Thread Alex Bennée


Peter Maydell  writes:

> On Mon, 11 Nov 2019 at 14:41, Thomas Huth  wrote:
>>
>> On 11/11/2019 15.11, Alex Bennée wrote:
>> >
>> > Thomas Huth  writes:
>> >
>> >> On 11/11/2019 13.55, Alex Bennée wrote:
>> >>> Signed-off-by: Alex Bennée 
>> >>
>> >> Could you please add at least a short patch description? (Why is this
>> >> change necessary / a good idea?)
>> >
>> > It's just a minor clean-up Dave happened to comment on last week. Using
>> > the helper function is preferable given it abstracts away any system
>> > differences for the same information.
>>
>> But this also changes the behavior on non-Linux systems (i.e. the *BSDs
>> and macOS), since they will now use getpid() instead of gettid ... is
>> that the intended change here?
>
> Does the 'stress' program work on those OSes? For that matter,
> does it work on Linux?
>
> As far as I can tell we don't compile stress.c on any host,
> since the only thing that depends on tests/migration/stress$(EXESUF)
> is tests/migration/initrd-stress.img, and nothing depends on that.
>
> Nothing creates tests/migration/ in the build dir so trying
> to build tests/migration/stress in an out-of-tree config fails:
>
>   CC  tests/migration/stress.o
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:359:1:
> fatal error: opening dependency file tests/migration/stress.d: No such
> file or directory
>  }
>  ^
> compilation terminated.
>
> ...and if I fix that by manually creating the directory then
> it fails to link:
>
>   CC  tests/migration/stress.o
>   LINKtests/migration/stress
> tests/migration/stress.o: In function `get_command_arg_str':
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:107:
> undefined reference to `g_strndup'
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:109:
> undefined reference to `g_strdup'
> tests/migration/stress.o: In function `get_command_arg_ull':
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:129:
> undefined reference to `g_free'
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:132:
> undefined reference to `g_free'
> tests/migration/stress.o: In function `stress':
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:253:
> undefined reference to `pthread_create'
> collect2: error: ld returned 1 exit status
> /home/petmay01/linaro/qemu-from-laptop/qemu/tests/Makefile.include:849:
> recipe for target 'tests/migration/stress' failed
>
> Is this dead code ?

It was introduced around 3 years ago by Daniel for stress testing. The
instructions in:

  409437e16df273fc5f78f6cd1cb53023eaeb9b72
  Author: Daniel P. Berrangé 
  AuthorDate: Wed Jul 20 14:23:13 2016 +0100
  Commit: Amit Shah 
  CommitDate: Fri Jul 22 13:23:39 2016 +0530

  tests: introduce a framework for testing migration performance

say to use:

  make tests/migration/initrd-stress.img

And that has indeed bitrotted over time. All the other tweaks since are
passing through clean ups.

>
> thanks
> -- PMM


--
Alex Bennée



[PATCH v8 1/4] block: introducing 'bdrv_co_delete_file' interface

2019-11-11 Thread Daniel Henrique Barboza
Adding to Block Drivers the capability of being able to clean up
its created files can be useful in certain situations. For the
LUKS driver, for instance, a failure in one of its authentication
steps can leave files in the host that weren't there before.

This patch adds the 'bdrv_co_delete_file' interface to block
drivers and add it to the 'file' driver in file-posix.c. The
implementation is given by 'raw_co_delete_file'.

Suggested-by: Daniel P. Berrangé 
Signed-off-by: Daniel Henrique Barboza 
---
 block/file-posix.c| 23 +++
 include/block/block_int.h |  4 
 2 files changed, 27 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index 1f0f61a02b..692a36a799 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2386,6 +2386,28 @@ static int coroutine_fn raw_co_create_opts(const char 
*filename, QemuOpts *opts,
 return raw_co_create(, errp);
 }
 
+static int coroutine_fn raw_co_delete_file(BlockDriverState *bs,
+   Error **errp)
+{
+struct stat st;
+int ret;
+
+if (!(stat(bs->filename, ) == 0) || !S_ISREG(st.st_mode)) {
+error_setg_errno(errp, ENOENT, "%s is not a regular file",
+ bs->filename);
+return -ENOENT;
+}
+
+ret = unlink(bs->filename);
+if (ret < 0) {
+ret = -errno;
+error_setg_errno(errp, -ret, "Error when deleting file %s",
+ bs->filename);
+}
+
+return ret;
+}
+
 /*
  * Find allocation range in @bs around offset @start.
  * May change underlying file descriptor's file offset.
@@ -3017,6 +3039,7 @@ BlockDriver bdrv_file = {
 .bdrv_co_block_status = raw_co_block_status,
 .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
 .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
+.bdrv_co_delete_file = raw_co_delete_file,
 
 .bdrv_co_preadv = raw_co_preadv,
 .bdrv_co_pwritev= raw_co_pwritev,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index dd033d0b37..d938d3e8d2 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -314,6 +314,10 @@ struct BlockDriver {
  */
 int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
 
+/* Delete a created file. */
+int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
+Error **errp);
+
 /*
  * Flushes all data that was already written to the OS all the way down to
  * the disk (for example file-posix.c calls fsync()).
-- 
2.21.0




Re: [PULL 0/3] target-arm queue

2019-11-11 Thread Peter Maydell
On Mon, 11 Nov 2019 at 13:58, Peter Maydell  wrote:
>
> Arm patches for rc1:
>  * two final "remove the old API" patches for some API transitions
>  * bugfix for raspi/highbank Linux boot
>
> thanks
> -- PMM
>
> The following changes since commit 654efcb511d394c1d3f5292c28503d1d19e5b1d3:
>
>   Merge remote-tracking branch 'remotes/vivier/tags/q800-branch-pull-request' 
> into staging (2019-11-11 09:23:46 +)
>
> are available in the Git repository at:
>
>   https://git.linaro.org/people/pmaydell/qemu-arm.git 
> tags/pull-target-arm-2019
>
> for you to fetch changes up to 45c078f163fd47c35e7505d98928fae63baada7d:
>
>   hw/arm/boot: Set NSACR.{CP11, CP10} in dummy SMC setup routine (2019-11-11 
> 13:44:16 +)
>
> 
> target-arm queue:
>  * Remove old unassigned_access CPU hook API
>  * Remove old ptimer_init_with_bh() API
>  * hw/arm/boot: Set NSACR.{CP11, CP10} in dummy SMC setup routine
>

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/4.2
for any user-visible changes.

-- PMM



RE: [PATCH] riscv/virt: Add syscon reboot and poweroff DT nodes

2019-11-11 Thread Anup Patel


> -Original Message-
> From: Bin Meng 
> Sent: Monday, November 11, 2019 8:58 PM
> To: Anup Patel 
> Cc: Peter Maydell ; Alistair Francis
> ; Sagar Karandikar ;
> Palmer Dabbelt ; qemu-devel@nongnu.org; Atish
> Patra ; qemu-ri...@nongnu.org; Christoph Hellwig
> ; Anup Patel 
> Subject: Re: [PATCH] riscv/virt: Add syscon reboot and poweroff DT nodes
> 
> On Mon, Nov 11, 2019 at 9:42 PM Anup Patel  wrote:
> >
> > Correct Palmer's email address.
> >
> > > -Original Message-
> > > From: Anup Patel
> > > Sent: Monday, November 11, 2019 7:08 PM
> > > To: Peter Maydell ; Palmer Dabbelt
> > > ; Alistair Francis ;
> > > Sagar Karandikar 
> > > Cc: Atish Patra ; Christoph Hellwig
> > > ; Anup Patel ;
> > > qemu-ri...@nongnu.org; qemu- de...@nongnu.org; Anup Patel
> > > 
> > > Subject: [PATCH] riscv/virt: Add syscon reboot and poweroff DT nodes
> > >
> > > The SiFive test device found on virt machine can be used by generic
> > > syscon reboot and poweroff drivers available in Linux kernel.
> > >
> > > This patch updates FDT generation in virt machine so that Linux
> > > kernel can probe and use generic syscon drivers.
> > >
> > > Signed-off-by: Anup Patel 
> > > ---
> > >  hw/riscv/virt.c | 28 
> > >  1 file changed, 24 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index
> > > cc8f311e6b..fdfa359713
> > > 100644
> > > --- a/hw/riscv/virt.c
> > > +++ b/hw/riscv/virt.c
> > > @@ -182,11 +182,11 @@ static void create_fdt(RISCVVirtState *s,
> > > const struct MemmapEntry *memmap,
> > >  uint64_t mem_size, const char *cmdline)  {
> > >  void *fdt;
> > > -int cpu;
> > > +int cpu, i;
> > >  uint32_t *cells;
> > >  char *nodename;
> > > -uint32_t plic_phandle, phandle = 1;
> > > -int i;
> > > +const char test_compat[] = "sifive,test0\0syscon";
> > > +uint32_t plic_phandle, test_phandle, phandle = 1;
> > >  hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
> > >  hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
> > >
> > > @@ -356,13 +356,33 @@ static void create_fdt(RISCVVirtState *s,
> > > const struct MemmapEntry *memmap,
> > >  create_pcie_irq_map(fdt, nodename, plic_phandle);
> > >  g_free(nodename);
> > >
> > > +test_phandle = phandle++;
> > >  nodename = g_strdup_printf("/test@%lx",
> > >  (long)memmap[VIRT_TEST].base);
> > >  qemu_fdt_add_subnode(fdt, nodename);
> > > -qemu_fdt_setprop_string(fdt, nodename, "compatible",
> "sifive,test0");
> > > +qemu_fdt_setprop(fdt, nodename, "compatible",
> > > + test_compat, sizeof(test_compat));
> > >  qemu_fdt_setprop_cells(fdt, nodename, "reg",
> > >  0x0, memmap[VIRT_TEST].base,
> > >  0x0, memmap[VIRT_TEST].size);
> > > +qemu_fdt_setprop_cell(fdt, nodename, "phandle", test_phandle);
> > > +test_phandle = qemu_fdt_get_phandle(fdt, nodename);
> 
> Is this necessary?

I was doubtful about this line myself but I tried to keep code
consistent with other DT nodes (such as PLIC).

Regards,
Anup

> 
> > > +g_free(nodename);
> > > +
> > > +nodename = g_strdup_printf("/reboot");
> > > +qemu_fdt_add_subnode(fdt, nodename);
> > > +qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-
> > > reboot");
> > > +qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle);
> > > +qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0);
> > > +qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_RESET);
> > > +g_free(nodename);
> > > +
> > > +nodename = g_strdup_printf("/poweroff");
> > > +qemu_fdt_add_subnode(fdt, nodename);
> > > +qemu_fdt_setprop_string(fdt, nodename, "compatible", "syscon-
> > > poweroff");
> > > +qemu_fdt_setprop_cell(fdt, nodename, "regmap", test_phandle);
> > > +qemu_fdt_setprop_cell(fdt, nodename, "offset", 0x0);
> > > +qemu_fdt_setprop_cell(fdt, nodename, "value", FINISHER_PASS);
> > >  g_free(nodename);
> > >
> > >  nodename = g_strdup_printf("/uart@%lx",
> > > --
> 
> Regards,
> Bin


Re: [RFC][PATCH 2/3] docs/specs: Add specification of ivshmem device revision 2

2019-11-11 Thread Michael S. Tsirkin
On Mon, Nov 11, 2019 at 04:42:52PM +0100, Jan Kiszka wrote:
> On 11.11.19 16:27, Daniel P. Berrangé wrote:
> > On Mon, Nov 11, 2019 at 10:08:20AM -0500, Michael S. Tsirkin wrote:
> > > On Mon, Nov 11, 2019 at 02:59:07PM +0100, Jan Kiszka wrote:
> > > > On 11.11.19 14:45, Michael S. Tsirkin wrote:
> > > > > On Mon, Nov 11, 2019 at 01:57:11PM +0100, Jan Kiszka wrote:
> > > > > > +| Offset | Register   | Content
> > > > > >   |
> > > > > > +|---:|:---|:-|
> > > > > > +|00h | Vendor ID  | 1AF4h  
> > > > > >   |
> > > > > > +|02h | Device ID  | 1110h  
> > > > > >   |
> > > > > 
> > > > > Given it's a virtio vendor ID, please reserve a device ID
> > > > > with the virtio TC.
> > > > 
> > > > Yeah, QEMU's IVSHMEM was always using that. I'm happy to make this 
> > > > finally
> > > > official.
> > > > 
> > > 
> > > And I guess we will just mark it reserved or something right?
> > > Since at least IVSHMEM 1 isn't a virtio device.
> > > And will you be reusing same ID for IVSHMEM 2 or a new one?
> > 
> > 1110h isn't under either of the virtio PCI device ID allowed ranges
> > according to the spec:
> > 
> >"Any PCI device with PCI Vendor ID 0x1AF4, and PCI Device
> > ID 0x1000 through 0x107F inclusive is a virtio device.
> > ...
> > Additionally, devices MAY utilize a Transitional PCI Device
> > ID range, 0x1000 to 0x103F depending on the device type. "
> > 
> > So there's no need to reserve 0x1110h from the virtio spec POV.
> 
> Indeed.
> 
> > 
> > I have, however, ensured it is assigned to ivshmem from POV of
> > Red Hat's own internal tracking of allocated device IDs, under
> > its vendor ID.
> > 
> > If ivshmem 2 is now a virtio device, then it is a good thing that
> > it will get a new/different PCI device ID, to show that it is not
> > compatible with the old device impl.
> 
> At this stage, it is just a PCI device that may be used in combination with
> virtio (stacked on top), but it is not designed like a normal virtio (PCI)
> device. That's because it lacks many properties of regular virtio devices,
> like queues.
> 
> So, if such a device could be come part of the virtio spec, it would be
> separate from the rest, and having an ID from the regular range would likely
> not be helpful in this regard.
> 
> Jan

I agree it needs a separate ID not from the regular range.
It's a distinct transport.
Maybe even a distinct vendor ID - we could easily get another one
if needed.

> -- 
> Siemens AG, Corporate Technology, CT RDA IOT SES-DE
> Corporate Competence Center Embedded Linux




Re: [RFC v4 PATCH 07/49] multi-process: define mpqemu-link object

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:08:48AM -0400, Jagannathan Raman wrote:
> +int mpqemu_msg_recv(MPQemuLinkState *s, MPQemuMsg *msg, MPQemuChannel *chan)
> +{
> +int rc;
> +uint8_t *data;
> +union {
> +char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
> +struct cmsghdr align;
> +} u;
> +struct msghdr hdr;
> +struct cmsghdr *chdr;
> +size_t fdsize;
> +int sock = chan->sock;
> +QemuMutex *lock = >recv_lock;
> +
> +struct iovec iov = {
> +.iov_base = (char *) msg,
> +.iov_len = MPQEMU_MSG_HDR_SIZE,
> +};
> +
> +memset(, 0, sizeof(hdr));
> +memset(, 0, sizeof(u));
> +
> +hdr.msg_iov = 
> +hdr.msg_iovlen = 1;
> +hdr.msg_control = 
> +hdr.msg_controllen = sizeof(u);
> +
> +qemu_mutex_lock(lock);
> +
> +do {
> +rc = recvmsg(sock, , 0);
> +} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +if (rc < 0) {
> +qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
> +  " sock %d\n", __func__, rc, errno, sock);
> +qemu_mutex_unlock(lock);
> +return rc;
> +}
> +
> +msg->num_fds = 0;
> +for (chdr = CMSG_FIRSTHDR(); chdr != NULL;
> + chdr = CMSG_NXTHDR(, chdr)) {
> +if ((chdr->cmsg_level == SOL_SOCKET) &&
> +(chdr->cmsg_type == SCM_RIGHTS)) {
> +fdsize = chdr->cmsg_len - CMSG_LEN(0);
> +msg->num_fds = fdsize / sizeof(int);
> +if (msg->num_fds > REMOTE_MAX_FDS) {
> +/*
> + * TODO: Security issue detected. Sender never sends more
> + * than REMOTE_MAX_FDS. This condition should be signaled to
> + * the admin
> + */
> +qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", 
> __func__);
> +return -ERANGE;
> +}
> +
> +memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
> +break;
> +}
> +}
> +
> +if (msg->size && msg->bytestream) {
> +msg->data2 = calloc(1, msg->size);
> +data = msg->data2;
> +} else {
> +data = (uint8_t *)>data1;
> +}
> +
> +if (msg->size) {
> +do {
> +rc = read(sock, data, msg->size);
> +} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +}
> +
> +qemu_mutex_unlock(lock);
> +
> +return rc;
> +}

This code is still insecure.  Until the communication between processes
is made secure this series does not meet its goal of providing process
isolation.

1. An attacker can overflow msg->data1 easily by setting msg->size but
   not msg->bytestream.
2. An attacker can allocate data2, all mpqemu_msg_recv() callers
   need to free it to prevent memory leaks.
3. mpqemu_msg_recv() callers generally do not validate untrusted msg
   fields.  All the code needs to be audited.

Stefan


signature.asc
Description: PGP signature


Re: [RFC][PATCH 2/3] docs/specs: Add specification of ivshmem device revision 2

2019-11-11 Thread Jan Kiszka

On 11.11.19 17:11, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 03:27:43PM +, Daniel P. Berrangé wrote:

On Mon, Nov 11, 2019 at 10:08:20AM -0500, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 02:59:07PM +0100, Jan Kiszka wrote:

On 11.11.19 14:45, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 01:57:11PM +0100, Jan Kiszka wrote:

+| Offset | Register   | Content
  |
+|---:|:---|:-|
+|00h | Vendor ID  | 1AF4h  
  |
+|02h | Device ID  | 1110h  
  |


Given it's a virtio vendor ID, please reserve a device ID
with the virtio TC.


Yeah, QEMU's IVSHMEM was always using that. I'm happy to make this finally
official.



And I guess we will just mark it reserved or something right?
Since at least IVSHMEM 1 isn't a virtio device.
And will you be reusing same ID for IVSHMEM 2 or a new one?


1110h isn't under either of the virtio PCI device ID allowed ranges
according to the spec:

   "Any PCI device with PCI Vendor ID 0x1AF4, and PCI Device
ID 0x1000 through 0x107F inclusive is a virtio device.
...
Additionally, devices MAY utilize a Transitional PCI Device
ID range, 0x1000 to 0x103F depending on the device type. "

So there's no need to reserve 0x1110h from the virtio spec POV.


Well we do have:

B.3
What Device Number?
Device numbers can be reserved by the OASIS committee: email 
virtio-...@lists.oasis-open.org to secure
a unique one.
Meanwhile for experimental drivers, use 65535 and work backwards.

So it seems it can  in theory conflict at least with experimental virtio 
devices.

Really it's messy that people are reusing the virtio vendor ID for
random stuff - getting a vendor ID is only hard for a hobbyist, any big
company already has an ID - but if it is a hobbyist and they at least
register then doesn't cause much harm.


Note that ivshmem came from a research environment. I do know if there 
was a check for the IDs at the point the code was merged.


That said, I may get a device ID here as well, provided I can explain 
that not a single "product" will own it, but rather an open specification.


Jan

--
Siemens AG, Corporate Technology, CT RDA IOT SES-DE
Corporate Competence Center Embedded Linux



[PATCH v6 1/3] block: introduce compress filter driver

2019-11-11 Thread Andrey Shinkevich
Allow writing all the data compressed through the filter driver.
The written data will be aligned by the cluster size.
Based on the QEMU current implementation, that data can be written to
unallocated clusters only. May be used for a backup job.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
 block/Makefile.objs |   1 +
 block/filter-compress.c | 212 
 qapi/block-core.json|  10 ++-
 3 files changed, 219 insertions(+), 4 deletions(-)
 create mode 100644 block/filter-compress.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index e394fe0..330529b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -43,6 +43,7 @@ block-obj-y += crypto.o
 
 block-obj-y += aio_task.o
 block-obj-y += backup-top.o
+block-obj-y += filter-compress.o
 
 common-obj-y += stream.o
 
diff --git a/block/filter-compress.c b/block/filter-compress.c
new file mode 100644
index 000..a7b0337
--- /dev/null
+++ b/block/filter-compress.c
@@ -0,0 +1,212 @@
+/*
+ * Compress filter block driver
+ *
+ * Copyright (c) 2019 Virtuozzo International GmbH
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *   (based on block/copy-on-read.c by Max Reitz)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) any later version of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+
+
+static int zip_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+bs->backing = bdrv_open_child(NULL, options, "file", bs, _file, 
false,
+  errp);
+if (!bs->backing) {
+return -EINVAL;
+}
+
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+BDRV_REQ_WRITE_COMPRESSED |
+(BDRV_REQ_FUA & bs->backing->bs->supported_write_flags);
+
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+bs->backing->bs->supported_zero_flags);
+
+return 0;
+}
+
+
+#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+  | BLK_PERM_WRITE \
+  | BLK_PERM_RESIZE)
+#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
+
+static void zip_child_perm(BlockDriverState *bs, BdrvChild *c,
+const BdrvChildRole *role,
+BlockReopenQueue *reopen_queue,
+uint64_t perm, uint64_t shared,
+uint64_t *nperm, uint64_t *nshared)
+{
+*nperm = perm & PERM_PASSTHROUGH;
+*nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
+
+/*
+ * We must not request write permissions for an inactive node, the child
+ * cannot provide it.
+ */
+if (!(bs->open_flags & BDRV_O_INACTIVE)) {
+*nperm |= BLK_PERM_WRITE_UNCHANGED;
+}
+}
+
+
+static int64_t zip_getlength(BlockDriverState *bs)
+{
+return bdrv_getlength(bs->backing->bs);
+}
+
+
+static int coroutine_fn zip_co_truncate(BlockDriverState *bs, int64_t offset,
+ bool exact, PreallocMode prealloc,
+ Error **errp)
+{
+return bdrv_co_truncate(bs->backing, offset, exact, prealloc, errp);
+}
+
+
+static int coroutine_fn zip_co_preadv(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov, int flags)
+{
+return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+
+static int coroutine_fn zip_co_preadv_part(BlockDriverState *bs,
+uint64_t offset, uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset,
+int flags)
+{
+return bdrv_co_preadv_part(bs->backing, offset, bytes, qiov, qiov_offset,
+   flags);
+}
+
+
+static int coroutine_fn zip_co_pwritev(BlockDriverState *bs,
+uint64_t offset, uint64_t bytes,
+QEMUIOVector *qiov, int flags)
+{
+return bdrv_co_pwritev(bs->backing, offset, bytes, qiov,
+   flags | BDRV_REQ_WRITE_COMPRESSED);
+}
+
+
+static int coroutine_fn 

Re: [RFC v5 026/126] python: add commit-per-subsystem.py

2019-11-11 Thread Aleksandar Markovic
On Friday, October 11, 2019, Vladimir Sementsov-Ogievskiy <
vsement...@virtuozzo.com> wrote:

> Add script to automatically commit tree-wide changes per-subsystem.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---


Great idea!

Can you just add a comment somewhere close to the top of the file on script
usage? Or "--help" option? If you would like to be the script maintainer,
please change the MAINTAINERS too.

Reviewed-by: Aleksandar Markovic 


>
> CC: Gerd Hoffmann 
> CC: "Gonglei (Arei)" 
> CC: Eduardo Habkost 
> CC: Igor Mammedov 
> CC: Laurent Vivier 
> CC: Amit Shah 
> CC: Kevin Wolf 
> CC: Max Reitz 
> CC: John Snow 
> CC: Ari Sundholm 
> CC: Pavel Dovgalyuk 
> CC: Paolo Bonzini 
> CC: Stefan Hajnoczi 
> CC: Fam Zheng 
> CC: Stefan Weil 
> CC: Ronnie Sahlberg 
> CC: Peter Lieven 
> CC: Eric Blake 
> CC: "Denis V. Lunev" 
> CC: Markus Armbruster 
> CC: Alberto Garcia 
> CC: Jason Dillaman 
> CC: Wen Congyang 
> CC: Xie Changlong 
> CC: Liu Yuan 
> CC: "Richard W.M. Jones" 
> CC: Jeff Cody 
> CC: "Marc-André Lureau" 
> CC: "Daniel P. Berrangé" 
> CC: Richard Henderson 
> CC: Greg Kurz 
> CC: "Michael S. Tsirkin" 
> CC: Marcel Apfelbaum 
> CC: Beniamino Galvani 
> CC: Peter Maydell 
> CC: "Cédric Le Goater" 
> CC: Andrew Jeffery 
> CC: Joel Stanley 
> CC: Andrew Baumann 
> CC: "Philippe Mathieu-Daudé" 
> CC: Antony Pavlov 
> CC: Jean-Christophe Dubois 
> CC: Peter Chubb 
> CC: Subbaraya Sundeep 
> CC: Eric Auger 
> CC: Alistair Francis 
> CC: "Edgar E. Iglesias" 
> CC: Stefano Stabellini 
> CC: Anthony Perard 
> CC: Paul Durrant 
> CC: Paul Burton 
> CC: Aleksandar Rikalo 
> CC: Chris Wulff 
> CC: Marek Vasut 
> CC: David Gibson 
> CC: Cornelia Huck 
> CC: Halil Pasic 
> CC: Christian Borntraeger 
> CC: "Hervé Poussineau" 
> CC: Xiao Guangrong 
> CC: Aurelien Jarno 
> CC: Aleksandar Markovic 
> CC: Mark Cave-Ayland 
> CC: Jason Wang 
> CC: Laszlo Ersek 
> CC: Yuval Shaia 
> CC: Palmer Dabbelt 
> CC: Sagar Karandikar 
> CC: Bastian Koppelmann 
> CC: David Hildenbrand 
> CC: Thomas Huth 
> CC: Eric Farman 
> CC: Matthew Rosato 
> CC: Hannes Reinecke 
> CC: Michael Walle 
> CC: Artyom Tarasenko 
> CC: Stefan Berger 
> CC: Samuel Thibault 
> CC: Alex Williamson 
> CC: Tony Krowiak 
> CC: Pierre Morel 
> CC: Michael Roth 
> CC: Hailiang Zhang 
> CC: Juan Quintela 
> CC: "Dr. David Alan Gilbert" 
> CC: Luigi Rizzo 
> CC: Giuseppe Lettieri 
> CC: Vincenzo Maffione 
> CC: Jan Kiszka 
> CC: Anthony Green 
> CC: Stafford Horne 
> CC: Guan Xuetao 
> CC: Max Filippov 
> CC: qemu-bl...@nongnu.org
> CC: integrat...@gluster.org
> CC: sheep...@lists.wpkg.org
> CC: qemu-...@nongnu.org
> CC: xen-de...@lists.xenproject.org
> CC: qemu-...@nongnu.org
> CC: qemu-s3...@nongnu.org
> CC: qemu-ri...@nongnu.org
>
>  python/commit-per-subsystem.py | 204 +
>  1 file changed, 204 insertions(+)
>  create mode 100755 python/commit-per-subsystem.py
>
> diff --git a/python/commit-per-subsystem.py b/python/commit-per-subsystem.
> py
> new file mode 100755
> index 00..2ccf84cb15
> --- /dev/null
> +++ b/python/commit-per-subsystem.py
> @@ -0,0 +1,204 @@
> +#!/usr/bin/env python3
> +#
> +# Copyright (c) 2019 Virtuozzo International GmbH
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +
> +import subprocess
> +import sys
> +import os
> +import glob
> +
> +
> +def git_add(pattern):
> +subprocess.run(['git', 'add', pattern])
> +
> +
> +def git_commit(msg):
> +subprocess.run(['git', 'commit', '-m', msg], capture_output=True)
> +
> +
> +def git_changed_files():
> +ret = subprocess.check_output(['git', 'diff', '--name-only'],
> encoding='utf-8').split('\n')
> +if ret[-1] == '':
> +del ret[-1]
> +return ret
> +
> +
> +maintainers = sys.argv[1]
> +message = sys.argv[2].strip()
> +
> +subsystem = None
> +
> +remap = {
> +'Block layer core': 'block',
> +'Block Jobs': 'block',
> +'Dirty Bitmaps': 'block',
> +'Block QAPI, monitor, command line': 'block',
> +'Block I/O path': 'block',
> +'Throttling infrastructure': 'block',
> +'Architecture support': 's390x',
> +'Guest CPU Cores (KVM)': 'kvm',
> +'Guest CPU Cores (Xen)': 'xen',
> +'Guest CPU cores (TCG)': 'tcg',
> +'Network Block Device (NBD)': 'nbd',
> +'Parallel NOR Flash devices': 'pflash',
> +'Firmware configuration (fw_cfg)': 'fw_cfg',
> +'Block 

Re: [PATCH v3 00/20] nvme: support NVMe v1.3d, SGLs and multiple namespaces

2019-11-11 Thread no-reply
Patchew URL: https://patchew.org/QEMU/2019122545.252478-1-...@irrelevant.dk/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v3 00/20] nvme: support NVMe v1.3d, SGLs and multiple namespaces
Type: series
Message-id: 2019122545.252478-1-...@irrelevant.dk

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
d447043 nvme: handle dma errors
363b412 pci: pass along the return value of dma_memory_rw
9931480 nvme: make lba data size configurable
653dfec nvme: remove redundant NvmeCmd pointer parameter
f5d2d55 nvme: bump controller pci device id
899efe1 nvme: support multiple namespaces
4fa538c nvme: add support for scatter gather lists
77a96f7 nvme: allow multiple aios per command
c2e14a0 nvme: refactor prp mapping
8d7ce85 nvme: bump supported specification version to 1.3
3c4c466 nvme: add missing mandatory features
c2ae92c nvme: add logging to error information log page
15f570f nvme: add support for the asynchronous event request command
89736d5 nvme: add support for the get log page command
3e527d8 nvme: refactor device realization
3f13647 nvme: add support for the abort command
29c1323 nvme: allow completion queues in the cmb
c8e3900 nvme: populate the mandatory subnqn and ver fields
e4d9684 nvme: add missing fields in the identify controller data structure
3da94d4 nvme: move device parameters to separate struct
48600f3 nvme: remove superfluous breaks

=== OUTPUT BEGIN ===
1/21 Checking commit 48600f300908 (nvme: remove superfluous breaks)
2/21 Checking commit 3da94d4fe855 (nvme: move device parameters to separate 
struct)
ERROR: Macros with complex values should be enclosed in parenthesis
#177: FILE: hw/block/nvme.h:6:
+#define DEFINE_NVME_PROPERTIES(_state, _props) \
+DEFINE_PROP_STRING("serial", _state, _props.serial), \
+DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
+DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64)

total: 1 errors, 0 warnings, 181 lines checked

Patch 2/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

3/21 Checking commit e4d968402d62 (nvme: add missing fields in the identify 
controller data structure)
4/21 Checking commit c8e3900b8f9b (nvme: populate the mandatory subnqn and ver 
fields)
5/21 Checking commit 29c13234b204 (nvme: allow completion queues in the cmb)
6/21 Checking commit 3f1364798e4c (nvme: add support for the abort command)
7/21 Checking commit 3e527d850c0c (nvme: refactor device realization)
8/21 Checking commit 89736d5d2575 (nvme: add support for the get log page 
command)
9/21 Checking commit 15f570fc5e87 (nvme: add support for the asynchronous event 
request command)
10/21 Checking commit c2ae92ca1dce (nvme: add logging to error information log 
page)
11/21 Checking commit 3c4c466a54f3 (nvme: add missing mandatory features)
12/21 Checking commit 8d7ce85d02de (nvme: bump supported specification version 
to 1.3)
13/21 Checking commit c2e14a009c64 (nvme: refactor prp mapping)
14/21 Checking commit 77a96f7b0da7 (nvme: allow multiple aios per command)
15/21 Checking commit 4fa538c80d5f (nvme: add support for scatter gather lists)
16/21 Checking commit 899efe1f1b16 (nvme: support multiple namespaces)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#42: 
new file mode 100644

ERROR: Macros with complex values should be enclosed in parenthesis
#218: FILE: hw/block/nvme-ns.h:8:
+#define DEFINE_NVME_NS_PROPERTIES(_state, _props) \
+DEFINE_PROP_DRIVE("drive", _state, blk), \
+DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0)

total: 1 errors, 1 warnings, 832 lines checked

Patch 16/21 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

17/21 Checking commit f5d2d55972a1 (nvme: bump controller pci device id)
18/21 Checking commit 653dfecb75e9 (nvme: remove redundant NvmeCmd pointer 
parameter)
19/21 Checking commit 9931480ce8e3 (nvme: make lba data size configurable)
20/21 Checking commit 363b412864ea (pci: pass along the return value of 
dma_memory_rw)
21/21 Checking commit d447043cfb9b (nvme: handle dma errors)
WARNING: line over 80 characters
#77: FILE: hw/block/nvme.c:257:
+if (nvme_addr_read(n, prp_ent, (void *) prp_list, 
prp_trans)) {

WARNING: line over 80 characters
#103: FILE: hw/block/nvme.c:428:
+if (nvme_addr_read(n, addr, segment, nsgld * 
sizeof(NvmeSglDescriptor))) {

total: 0 errors, 2 warnings, 148 lines checked

Patch 21/21 has style problems, please review.  If any of these errors
are false positives 

Re: [RFC][PATCH 2/3] docs/specs: Add specification of ivshmem device revision 2

2019-11-11 Thread Jan Kiszka

On 11.11.19 17:14, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 04:42:52PM +0100, Jan Kiszka wrote:

On 11.11.19 16:27, Daniel P. Berrangé wrote:

On Mon, Nov 11, 2019 at 10:08:20AM -0500, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 02:59:07PM +0100, Jan Kiszka wrote:

On 11.11.19 14:45, Michael S. Tsirkin wrote:

On Mon, Nov 11, 2019 at 01:57:11PM +0100, Jan Kiszka wrote:

+| Offset | Register   | Content
  |
+|---:|:---|:-|
+|00h | Vendor ID  | 1AF4h  
  |
+|02h | Device ID  | 1110h  
  |


Given it's a virtio vendor ID, please reserve a device ID
with the virtio TC.


Yeah, QEMU's IVSHMEM was always using that. I'm happy to make this finally
official.



And I guess we will just mark it reserved or something right?
Since at least IVSHMEM 1 isn't a virtio device.
And will you be reusing same ID for IVSHMEM 2 or a new one?


1110h isn't under either of the virtio PCI device ID allowed ranges
according to the spec:

"Any PCI device with PCI Vendor ID 0x1AF4, and PCI Device
 ID 0x1000 through 0x107F inclusive is a virtio device.
 ...
 Additionally, devices MAY utilize a Transitional PCI Device
 ID range, 0x1000 to 0x103F depending on the device type. "

So there's no need to reserve 0x1110h from the virtio spec POV.


Indeed.



I have, however, ensured it is assigned to ivshmem from POV of
Red Hat's own internal tracking of allocated device IDs, under
its vendor ID.

If ivshmem 2 is now a virtio device, then it is a good thing that
it will get a new/different PCI device ID, to show that it is not
compatible with the old device impl.


At this stage, it is just a PCI device that may be used in combination with
virtio (stacked on top), but it is not designed like a normal virtio (PCI)
device. That's because it lacks many properties of regular virtio devices,
like queues.

So, if such a device could be come part of the virtio spec, it would be
separate from the rest, and having an ID from the regular range would likely
not be helpful in this regard.

Jan


I agree it needs a separate ID not from the regular range.
It's a distinct transport.
Maybe even a distinct vendor ID - we could easily get another one
if needed.


That might be useful because I've seen the kernel's virtio-pci driver 
grabbing ivshmem devices from time to time. OTOH, that could likely also 
be improved in Linux, at least for future versions.


Jan

--
Siemens AG, Corporate Technology, CT RDA IOT SES-DE
Corporate Competence Center Embedded Linux



Re: [PATCH v2 03/15] block/block: add BDRV flag for io_uring

2019-11-11 Thread Max Reitz
On 11.11.19 11:57, Kevin Wolf wrote:
> Am 25.10.2019 um 18:04 hat Stefan Hajnoczi geschrieben:
>> From: Aarushi Mehta 
>>
>> Signed-off-by: Aarushi Mehta 
>> Reviewed-by: Maxim Levitsky 
>> Signed-off-by: Stefan Hajnoczi 
>> ---
>>  include/block/block.h | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 89606bd9f8..bdb48dcd1b 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -126,6 +126,7 @@ typedef struct HDGeometry {
>>ignoring the format layer */
>>  #define BDRV_O_NO_IO   0x1 /* don't initialize for I/O */
>>  #define BDRV_O_AUTO_RDONLY 0x2 /* degrade to read-only if opening 
>> read-write fails */
>> +#define BDRV_O_IO_URING0x4 /* use io_uring instead of the thread 
>> pool */
> 
> Why do we need a new (global!) flag rather than a driver-specific option
> for file-posix? This looks wrong to me, the flag has no sensible meaning
> for any other driver.
> 
> (Yes, BDRV_O_NATIVE_AIO is wrong, too, but compatibility means we can't
> remove it easily.)

To add to that, Kevin and me had a short talk on IRC, and it seemed like
the reason would be that it’s easier to use this way than an option
would be.

To me, the problem seems to be that it’s only easier to use because of
the option inheritance we have in the block layer (so you can set this
flag on a qcow2 node and its file node will have it, too).  But
naturally this inheritance is a bit of magic (because qemu has to guess
where you probably want the flag to end up), so I’d too rather avoid
adding to it.

OTOH, I wonder whether ease of use is really that important here.  Would
people who want to use io_uring really care about a command line that’s
going to be a bit more complicated than just
-drive file=foo.img,format=$imgfmt,aio=io_uring,if=$interface?  (Namely
file.aio in this case, or maybe even a full-on block graph definition.)

Max



signature.asc
Description: OpenPGP digital signature


[PATCH for-5.0 v2 23/23] iotests: Mirror must not attempt to create loops

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 235 +
 tests/qemu-iotests/041.out |   4 +-
 2 files changed, 237 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 9a00cf6f7b..0e43bb699d 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -1246,6 +1246,241 @@ class TestReplaces(iotests.QMPTestCase):
 
 self.vm.assert_block_path('filter0', '/file', 'target')
 
+"""
+See what happens when the @sync/@replaces configuration dictates
+creating a loop.
+"""
+@iotests.skip_if_unsupported(['throttle'])
+def test_loop(self):
+qemu_img('create', '-f', iotests.imgfmt, test_img, str(1 * 1024 * 
1024))
+
+# Dummy group so we can create a NOP filter
+result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg0')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+ 'driver': 'throttle',
+ 'node-name': 'source',
+ 'throttle-group': 'tg0',
+ 'file': {
+ 'driver': iotests.imgfmt,
+ 'node-name': 'filtered',
+ 'file': {
+ 'driver': 'file',
+ 'filename': test_img
+ }
+ }
+ })
+self.assert_qmp(result, 'return', {})
+
+# Block graph is now:
+#   source[throttle] --file--> filtered[imgfmt] --file--> ...
+
+result = self.vm.qmp('drive-mirror', job_id='mirror', device='source',
+ target=target_img, format=iotests.imgfmt,
+ node_name='target', sync='none',
+ replaces='filtered')
+
+"""
+Block graph before mirror exits would be (ignoring mirror_top):
+  source[throttle] --file--> filtered[imgfmt] --file--> ...
+  target[imgfmt] --file--> ...
+
+Then, because of sync=none and drive-mirror in absolute-paths mode,
+the source is attached to the target:
+  source[throttle] --file--> filtered[imgfmt] --file--> ...
+ ^
+  backing
+ |
+target[imgfmt] --file--> ...
+
+Replacing filtered by target would yield:
+  source[throttle] --file--> target[imgfmt] --file--> ...
+ ^|
+ +--- backing +
+
+I.e., a loop.  bdrv_replace_node() detects this and simply
+does not let source's file link point to target.  However,
+that means that target cannot really replace source.
+
+drive-mirror should detect this and not allow this case.
+"""
+
+self.assert_qmp(result, 'error/desc',
+"Replacing 'filtered' by 'target' with this sync " + \
+"mode would result in a loop, because the former " + \
+"would be a child of the latter's backing file " + \
+"('source') after the mirror job")
+
+"""
+Test what happens when there would be no loop with the pre-mirror
+configuration, but something changes during the mirror job that asks
+for a loop to be created during completion.
+"""
+@iotests.skip_if_unsupported(['copy-on-read', 'quorum'])
+def test_loop_during_mirror(self):
+qemu_img('create', '-f', iotests.imgfmt, test_img, str(1 * 1024 * 
1024))
+
+"""
+In this test, we are going to mirror from a node that is a
+filter above some file "common-base".  The target is a quorum
+node (with just an unrelated null-co child).
+
+We will ask the mirror job to replace common-base by the
+target upon completion.  That is a completely valid
+configuration so far.
+
+However, while the job is running, we add common-base as an
+(indirect[1]) child to the target quorum node.  This way,
+completing the job as requested would yield a loop, because
+the target would be supposed to replace common-base -- which
+is its own (indirect) child.
+
+[1] It needs to be an indirect child, because if it were a
+direct child, the mirror job would simply end by effectively
+injecting the target above common-base.  This is the same
+effect as when using sync=none: The target ends up above the
+source.
+
+So only loops that have a length of more than one node are
+forbidden, which means common-base must be an indirect child
+of the target.
+
+(Furthermore, we are going to use x-blockdev-change to add
+common-base as a 

Re: [RFC v4 PATCH 30/49] multi-process: send heartbeat messages to remote

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:09:11AM -0400, Jagannathan Raman wrote:
> +static void broadcast_msg(MPQemuMsg *msg, bool need_reply)
> +{
> +PCIProxyDev *entry;
> +unsigned int pid;
> +int wait;
> +
> +QLIST_FOREACH(entry, _dev_list.devices, next) {
> +if (need_reply) {
> +wait = eventfd(0, EFD_NONBLOCK);
> +msg->num_fds = 1;
> +msg->fds[0] = wait;
> +}
> +
> +mpqemu_msg_send(entry->mpqemu_link, msg, entry->mpqemu_link->com);
> +if (need_reply) {
> +pid = (uint32_t)wait_for_remote(wait);

Sometimes QEMU really needs to wait for the remote process before it can
make progress.  I think this is not one of those cases though.

Since QEMU is event-driven it's problematic to invoke blocking system
calls.  The remote process might not respond for a significant amount of
time.  Other QEMU threads will be held up waiting for the QEMU global
mutex in the meantime (because we hold it!).

Please implement heartbeat/ping asynchronously.  The wait eventfd should
be read by an event loop fd handler instead.  That way QEMU can continue
with running the VM while waiting for the remote process.

This will also improve guest performance because there will be less
jitter (random latency because the event loop is held up waiting for
remote processes for short periods of time).


signature.asc
Description: PGP signature


Re: [RFC v4 PATCH 33/49] multi-process: perform device reset in the remote process

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:09:14AM -0400, Jagannathan Raman wrote:
> +void proxy_device_reset(DeviceState *dev)
> +{
> +PCIProxyDev *pdev = PCI_PROXY_DEV(dev);
> +MPQemuMsg msg;
> +
> +memset(, 0, sizeof(MPQemuMsg));
> +
> +msg.bytestream = 0;
> +msg.size = sizeof(msg.data1);
> +msg.cmd = DEVICE_RESET;
> +
> +mpqemu_msg_send(pdev->mpqemu_link, , pdev->mpqemu_link->com);
> +}

Device reset must wait for the remote process to finish reset, otherwise
the remote device could still be running after proxy_device_reset()
returns from sending the message.

Stefan


signature.asc
Description: PGP signature


[PATCH for-5.0 v2 21/23] iotests: Add tests for invalid Quorum @replaces

2019-11-11 Thread Max Reitz
Add two tests to see that you cannot replace a Quorum child with the
mirror job while the child is in use by a different parent.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 70 +-
 tests/qemu-iotests/041.out |  4 +--
 2 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 0c1af45639..ab0cb5b42f 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -20,6 +20,7 @@
 
 import time
 import os
+import re
 import iotests
 from iotests import qemu_img, qemu_io
 
@@ -34,6 +35,8 @@ quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
 quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
 quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
 
+nbd_sock_path = os.path.join(iotests.test_dir, 'nbd.sock')
+
 class TestSingleDrive(iotests.QMPTestCase):
 image_len = 1 * 1024 * 1024 # MB
 qmp_cmd = 'drive-mirror'
@@ -901,7 +904,8 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
 def tearDown(self):
 self.vm.shutdown()
-for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file ]:
+for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file,
+ nbd_sock_path ]:
 # Do a try/except because the test may have deleted some images
 try:
 os.remove(i)
@@ -1042,6 +1046,70 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_has_block_node("repair0", quorum_repair_img)
 self.vm.assert_block_path('quorum0', '/children.1', 'repair0')
 
+"""
+Check that we cannot replace a Quorum child when it has other
+parents.
+"""
+def test_with_other_parent(self):
+result = self.vm.qmp('nbd-server-start',
+ addr={
+ 'type': 'unix',
+ 'data': {'path': nbd_sock_path}
+ })
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('nbd-server-add', device='img1')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0',
+ sync='full', node_name='repair0', replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+self.assert_qmp(result, 'error/desc',
+"Cannot replace 'img1' by a node mirrored from "
+"'quorum0', because it cannot be guaranteed that doing 
"
+"so would not lead to an abrupt change of visible 
data")
+
+"""
+The same as test_with_other_parent(), but add the NBD server only
+when the mirror job is already running.
+"""
+def test_with_other_parents_after_mirror_start(self):
+result = self.vm.qmp('nbd-server-start',
+ addr={
+ 'type': 'unix',
+ 'data': {'path': nbd_sock_path}
+ })
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0',
+ sync='full', node_name='repair0', replaces='img1',
+ target=quorum_repair_img, format=iotests.imgfmt)
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('nbd-server-add', device='img1')
+self.assert_qmp(result, 'return', {})
+
+# The full error message goes to stderr, we will check it later
+self.complete_and_wait('mirror',
+   completion_error='Operation not permitted')
+
+# Should not have been replaced
+self.vm.assert_block_path('quorum0', '/children.1', 'img1')
+
+# Check the full error message now
+self.vm.shutdown()
+log = self.vm.get_log()
+log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+log = re.sub(r'^Formatting.*\n', '', log)
+log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+log = re.sub(r'^qemu-system-[^:]*: ', '', log)
+
+self.assertEqual(log,
+ "Can no longer replace 'img1' by 'repair0', because " 
+
+ "it can no longer be guaranteed that doing so would " 
+
+ "not lead to an abrupt change of visible data")
+
+
 # Test mirroring with a source that does not have any parents (not even a
 # BlockBackend)
 class TestOrphanedSource(iotests.QMPTestCase):
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index f496be9197..ffc779b4d1 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...

Re: [PATCH] tests/migration: use the common library function

2019-11-11 Thread Peter Maydell
On Mon, 11 Nov 2019 at 14:41, Thomas Huth  wrote:
>
> On 11/11/2019 15.11, Alex Bennée wrote:
> >
> > Thomas Huth  writes:
> >
> >> On 11/11/2019 13.55, Alex Bennée wrote:
> >>> Signed-off-by: Alex Bennée 
> >>
> >> Could you please add at least a short patch description? (Why is this
> >> change necessary / a good idea?)
> >
> > It's just a minor clean-up Dave happened to comment on last week. Using
> > the helper function is preferable given it abstracts away any system
> > differences for the same information.
>
> But this also changes the behavior on non-Linux systems (i.e. the *BSDs
> and macOS), since they will now use getpid() instead of gettid ... is
> that the intended change here?

Does the 'stress' program work on those OSes? For that matter,
does it work on Linux?

As far as I can tell we don't compile stress.c on any host,
since the only thing that depends on tests/migration/stress$(EXESUF)
is tests/migration/initrd-stress.img, and nothing depends on that.

Nothing creates tests/migration/ in the build dir so trying
to build tests/migration/stress in an out-of-tree config fails:

  CC  tests/migration/stress.o
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:359:1:
fatal error: opening dependency file tests/migration/stress.d: No such
file or directory
 }
 ^
compilation terminated.

...and if I fix that by manually creating the directory then
it fails to link:

  CC  tests/migration/stress.o
  LINKtests/migration/stress
tests/migration/stress.o: In function `get_command_arg_str':
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:107:
undefined reference to `g_strndup'
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:109:
undefined reference to `g_strdup'
tests/migration/stress.o: In function `get_command_arg_ull':
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:129:
undefined reference to `g_free'
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:132:
undefined reference to `g_free'
tests/migration/stress.o: In function `stress':
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/migration/stress.c:253:
undefined reference to `pthread_create'
collect2: error: ld returned 1 exit status
/home/petmay01/linaro/qemu-from-laptop/qemu/tests/Makefile.include:849:
recipe for target 'tests/migration/stress' failed

Is this dead code ?

thanks
-- PMM



[PATCH for-5.0 v2 19/23] iotests: Resolve TODOs in 041

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 2ab59e9c53..d636cb7f1d 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -918,8 +918,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
 self.complete_and_wait(drive="job0")
 self.assert_has_block_node("repair0", quorum_repair_img)
-# TODO: a better test requiring some QEMU infrastructure will be added
-#   to check that this file is really driven by quorum
+self.vm.assert_block_path('quorum0', '/children.1', 'repair0')
 self.vm.shutdown()
 self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
 'target image does not match source after mirroring')
@@ -1041,9 +1040,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
 self.complete_and_wait('job0')
 self.assert_has_block_node("repair0", quorum_repair_img)
-# TODO: a better test requiring some QEMU infrastructure will be added
-#   to check that this file is really driven by quorum
-self.vm.shutdown()
+self.vm.assert_block_path('quorum0', '/children.1', 'repair0')
 
 # Test mirroring with a source that does not have any parents (not even a
 # BlockBackend)
-- 
2.23.0




Re: [RFC v4 PATCH 32/49] multi-process: Use separate MMIO communication channel

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:09:13AM -0400, Jagannathan Raman wrote:
> Using a separate communication channel for MMIO helps
> with improving Performance

Why?


signature.asc
Description: PGP signature


Re: [RFC][PATCH 2/3] docs/specs: Add specification of ivshmem device revision 2

2019-11-11 Thread Michael S. Tsirkin
On Mon, Nov 11, 2019 at 03:27:43PM +, Daniel P. Berrangé wrote:
> On Mon, Nov 11, 2019 at 10:08:20AM -0500, Michael S. Tsirkin wrote:
> > On Mon, Nov 11, 2019 at 02:59:07PM +0100, Jan Kiszka wrote:
> > > On 11.11.19 14:45, Michael S. Tsirkin wrote:
> > > > On Mon, Nov 11, 2019 at 01:57:11PM +0100, Jan Kiszka wrote:
> > > > > +| Offset | Register   | Content  
> > > > > |
> > > > > +|---:|:---|:-|
> > > > > +|00h | Vendor ID  | 1AF4h
> > > > > |
> > > > > +|02h | Device ID  | 1110h
> > > > > |
> > > > 
> > > > Given it's a virtio vendor ID, please reserve a device ID
> > > > with the virtio TC.
> > > 
> > > Yeah, QEMU's IVSHMEM was always using that. I'm happy to make this finally
> > > official.
> > > 
> > 
> > And I guess we will just mark it reserved or something right?
> > Since at least IVSHMEM 1 isn't a virtio device.
> > And will you be reusing same ID for IVSHMEM 2 or a new one?
> 
> 1110h isn't under either of the virtio PCI device ID allowed ranges
> according to the spec:
> 
>   "Any PCI device with PCI Vendor ID 0x1AF4, and PCI Device
>ID 0x1000 through 0x107F inclusive is a virtio device.
>...
>Additionally, devices MAY utilize a Transitional PCI Device 
>ID range, 0x1000 to 0x103F depending on the device type. "
> 
> So there's no need to reserve 0x1110h from the virtio spec POV.

Well we do have:

B.3
What Device Number?
Device numbers can be reserved by the OASIS committee: email 
virtio-...@lists.oasis-open.org to secure
a unique one.
Meanwhile for experimental drivers, use 65535 and work backwards.

So it seems it can  in theory conflict at least with experimental virtio 
devices.

Really it's messy that people are reusing the virtio vendor ID for
random stuff - getting a vendor ID is only hard for a hobbyist, any big
company already has an ID - but if it is a hobbyist and they at least
register then doesn't cause much harm.

E.g. Red Hat switched to 1b36 for new non virtio devices and I think that's
nicer.


> I have, however, ensured it is assigned to ivshmem from POV of
> Red Hat's own internal tracking of allocated device IDs, under
> its vendor ID.

Thanks!

> If ivshmem 2 is now a virtio device, then it is a good thing that
> it will get a new/different PCI device ID, to show that it is not
> compatible with the old device impl.
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [RFC v4 PATCH 45/49] multi-process/mig: Synchronize runstate of remote process

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:09:26AM -0400, Jagannathan Raman wrote:
> @@ -656,6 +657,19 @@ static void init_proxy(PCIDevice *dev, char *command, 
> bool need_spawn, Error **e
>  }
>  }
>  
> +static void proxy_vm_state_change(void *opaque, int running, RunState state)
> +{
> +PCIProxyDev *dev = opaque;
> +MPQemuMsg msg = { 0 };
> +
> +msg.cmd = RUNSTATE_SET;
> +msg.bytestream = 0;
> +msg.size = sizeof(msg.data1);
> +msg.data1.runstate.state = state;
> +
> +mpqemu_msg_send(dev->mpqemu_link, , dev->mpqemu_link->com);
> +}

Changing vm state is a barrier operation - devices must not dirty memory
afterwards.  This function doesn't have barrier semantics, it sends off
the message without waiting for the remote process to finish processing
it.  This means there is a race condition where QEMU has changes the vm
state but devices could still dirty memory.  Please wait for a reply to
prevent this.

Stefan


signature.asc
Description: PGP signature


[PATCH v6 3/3] tests/qemu-iotests: add case to write compressed data of multiple clusters

2019-11-11 Thread Andrey Shinkevich
Add the case to the iotest #214 that checks possibility of writing
compressed data of more than one cluster size. The test case involves
the compress filter driver showing a sample usage of that.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/214 | 43 +++
 tests/qemu-iotests/214.out | 14 ++
 2 files changed, 57 insertions(+)

diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
index 21ec8a2..5012112 100755
--- a/tests/qemu-iotests/214
+++ b/tests/qemu-iotests/214
@@ -89,6 +89,49 @@ _check_test_img -r all
 $QEMU_IO -c "read  -P 0x11  0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
 $QEMU_IO -c "read  -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
 
+echo
+echo "=== Write compressed data of multiple clusters ==="
+echo
+cluster_size=0x1
+_make_test_img 2M -o cluster_size=$cluster_size
+
+echo "Write uncompressed data:"
+let data_size="8 * $cluster_size"
+$QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \
+ 2>&1 | _filter_qemu_io | _filter_testdir
+sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" |
+sed -n '/"actual-size":/ s/[^0-9]//gp')
+
+_make_test_img 2M -o cluster_size=$cluster_size
+echo "Write compressed data:"
+let data_size="3 * $cluster_size + ($cluster_size / 2)"
+# Set compress=on. That will align the written data
+# by the cluster size and will write them compressed.
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO -c "write -P 0xbb 0 $data_size" --image-opts \
+ 
"driver=compress,file.driver=$IMGFMT,file.file.driver=file,file.file.filename=$TEST_IMG"
 \
+ 2>&1 | _filter_qemu_io | _filter_testdir
+
+let offset="4 * $cluster_size"
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\
+'driver': 'compress',
+'file': {'driver': '$IMGFMT',
+ 'file': {'driver': 'file',
+  'filename': '$TEST_IMG'}}}" | \
+  _filter_qemu_io | _filter_testdir
+
+sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" |
+sed -n '/"actual-size":/ s/[^0-9]//gp')
+
+if [ $sizeA -le $sizeB ]
+then
+echo "Compression ERROR"
+fi
+
+$QEMU_IMG check --output=json "$TEST_IMG" |
+  sed -n 's/,$//; /"compressed-clusters":/ s/^ *//p'
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
diff --git a/tests/qemu-iotests/214.out b/tests/qemu-iotests/214.out
index 0fcd8dc..4a2ec33 100644
--- a/tests/qemu-iotests/214.out
+++ b/tests/qemu-iotests/214.out
@@ -32,4 +32,18 @@ read 4194304/4194304 bytes at offset 0
 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 4194304/4194304 bytes at offset 4194304
 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Write compressed data of multiple clusters ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+Write uncompressed data:
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+Write compressed data:
+wrote 229376/229376 bytes at offset 0
+224 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 229376/229376 bytes at offset 262144
+224 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+"compressed-clusters": 8
 *** done
-- 
1.8.3.1




[PATCH for-5.0 v2 17/23] iotests: Use skip_if_unsupported decorator in 041

2019-11-11 Thread Max Reitz
We can use this decorator above TestRepairQuorum.setUp() to skip all
quorum tests with a single line.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 39 +++
 1 file changed, 3 insertions(+), 36 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 3c60c07b01..2ab59e9c53 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -871,6 +871,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 image_len = 1 * 1024 * 1024 # MB
 IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
 
+@iotests.skip_if_unsupported(['quorum'])
 def setUp(self):
 self.vm = iotests.VM()
 
@@ -894,9 +895,8 @@ class TestRepairQuorum(iotests.QMPTestCase):
 #assemble the quorum block device from the individual files
 args = { "driver": "quorum", "node-name": "quorum0",
  "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] }
-if iotests.supports_quorum():
-result = self.vm.qmp("blockdev-add", **args)
-self.assert_qmp(result, 'return', {})
+result = self.vm.qmp("blockdev-add", **args)
+self.assert_qmp(result, 'return', {})
 
 
 def tearDown(self):
@@ -909,9 +909,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 pass
 
 def test_complete(self):
-if not iotests.supports_quorum():
-return
-
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -928,9 +925,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_cancel(self):
-if not iotests.supports_quorum():
-return
-
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -945,9 +939,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.vm.shutdown()
 
 def test_cancel_after_ready(self):
-if not iotests.supports_quorum():
-return
-
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -964,9 +955,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_pause(self):
-if not iotests.supports_quorum():
-return
-
 self.assert_no_active_block_jobs()
 
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
@@ -992,9 +980,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 'target image does not match source after mirroring')
 
 def test_medium_not_found(self):
-if not iotests.supports_quorum():
-return
-
 if iotests.qemu_default_machine != 'pc':
 return
 
@@ -1006,9 +991,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_image_not_found(self):
-if not iotests.supports_quorum():
-return
-
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
  sync='full', node_name='repair0', replaces='img1',
  mode='existing', target=quorum_repair_img,
@@ -1016,9 +998,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_device_not_found(self):
-if not iotests.supports_quorum():
-return
-
 result = self.vm.qmp('drive-mirror', job_id='job0',
  device='nonexistent', sync='full',
  node_name='repair0',
@@ -1027,9 +1006,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_wrong_sync_mode(self):
-if not iotests.supports_quorum():
-return
-
 result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0',
  node_name='repair0',
  replaces='img1',
@@ -1037,27 +1013,18 @@ class TestRepairQuorum(iotests.QMPTestCase):
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_no_node_name(self):
-if not iotests.supports_quorum():
-return
-
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
  sync='full', replaces='img1',
  target=quorum_repair_img, format=iotests.imgfmt)
 self.assert_qmp(result, 'error/class', 'GenericError')
 
 def test_nonexistent_replaces(self):
-if not iotests.supports_quorum():
-return
-
 result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
  sync='full', node_name='repair0', 
replaces='img77',
  

Re: [PATCH v3 4/6] iotests: Skip "make check-block" if QEMU does not support virtio-blk

2019-11-11 Thread Max Reitz
On 11.11.19 15:02, Thomas Huth wrote:
> On 30/10/2019 12.21, Max Reitz wrote:
>> On 22.10.19 09:21, Thomas Huth wrote:
>>> The next patch is going to add some python-based tests to the "auto"
>>> group, and these tests require virtio-blk to work properly. Running
>>> iotests without virtio-blk likely does not make too much sense anyway,
>>> so instead of adding a check for the availability of virtio-blk to each
>>> and every test (which does not sound very appealing), let's rather add
>>> a check for this at the top level in the check-block.sh script instead
>>> (so that it is possible to run "make check" without the "check-block"
>>> part for qemu-system-tricore for example).
>>>
>>> Reviewed-by: Max Reitz 
>>> Signed-off-by: Thomas Huth 
>>> ---
>>>  tests/check-block.sh | 16 +++-
>>>  1 file changed, 15 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tests/check-block.sh b/tests/check-block.sh
>>> index 679aedec50..e9e2978818 100755
>>> --- a/tests/check-block.sh
>>> +++ b/tests/check-block.sh
>>> @@ -26,10 +26,24 @@ if grep -q "CFLAGS.*-fsanitize" config-host.mak 
>>> 2>/dev/null ; then
>>>  exit 0
>>>  fi
>>>  
>>> -if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then
>>> +if [ -n "$QEMU_PROG" ]; then
>>> +qemu_prog="$QEMU_PROG"
>>> +else
>>> +for binary in *-softmmu/qemu-system-* ; do
>>
>> Hm, I know I’ve already given my R-b, but looking at this again – what
>> if the user builds qemu for multiple targets?  Then this will just test
>> any target, whereas the iotests might test something else, because the
>> algorithm there is slightly different:
>>
>> First, check $QEMU_PROG (same as here).
>>
>> Second, check $build_iotests/qemu.  I think we can do this here, because
>> we know that $build_iotests is $PWD/tests/qemu-iotests (or invoking
>> ./check below wouldn’t work).
>>
>> Third, and this is actually important, I think, is that we first look
>> for the qemu that matches the host architecture (uname -m, with an
>> exception for ppc64).  I think we really should do that here, too.
>>
>> Fourth, look for any qemu, as is done here.
>>
>> So I think we could do without #2, but it probably doesn’t hurt to check
>> that, too.  I don’t think we should do without #3, though.
> 
> Maybe we should simply move the check into tests/qemu-iotests/check to
> avoid duplication of that logic?
> We could then also only simply skip the python tests instead of skipping
> everything, in case the chosen QEMU binary does not support virtio-blk.

You mean by adding a new flag e.g. -batch that’s supposed to generally
skip cases when in doubt that they can be run on the current host?
Sounds good to me.

Max



signature.asc
Description: OpenPGP digital signature


Re: [RFC v4 PATCH 47/49] multi-process: Enable support for multiple devices in remote

2019-11-11 Thread Stefan Hajnoczi
On Thu, Oct 24, 2019 at 05:09:28AM -0400, Jagannathan Raman wrote:
> @@ -93,7 +94,8 @@ static void process_config_write(MPQemuMsg *msg)
>  struct conf_data_msg *conf = (struct conf_data_msg *)msg->data2;
>  
>  qemu_mutex_lock_iothread();
> -pci_default_write_config(remote_pci_dev, conf->addr, conf->val, conf->l);
> +pci_default_write_config(remote_pci_devs[msg->id], conf->addr, conf->val,
> + conf->l);
>  qemu_mutex_unlock_iothread();
>  }
>  
> @@ -106,7 +108,8 @@ static void process_config_read(MPQemuMsg *msg)
>  wait = msg->fds[0];
>  
>  qemu_mutex_lock_iothread();
> -val = pci_default_read_config(remote_pci_dev, conf->addr, conf->l);
> +val = pci_default_read_config(remote_pci_devs[msg->id], conf->addr,
> +  conf->l);
>  qemu_mutex_unlock_iothread();
>  
>  notify_proxy(wait, val);

msg->id was read from a socket and hasn't been validated before indexing
into remote_pci_devs[].


signature.asc
Description: PGP signature


[PATCH v6 2/3] qcow2: Allow writing compressed data of multiple clusters

2019-11-11 Thread Andrey Shinkevich
QEMU currently supports writing compressed data of the size equal to
one cluster. This patch allows writing QCOW2 compressed data that
exceed one cluster. Now, we split buffered data into separate clusters
and write them compressed using the existing functionality.

Suggested-by: Pavel Butsykin 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2.c | 102 ++
 1 file changed, 75 insertions(+), 27 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 7c18721..0e03a1a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4222,10 +4222,8 @@ fail:
 return ret;
 }
 
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
 static coroutine_fn int
-qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
  uint64_t offset, uint64_t bytes,
  QEMUIOVector *qiov, size_t qiov_offset)
 {
@@ -4235,32 +4233,11 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
 uint8_t *buf, *out_buf;
 uint64_t cluster_offset;
 
-if (has_data_file(bs)) {
-return -ENOTSUP;
-}
-
-if (bytes == 0) {
-/* align end of file to a sector boundary to ease reading with
-   sector based I/Os */
-int64_t len = bdrv_getlength(bs->file->bs);
-if (len < 0) {
-return len;
-}
-return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
-}
-
-if (offset_into_cluster(s, offset)) {
-return -EINVAL;
-}
+assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
+   (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
 
 buf = qemu_blockalign(bs, s->cluster_size);
-if (bytes != s->cluster_size) {
-if (bytes > s->cluster_size ||
-offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
-{
-qemu_vfree(buf);
-return -EINVAL;
-}
+if (bytes < s->cluster_size) {
 /* Zero-pad last write if image size is not cluster aligned */
 memset(buf + bytes, 0, s->cluster_size - bytes);
 }
@@ -4309,6 +4286,77 @@ fail:
 return ret;
 }
 
+static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+{
+Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+assert(!t->cluster_type && !t->l2meta);
+
+return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, 
t->qiov,
+t->qiov_offset);
+}
+
+/*
+ * XXX: put compressed sectors first, then all the cluster aligned
+ * tables to avoid losing bytes in alignment
+ */
+static coroutine_fn int
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
+{
+BDRVQcow2State *s = bs->opaque;
+AioTaskPool *aio = NULL;
+int ret = 0;
+
+if (has_data_file(bs)) {
+return -ENOTSUP;
+}
+
+if (bytes == 0) {
+/*
+ * align end of file to a sector boundary to ease reading with
+ * sector based I/Os
+ */
+int64_t len = bdrv_getlength(bs->file->bs);
+if (len < 0) {
+return len;
+}
+return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
+}
+
+if (offset_into_cluster(s, offset)) {
+return -EINVAL;
+}
+
+while (bytes && aio_task_pool_status(aio) == 0) {
+uint64_t chunk_size = MIN(bytes, s->cluster_size);
+
+if (!aio && chunk_size != bytes) {
+aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+}
+
+ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
+ 0, 0, offset, chunk_size, qiov, qiov_offset, 
NULL);
+if (ret < 0) {
+break;
+}
+qiov_offset += chunk_size;
+offset += chunk_size;
+bytes -= chunk_size;
+}
+
+if (aio) {
+aio_task_pool_wait_all(aio);
+if (ret == 0) {
+ret = aio_task_pool_status(aio);
+}
+g_free(aio);
+}
+
+return ret;
+}
+
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
uint64_t file_cluster_offset,
-- 
1.8.3.1




[PATCH for-5.0 v2 16/23] iotests: Use complete_and_wait() in 155

2019-11-11 Thread Max Reitz
This way, we get to see errors during the completion phase.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/155 | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index e19485911c..d7ef2579d3 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass):
 
 self.assert_qmp(result, 'return', {})
 
-self.vm.event_wait('BLOCK_JOB_READY')
-
-result = self.vm.qmp('block-job-complete', device='mirror-job')
-self.assert_qmp(result, 'return', {})
-
-self.vm.event_wait('BLOCK_JOB_COMPLETED')
+self.complete_and_wait('mirror-job')
 
 def testFull(self):
 self.runMirror('full')
-- 
2.23.0




[PATCH for-5.0 v2 15/23] mirror: Prevent loops

2019-11-11 Thread Max Reitz
While bdrv_replace_node() will not follow through with it, a specific
@replaces asks the mirror job to create a loop.

For example, say both the source and the target share a child where the
source is a filter; by letting @replaces point to the common child, you
ask for a loop.

Or if you use @replaces in drive-mirror with sync=none and
mode=absolute-paths, you generally ask for a loop (@replaces must point
to a child of the source, and sync=none makes the source the backing
file of the target after the job).

bdrv_replace_node() will not create those loops, but by doing so, it
ignores the user-requested configuration, which is not ideally either.
(In the first example above, the target's child will remain what it was,
which may still be reasonable.  But in the second example, the target
will just not become a child of the source, which is precisely what was
requested with @replaces.)

So prevent such configurations, both before the job, and before it
actually completes.

Signed-off-by: Max Reitz 
---
 block.c   | 30 
 block/mirror.c| 19 +++-
 blockdev.c| 48 ++-
 include/block/block_int.h |  3 +++
 4 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 0159f8e510..e3922a0474 100644
--- a/block.c
+++ b/block.c
@@ -6259,6 +6259,36 @@ out:
 return to_replace_bs;
 }
 
+/*
+ * Return true iff @child is a (recursive) child of @parent, with at
+ * least @min_level edges between them.
+ *
+ * (If @min_level == 0, return true if @child == @parent.  For
+ * @min_level == 1, @child needs to be at least a real child; for
+ * @min_level == 2, it needs to be at least a grand-child; and so on.)
+ */
+bool bdrv_is_child_of(BlockDriverState *child, BlockDriverState *parent,
+  int min_level)
+{
+BdrvChild *c;
+
+if (child == parent && min_level <= 0) {
+return true;
+}
+
+if (!parent) {
+return false;
+}
+
+QLIST_FOREACH(c, >children, next) {
+if (bdrv_is_child_of(child, c->bs, min_level - 1)) {
+return true;
+}
+}
+
+return false;
+}
+
 /**
  * Iterates through the list of runtime option keys that are said to
  * be "strong" for a BDS.  An option is called "strong" if it changes
diff --git a/block/mirror.c b/block/mirror.c
index 68a4404666..b258c7e98b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -701,7 +701,24 @@ static int mirror_exit_common(Job *job)
  * there.
  */
 if (bdrv_recurse_can_replace(src, to_replace)) {
-bdrv_replace_node(to_replace, target_bs, _err);
+/*
+ * It is OK for @to_replace to be an immediate child of
+ * @target_bs, because that is what happens with
+ * drive-mirror sync=none mode=absolute-paths: target_bs's
+ * backing file will be the source node, which is also
+ * to_replace (by default).
+ * bdrv_replace_node() handles this case by not letting
+ * target_bs->backing point to itself, but to the source
+ * still.
+ */
+if (!bdrv_is_child_of(to_replace, target_bs, 2)) {
+bdrv_replace_node(to_replace, target_bs, _err);
+} else {
+error_setg(_err, "Can no longer replace '%s' by '%s', "
+   "because the former is now a child of the latter, "
+   "and doing so would thus create a loop",
+   to_replace->node_name, target_bs->node_name);
+}
 } else {
 error_setg(_err, "Can no longer replace '%s' by '%s', "
"because it can no longer be guaranteed that doing so "
diff --git a/blockdev.c b/blockdev.c
index 9dc2238bf3..d29f147f72 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3824,7 +3824,7 @@ static void blockdev_mirror_common(const char *job_id, 
BlockDriverState *bs,
 }
 
 if (has_replaces) {
-BlockDriverState *to_replace_bs;
+BlockDriverState *to_replace_bs, *target_backing_bs;
 AioContext *replace_aio_context;
 int64_t bs_size, replace_size;
 
@@ -3839,6 +3839,52 @@ static void blockdev_mirror_common(const char *job_id, 
BlockDriverState *bs,
 return;
 }
 
+if (bdrv_is_child_of(to_replace_bs, target, 1)) {
+error_setg(errp, "Replacing %s by %s would result in a loop, "
+   "because the former is a child of the latter",
+   to_replace_bs->node_name, target->node_name);
+return;
+}
+
+if (backing_mode == MIRROR_SOURCE_BACKING_CHAIN ||
+backing_mode == MIRROR_OPEN_BACKING_CHAIN)
+{
+/*
+ * While we do not quite know what OPEN_BACKING_CHAIN
+ * (used for mode=existing) will yield, it is probably
+ * best to 

[PATCH v6 0/3] qcow2: advanced compression options

2019-11-11 Thread Andrey Shinkevich
The compression filter driver is introduced as suggested by Max.
A sample usage of the filter can be found in the test #214.
Now, multiple clusters can be written compressed.
It is useful for the backup job.

v6: The new approach to write compressed data was applied. The patch
v5 4/4 with the block-stream compress test case was removed from
the series.
Discussed in the email thread with the message ID
<1571603828-185910-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (3):
  block: introduce compress filter driver
  qcow2: Allow writing compressed data of multiple clusters
  tests/qemu-iotests: add case to write compressed data of multiple
clusters

 block/Makefile.objs|   1 +
 block/filter-compress.c| 212 +
 block/qcow2.c  | 102 --
 qapi/block-core.json   |  10 ++-
 tests/qemu-iotests/214 |  43 +
 tests/qemu-iotests/214.out |  14 +++
 6 files changed, 351 insertions(+), 31 deletions(-)
 create mode 100644 block/filter-compress.c

-- 
1.8.3.1




[PATCH for-5.0 v2 13/23] mirror: Double-check immediately before replacing

2019-11-11 Thread Max Reitz
There is no guarantee that we can still replace the node we want to
replace at the end of the mirror job.  Double-check by calling
bdrv_recurse_can_replace().

Signed-off-by: Max Reitz 
---
 block/mirror.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index f0f2d9dff1..68a4404666 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -695,7 +695,19 @@ static int mirror_exit_common(Job *job)
  * drain potential other users of the BDS before changing the graph. */
 assert(s->in_drain);
 bdrv_drained_begin(target_bs);
-bdrv_replace_node(to_replace, target_bs, _err);
+/*
+ * Cannot use check_to_replace_node() here, because that would
+ * check for an op blocker on @to_replace, and we have our own
+ * there.
+ */
+if (bdrv_recurse_can_replace(src, to_replace)) {
+bdrv_replace_node(to_replace, target_bs, _err);
+} else {
+error_setg(_err, "Can no longer replace '%s' by '%s', "
+   "because it can no longer be guaranteed that doing so "
+   "would not lead to an abrupt change of visible data",
+   to_replace->node_name, target_bs->node_name);
+}
 bdrv_drained_end(target_bs);
 if (local_err) {
 error_report_err(local_err);
-- 
2.23.0




[PATCH for-5.0 v2 14/23] quorum: Stop marking it as a filter

2019-11-11 Thread Max Reitz
Quorum is not a filter, for example because it cannot guarantee which of
its children will serve the next request.  Thus, any of its children may
differ from the data visible to quorum's parents.

We have other filters with multiple children, but they differ in this
aspect:

- blkverify quits the whole qemu process if its children differ.  As
  such, we can always skip it when we want to skip it (as a filter node)
  by going to any of its children.  Both have the same data.

- replication generally serves requests from bs->file, so this is its
  only actually filtered child.

- Block job filters currently only have one child, but they will
  probably get more children in the future.  Still, they will always
  have only one actually filtered child.

Having "filters" as a dedicated node category only makes sense if you
can skip them by going to a one fixed child that always shows the same
data as the filter node.  Quorum cannot fulfill this, so it is not a
filter.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/quorum.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/quorum.c b/block/quorum.c
index 1974e2ffa8..8cd13a7b91 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1237,7 +1237,6 @@ static BlockDriver bdrv_quorum = {
 
 .bdrv_child_perm= quorum_child_perm,
 
-.is_filter  = true,
 .bdrv_recurse_can_replace   = quorum_recurse_can_replace,
 
 .strong_runtime_opts= quorum_strong_runtime_opts,
-- 
2.23.0




[PATCH for-5.0 v2 20/23] iotests: Use self.image_len in TestRepairQuorum

2019-11-11 Thread Max Reitz
041's TestRepairQuorum has its own image_len, no need to refer to
TestSingleDrive.  (This patch allows commenting out TestSingleDrive to
speed up 041 during test testing.)

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/041 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index d636cb7f1d..0c1af45639 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -881,7 +881,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 # Add each individual quorum images
 for i in self.IMAGES:
 qemu_img('create', '-f', iotests.imgfmt, i,
- str(TestSingleDrive.image_len))
+ str(self.image_len))
 # Assign a node name to each quorum image in order to manipulate
 # them
 opts = "node-name=img%i" % self.IMAGES.index(i)
-- 
2.23.0




[PATCH for-5.0 v2 22/23] iotests: Check that @replaces can replace filters

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 46 ++
 tests/qemu-iotests/041.out |  4 ++--
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ab0cb5b42f..9a00cf6f7b 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -1200,6 +1200,52 @@ class TestOrphanedSource(iotests.QMPTestCase):
 self.assertFalse('mirror-filter' in nodes,
  'Mirror filter node did not disappear')
 
+# Test cases for @replaces that do not necessarily involve Quorum
+class TestReplaces(iotests.QMPTestCase):
+# Each of these test cases needs their own block graph, so do not
+# create any nodes here
+def setUp(self):
+self.vm = iotests.VM()
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+for img in (test_img, target_img):
+try:
+os.remove(img)
+except OSError:
+pass
+
+"""
+Check that we can replace filter nodes.
+"""
+@iotests.skip_if_unsupported(['copy-on-read'])
+def test_replace_filter(self):
+result = self.vm.qmp('blockdev-add', **{
+ 'driver': 'copy-on-read',
+ 'node-name': 'filter0',
+ 'file': {
+ 'driver': 'copy-on-read',
+ 'node-name': 'filter1',
+ 'file': {
+ 'driver': 'null-co'
+ }
+ }
+ })
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add',
+ node_name='target', driver='null-co')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-mirror', job_id='mirror', 
device='filter0',
+ target='target', sync='full', replaces='filter1')
+self.assert_qmp(result, 'return', {})
+
+self.complete_and_wait('mirror')
+
+self.vm.assert_block_path('filter0', '/file', 'target')
+
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2', 'qed'],
  supported_protocols=['file'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index ffc779b4d1..877b76fd31 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 93 tests
+Ran 94 tests
 
 OK
-- 
2.23.0




[PATCH for-5.0 v2 06/23] block: Add bdrv_recurse_can_replace()

2019-11-11 Thread Max Reitz
After a couple of follow-up patches, this function will replace
bdrv_recurse_is_first_non_filter() in check_to_replace_node().

bdrv_recurse_is_first_non_filter() is both not sufficiently specific for
check_to_replace_node() (it allows cases that should not be allowed,
like replacing child nodes of quorum with dissenting data that have more
parents than just quorum), and it is too restrictive (it is perfectly
fine to replace filters).

Signed-off-by: Max Reitz 
---
 block.c   | 38 ++
 include/block/block_int.h | 10 ++
 2 files changed, 48 insertions(+)

diff --git a/block.c b/block.c
index 9b1049786a..de53addeb0 100644
--- a/block.c
+++ b/block.c
@@ -6205,6 +6205,44 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState 
*bs,
 return false;
 }
 
+/*
+ * This function checks whether the given @to_replace is allowed to be
+ * replaced by a node that always shows the same data as @bs.  This is
+ * used for example to verify whether the mirror job can replace
+ * @to_replace by the target mirrored from @bs.
+ * To be replaceable, @bs and @to_replace may either be guaranteed to
+ * always show the same data (because they are only connected through
+ * filters), or some driver may allow replacing one of its children
+ * because it can guarantee that this child's data is not visible at
+ * all (for example, for dissenting quorum children that have no other
+ * parents).
+ */
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+  BlockDriverState *to_replace)
+{
+if (!bs || !bs->drv) {
+return false;
+}
+
+if (bs == to_replace) {
+return true;
+}
+
+/* See what the driver can do */
+if (bs->drv->bdrv_recurse_can_replace) {
+return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
+}
+
+/* For filters without an own implementation, we can recurse on our own */
+if (bs->drv->is_filter) {
+BdrvChild *child = bs->file ?: bs->backing;
+return bdrv_recurse_can_replace(child->bs, to_replace);
+}
+
+/* Safe default */
+return false;
+}
+
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
 const char *node_name, Error **errp)
 {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index dd033d0b37..75f03dcc38 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -102,6 +102,13 @@ struct BlockDriver {
  */
 bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
  BlockDriverState *candidate);
+/*
+ * Return true if @to_replace can be replaced by a BDS with the
+ * same data as @bs without it affecting @bs's behavior (that is,
+ * without it being visible to @bs's parents).
+ */
+bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+ BlockDriverState *to_replace);
 
 int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
 int (*bdrv_probe_device)(const char *filename);
@@ -1264,6 +1271,9 @@ void bdrv_format_default_perms(BlockDriverState *bs, 
BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared);
 
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+  BlockDriverState *to_replace);
+
 /*
  * Default implementation for drivers to pass bdrv_co_block_status() to
  * their file.
-- 
2.23.0




[PATCH for-5.0 v2 09/23] quorum: Add QuorumChild.to_be_replaced

2019-11-11 Thread Max Reitz
We will need this to verify that Quorum can let one of its children be
replaced without breaking anything else.

Signed-off-by: Max Reitz 
---
 block/quorum.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/block/quorum.c b/block/quorum.c
index 59cd524502..3a824e77e3 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -67,6 +67,13 @@ typedef struct QuorumVotes {
 
 typedef struct QuorumChild {
 BdrvChild *child;
+
+/*
+ * If set, check whether this node can be replaced without any
+ * other parent noticing: Unshare CONSISTENT_READ, and take the
+ * WRITE permission.
+ */
+bool to_be_replaced;
 } QuorumChild;
 
 /* the following structure holds the state of one quorum instance */
@@ -1128,6 +1135,16 @@ static void quorum_child_perm(BlockDriverState *bs, 
BdrvChild *c,
   uint64_t perm, uint64_t shared,
   uint64_t *nperm, uint64_t *nshared)
 {
+BDRVQuorumState *s = bs->opaque;
+int i;
+
+for (i = 0; i < s->num_children; i++) {
+if (s->children[i].child == c) {
+break;
+}
+}
+assert(!c || i < s->num_children);
+
 *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
 
 /*
@@ -1137,6 +1154,12 @@ static void quorum_child_perm(BlockDriverState *bs, 
BdrvChild *c,
 *nshared = (shared & (BLK_PERM_CONSISTENT_READ |
   BLK_PERM_WRITE_UNCHANGED))
  | DEFAULT_PERM_UNCHANGED;
+
+if (c && s->children[i].to_be_replaced) {
+/* Prepare for sudden data changes */
+*nperm |= BLK_PERM_WRITE;
+*nshared &= ~BLK_PERM_CONSISTENT_READ;
+}
 }
 
 static const char *const quorum_strong_runtime_opts[] = {
-- 
2.23.0




[PATCH for-5.0 v2 12/23] block: Remove bdrv_recurse_is_first_non_filter()

2019-11-11 Thread Max Reitz
It no longer has any users.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block.c   | 33 -
 block/blkverify.c | 15 ---
 block/copy-on-read.c  |  9 -
 block/quorum.c| 18 --
 block/replication.c   |  7 ---
 block/throttle.c  |  8 
 include/block/block.h |  4 
 include/block/block_int.h |  8 
 8 files changed, 102 deletions(-)

diff --git a/block.c b/block.c
index 7608f21570..0159f8e510 100644
--- a/block.c
+++ b/block.c
@@ -6172,39 +6172,6 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts 
*opts,
 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp);
 }
 
-/* This function will be called by the bdrv_recurse_is_first_non_filter method
- * of block filter and by bdrv_is_first_non_filter.
- * It is used to test if the given bs is the candidate or recurse more in the
- * node graph.
- */
-bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
-  BlockDriverState *candidate)
-{
-/* return false if basic checks fails */
-if (!bs || !bs->drv) {
-return false;
-}
-
-/* the code reached a non block filter driver -> check if the bs is
- * the same as the candidate. It's the recursion termination condition.
- */
-if (!bs->drv->is_filter) {
-return bs == candidate;
-}
-/* Down this path the driver is a block filter driver */
-
-/* If the block filter recursion method is defined use it to recurse down
- * the node graph.
- */
-if (bs->drv->bdrv_recurse_is_first_non_filter) {
-return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
-}
-
-/* the driver is a block filter but don't allow to recurse -> return false
- */
-return false;
-}
-
 /*
  * This function checks whether the given @to_replace is allowed to be
  * replaced by a node that always shows the same data as @bs.  This is
diff --git a/block/blkverify.c b/block/blkverify.c
index 0add3ab483..ba6b1853ae 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -268,20 +268,6 @@ static int blkverify_co_flush(BlockDriverState *bs)
 return bdrv_co_flush(s->test_file->bs);
 }
 
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-  BlockDriverState *candidate)
-{
-BDRVBlkverifyState *s = bs->opaque;
-
-bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
-if (perm) {
-return true;
-}
-
-return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
-}
-
 static bool blkverify_recurse_can_replace(BlockDriverState *bs,
   BlockDriverState *to_replace)
 {
@@ -341,7 +327,6 @@ static BlockDriver bdrv_blkverify = {
 .bdrv_co_flush= blkverify_co_flush,
 
 .is_filter= true,
-.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
 .bdrv_recurse_can_replace = blkverify_recurse_can_replace,
 };
 
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e95223d3cb..242d3ff055 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -118,13 +118,6 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 }
 
 
-static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
-BlockDriverState *candidate)
-{
-return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
 
@@ -143,8 +136,6 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_co_block_status   = bdrv_co_block_status_from_file,
 
-.bdrv_recurse_is_first_non_filter   = cor_recurse_is_first_non_filter,
-
 .has_variable_length= true,
 .is_filter  = true,
 };
diff --git a/block/quorum.c b/block/quorum.c
index 8ee03e9baf..1974e2ffa8 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -808,23 +808,6 @@ static coroutine_fn int quorum_co_flush(BlockDriverState 
*bs)
 return result;
 }
 
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-   BlockDriverState *candidate)
-{
-BDRVQuorumState *s = bs->opaque;
-int i;
-
-for (i = 0; i < s->num_children; i++) {
-bool perm = bdrv_recurse_is_first_non_filter(s->children[i].child->bs,
- candidate);
-if (perm) {
-return true;
-}
-}
-
-return false;
-}
-
 static bool quorum_recurse_can_replace(BlockDriverState *bs,
BlockDriverState *to_replace)
 {
@@ -1255,7 +1238,6 @@ static BlockDriver bdrv_quorum = {
 .bdrv_child_perm

[PATCH for-5.0 v2 07/23] blkverify: Implement .bdrv_recurse_can_replace()

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/blkverify.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/block/blkverify.c b/block/blkverify.c
index 304b0a1368..0add3ab483 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -282,6 +282,20 @@ static bool 
blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
 return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
 }
 
+static bool blkverify_recurse_can_replace(BlockDriverState *bs,
+  BlockDriverState *to_replace)
+{
+BDRVBlkverifyState *s = bs->opaque;
+
+/*
+ * blkverify quits the whole qemu process if there is a mismatch
+ * between bs->file->bs and s->test_file->bs.  Therefore, we know
+ * know that both must match bs and we can recurse down to either.
+ */
+return bdrv_recurse_can_replace(bs->file->bs, to_replace) ||
+   bdrv_recurse_can_replace(s->test_file->bs, to_replace);
+}
+
 static void blkverify_refresh_filename(BlockDriverState *bs)
 {
 BDRVBlkverifyState *s = bs->opaque;
@@ -328,6 +342,7 @@ static BlockDriver bdrv_blkverify = {
 
 .is_filter= true,
 .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
+.bdrv_recurse_can_replace = blkverify_recurse_can_replace,
 };
 
 static void bdrv_blkverify_init(void)
-- 
2.23.0




[PATCH for-5.0 v2 18/23] iotests: Add VM.assert_block_path()

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/iotests.py | 59 +++
 1 file changed, 59 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index d34305ce69..3e03320ce3 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -681,6 +681,65 @@ class VM(qtest.QEMUQtestMachine):
 
 return fields.items() <= ret.items()
 
+"""
+Check whether the node under the given path in the block graph is
+@expected_node.
+
+@root is the node name of the node where the @path is rooted.
+
+@path is a string that consists of child names separated by
+slashes.  It must begin with a slash.
+
+Examples for @root + @path:
+  - root="qcow2-node", path="/backing/file"
+  - root="quorum-node", path="/children.2/file"
+
+Hypothetically, @path could be empty, in which case it would point
+to @root.  However, in practice this case is not useful and hence
+not allowed.
+
+@expected_node may be None.
+
+@graph may be None or the result of an x-debug-query-block-graph
+call that has already been performed.
+"""
+def assert_block_path(self, root, path, expected_node, graph=None):
+if graph is None:
+graph = self.qmp('x-debug-query-block-graph')['return']
+
+iter_path = iter(path.split('/'))
+
+# Must start with a /
+assert next(iter_path) == ''
+
+node = next((node for node in graph['nodes'] if node['name'] == root),
+None)
+
+for path_node in iter_path:
+assert node is not None, 'Cannot follow path %s' % path
+
+try:
+node_id = next(edge['child'] for edge in graph['edges'] \
+ if edge['parent'] == node['id'] 
and
+edge['name'] == path_node)
+
+node = next(node for node in graph['nodes'] \
+ if node['id'] == node_id)
+except StopIteration:
+node = None
+
+assert node is not None or expected_node is None, \
+   'No node found under %s (but expected %s)' % \
+   (path, expected_node)
+
+assert expected_node is not None or node is None, \
+   'Found node %s under %s (but expected none)' % \
+   (node['name'], path)
+
+if node is not None and expected_node is not None:
+assert node['name'] == expected_node, \
+   'Found node %s under %s (but expected %s)' % \
+   (node['name'], path, expected_node)
 
 index_re = re.compile(r'([^\[]+)\[([^\]]+)\]')
 
-- 
2.23.0




[PATCH for-5.0 v2 08/23] quorum: Store children in own structure

2019-11-11 Thread Max Reitz
This will be useful when we want to store additional attributes for each
child.

Signed-off-by: Max Reitz 
---
 block/quorum.c | 64 --
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/block/quorum.c b/block/quorum.c
index 17b439056f..59cd524502 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -65,9 +65,13 @@ typedef struct QuorumVotes {
 bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
 } QuorumVotes;
 
+typedef struct QuorumChild {
+BdrvChild *child;
+} QuorumChild;
+
 /* the following structure holds the state of one quorum instance */
 typedef struct BDRVQuorumState {
-BdrvChild **children;  /* children BlockDriverStates */
+QuorumChild *children;
 int num_children;  /* children count */
 unsigned next_child_index;  /* the index of the next child that should
  * be added
@@ -264,7 +268,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
 }
 QLIST_FOREACH(item, >items, next) {
 quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
-  s->children[item->index]->bs->node_name, 0);
+  s->children[item->index].child->bs->node_name, 
0);
 }
 }
 }
@@ -279,7 +283,7 @@ static void quorum_rewrite_entry(void *opaque)
  * corrupted data.
  * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
  * area with different data from the other children. */
-bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
+bdrv_co_pwritev(s->children[co->idx].child, acb->offset, acb->bytes,
 acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);
 
 /* Wake up the caller after the last rewrite */
@@ -578,8 +582,8 @@ static void read_quorum_children_entry(void *opaque)
 int i = co->idx;
 QuorumChildRequest *sacb = >qcrs[i];
 
-sacb->bs = s->children[i]->bs;
-sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
+sacb->bs = s->children[i].child->bs;
+sacb->ret = bdrv_co_preadv(s->children[i].child, acb->offset, acb->bytes,
>qcrs[i].qiov, 0);
 
 if (sacb->ret == 0) {
@@ -605,7 +609,8 @@ static int read_quorum_children(QuorumAIOCB *acb)
 
 acb->children_read = s->num_children;
 for (i = 0; i < s->num_children; i++) {
-acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, 
acb->qiov->size);
+acb->qcrs[i].buf = qemu_blockalign(s->children[i].child->bs,
+   acb->qiov->size);
 qemu_iovec_init(>qcrs[i].qiov, acb->qiov->niov);
 qemu_iovec_clone(>qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
 }
@@ -647,8 +652,8 @@ static int read_fifo_child(QuorumAIOCB *acb)
 /* We try to read the next child in FIFO order if we failed to read */
 do {
 n = acb->children_read++;
-acb->qcrs[n].bs = s->children[n]->bs;
-ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+acb->qcrs[n].bs = s->children[n].child->bs;
+ret = bdrv_co_preadv(s->children[n].child, acb->offset, acb->bytes,
  acb->qiov, 0);
 if (ret < 0) {
 quorum_report_bad_acb(>qcrs[n], ret);
@@ -688,8 +693,8 @@ static void write_quorum_entry(void *opaque)
 int i = co->idx;
 QuorumChildRequest *sacb = >qcrs[i];
 
-sacb->bs = s->children[i]->bs;
-sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+sacb->bs = s->children[i].child->bs;
+sacb->ret = bdrv_co_pwritev(s->children[i].child, acb->offset, acb->bytes,
 acb->qiov, acb->flags);
 if (sacb->ret == 0) {
 acb->success_count++;
@@ -743,12 +748,12 @@ static int64_t quorum_getlength(BlockDriverState *bs)
 int i;
 
 /* check that all file have the same length */
-result = bdrv_getlength(s->children[0]->bs);
+result = bdrv_getlength(s->children[0].child->bs);
 if (result < 0) {
 return result;
 }
 for (i = 1; i < s->num_children; i++) {
-int64_t value = bdrv_getlength(s->children[i]->bs);
+int64_t value = bdrv_getlength(s->children[i].child->bs);
 if (value < 0) {
 return value;
 }
@@ -774,10 +779,10 @@ static coroutine_fn int quorum_co_flush(BlockDriverState 
*bs)
 error_votes.compare = quorum_64bits_compare;
 
 for (i = 0; i < s->num_children; i++) {
-result = bdrv_co_flush(s->children[i]->bs);
+result = bdrv_co_flush(s->children[i].child->bs);
 if (result) {
 quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, 0,
-  s->children[i]->bs->node_name, result);
+  s->children[i].child->bs->node_name, result);
 result_value.l = result;
 quorum_count_vote(_votes, _value, i);
 } else {
@@ -803,7 +808,7 @@ static bool 

[PATCH for-5.0 v2 05/23] quorum: Fix child permissions

2019-11-11 Thread Max Reitz
Quorum cannot share WRITE or RESIZE on its children.  Presumably, it
only does so because as a filter, it seemed intuitively correct to point
its .bdrv_child_perm to bdrv_filter_default_perm().

However, it is not really a filter, and bdrv_filter_default_perm() does
not work for it, so we have to provide a custom .bdrv_child_perm
implementation.

Signed-off-by: Max Reitz 
---
 block/quorum.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/block/quorum.c b/block/quorum.c
index df68adcfaa..17b439056f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1114,6 +1114,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error 
**errp)
 return NULL;
 }
 
+static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
+  const BdrvChildRole *role,
+  BlockReopenQueue *reopen_queue,
+  uint64_t perm, uint64_t shared,
+  uint64_t *nperm, uint64_t *nshared)
+{
+*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+
+/*
+ * We cannot share RESIZE or WRITE, as this would make the
+ * children differ from each other.
+ */
+*nshared = (shared & (BLK_PERM_CONSISTENT_READ |
+  BLK_PERM_WRITE_UNCHANGED))
+ | DEFAULT_PERM_UNCHANGED;
+}
+
 static const char *const quorum_strong_runtime_opts[] = {
 QUORUM_OPT_VOTE_THRESHOLD,
 QUORUM_OPT_BLKVERIFY,
@@ -1143,7 +1160,7 @@ static BlockDriver bdrv_quorum = {
 .bdrv_add_child = quorum_add_child,
 .bdrv_del_child = quorum_del_child,
 
-.bdrv_child_perm= bdrv_filter_default_perms,
+.bdrv_child_perm= quorum_child_perm,
 
 .is_filter  = true,
 .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
-- 
2.23.0




[PATCH for-5.0 v2 04/23] iotests: Let 041 use -blockdev for quorum children

2019-11-11 Thread Max Reitz
Using -drive with default options means that a virtio-blk drive will be
created that has write access to the to-be quorum children.  Quorum
should have exclusive write access to them, so we should use -blockdev
instead.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/041 | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index d7be30b62b..3c60c07b01 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -884,7 +884,10 @@ class TestRepairQuorum(iotests.QMPTestCase):
 # Assign a node name to each quorum image in order to manipulate
 # them
 opts = "node-name=img%i" % self.IMAGES.index(i)
-self.vm = self.vm.add_drive(i, opts)
+opts += ',driver=%s' % iotests.imgfmt
+opts += ',file.driver=file'
+opts += ',file.filename=%s' % i
+self.vm = self.vm.add_blockdev(opts)
 
 self.vm.launch()
 
-- 
2.23.0




[PATCH for-5.0 v2 02/23] blockdev: Allow resizing everywhere

2019-11-11 Thread Max Reitz
Block nodes that do not allow resizing should not share BLK_PERM_RESIZE.
It does not matter whether they are the first non-filter in their chain
or not.

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 blockdev.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index ab78230d23..9dc2238bf3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3177,11 +3177,6 @@ void qmp_block_resize(bool has_device, const char 
*device,
 aio_context = bdrv_get_aio_context(bs);
 aio_context_acquire(aio_context);
 
-if (!bdrv_is_first_non_filter(bs)) {
-error_setg(errp, QERR_FEATURE_DISABLED, "resize");
-goto out;
-}
-
 if (size < 0) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
 goto out;
-- 
2.23.0




[PATCH for-5.0 v2 11/23] block: Use bdrv_recurse_can_replace()

2019-11-11 Thread Max Reitz
Let check_to_replace_node() use the more specialized
bdrv_recurse_can_replace() instead of
bdrv_recurse_is_first_non_filter(), which is too restrictive.

Signed-off-by: Max Reitz 
---
 block.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index de53addeb0..7608f21570 100644
--- a/block.c
+++ b/block.c
@@ -6243,6 +6243,17 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
 return false;
 }
 
+/*
+ * Check whether the given @node_name can be replaced by a node that
+ * has the same data as @parent_bs.  If so, return @node_name's BDS;
+ * NULL otherwise.
+ *
+ * @node_name must be a (recursive) *child of @parent_bs (or this
+ * function will return NULL).
+ *
+ * The result (whether the node can be replaced or not) is only valid
+ * for as long as no graph changes occur.
+ */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
 const char *node_name, Error **errp)
 {
@@ -6267,8 +6278,11 @@ BlockDriverState *check_to_replace_node(BlockDriverState 
*parent_bs,
  * Another benefit is that this tests exclude backing files which are
  * blocked by the backing blockers.
  */
-if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
-error_setg(errp, "Only top most non filter can be replaced");
+if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
+error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
+   "because it cannot be guaranteed that doing so would not "
+   "lead to an abrupt change of visible data",
+   node_name, parent_bs->node_name);
 to_replace_bs = NULL;
 goto out;
 }
-- 
2.23.0




[PATCH for-5.0 v2 03/23] block: Drop bdrv_is_first_non_filter()

2019-11-11 Thread Max Reitz
It is unused now.  (And it was ugly because it needed to explore all BDS
chains from the top.)

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block.c   | 26 --
 include/block/block.h |  1 -
 2 files changed, 27 deletions(-)

diff --git a/block.c b/block.c
index ae279ff21f..9b1049786a 100644
--- a/block.c
+++ b/block.c
@@ -6205,32 +6205,6 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState 
*bs,
 return false;
 }
 
-/* This function checks if the candidate is the first non filter bs down it's
- * bs chain. Since we don't have pointers to parents it explore all bs chains
- * from the top. Some filters can choose not to pass down the recursion.
- */
-bool bdrv_is_first_non_filter(BlockDriverState *candidate)
-{
-BlockDriverState *bs;
-BdrvNextIterator it;
-
-/* walk down the bs forest recursively */
-for (bs = bdrv_first(); bs; bs = bdrv_next()) {
-bool perm;
-
-/* try to recurse in this top level bs */
-perm = bdrv_recurse_is_first_non_filter(bs, candidate);
-
-/* candidate is the first non filter */
-if (perm) {
-bdrv_next_cleanup();
-return true;
-}
-}
-
-return false;
-}
-
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
 const char *node_name, Error **errp)
 {
diff --git a/include/block/block.h b/include/block/block.h
index e9dcfef7fa..8f6a0cad9c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -404,7 +404,6 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts 
*opts,
 /* external snapshots */
 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
   BlockDriverState *candidate);
-bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
 /* check if a named node can be replaced when doing drive-mirror */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
-- 
2.23.0




Re: [PATCH 1/2] tests/tcg/multiarch: fix code style in function main of test-mmap.c

2019-11-11 Thread Alex Bennée


Wei Yang  writes:

> On Mon, Nov 11, 2019 at 10:25:43AM +, Alex Benn??e wrote:
>>
>>Wei Yang  writes:
>>
>>> This file uses quite a different code style and changing just one line
>>> would leads to some awkward appearance.
>>>
>>> This is a preparation for the following replacement of
>>> sysconf(_SC_PAGESIZE).
>>>
>>> BTW, to depress ERROR message from checkpatch.pl, this patch replaces
>>> strtoul with qemu_strtoul.
>>
>>
>>NACK I'm afraid.
>>
>>The tests/tcg directory all build against glibc only to make them easier
>>to cross-compile for the various targets. If you run check-tcg and have
>>a non-native cross compiler setup you'll notice this fails to build:
>>
>>BUILD   aarch64-linux-user guest-tests with aarch64-linux-gnu-gcc
>>  /home/alex/lsrc/qemu.git/tests/tcg/multiarch/test-mmap.c: In function 
>> ???main???:
>>  /home/alex/lsrc/qemu.git/tests/tcg/multiarch/test-mmap.c:467:9: warning: 
>> implicit declaration of function ???qemu_strtoul???; did you mean 
>> ???strtoul [-Wimplicit-function-declaration]
>>   qemu_strtoul(argv[1], NULL, 0, );
>>   ^~~~
>>   strtoul
>>  /home/alex/lsrc/qemu.git/tests/tcg/multiarch/test-mmap.c:469:20: error: 
>> ???qemu_real_host_page_size??? undeclared (first use in this function)
>>   pagesize = qemu_real_host_page_size;
>>  ^~~~
>>  /home/alex/lsrc/qemu.git/tests/tcg/multiarch/test-mmap.c:469:20: note: each 
>> undeclared identifier is reported only once for each function it appears in
>>  make[2]: *** [../Makefile.target:103: test-mmap] Error 1
>>  make[1]: *** [/home/alex/lsrc/qemu.git/tests/tcg/Makefile.qemu:33: 
>> cross-build-guest-tests] Error 2
>>  make: *** [/home/alex/lsrc/qemu.git/tests/Makefile.include:1094: 
>> build-tcg-tests-aarch64-linux-user] Error 2
>>  make: *** Waiting for unfinished jobs
>>
>
> This output is from "make test" ?

make check-tcg

>
>>>
>>> Signed-off-by: Wei Yang 
>>> ---
>>>  tests/tcg/multiarch/test-mmap.c | 67 ++---
>>>  1 file changed, 36 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/tests/tcg/multiarch/test-mmap.c 
>>> b/tests/tcg/multiarch/test-mmap.c
>>> index 11d0e777b1..9ea49e2307 100644
>>> --- a/tests/tcg/multiarch/test-mmap.c
>>> +++ b/tests/tcg/multiarch/test-mmap.c
>>> @@ -456,49 +456,54 @@ void check_invalid_mmaps(void)
>>>
>>>  int main(int argc, char **argv)
>>>  {
>>> -   char tempname[] = "/tmp/.cmmapXX";
>>> -   unsigned int i;
>>> -
>>> -   /* Trust the first argument, otherwise probe the system for our
>>> -  pagesize.  */
>>> -   if (argc > 1)
>>> -   pagesize = strtoul(argv[1], NULL, 0);
>>> -   else
>>> -   pagesize = sysconf(_SC_PAGESIZE);
>>> +char tempname[] = "/tmp/.cmmapXX";
>>> +unsigned int i;
>>> +
>>> +/*
>>> + * Trust the first argument, otherwise probe the system for our
>>> + * pagesize.
>>> + */
>>> +if (argc > 1) {
>>> +qemu_strtoul(argv[1], NULL, 0, );
>>> +} else {
>>> +pagesize = sysconf(_SC_PAGESIZE);
>>> +}
>>>
>>> -   /* Assume pagesize is a power of two.  */
>>> -   pagemask = pagesize - 1;
>>> -   dummybuf = malloc (pagesize);
>>> -   printf ("pagesize=%u pagemask=%x\n", pagesize, pagemask);
>>> +/* Assume pagesize is a power of two.  */
>>> +pagemask = pagesize - 1;
>>> +dummybuf = malloc(pagesize);
>>> +printf("pagesize=%u pagemask=%x\n", pagesize, pagemask);
>>>
>>> -   test_fd = mkstemp(tempname);
>>> -   unlink(tempname);
>>> +test_fd = mkstemp(tempname);
>>> +unlink(tempname);
>>>
>>> -   /* Fill the file with int's counting from zero and up.  */
>>> +/* Fill the file with int's counting from zero and up.  */
>>>  for (i = 0; i < (pagesize * 4) / sizeof i; i++) {
>>>  checked_write(test_fd, , sizeof i);
>>>  }
>>>
>>> -   /* Append a few extra writes to make the file end at non
>>> -  page boundary.  */
>>> +/*
>>> + * Append a few extra writes to make the file end at non
>>> + * page boundary.
>>> + */
>>>  checked_write(test_fd, , sizeof i); i++;
>>>  checked_write(test_fd, , sizeof i); i++;
>>>  checked_write(test_fd, , sizeof i); i++;
>>>
>>> -   test_fsize = lseek(test_fd, 0, SEEK_CUR);
>>> +test_fsize = lseek(test_fd, 0, SEEK_CUR);
>>>
>>> -   /* Run the tests.  */
>>> -   check_aligned_anonymous_unfixed_mmaps();
>>> -   check_aligned_anonymous_unfixed_colliding_mmaps();
>>> -   check_aligned_anonymous_fixed_mmaps();
>>> -   check_file_unfixed_mmaps();
>>> -   check_file_fixed_mmaps();
>>> -   check_file_fixed_eof_mmaps();
>>> -   check_file_unfixed_eof_mmaps();
>>> -   check_invalid_mmaps();
>>> +/* Run the tests.  */
>>> +check_aligned_anonymous_unfixed_mmaps();
>>> +check_aligned_anonymous_unfixed_colliding_mmaps();
>>> +check_aligned_anonymous_fixed_mmaps();
>>> +check_file_unfixed_mmaps();
>>> +check_file_fixed_mmaps();
>>> +

[PATCH for-5.0 v2 10/23] quorum: Implement .bdrv_recurse_can_replace()

2019-11-11 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/quorum.c | 62 ++
 1 file changed, 62 insertions(+)

diff --git a/block/quorum.c b/block/quorum.c
index 3a824e77e3..8ee03e9baf 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -825,6 +825,67 @@ static bool 
quorum_recurse_is_first_non_filter(BlockDriverState *bs,
 return false;
 }
 
+static bool quorum_recurse_can_replace(BlockDriverState *bs,
+   BlockDriverState *to_replace)
+{
+BDRVQuorumState *s = bs->opaque;
+int i;
+
+for (i = 0; i < s->num_children; i++) {
+/*
+ * We have no idea whether our children show the same data as
+ * this node (@bs).  It is actually highly likely that
+ * @to_replace does not, because replacing a broken child is
+ * one of the main use cases here.
+ *
+ * We do know that the new BDS will match @bs, so replacing
+ * any of our children by it will be safe.  It cannot change
+ * the data this quorum node presents to its parents.
+ *
+ * However, replacing @to_replace by @bs in any of our
+ * children's chains may change visible data somewhere in
+ * there.  We therefore cannot recurse down those chains with
+ * bdrv_recurse_can_replace().
+ * (More formally, bdrv_recurse_can_replace() requires that
+ * @to_replace will be replaced by something matching the @bs
+ * passed to it.  We cannot guarantee that.)
+ *
+ * Thus, we can only check whether any of our immediate
+ * children matches @to_replace.
+ *
+ * (In the future, we might add a function to recurse down a
+ * chain that checks that nothing there cares about a change
+ * in data from the respective child in question.  For
+ * example, most filters do not care when their child's data
+ * suddenly changes, as long as their parents do not care.)
+ */
+if (s->children[i].child->bs == to_replace) {
+Error *local_err = NULL;
+
+/*
+ * We now have to ensure that there is no other parent
+ * that cares about replacing this child by a node with
+ * potentially different data.
+ */
+s->children[i].to_be_replaced = true;
+bdrv_child_refresh_perms(bs, s->children[i].child, _err);
+
+/* Revert permissions */
+s->children[i].to_be_replaced = false;
+bdrv_child_refresh_perms(bs, s->children[i].child, _abort);
+
+if (local_err) {
+error_free(local_err);
+return false;
+}
+
+return true;
+}
+}
+
+return false;
+}
+
 static int quorum_valid_threshold(int threshold, int num_children, Error 
**errp)
 {
 
@@ -1195,6 +1256,7 @@ static BlockDriver bdrv_quorum = {
 
 .is_filter  = true,
 .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
+.bdrv_recurse_can_replace   = quorum_recurse_can_replace,
 
 .strong_runtime_opts= quorum_strong_runtime_opts,
 };
-- 
2.23.0




[PATCH for-5.0 v2 01/23] blockdev: Allow external snapshots everywhere

2019-11-11 Thread Max Reitz
There is no good reason why we would allow external snapshots only on
the first non-filter node in a chain.  Parent BDSs should not care
whether their child is replaced by a snapshot.  (If they do care, they
should announce that via freezing the chain, which is checked in
bdrv_append() through bdrv_set_backing_hd().)

Before we had bdrv_is_first_non_filter() here (since 212a5a8f095), there
was a special function bdrv_check_ext_snapshot() that allowed snapshots
by default, but block drivers could override this.  Only blkverify did
so, however.

It is not clear to me why blkverify would do so; maybe just so that the
testee block driver would not be replaced.  The introducing commit
f6186f49e2c does not explain why.  Maybe because 08b24cfe376 would have
been the correct solution?  (Which adds a .supports_backing check.)

Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 blockdev.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 8e029e9c01..ab78230d23 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1595,11 +1595,6 @@ static void external_snapshot_prepare(BlkActionState 
*common,
 }
 }
 
-if (!bdrv_is_first_non_filter(state->old_bs)) {
-error_setg(errp, QERR_FEATURE_DISABLED, "snapshot");
-goto out;
-}
-
 if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
 BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
 const char *format = s->has_format ? s->format : "qcow2";
-- 
2.23.0




  1   2   3   >