[PATCH v3 7/7] block: check availablity for preadv/pwritev on mac

2020-10-27 Thread Joelle van Dyne
macOS 11/iOS 14 added preadv/pwritev APIs. Due to weak linking, configure
will succeed with CONFIG_PREADV even when targeting a lower OS version. We
therefore need to check at run time if we can actually use these APIs.

Signed-off-by: Joelle van Dyne 
---
 block/file-posix.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index 5560fd20ac..b5a7ce483d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1394,12 +1394,24 @@ static bool preadv_present = true;
 static ssize_t
 qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
 {
+#ifdef CONFIG_DARWIN /* preadv introduced in macOS 11 */
+if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
+preadv_present = false;
+return -ENOSYS;
+} else
+#endif
 return preadv(fd, iov, nr_iov, offset);
 }
 
 static ssize_t
 qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
 {
+#ifdef CONFIG_DARWIN /* pwritev introduced in macOS 11 */
+if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
+preadv_present = false;
+return -ENOSYS;
+} else
+#endif
 return pwritev(fd, iov, nr_iov, offset);
 }
 
-- 
2.28.0




[PATCH v3 3/7] qemu: add support for iOS host

2020-10-27 Thread Joelle van Dyne
This introduces support for building for iOS hosts. When the correct Xcode
toolchain is used, iOS host will be detected automatically.

block: disable features not supported by iOS sandbox
slirp: disable SMB features for iOS
target: disable system() calls for iOS
tcg: use sys_icache_invalidate() instead of GCC builtin for iOS
tests: disable tests on iOS which uses system()
Signed-off-by: Joelle van Dyne 
---
 configure  | 43 +-
 meson.build|  2 +-
 tcg/aarch64/tcg-target.h   |  7 ++
 block.c|  2 +-
 block/file-posix.c | 31 ++--
 net/slirp.c| 16 ++---
 qga/commands-posix.c   |  6 +
 target/arm/arm-semi.c  |  2 ++
 target/m68k/m68k-semi.c|  2 ++
 target/nios2/nios2-semi.c  |  2 ++
 tests/qtest/libqos/virtio-9p.c |  8 +++
 tests/qtest/meson.build|  7 +++---
 12 files changed, 101 insertions(+), 27 deletions(-)

diff --git a/configure b/configure
index e211d11971..7fbaefc255 100755
--- a/configure
+++ b/configure
@@ -557,6 +557,19 @@ EOF
   compile_object
 }
 
+check_ios() {
+  cat > $TMPC < $TMPC <
@@ -599,7 +612,11 @@ elif check_define __DragonFly__ ; then
 elif check_define __NetBSD__; then
   targetos='NetBSD'
 elif check_define __APPLE__; then
-  targetos='Darwin'
+  if check_ios ; then
+targetos='iOS'
+  else
+targetos='Darwin'
+  fi
 else
   # This is a fatal error, but don't report it yet, because we
   # might be going to just print the --help text, or it might
@@ -776,6 +793,22 @@ Darwin)
   # won't work when we're compiling with gcc as a C compiler.
   QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS"
 ;;
+iOS)
+  bsd="yes"
+  darwin="yes"
+  ios="yes"
+  if [ "$cpu" = "x86_64" ] ; then
+QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
+QEMU_LDFLAGS="-arch x86_64 $QEMU_LDFLAGS"
+  fi
+  host_block_device_support="no"
+  audio_drv_list=""
+  audio_possible_drivers=""
+  QEMU_LDFLAGS="-framework CoreFoundation $QEMU_LDFLAGS"
+  # Disable attempts to use ObjectiveC features in os/object.h since they
+  # won't work when we're compiling with gcc as a C compiler.
+  QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS"
+;;
 SunOS)
   solaris="yes"
   make="${MAKE-gmake}"
@@ -5956,6 +5989,10 @@ if test "$darwin" = "yes" ; then
   echo "CONFIG_DARWIN=y" >> $config_host_mak
 fi
 
+if test "$ios" = "yes" ; then
+  echo "CONFIG_IOS=y" >> $config_host_mak
+fi
+
 if test "$solaris" = "yes" ; then
   echo "CONFIG_SOLARIS=y" >> $config_host_mak
 fi
@@ -6924,6 +6961,7 @@ echo "cpp_link_args = [${LDFLAGS:+$(meson_quote 
$LDFLAGS)}]" >> $cross
 echo "[binaries]" >> $cross
 echo "c = [$(meson_quote $cc)]" >> $cross
 test -n "$cxx" && echo "cpp = [$(meson_quote $cxx)]" >> $cross
+test -n "$objcc" && echo "objc = [$(meson_quote $objcc)]" >> $cross
 echo "ar = [$(meson_quote $ar)]" >> $cross
 echo "nm = [$(meson_quote $nm)]" >> $cross
 echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross
@@ -6942,6 +6980,9 @@ if test "$cross_compile" = "yes"; then
 if test "$linux" = "yes" ; then
 echo "system = 'linux'" >> $cross
 fi
+if test "$darwin" = "yes" ; then
+echo "system = 'darwin'" >> $cross
+fi
 case "$ARCH" in
 i386|x86_64)
 echo "cpu_family = 'x86'" >> $cross
diff --git a/meson.build b/meson.build
index e880274b7c..48c95150fe 100644
--- a/meson.build
+++ b/meson.build
@@ -176,7 +176,7 @@ if targetos == 'windows'
   include_directories: 
include_directories('.'))
 elif targetos == 'darwin'
   coref = dependency('appleframeworks', modules: 'CoreFoundation')
-  iokit = dependency('appleframeworks', modules: 'IOKit')
+  iokit = dependency('appleframeworks', modules: 'IOKit', required: 
'CONFIG_IOS' not in config_host)
   cocoa = dependency('appleframeworks', modules: 'Cocoa', required: 
get_option('cocoa'))
 elif targetos == 'sunos'
   socket = [cc.find_library('socket'),
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index f605257ed5..6b5b3216fa 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -149,12 +149,19 @@ typedef enum {
 #define TCG_TARGET_HAS_MEMORY_BSWAP 1
 
 #if defined(__APPLE__)
+void sys_icache_invalidate(void *start, size_t len);
 void sys_dcache_flush(void *start, size_t len);
 #endif
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
+#if defined(__APPLE__)
+sys_icache_invalidate((char *)start, stop - start);
+#elif defined(__GNUC__)
 __builtin___clear_cache((char *)start, (char *)stop);
+#else
+#error "Missing builtin to flush instruction cache"
+#endif
 }
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
diff --git a/block.c b/block.c
index 430edf79bb..5d49869d02 100644
--- a/block.c
+++ b/block.c
@@ -53,7 +53,7 @@
 #ifdef CONFIG_BSD
 #include 
 #include 
-#ifndef __DragonFly__
+#if 

[PATCH v3 1/7] configure: option to disable host block devices

2020-10-27 Thread Joelle van Dyne
Some hosts (iOS) have a sandboxed filesystem and do not provide low-level
APIs for interfacing with host block devices.

Signed-off-by: Joelle van Dyne 
---
 configure  | 4 
 meson.build| 1 +
 block/file-posix.c | 8 +++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 71bbe82ac5..4e68a5fefe 100755
--- a/configure
+++ b/configure
@@ -448,6 +448,7 @@ ninja=""
 skip_meson=no
 gettext=""
 mirror_jit="no"
+host_block_device_support="yes"
 
 bogus_os="no"
 malloc_trim="auto"
@@ -5901,6 +5902,9 @@ if test "$default_devices" = "yes" ; then
 else
   echo "CONFIG_MINIKCONF_MODE=--allnoconfig" >> $config_host_mak
 fi
+if test "$host_block_device_support" = "yes" ; then
+  echo "CONFIG_HOST_BLOCK_DEVICE=y" >> $config_host_mak
+fi
 if test "$debug_tcg" = "yes" ; then
   echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak
 fi
diff --git a/meson.build b/meson.build
index 0a56fef146..e880274b7c 100644
--- a/meson.build
+++ b/meson.build
@@ -2149,6 +2149,7 @@ summary_info += {'vvfat support': 
config_host.has_key('CONFIG_VVFAT')}
 summary_info += {'qed support':   config_host.has_key('CONFIG_QED')}
 summary_info += {'parallels support': config_host.has_key('CONFIG_PARALLELS')}
 summary_info += {'sheepdog support':  config_host.has_key('CONFIG_SHEEPDOG')}
+summary_info += {'host block dev support': 
config_host.has_key('CONFIG_HOST_BLOCK_DEVICE')}
 summary_info += {'capstone':  capstone_opt == 'disabled' ? false : 
capstone_opt}
 summary_info += {'libpmem support':   config_host.has_key('CONFIG_LIBPMEM')}
 summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
diff --git a/block/file-posix.c b/block/file-posix.c
index c63926d592..52f7c20525 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -41,7 +41,7 @@
 #include "scsi/pr-manager.h"
 #include "scsi/constants.h"
 
-#if defined(__APPLE__) && (__MACH__)
+#if defined(CONFIG_HOST_BLOCK_DEVICE) && defined(__APPLE__) && (__MACH__)
 #include 
 #include 
 #include 
@@ -3247,6 +3247,8 @@ BlockDriver bdrv_file = {
 /***/
 /* host device */
 
+#if defined(CONFIG_HOST_BLOCK_DEVICE)
+
 #if defined(__APPLE__) && defined(__MACH__)
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
 CFIndex maxPathSize, int flags);
@@ -3872,6 +3874,8 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
+#endif /* CONFIG_HOST_BLOCK_DEVICE */
+
 static void bdrv_file_init(void)
 {
 /*
@@ -3879,6 +3883,7 @@ static void bdrv_file_init(void)
  * registered last will get probed first.
  */
 bdrv_register(_file);
+#if defined(CONFIG_HOST_BLOCK_DEVICE)
 bdrv_register(_host_device);
 #ifdef __linux__
 bdrv_register(_host_cdrom);
@@ -3886,6 +3891,7 @@ static void bdrv_file_init(void)
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 bdrv_register(_host_cdrom);
 #endif
+#endif /* CONFIG_HOST_BLOCK_DEVICE */
 }
 
 block_init(bdrv_file_init);
-- 
2.28.0




Re: [PATCH] hw/sd: Zero out function selection fields before being populated

2020-10-27 Thread Bin Meng
Hi Niek,

On Wed, Oct 28, 2020 at 3:55 AM Niek Linnenbank
 wrote:
>
> Hello Philippe, Bin,
>
> Thanks for your support on this. I've just tried this patch and unfortunately 
> it doesn't seem to
> resolve the issue, at least on my machine. This is the output that I get when 
> running the avocado test for NetBSD9.0:
>
>  (5/5) 
> tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_arm_orangepi_uboot_netbsd9:
>  |console: U-Boot SPL 2020.01+dfsg-1 (Jan 08 2020 - 08:19:44 +)
> console: DRAM: 1024 MiB
> console: Failed to set core voltage! Can't set CPU frequency
> console: Trying to boot from MMC1
> console: U-Boot 2020.01+dfsg-1 (Jan 08 2020 - 08:19:44 +) Allwinner 
> Technology
> console: CPU:   Allwinner H3 (SUN8I )
> ...
> console: [   1.2957642] sdmmc0: SD card status: 4-bit, C0
> console: [   1.3094731] ld0 at sdmmc0: 
> <0xaa:0x5859:QEMU!:0x01:0xdeadbeef:0x062>
> console: [   1.3159383] ld0: 1024 MB, 1040 cyl, 32 head, 63 sec, 512 
> bytes/sect x 2097152 sectors
> console: [   1.3763222] ld0: 4-bit width, High-Speed/SDR25, 50.000 MHz
> console: [   2.0592109] WARNING: 4 errors while detecting hardware; check 
> system log.
> console: [   2.0693403] boot device: ld0
> console: [   2.0798960] root on ld0a dumps on ld0b
> console: [   2.0994141] vfs_mountroot: can't open root device
> console: [   2.0994141] cannot mount root, error = 6
> 
>
> When starting NetBSD 9.0 manually, it shows clearly that the SD card is 
> recognized with 1GiB size, also from U-Boot:
> $ qemu-system-arm -M orangepi-pc -nographic -nic user -sd ./armv7.img
> WARNING: Image format was not specified for './armv7.img' and probing guessed 
> raw.
>  Automatically detecting the format is dangerous for raw images, 
> write operations on block 0 will be restricted.
>  Specify the 'raw' format explicitly to remove the restrictions.
>
> U-Boot SPL 2020.07-00610-g610e1487c8 (Jul 11 2020 - 22:31:58 +0200)
> DRAM: 1024 MiB
> Failed to set core voltage! Can't set CPU frequency
> Trying to boot from MMC1
>
> U-Boot 2020.07-00610-g610e1487c8 (Jul 11 2020 - 22:31:58 +0200) Allwinner 
> Technology
>
> CPU:   Allwinner H3 (SUN8I )
> Model: Xunlong Orange Pi PC
> DRAM:  1 GiB
> MMC:   mmc@1c0f000: 0
> ...
> Hit any key to stop autoboot:  0
> => mmc info
> Device: mmc@1c0f000
> Manufacturer ID: aa
> OEM: 5859
> Name: QEMU!
> Bus Speed: 5000
> Mode: SD High Speed (50MHz)
> Rd Block Len: 512
> SD version 2.0
> High Capacity: No
> Capacity: 1 GiB
> Bus Width: 4-bit
> Erase Group Size: 512 Bytes
> =>
> => boot
> 8846552 bytes read in 931 ms (9.1 MiB/s)
> ...
> [   1.3447558] sdmmc0: SD card status: 4-bit, C0
> [   1.3546801] ld0 at sdmmc0: <0xaa:0x5859:QEMU!:0x01:0xdeadbeef:0x062>
> [   1.3647790] ld0: 1024 MB, 1040 cyl, 32 head, 63 sec, 512 bytes/sect x 
> 2097152 sectors
> [   1.4150230] ld0: 4-bit width, High-Speed/SDR25, 50.000 MHz
> [   2.0800893] WARNING: 4 errors while detecting hardware; check system log.
> [   2.0800893] boot device: ld0
> [   2.0900792] root on ld0a dumps on ld0b
> [   2.1004160] vfs_mountroot: can't open root device
> [   2.1004160] cannot mount root, error = 6
> [   2.1004160] root device (default ld0a):
> 
>
> Note that the image has been resized to 2GiB with qemu-img:
> $ ls -alh armv7.img
> -rw-rw-r-- 1 user user 2,0G okt 28 22:45 armv7.img
>
> The previous patch proposed by Bin did resolve the error ("hw/sd: Fix 2GiB 
> card CSD register values" ):
>  https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg07318.html

Correct. The patch above has not been applied yet, and only this patch
is now in mainline, so you will still see errors in the NetBSD 9.0
test.

>
> Although I see that this patch is now in master 
> (89c6700fe7eed9195f10055751edbc6d5e7ab940),
> can you please confirm that the issue is still present when testing this on 
> your machine as well?
>

Regards,
Bin



RE: [PATCH 1/2] hw/block/m25p80: Fix Numonyx dummy cycle register behavior

2020-10-27 Thread Joe Komlodi
Hi Francisco,

Comments marked with [Joe]

-Original Message-
From: Francisco Iglesias  
Sent: Tuesday, October 20, 2020 6:50 AM
To: Joe Komlodi 
Cc: qemu-de...@nongnu.org; alist...@alistair23.me; kw...@redhat.com; 
mre...@redhat.com; qemu-block@nongnu.org
Subject: Re: [PATCH 1/2] hw/block/m25p80: Fix Numonyx dummy cycle register 
behavior

Hi Joe,

On Tue, Sep 29, 2020 at 05:28:35PM -0700, Joe Komlodi wrote:
> Numonyx chips determine the number of cycles to wait based on bits 7:4 
> in the volatile configuration register.
> 
> However, if these bits are 0x0 or 0xF, the number of dummy cycles to 
> wait is
> 10 on a QIOR or QIOR4 command, or 8 on any other currently supported 
> fast read command. [1]
> 
> [1] http://www.micron.com/-/media/client/global/documents/products/
> data-sheet/nor-flash/serial-nor/n25q/n25q_512mb_1_8v_65nm.pdf
> 
> Page 22 note 2, and page 30 notes 5 and 10.
> 
> Signed-off-by: Joe Komlodi 
> ---
>  hw/block/m25p80.c | 26 +++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 
> 483925f..43830c9 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -820,6 +820,26 @@ static void reset_memory(Flash *s)
>  trace_m25p80_reset_done(s);
>  }
>  
> +static uint8_t numonyx_fast_read_num_dummies(Flash *s)

Should we rename the function to something like 
'numonyx_extract_cfg_num_dummies' (since it is not only used inside 
'decode_fast_read_cmd')?

[Joe] Yeah, that name makes more sense.

> +{
> +uint8_t cycle_count;
> +uint8_t num_dummies;
> +assert(get_man(s) == MAN_NUMONYX);
> +
> +cycle_count = extract32(s->volatile_cfg, 4, 4);
> +if (cycle_count == 0x0 || cycle_count == 0x0F) {
> +if (s->cmd_in_progress == QIOR || s->cmd_in_progress == 
> + QIOR4) {

QOR and QOR4 also has 10 dummy cycles on default so we will have to check for 
those aswell, perhaps something similar like below migth work:  

uint8_t n_dummies = extract32(s->volatile_cfg, 4, 4);

if (!n_dummies || n_dummies == 0xF) {
switch(s->cmd_in_progress){
case QOR:
case QOR4
case QIOR:
case QIOR4:
n_dummies = 10;
break;
default:
n_dummies = 8;
break;
}
}

return n_dummies;

[Joe] As talked about offline, the datasheet in the commit message just has 
confusing wording.
8 dummies for QOR seems to be correct, and I'll update the datasheet in the 
commit message with one that's more clear.

Thanks!
Joe

Best regards,
Francisco Iglesias

> +num_dummies = 10;
> +} else {
> +num_dummies = 8;
> +}
> +} else {
> +num_dummies = cycle_count;
> +}
> +
> +return num_dummies;
> +}
> +
>  static void decode_fast_read_cmd(Flash *s)  {
>  s->needed_bytes = get_addr_length(s); @@ -829,7 +849,7 @@ static 
> void decode_fast_read_cmd(Flash *s)
>  s->needed_bytes += 8;
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_fast_read_num_dummies(s);
>  break;
>  case MAN_MACRONIX:
>  if (extract32(s->volatile_cfg, 6, 2) == 1) { @@ -868,7 +888,7 
> @@ static void decode_dio_read_cmd(Flash *s)
>  );
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_fast_read_num_dummies(s);
>  break;
>  case MAN_MACRONIX:
>  switch (extract32(s->volatile_cfg, 6, 2)) { @@ -908,7 +928,7 
> @@ static void decode_qio_read_cmd(Flash *s)
>  );
>  break;
>  case MAN_NUMONYX:
> -s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
> +s->needed_bytes += numonyx_fast_read_num_dummies(s);
>  break;
>  case MAN_MACRONIX:
>  switch (extract32(s->volatile_cfg, 6, 2)) {
> --
> 2.7.4
> 


[PATCH v2 1/1] hw/block/m25p80: Fix Numonyx fast read dummy cycle count

2020-10-27 Thread Joe Komlodi
Numonyx chips determine the number of cycles to wait based on bits 7:4 in the
volatile configuration register.

However, if these bits are 0x0 or 0xF, the number of dummy cycles to wait is
10 on a QIOR or QIOR4 command, or 8 on any other currently supported
fast read command. [1]

[1] https://www.micron.com/-/media/client/global/documents/products/data-sheet/
nor-flash/serial-nor/mt25q/die-rev-b/mt25q_qlkt_u_02g_cbb_0.pdf
?rev=9b167fbf2b3645efba6385949a72e453
Page 34, page 39 note 5

Signed-off-by: Joe Komlodi 
---
 hw/block/m25p80.c | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 483925f..302ed9d 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -820,6 +820,26 @@ static void reset_memory(Flash *s)
 trace_m25p80_reset_done(s);
 }
 
+static uint8_t numonyx_extract_cfg_num_dummies(Flash *s)
+{
+uint8_t cycle_count;
+uint8_t num_dummies;
+assert(get_man(s) == MAN_NUMONYX);
+
+cycle_count = extract32(s->volatile_cfg, 4, 4);
+if (cycle_count == 0x0 || cycle_count == 0x0F) {
+if (s->cmd_in_progress == QIOR || s->cmd_in_progress == QIOR4) {
+num_dummies = 10;
+} else {
+num_dummies = 8;
+}
+} else {
+num_dummies = cycle_count;
+}
+
+return num_dummies;
+}
+
 static void decode_fast_read_cmd(Flash *s)
 {
 s->needed_bytes = get_addr_length(s);
@@ -829,7 +849,7 @@ static void decode_fast_read_cmd(Flash *s)
 s->needed_bytes += 8;
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 if (extract32(s->volatile_cfg, 6, 2) == 1) {
@@ -868,7 +888,7 @@ static void decode_dio_read_cmd(Flash *s)
 );
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 switch (extract32(s->volatile_cfg, 6, 2)) {
@@ -908,7 +928,7 @@ static void decode_qio_read_cmd(Flash *s)
 );
 break;
 case MAN_NUMONYX:
-s->needed_bytes += extract32(s->volatile_cfg, 4, 4);
+s->needed_bytes += numonyx_extract_cfg_num_dummies(s);
 break;
 case MAN_MACRONIX:
 switch (extract32(s->volatile_cfg, 6, 2)) {
-- 
2.7.4




[PATCH v2 0/1] hw/block/m25p80: Fix Numonyx flash dummy cycle register behavior

2020-10-27 Thread Joe Komlodi
Changelog:
v1 -> v2
 - 1/2: Change function name to be more accurate
 - 2/2: Dropped

Hi all,

The series fixes the behavior of the dummy cycle register for Numonyx flashes so
it's closer to how hardware behaves.

On hardware, the dummy cycles for fast read commands are set to a specific value
(8 or 10) if the register is all 0s or 1s.
If the register value isn't all 0s or 1s, then the flash expects the amount of
cycles sent to be equal to the count in the register.

Thanks!
Joe

Joe Komlodi (1):
  hw/block/m25p80: Fix Numonyx fast read dummy cycle count

 hw/block/m25p80.c | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

-- 
2.7.4




[PULL 12/12] nbd: Add 'qemu-nbd -A' to expose allocation depth

2020-10-27 Thread Eric Blake
Allow the server to expose an additional metacontext to be requested
by savvy clients.  qemu-nbd adds a new option -A to expose the
qemu:allocation-depth metacontext through NBD_CMD_BLOCK_STATUS; this
can also be set via QMP when using block-export-add.

qemu as client is hacked into viewing the key aspects of this new
context by abusing the already-experimental x-dirty-bitmap option to
collapse all depths greater than 2, which results in a tri-state value
visible in the output of 'qemu-img map --output=json' (yes, that means
x-dirty-bitmap is now a bit of a misnomer, but I didn't feel like
renaming it as it would introduce a needless break of back-compat,
even though we make no compat guarantees with x- members):

unallocated (depth 0) => "zero":false, "data":true
local (depth 1)   => "zero":false, "data":false
backing (depth 2+)=> "zero":true,  "data":true

libnbd as client is probably a nicer way to get at the information
without having to decipher such hacks in qemu as client. ;)

Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-11-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 docs/tools/qemu-nbd.rst|  8 +++-
 qapi/block-core.json   |  7 +++-
 qapi/block-export.json |  7 +++-
 block/nbd.c| 26 ++---
 nbd/server.c   |  2 +
 qemu-nbd.c | 12 +-
 tests/qemu-iotests/309 | 77 ++
 tests/qemu-iotests/309.out | 22 +++
 tests/qemu-iotests/group   |  1 +
 9 files changed, 151 insertions(+), 11 deletions(-)
 create mode 100755 tests/qemu-iotests/309
 create mode 100644 tests/qemu-iotests/309.out

diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index 667861cb22e9..fe41336dc550 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -72,10 +72,16 @@ driver options if ``--image-opts`` is specified.

   Export the disk as read-only.

+.. option:: -A, --allocation-depth
+
+  Expose allocation depth information via the
+  ``qemu:allocation-depth`` metadata context accessible through
+  NBD_OPT_SET_META_CONTEXT.
+
 .. option:: -B, --bitmap=NAME

   If *filename* has a qcow2 persistent bitmap *NAME*, expose
-  that bitmap via the ``qemu:dirty-bitmap:NAME`` context
+  that bitmap via the ``qemu:dirty-bitmap:NAME`` metadata context
   accessible through NBD_OPT_SET_META_CONTEXT.

 .. option:: -s, --snapshot
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e00fc27b5ea4..1b8b4156b4b9 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3905,9 +3905,12 @@
 #
 # @tls-creds: TLS credentials ID
 #
-# @x-dirty-bitmap: A "qemu:dirty-bitmap:NAME" string to query in place of
+# @x-dirty-bitmap: A metadata context name such as "qemu:dirty-bitmap:NAME"
+#  or "qemu:allocation-depth" to query in place of the
 #  traditional "base:allocation" block status (see
-#  NBD_OPT_LIST_META_CONTEXT in the NBD protocol) (since 3.0)
+#  NBD_OPT_LIST_META_CONTEXT in the NBD protocol; and
+#  yes, naming this option x-context would have made
+#  more sense) (since 3.0)
 #
 # @reconnect-delay: On an unexpected disconnect, the nbd client tries to
 #   connect again until succeeding or encountering a serious
diff --git a/qapi/block-export.json b/qapi/block-export.json
index c4125f4d2104..a9f488f99c1a 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -90,11 +90,16 @@
 #   the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
 #   each bitmap.
 #
+# @allocation-depth: Also export the allocation depth map for @device, so
+#the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#the metadata context name "qemu:allocation-depth" to
+#inspect allocation details. (since 5.2)
+#
 # Since: 5.2
 ##
 { 'struct': 'BlockExportOptionsNbd',
   'base': 'BlockExportOptionsNbdBase',
-  'data': { '*bitmaps': ['str'] } }
+  'data': { '*bitmaps': ['str'], '*allocation-depth': 'bool' } }

 ##
 # @BlockExportOptionsVhostUserBlk:
diff --git a/block/nbd.c b/block/nbd.c
index 4548046cd7cd..42536702b6f9 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -135,6 +135,7 @@ typedef struct BDRVNBDState {
 QCryptoTLSCreds *tlscreds;
 const char *hostname;
 char *x_dirty_bitmap;
+bool alloc_depth;

 bool wait_connect;
 NBDConnectThread *connect_thread;
@@ -961,6 +962,16 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
 trace_nbd_parse_blockstatus_compliance("extent length too large");
 }

+/*
+ * HACK: if we are using x-dirty-bitmaps to access
+ * qemu:allocation-depth, treat all depths > 2 the same as 2,
+ * since nbd_client_co_block_status is only expecting the low two
+ * bits to be set.
+ */
+if (s->alloc_depth && extent->flags > 2) {
+extent->flags = 2;
+}
+
 return 0;
 }

@@ 

[PULL 10/12] block: Return depth level during bdrv_is_allocated_above

2020-10-27 Thread Eric Blake
When checking for allocation across a chain, it's already easy to
count the depth within the chain at which the allocation is found.
Instead of throwing that information away, return it to the caller.
Existing callers only cared about allocated/non-allocated, but having
a depth available will be used by NBD in the next patch.

Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-9-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/coroutines.h |  6 --
 block/io.c | 29 ++---
 block/commit.c |  2 +-
 block/mirror.c |  2 +-
 block/stream.c |  2 +-
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 1cb3128b942c..4cfb4946e65e 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -47,7 +47,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
   int64_t bytes,
   int64_t *pnum,
   int64_t *map,
-  BlockDriverState **file);
+  BlockDriverState **file,
+  int *depth);
 int generated_co_wrapper
 bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@@ -57,7 +58,8 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t bytes,
int64_t *pnum,
int64_t *map,
-   BlockDriverState **file);
+   BlockDriverState **file,
+   int *depth);

 int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
diff --git a/block/io.c b/block/io.c
index 02528b3823fe..7751cdb81948 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2349,20 +2349,28 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
   int64_t bytes,
   int64_t *pnum,
   int64_t *map,
-  BlockDriverState **file)
+  BlockDriverState **file,
+  int *depth)
 {
 int ret;
 BlockDriverState *p;
 int64_t eof = 0;
+int dummy;

 assert(!include_base || base); /* Can't include NULL base */

+if (!depth) {
+depth = 
+}
+*depth = 0;
+
 if (!include_base && bs == base) {
 *pnum = bytes;
 return 0;
 }

 ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
+++*depth;
 if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
 return ret;
 }
@@ -2379,6 +2387,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
 {
 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
file);
+++*depth;
 if (ret < 0) {
 return ret;
 }
@@ -2437,7 +2446,7 @@ int bdrv_block_status_above(BlockDriverState *bs, 
BlockDriverState *base,
 int64_t *map, BlockDriverState **file)
 {
 return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
-  pnum, map, file);
+  pnum, map, file, NULL);
 }

 int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
@@ -2455,7 +2464,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, 
int64_t offset,

 ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
  bytes, pnum ? pnum : , NULL,
- NULL);
+ NULL, NULL);
 if (ret < 0) {
 return ret;
 }
@@ -2465,8 +2474,9 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, 
int64_t offset,
 /*
  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
  *
- * Return 1 if (a prefix of) the given range is allocated in any image
- * between BASE and TOP (BASE is only included if include_base is set).
+ * Return a positive depth if (a prefix of) the given range is allocated
+ * in any image between BASE and TOP (BASE is only included if include_base
+ * is set).  Depth 1 is TOP, 2 is the first backing layer, and so forth.
  * BASE can be NULL to check if the given offset is allocated in any
  * image of the chain.  Return 0 otherwise, or negative errno on
  * failure.
@@ -2483,13 +2493,18 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 bool include_base, int64_t offset,
 int64_t bytes, int64_t *pnum)
 {
+int depth;
 int ret = bdrv_common_block_status_above(top, base, include_base, false,
-   

[PULL 06/12] nbd: Update qapi to support exporting multiple bitmaps

2020-10-27 Thread Eric Blake
Since 'block-export-add' is new to 5.2, we can still tweak the
interface; there, allowing 'bitmaps':['str'] is nicer than
'bitmap':'str'.  This wires up the qapi and qemu-nbd changes to permit
passing multiple bitmaps as distinct metadata contexts that the NBD
client may request, but the actual support for more than one will
require a further patch to the server.

Note that there are no changes made to the existing deprecated
'nbd-server-add' command; this required splitting the QAPI type
BlockExportOptionsNbd, which fortunately does not affect QMP
introspection.

Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-5-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Peter Krempa 
---
 docs/system/deprecated.rst |  3 ++-
 qapi/block-export.json | 41 +++---
 blockdev-nbd.c |  6 +-
 nbd/server.c   | 19 --
 qemu-nbd.c | 18 -
 5 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 0ebce37a1919..32a0e620dbb9 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -257,7 +257,8 @@ the 'wait' field, which is only applicable to sockets in 
server mode
 

 Use the more generic commands ``block-export-add`` and ``block-export-del``
-instead.
+instead.  As part of this deprecation, where ``nbd-server-add`` used a
+single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``.

 Human Monitor Protocol (HMP) commands
 -
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 480c497690b0..c4125f4d2104 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -63,10 +63,10 @@
 '*max-connections': 'uint32' } }

 ##
-# @BlockExportOptionsNbd:
+# @BlockExportOptionsNbdBase:
 #
-# An NBD block export (options shared between nbd-server-add and the NBD branch
-# of block-export-add).
+# An NBD block export (common options shared between nbd-server-add and
+# the NBD branch of block-export-add).
 #
 # @name: Export name. If unspecified, the @device parameter is used as the
 #export name. (Since 2.12)
@@ -74,15 +74,27 @@
 # @description: Free-form description of the export, up to 4096 bytes.
 #   (Since 5.0)
 #
-# @bitmap: Also export the dirty bitmap reachable from @device, so the
-#  NBD client can use NBD_OPT_SET_META_CONTEXT with
-#  "qemu:dirty-bitmap:NAME" to inspect the bitmap. (since 4.0)
-#
 # Since: 5.0
 ##
+{ 'struct': 'BlockExportOptionsNbdBase',
+  'data': { '*name': 'str', '*description': 'str' } }
+
+##
+# @BlockExportOptionsNbd:
+#
+# An NBD block export (distinct options used in the NBD branch of
+# block-export-add).
+#
+# @bitmaps: Also export each of the named dirty bitmaps reachable from
+#   @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with
+#   the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect
+#   each bitmap.
+#
+# Since: 5.2
+##
 { 'struct': 'BlockExportOptionsNbd',
-  'data': { '*name': 'str', '*description': 'str',
-'*bitmap': 'str' } }
+  'base': 'BlockExportOptionsNbdBase',
+  'data': { '*bitmaps': ['str'] } }

 ##
 # @BlockExportOptionsVhostUserBlk:
@@ -106,19 +118,24 @@
 ##
 # @NbdServerAddOptions:
 #
-# An NBD block export.
+# An NBD block export, per legacy nbd-server-add command.
 #
 # @device: The device name or node name of the node to be exported
 #
 # @writable: Whether clients should be able to write to the device via the
 #NBD connection (default false).
 #
+# @bitmap: Also export a single dirty bitmap reachable from @device, so the
+#  NBD client can use NBD_OPT_SET_META_CONTEXT with the metadata
+#  context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap
+#  (since 4.0).
+#
 # Since: 5.0
 ##
 { 'struct': 'NbdServerAddOptions',
-  'base': 'BlockExportOptionsNbd',
+  'base': 'BlockExportOptionsNbdBase',
   'data': { 'device': 'str',
-'*writable': 'bool' } }
+'*writable': 'bool', '*bitmap': 'str' } }

 ##
 # @nbd-server-add:
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index cee9134b12eb..d8443d235b73 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -209,8 +209,12 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error 
**errp)
 .has_writable   = arg->has_writable,
 .writable   = arg->writable,
 };
-QAPI_CLONE_MEMBERS(BlockExportOptionsNbd, _opts->u.nbd,
+QAPI_CLONE_MEMBERS(BlockExportOptionsNbdBase, _opts->u.nbd,
qapi_NbdServerAddOptions_base(arg));
+if (arg->has_bitmap) {
+export_opts->u.nbd.has_bitmaps = true;
+QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, g_strdup(arg->bitmap));
+}

 /*
  * nbd-server-add doesn't complain when a read-only 

[PULL 07/12] nbd: Simplify qemu bitmap context name

2020-10-27 Thread Eric Blake
Each dirty bitmap already knows its name; by reducing the scope of the
places where we construct "qemu:dirty-bitmap:NAME" strings, tracking
the name is more localized, and there are fewer per-export fields to
worry about.  This in turn will make it easier for an upcoming patch
to export more than one bitmap at once.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201027050556.269064-6-ebl...@redhat.com>
---
 nbd/server.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 8d01662b4511..77fdecdf9dec 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -95,7 +95,6 @@ struct NBDExport {
 Notifier eject_notifier;

 BdrvDirtyBitmap *export_bitmap;
-char *export_bitmap_context;
 };

 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -871,14 +870,15 @@ static bool nbd_meta_qemu_query(NBDClient *client, 
NBDExportMetaContexts *meta,
 }

 if (nbd_strshift(, "dirty-bitmap:")) {
+const char *bm_name;
+
 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
 if (!meta->exp->export_bitmap) {
 trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
 return true;
 }
-if (nbd_meta_empty_or_pattern(client,
-  meta->exp->export_bitmap_context +
-  strlen("qemu:dirty-bitmap:"), query)) {
+bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
+if (nbd_meta_empty_or_pattern(client, bm_name, query)) {
 meta->bitmap = true;
 }
 return true;
@@ -1004,8 +1004,11 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 }

 if (meta->bitmap) {
-ret = nbd_negotiate_send_meta_context(client,
-  meta->exp->export_bitmap_context,
+const char *bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
+g_autofree char *context = g_strdup_printf("qemu:dirty-bitmap:%s",
+   bm_name);
+
+ret = nbd_negotiate_send_meta_context(client, context,
   NBD_META_ID_DIRTY_BITMAP,
   errp);
 if (ret < 0) {
@@ -1576,9 +1579,6 @@ static int nbd_export_create(BlockExport *blk_exp, 
BlockExportOptions *exp_args,
 bdrv_dirty_bitmap_set_busy(bm, true);
 exp->export_bitmap = bm;
 assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE);
-exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
- bitmap);
-assert(strlen(exp->export_bitmap_context) < NBD_MAX_STRING_SIZE);
 }

 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
@@ -1656,7 +1656,6 @@ static void nbd_export_delete(BlockExport *blk_exp)

 if (exp->export_bitmap) {
 bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
-g_free(exp->export_bitmap_context);
 }
 }

-- 
2.29.0




Re: [PATCH v3 00/15] python: create installable package

2020-10-27 Thread John Snow

Ping O:-)

Looking for feedback from at least Cleber and Eduardo before I barge 
ahead and send a PR to include this on master. Additional packaging and 
versioning feedback from Dan would be nice.


(I know we have a very busy two weeks here; I will continue pinging, but 
I have every intention of merging this prior to 5.2.)


--js

On 10/20/20 3:35 PM, John Snow wrote:

Based-on: https://gitlab.com/jsnow/qemu/-/tree/python

This series factors the python/qemu directory as an installable
package. It does not yet actually change the mechanics of how any other
python source in the tree actually consumes it (yet), beyond the import
path.
 > The point of this series is primarily to formalize our dependencies on
mypy, flake8, isort, and pylint alongside versions that are known to
work. It also adds explicitly pinned versions of these dependencies that
should behave in a repeatable and known way for developers and CI
environments both.

With the python tooling as a proper package, you can install this
package in editable or production mode to a virtual environment, your
local user environment, or your system packages. The primary benefit of
this is to gain access to QMP tooling regardless of CWD, without needing
to battle sys.path.

For example: when developing, you may go to qemu/python/ and invoke
`pipenv shell` to activate a virtual environment that contains the qemu
packages.  This package will always reflect the current version of the
source files in the tree. When you are finished, you can simply exit the
shell to remove these packages from your python environment.

When not developing, you could install a version of this package to your
environment outright to gain access to the QMP and QEMUMachine classes
for lightweight scripting and testing by using pip: "pip install [--user] ."

Finally, this package is formatted in such a way that it COULD be
uploaded to https://pypi.org/project/qemu and installed independently of
qemu.git with `pip install qemu`, but that button remains unpushed.

TESTING THIS SERIES:

CD to qemu/python first, and then:

1. Try "pipenv shell" to get a venv with the package installed to it in
editable mode. Ctrl+d exits this venv shell. While in this shell, any
python script that uses "from qemu.[qmp|machine] import ..." should work
correctly regardless of your CWD.

2. Try "pipenv sync --dev" to create/update the venv with the
development packages without actually entering the venv. This should
install isort, mypy, flake8 and pylint to the venv.

3. After the above sync, try "pipenv shell" again, and from the python
project root, try any of the following:

   - pylint qemu
   - flake8 qemu
   - isort -c qemu
   - mypy qemu

4. Leave any venv you are in, and from the project root, try the
following commands:

   - pipenv run pylint qemu
   - pipenv run flake8 qemu
   - pipenv run isort -c qemu
   - pipenv run mypy qemu

V3:
  - Changed "qemu.core" to "qemu.qmp" and "qemu.machine",
Partly to accommodate forthcoming work which would benefit from a separate
qemu.qmp namespace.
  - Changed the initial version from 5.2.0a1 to 0.5.2.0a1, to allow for
more rapid development while we smooth out the initial kinks.
  - 001: Renamed patch title; differences implement the new names.
  - 002: Readme changes for above.
  - 003: Version change.
  - 004: New readme for the new qmp directory.
  - 006: A few more import exceptions for pylint, hopefully temporary.
  - 009: Updated flake8 config comment to match qapi's
  - 012: Added namespace_package configuration value

001/15:[down] 'python: create qemu packages'
002/15:[0009] [FC] 'python: add qemu package installer'
003/15:[0002] [FC] 'python: add VERSION file'
004/15:[0015] [FC] 'python: add directory structure README.rst files'
005/15:[] [--] 'python: Add pipenv support'
006/15:[down] 'python: add pylint import exceptions'
007/15:[] [--] 'python: move pylintrc into setup.cfg'
008/15:[] [--] 'python: add pylint to pipenv'
009/15:[0002] [FC] 'python: move flake8 config to setup.cfg'
010/15:[] [--] 'python: Add flake8 to pipenv'
011/15:[] [-C] 'python: move mypy.ini into setup.cfg'
012/15:[0001] [FC] 'python: add mypy to pipenv'
013/15:[] [--] 'python: move .isort.cfg into setup.cfg'
014/15:[] [--] 'python/qemu: add isort to pipenv'
015/15:[] [--] 'python/qemu: add qemu package itself to pipenv'

John Snow (15):
   python: create qemu packages
   python: add qemu package installer
   python: add VERSION file
   python: add directory structure README.rst files
   python: Add pipenv support
   python: add pylint import exceptions
   python: move pylintrc into setup.cfg
   python: add pylint to pipenv
   python: move flake8 config to setup.cfg
   python: Add flake8 to pipenv
   python: move mypy.ini into setup.cfg
   python: add mypy to pipenv
   python: move .isort.cfg into setup.cfg
   python/qemu: add isort to pipenv
   python/qemu: add qemu package itself to pipenv

  python/PACKAGE.rst 

[PULL 02/12] iotests/291: Stop NBD server

2020-10-27 Thread Eric Blake
From: Max Reitz 

nbd_server_start_unix_socket() includes an implicit nbd_server_stop(),
but we still need an explicit one at the end of the test (where there
follows no next nbd_server_start_unix_socket()), or qemu-nbd will linger
until the test exits.

This will become important when enabling this test to run on FUSE
exports, because then the export (which is the image used by qemu-nbd)
will go away before qemu-nbd exits, which will lead to qemu-nbd
complaining that it cannot flush the bitmaps in the image.

Signed-off-by: Max Reitz 
Message-Id: <20201027164416.144115-3-mre...@redhat.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/291 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 77fa38f93d31..b7320bc7adf2 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -128,6 +128,8 @@ nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
 "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map

+nbd_server_stop
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
-- 
2.29.0




[PULL 08/12] nbd: Refactor counting of metadata contexts

2020-10-27 Thread Eric Blake
Rather than open-code the count of negotiated contexts at several
sites, embed it directly into the struct.  This will make it easier
for upcoming commits to support even more simultaneous contexts.

Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-7-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 77fdecdf9dec..42d494bc9616 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -104,8 +104,7 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);
  * NBD_OPT_LIST_META_CONTEXT. */
 typedef struct NBDExportMetaContexts {
 NBDExport *exp;
-bool valid; /* means that negotiation of the option finished without
-   errors */
+size_t count; /* number of negotiated contexts */
 bool base_allocation; /* export base:allocation context (block status) */
 bool bitmap; /* export qemu:dirty-bitmap: */
 } NBDExportMetaContexts;
@@ -445,7 +444,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, 
Error **errp)

 static void nbd_check_meta_export(NBDClient *client)
 {
-client->export_meta.valid &= client->exp == client->export_meta.exp;
+if (client->exp != client->export_meta.exp) {
+client->export_meta.count = 0;
+}
 }

 /* Send a reply to NBD_OPT_EXPORT_NAME.
@@ -945,6 +946,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 NBDExportMetaContexts local_meta;
 uint32_t nb_queries;
 int i;
+size_t count = 0;

 if (!client->structured_reply) {
 return nbd_opt_invalid(client, errp,
@@ -1001,6 +1003,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 if (ret < 0) {
 return ret;
 }
+count++;
 }

 if (meta->bitmap) {
@@ -1014,11 +1017,12 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 if (ret < 0) {
 return ret;
 }
+count++;
 }

 ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
 if (ret == 0) {
-meta->valid = true;
+meta->count = count;
 }

 return ret;
@@ -2337,18 +2341,16 @@ static coroutine_fn int nbd_handle_request(NBDClient 
*client,
 return nbd_send_generic_reply(client, request->handle, -EINVAL,
   "need non-zero length", errp);
 }
-if (client->export_meta.valid &&
-(client->export_meta.base_allocation ||
- client->export_meta.bitmap))
-{
+if (client->export_meta.count) {
 bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
+int contexts_remaining = client->export_meta.count;

 if (client->export_meta.base_allocation) {
 ret = nbd_co_send_block_status(client, request->handle,
blk_bs(exp->common.blk),
request->from,
request->len, dont_fragment,
-   !client->export_meta.bitmap,
+   !--contexts_remaining,
NBD_META_ID_BASE_ALLOCATION,
errp);
 if (ret < 0) {
@@ -2360,13 +2362,15 @@ static coroutine_fn int nbd_handle_request(NBDClient 
*client,
 ret = nbd_co_send_bitmap(client, request->handle,
  client->exp->export_bitmap,
  request->from, request->len,
- dont_fragment,
- true, NBD_META_ID_DIRTY_BITMAP, errp);
+ dont_fragment, !--contexts_remaining,
+ NBD_META_ID_DIRTY_BITMAP, errp);
 if (ret < 0) {
 return ret;
 }
 }

+assert(!contexts_remaining);
+
 return 0;
 } else {
 return nbd_send_generic_reply(client, request->handle, -EINVAL,
-- 
2.29.0




[PULL 11/12] nbd: Add new qemu:allocation-depth metadata context

2020-10-27 Thread Eric Blake
'qemu-img map' provides a way to determine which extents of an image
come from the top layer vs. inherited from a backing chain.  This is
useful information worth exposing over NBD.  There is a proposal to
add a QMP command block-dirty-bitmap-populate which can create a dirty
bitmap that reflects allocation information, at which point the
qemu:dirty-bitmap:NAME metadata context can expose that information
via the creation of a temporary bitmap, but we can shorten the effort
by adding a new qemu:allocation-depth metadata context that does the
same thing without an intermediate bitmap (this patch does not
eliminate the need for that proposal, as it will have other uses as
well).

While documenting things, remember that although the NBD protocol has
NBD_OPT_SET_META_CONTEXT, the rest of its documentation refers to
'metadata context', which is a more apt description of what is
actually being used by NBD_CMD_BLOCK_STATUS: the user is requesting
metadata by passing one or more context names.  So I also touched up
some existing wording to prefer the term 'metadata context' where it
makes sense.

Note that this patch does not actually enable any way to request a
server to enable this context; that will come in the next patch.

Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-10-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 docs/interop/nbd.txt | 23 +++
 include/block/nbd.h  |  8 +++--
 nbd/server.c | 70 +---
 3 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index f3b3cacc9621..10ce098a29bf 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -17,19 +17,31 @@ namespace "qemu".

 == "qemu" namespace ==

-The "qemu" namespace currently contains only one type of context,
-related to exposing the contents of a dirty bitmap alongside the
-associated disk contents.  That context has the following form:
+The "qemu" namespace currently contains two available metadata context
+types.  The first is related to exposing the contents of a dirty
+bitmap alongside the associated disk contents.  That metadata context
+is named with the following form:

 qemu:dirty-bitmap:

 Each dirty-bitmap metadata context defines only one flag for extents
 in reply for NBD_CMD_BLOCK_STATUS:

-bit 0: NBD_STATE_DIRTY, means that the extent is "dirty"
+bit 0: NBD_STATE_DIRTY, set when the extent is "dirty"
+
+The second is related to exposing the source of various extents within
+the image, with a single metadata context named:
+
+qemu:allocation-depth
+
+In the allocation depth context, the entire 32-bit value represents a
+depth of which layer in a thin-provisioned backing chain provided the
+data (0 for unallocated, 1 for the active layer, 2 for the first
+backing layer, and so forth).

 For NBD_OPT_LIST_META_CONTEXT the following queries are supported
-in addition to "qemu:dirty-bitmap:":
+in addition to the specific "qemu:allocation-depth" and
+"qemu:dirty-bitmap:":

 * "qemu:" - returns list of all available metadata contexts in the
 namespace.
@@ -55,3 +67,4 @@ the operation of that feature.
 NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
 * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports,
 NBD_CMD_FLAG_FAST_ZERO
+* 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth"
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 3dd9a04546ec..4a52a43ef598 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2019 Red Hat, Inc.
+ *  Copyright (C) 2016-2020 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori 
  *
  *  Network Block Device
@@ -47,7 +47,7 @@ typedef struct NBDOptionReply NBDOptionReply;
 typedef struct NBDOptionReplyMetaContext {
 NBDOptionReply h; /* h.type = NBD_REP_META_CONTEXT, h.length > 4 */
 uint32_t context_id;
-/* meta context name follows */
+/* metadata context name follows */
 } QEMU_PACKED NBDOptionReplyMetaContext;

 /* Transmission phase structs
@@ -229,7 +229,7 @@ enum {
 #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)

 /*
- * Maximum size of a protocol string (export name, meta context name,
+ * Maximum size of a protocol string (export name, metadata context name,
  * etc.).  Use malloc rather than stack allocation for storage of a
  * string.
  */
@@ -259,6 +259,8 @@ enum {
 /* Extent flags for qemu:dirty-bitmap in NBD_REPLY_TYPE_BLOCK_STATUS */
 #define NBD_STATE_DIRTY (1 << 0)

+/* No flags needed for qemu:allocation-depth in NBD_REPLY_TYPE_BLOCK_STATUS */
+
 static inline bool nbd_reply_type_is_error(int type)
 {
 return type & (1 << 15);
diff --git a/nbd/server.c b/nbd/server.c
index b6841e455414..ebbefcb6d3ec 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -27,8 +27,9 @@
 #include "qemu/units.h"

 #define NBD_META_ID_BASE_ALLOCATION 0
+#define NBD_META_ID_ALLOCATION_DEPTH 1
 /* Dirty bitmaps use 

[PULL 05/12] nbd: Utilize QAPI_CLONE for type conversion

2020-10-27 Thread Eric Blake
Rather than open-coding the translation from the deprecated
NbdServerAddOptions type to the preferred BlockExportOptionsNbd, it's
better to utilize QAPI_CLONE_MEMBERS.  This solves a couple of issues:
first, if we do any more refactoring of the base type (which an
upcoming patch plans to do), we don't have to revisit the open-coding.
Second, our assignment to arg->name is fishy: the generated QAPI code
for qapi_free_NbdServerAddOptions does not visit arg->name if
arg->has_name is false, but if it DID visit it, we would have
introduced a double-free situation when arg is finally freed.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Message-Id: <20201027050556.269064-4-ebl...@redhat.com>
---
 blockdev-nbd.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 8174023e5c47..cee9134b12eb 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -14,6 +14,8 @@
 #include "sysemu/block-backend.h"
 #include "hw/block/block.h"
 #include "qapi/error.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-block-export.h"
 #include "qapi/qapi-commands-block-export.h"
 #include "block/nbd.h"
 #include "io/channel-socket.h"
@@ -195,7 +197,8 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error 
**errp)
  * the device name as a default here for compatibility.
  */
 if (!arg->has_name) {
-arg->name = arg->device;
+arg->has_name = true;
+arg->name = g_strdup(arg->device);
 }

 export_opts = g_new(BlockExportOptions, 1);
@@ -205,15 +208,9 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error 
**errp)
 .node_name  = g_strdup(bdrv_get_node_name(bs)),
 .has_writable   = arg->has_writable,
 .writable   = arg->writable,
-.u.nbd = {
-.has_name   = true,
-.name   = g_strdup(arg->name),
-.has_description= arg->has_description,
-.description= g_strdup(arg->description),
-.has_bitmap = arg->has_bitmap,
-.bitmap = g_strdup(arg->bitmap),
-},
 };
+QAPI_CLONE_MEMBERS(BlockExportOptionsNbd, _opts->u.nbd,
+   qapi_NbdServerAddOptions_base(arg));

 /*
  * nbd-server-add doesn't complain when a read-only device should be
-- 
2.29.0




[PULL 03/12] block: Simplify QAPI_LIST_ADD

2020-10-27 Thread Eric Blake
There is no need to rely on the verbosity of the gcc/clang compiler
extension of g_new(typeof(X), 1) when we can instead use the standard
g_malloc(sizeof(X)).  In general, we like g_new over g_malloc for
returning type X rather than void* to let the compiler catch more
potential typing mistakes, but in this particular macro, our other use
of typeof on the same line already ensures we are getting correct
results.

Suggested-by: Markus Armbruster 
Signed-off-by: Eric Blake 
Message-Id: <20201027050556.269064-2-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 430edf79bb10..857387f3912f 100644
--- a/block.c
+++ b/block.c
@@ -5231,7 +5231,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
 }

 #define QAPI_LIST_ADD(list, element) do { \
-typeof(list) _tmp = g_new(typeof(*(list)), 1); \
+typeof(list) _tmp = g_malloc(sizeof(*(list))); \
 _tmp->value = (element); \
 _tmp->next = (list); \
 (list) = _tmp; \
-- 
2.29.0




[PULL 09/12] nbd: Allow export of multiple bitmaps for one device

2020-10-27 Thread Eric Blake
With this, 'qemu-nbd -B b0 -B b1 -f qcow2 img.qcow2' can let you sniff
out multiple bitmaps from one server.  qemu-img as client can still
only read one bitmap per client connection, but other NBD clients
(hello libnbd) can now read multiple bitmaps in a single pass.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201027050556.269064-8-ebl...@redhat.com>
---
 nbd/server.c   | 100 -
 tests/qemu-iotests/291 |   6 +--
 2 files changed, 72 insertions(+), 34 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 42d494bc9616..b6841e455414 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -27,6 +27,7 @@
 #include "qemu/units.h"

 #define NBD_META_ID_BASE_ALLOCATION 0
+/* Dirty bitmaps use 'NBD_META_ID_DIRTY_BITMAP + i', so keep this id last. */
 #define NBD_META_ID_DIRTY_BITMAP 1

 /*
@@ -94,7 +95,8 @@ struct NBDExport {
 BlockBackend *eject_notifier_blk;
 Notifier eject_notifier;

-BdrvDirtyBitmap *export_bitmap;
+BdrvDirtyBitmap **export_bitmaps;
+size_t nr_export_bitmaps;
 };

 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -106,7 +108,10 @@ typedef struct NBDExportMetaContexts {
 NBDExport *exp;
 size_t count; /* number of negotiated contexts */
 bool base_allocation; /* export base:allocation context (block status) */
-bool bitmap; /* export qemu:dirty-bitmap: */
+bool *bitmaps; /*
+* export qemu:dirty-bitmap:,
+* sized by exp->nr_export_bitmaps
+*/
 } NBDExportMetaContexts;

 struct NBDClient {
@@ -857,6 +862,8 @@ static bool nbd_meta_base_query(NBDClient *client, 
NBDExportMetaContexts *meta,
 static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
 const char *query)
 {
+size_t i;
+
 if (!nbd_strshift(, "qemu:")) {
 return false;
 }
@@ -864,24 +871,33 @@ static bool nbd_meta_qemu_query(NBDClient *client, 
NBDExportMetaContexts *meta,

 if (!*query) {
 if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
-meta->bitmap = !!meta->exp->export_bitmap;
+memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
 }
 trace_nbd_negotiate_meta_query_parse("empty");
 return true;
 }

 if (nbd_strshift(, "dirty-bitmap:")) {
-const char *bm_name;
-
 trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
-if (!meta->exp->export_bitmap) {
-trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported");
+if (!*query) {
+if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+}
+trace_nbd_negotiate_meta_query_parse("empty");
 return true;
 }
-bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmap);
-if (nbd_meta_empty_or_pattern(client, bm_name, query)) {
-meta->bitmap = true;
+
+for (i = 0; i < meta->exp->nr_export_bitmaps; i++) {
+const char *bm_name;
+
+bm_name = bdrv_dirty_bitmap_name(meta->exp->export_bitmaps[i]);
+if (strcmp(bm_name, query) == 0) {
+meta->bitmaps[i] = true;
+trace_nbd_negotiate_meta_query_parse(query);
+return true;
+}
 }
+trace_nbd_negotiate_meta_query_skip("no dirty-bitmap match");
 return true;
 }

@@ -943,9 +959,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 {
 int ret;
 g_autofree char *export_name = NULL;
-NBDExportMetaContexts local_meta;
+g_autofree bool *bitmaps = NULL;
+NBDExportMetaContexts local_meta = {0};
 uint32_t nb_queries;
-int i;
+size_t i;
 size_t count = 0;

 if (!client->structured_reply) {
@@ -960,6 +977,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 meta = _meta;
 }

+g_free(meta->bitmaps);
 memset(meta, 0, sizeof(*meta));

 ret = nbd_opt_read_name(client, _name, NULL, errp);
@@ -974,6 +992,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
 "export '%s' not present", sane_name);
 }
+meta->bitmaps = g_new0(bool, meta->exp->nr_export_bitmaps);
+if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+bitmaps = meta->bitmaps;
+}

 ret = nbd_opt_read(client, _queries, sizeof(nb_queries), false, errp);
 if (ret <= 0) {
@@ -986,7 +1008,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
 /* enable all known contexts */
 meta->base_allocation = true;
-meta->bitmap = !!meta->exp->export_bitmap;
+memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
 } else {
 for 

[PULL 04/12] qapi: Add QAPI_LIST_PREPEND() macro

2020-10-27 Thread Eric Blake
block.c has a useful macro QAPI_LIST_ADD() for inserting at the front
of any QAPI-generated list; move it from block.c to qapi/util.h so
more places can use it, including one earlier place in block.c, and
rename it to something more obvious (since we also have a lot of
places that append, rather than prepend, to a list).

There are many more places in the codebase that can benefit from using
the macro, but converting them will be left to later patches.

In theory, all QAPI list types are child classes of GenericList; but
in practice, that relationship is not explicitly spelled out in the C
type declarations (rather, it is something that happens implicitly due
to C compatible layouts), and the macro does not actually depend on
the GenericList type.  We considered moving GenericList from visitor.h
into util.h to group related code; however, such a move would be
awkward if we do not also move GenericAlternate.  Unfortunately,
moving GenericAlternate would introduce its own problems of
declaration circularity (qapi-builtin-types.h needs a complete
definition of QEnumLookup from util.h, but GenericAlternate needs a
complete definition of QType from qapi-builtin-types.h).

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Message-Id: <20201027050556.269064-3-ebl...@redhat.com>
[eblake: s/ADD/PREPEND/ per suggestion by Markus]
---
 include/qapi/util.h | 13 +
 block.c | 22 ++
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/include/qapi/util.h b/include/qapi/util.h
index a7c3c6414874..bc312e90aa0c 100644
--- a/include/qapi/util.h
+++ b/include/qapi/util.h
@@ -22,4 +22,17 @@ int qapi_enum_parse(const QEnumLookup *lookup, const char 
*buf,

 int parse_qapi_name(const char *name, bool complete);

+/*
+ * For any GenericList @list, insert @element at the front.
+ *
+ * Note that this macro evaluates @element exactly once, so it is safe
+ * to have side-effects with that argument.
+ */
+#define QAPI_LIST_PREPEND(list, element) do { \
+typeof(list) _tmp = g_malloc(sizeof(*(list))); \
+_tmp->value = (element); \
+_tmp->next = (list); \
+(list) = _tmp; \
+} while (0)
+
 #endif
diff --git a/block.c b/block.c
index 857387f3912f..28be6f336ff3 100644
--- a/block.c
+++ b/block.c
@@ -5211,7 +5211,7 @@ BlockDriverState *bdrv_find_node(const char *node_name)
 BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
Error **errp)
 {
-BlockDeviceInfoList *list, *entry;
+BlockDeviceInfoList *list;
 BlockDriverState *bs;

 list = NULL;
@@ -5221,22 +5221,12 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
 qapi_free_BlockDeviceInfoList(list);
 return NULL;
 }
-entry = g_malloc0(sizeof(*entry));
-entry->value = info;
-entry->next = list;
-list = entry;
+QAPI_LIST_PREPEND(list, info);
 }

 return list;
 }

-#define QAPI_LIST_ADD(list, element) do { \
-typeof(list) _tmp = g_malloc(sizeof(*(list))); \
-_tmp->value = (element); \
-_tmp->next = (list); \
-(list) = _tmp; \
-} while (0)
-
 typedef struct XDbgBlockGraphConstructor {
 XDbgBlockGraph *graph;
 GHashTable *graph_nodes;
@@ -5291,7 +5281,7 @@ static void xdbg_graph_add_node(XDbgBlockGraphConstructor 
*gr, void *node,
 n->type = type;
 n->name = g_strdup(name);

-QAPI_LIST_ADD(gr->graph->nodes, n);
+QAPI_LIST_PREPEND(gr->graph->nodes, n);
 }

 static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
@@ -5310,14 +5300,14 @@ static void 
xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);

 if (flag & child->perm) {
-QAPI_LIST_ADD(edge->perm, qapi_perm);
+QAPI_LIST_PREPEND(edge->perm, qapi_perm);
 }
 if (flag & child->shared_perm) {
-QAPI_LIST_ADD(edge->shared_perm, qapi_perm);
+QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
 }
 }

-QAPI_LIST_ADD(gr->graph->edges, edge);
+QAPI_LIST_PREPEND(gr->graph->edges, edge);
 }


-- 
2.29.0




[PULL 01/12] iotests/291: Filter irrelevant parts of img-info

2020-10-27 Thread Eric Blake
From: Max Reitz 

We need to let _img_info emit the format-specific information so we get
the list of bitmaps we want, but we do not need anything but the
bitmaps.  So filter out everything that is irrelevant to us.  (Ideally,
this would be a generalized function in common.filters that takes a list
of things to keep, but that would require implementing an anti-bitmap
filter, which would be hard, and which we do not need here.  So that is
why this function is just a local hack.)

This lets 291 pass with qcow2 options like refcount_bits or data_file
again.

Fixes: 14f16bf9474c860ecc127a66a86961942319f7af
   ("qemu-img: Support bitmap --merge into backing image")
Signed-off-by: Max Reitz 
Message-Id: <20201027164416.144115-2-mre...@redhat.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/291 | 14 +++---
 tests/qemu-iotests/291.out | 20 
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 4f837b205655..77fa38f93d31 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -42,6 +42,14 @@ _require_command QEMU_NBD
 # compat=0.10 does not support bitmaps
 _unsupported_imgopts 'compat=0.10'

+# Filter irrelevant format-specific information from the qemu-img info
+# output (we only want the bitmaps, basically)
+_filter_irrelevant_img_info()
+{
+grep -v -e 'compat' -e 'compression type' -e 'data file' -e 'extended l2' \
+-e 'lazy refcounts' -e 'refcount bits'
+}
+
 echo
 echo "=== Initial image setup ==="
 echo
@@ -79,7 +87,7 @@ echo

 # Only bitmaps from the active layer are copied
 $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG"
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info
 # But we can also merge in bitmaps from other layers.  This test is a bit
 # contrived to cover more code paths, in reality, you could merge directly
 # into b0 without going through tmp
@@ -89,7 +97,7 @@ $QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F 
$IMGFMT \
 $QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0
 $QEMU_IMG bitmap --remove --image-opts \
 driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info

 echo
 echo "=== Merge from top layer into backing image ==="
@@ -98,7 +106,7 @@ echo
 $QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG"
 $QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \
  -f $IMGFMT "$TEST_IMG.base" b3
-_img_info --format-specific --backing-chain
+_img_info --format-specific --backing-chain | _filter_irrelevant_img_info

 echo
 echo "=== Check bitmap contents ==="
diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index 3990f7aacc7b..23411c0ff4d9 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -26,9 +26,6 @@ file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -39,17 +36,12 @@ Format specific information:
 [0]: auto
 name: b2
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -64,9 +56,7 @@ Format specific information:
 flags:
 name: b0
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false

 === Merge from top layer into backing image ===

@@ -77,9 +67,6 @@ cluster_size: 65536
 backing file: TEST_DIR/t.IMGFMT.base
 backing file format: IMGFMT
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -94,18 +81,13 @@ Format specific information:
 flags:
 name: b0
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false

 image: TEST_DIR/t.IMGFMT.base
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -117,9 +99,7 @@ Format specific information:
 [0]: auto
 name: b3
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false

 === Check bitmap contents ===

-- 
2.29.0




Re: [PATCH] hw/sd: Zero out function selection fields before being populated

2020-10-27 Thread Niek Linnenbank
Hello Philippe, Bin,

Thanks for your support on this. I've just tried this patch and
unfortunately it doesn't seem to
resolve the issue, at least on my machine. This is the output that I get
when running the avocado test for NetBSD9.0:

 (5/5)
tests/acceptance/boot_linux_console.py:BootLinuxConsole.test_arm_orangepi_uboot_netbsd9:
|console: U-Boot SPL 2020.01+dfsg-1 (Jan 08 2020 - 08:19:44 +)
console: DRAM: 1024 MiB
console: Failed to set core voltage! Can't set CPU frequency
console: Trying to boot from MMC1
console: U-Boot 2020.01+dfsg-1 (Jan 08 2020 - 08:19:44 +) Allwinner
Technology
console: CPU:   Allwinner H3 (SUN8I )
...
console: [   1.2957642] sdmmc0: SD card status: 4-bit, C0
console: [   1.3094731] ld0 at sdmmc0:
<0xaa:0x5859:QEMU!:0x01:0xdeadbeef:0x062>
console: [   1.3159383] ld0: 1024 MB, 1040 cyl, 32 head, 63 sec, 512
bytes/sect x 2097152 sectors
console: [   1.3763222] ld0: 4-bit width, High-Speed/SDR25, 50.000 MHz
console: [   2.0592109] WARNING: 4 errors while detecting hardware; check
system log.
console: [   2.0693403] boot device: ld0
console: [   2.0798960] root on ld0a dumps on ld0b
console: [   2.0994141] vfs_mountroot: can't open root device
console: [   2.0994141] cannot mount root, error = 6


When starting NetBSD 9.0 manually, it shows clearly that the SD card is
recognized with 1GiB size, also from U-Boot:
$ qemu-system-arm -M orangepi-pc -nographic -nic user -sd ./armv7.img
WARNING: Image format was not specified for './armv7.img' and probing
guessed raw.
 Automatically detecting the format is dangerous for raw images,
write operations on block 0 will be restricted.
 Specify the 'raw' format explicitly to remove the restrictions.

U-Boot SPL 2020.07-00610-g610e1487c8 (Jul 11 2020 - 22:31:58 +0200)
DRAM: 1024 MiB
Failed to set core voltage! Can't set CPU frequency
Trying to boot from MMC1

U-Boot 2020.07-00610-g610e1487c8 (Jul 11 2020 - 22:31:58 +0200) Allwinner
Technology

CPU:   Allwinner H3 (SUN8I )
Model: Xunlong Orange Pi PC
DRAM:  1 GiB
MMC:   mmc@1c0f000: 0
...
Hit any key to stop autoboot:  0
=> mmc info
Device: mmc@1c0f000
Manufacturer ID: aa
OEM: 5859
Name: QEMU!
Bus Speed: 5000
Mode: SD High Speed (50MHz)
Rd Block Len: 512
SD version 2.0
High Capacity: No
Capacity: 1 GiB
Bus Width: 4-bit
Erase Group Size: 512 Bytes
=>
=> boot
8846552 bytes read in 931 ms (9.1 MiB/s)
...
[   1.3447558] sdmmc0: SD card status: 4-bit, C0
[   1.3546801] ld0 at sdmmc0: <0xaa:0x5859:QEMU!:0x01:0xdeadbeef:0x062>
[   1.3647790] ld0: 1024 MB, 1040 cyl, 32 head, 63 sec, 512 bytes/sect x
2097152 sectors
[   1.4150230] ld0: 4-bit width, High-Speed/SDR25, 50.000 MHz
[   2.0800893] WARNING: 4 errors while detecting hardware; check system log.
[   2.0800893] boot device: ld0
[   2.0900792] root on ld0a dumps on ld0b
[   2.1004160] vfs_mountroot: can't open root device
[   2.1004160] cannot mount root, error = 6
[   2.1004160] root device (default ld0a):


Note that the image has been resized to 2GiB with qemu-img:
$ ls -alh armv7.img
-rw-rw-r-- 1 user user 2,0G okt 28 22:45 armv7.img

The previous patch proposed by Bin did resolve the error ("hw/sd: Fix 2GiB
card CSD register values" ):
 https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg07318.html

Although I see that this patch is now in master
(89c6700fe7eed9195f10055751edbc6d5e7ab940),
can you please confirm that the issue is still present when testing this on
your machine as well?

With kind regards,
Niek


On Mon, Oct 26, 2020 at 9:10 AM Philippe Mathieu-Daudé 
wrote:

> On 10/24/20 3:49 AM, Bin Meng wrote:
> > From: Bin Meng 
> >
> > The function selection fields (399:376) should be zeroed out to
> > prevent leftover from being or'ed into the switch function status
> > data structure.
> >
> > This fixes the boot failure as seen in the acceptance testing on
> > the orangepi target.
> >
> > Fixes: b638627c723a ("hw/sd: Fix incorrect populated function switch
> status data structure")
> > Reported-by: Michael Roth 
> > Signed-off-by: Bin Meng 
> > ---
> >
> >   hw/sd/sd.c | 1 +
> >   1 file changed, 1 insertion(+)
> >
> > diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> > index c3febed243..bd10ec8fc4 100644
> > --- a/hw/sd/sd.c
> > +++ b/hw/sd/sd.c
> > @@ -824,6 +824,7 @@ static void sd_function_switch(SDState *sd, uint32_t
> arg)
> >   sd->data[12] = 0x80;/* Supported group 1 functions */
> >   sd->data[13] = 0x03;
> >
> > +memset(>data[14], 0, 3);
> >   for (i = 0; i < 6; i ++) {
> >   new_func = (arg >> (i * 4)) & 0x0f;
> >   if (mode && new_func != 0x0f)
> >
>
> Thanks, series applied to sd-next tree.
>
>

-- 
Niek Linnenbank


Re: [Libguestfs] [libnbd PATCH] info: Add support for new 'qemu-nbd -A' qemu:allocation-depth

2020-10-27 Thread Richard W.M. Jones
On Tue, Oct 27, 2020 at 10:33:48AM -0500, Eric Blake wrote:
> On 10/16/20 10:23 AM, Eric Blake wrote:
> > A rather trivial decoding; we may enhance it further if qemu extends
> > things to give an integer depth alongside its tri-state encoding.
> > ---
> > 
> > I'll wait to push this to libnbd until the counterpart qemu patches
> > land upstream, although it looks like I've got positive review.
> 
> Whoops, I accidentally pushed this before qemu stuff landed upstream,
> and in the meantime, we changed our minds on what to expose over
> qemu:allocation-depth to be a bare integer rather than a tri-state.
> I'll push this followup (but this time, wait for the actual qemu patch
> to land).  In fact, I should probably add test-suite coverage...

ACK.  I have a patch which touches this file but it's a simple merge
to combine the two changes.

Rich.

> >From eba8734654e6fd340e18b3e07c3213ed1a0ab9e8 Mon Sep 17 00:00:00 2001
> From: Eric Blake 
> Date: Tue, 27 Oct 2020 10:27:25 -0500
> Subject: [libnbd PATCH] info: Adjust to actual 'qemu-nbd -A' semantics
> 
> Review on the qemu list has led to an altered definition of what
> 'qemu:allocation-depth' should report: rather than a tri-state value,
> it is an actual depth.  It's time to match what actually got committed
> into qemu, which in turn means a slight refactoring to use a malloc'd
> string for a description.
> 
> Fixes: 71455c021
> ---
>  info/nbdinfo.c | 27 +++
>  1 file changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/info/nbdinfo.c b/info/nbdinfo.c
> index 2b22f51..b152f28 100644
> --- a/info/nbdinfo.c
> +++ b/info/nbdinfo.c
> @@ -767,28 +767,30 @@ get_content (struct nbd_handle *nbd, int64_t size)
>  }
> 
>  /* Callback handling --map. */
> -static const char *
> +static char *
>  extent_description (const char *metacontext, uint32_t type)
>  {
> +  char *ret;
> +
>if (strcmp (metacontext, "base:allocation") == 0) {
>  switch (type) {
> -case 0: return "allocated";
> -case 1: return "hole";
> -case 2: return "zero";
> -case 3: return "hole,zero";
> +case 0: return strdup ("allocated");
> +case 1: return strdup ("hole");
> +case 2: return strdup ("zero");
> +case 3: return strdup ("hole,zero");
>  }
>}
>else if (strncmp (metacontext, "qemu:dirty-bitmap:", 18) == 0) {
>  switch (type) {
> -case 0: return "clean";
> -case 1: return "dirty";
> +case 0: return strdup ("clean");
> +case 1: return strdup ("dirty");
>  }
>}
>else if (strcmp (metacontext, "qemu:allocation-depth") == 0) {
> -switch (type & 3) {
> -case 0: return "unallocated";
> -case 1: return "local";
> -case 2: return "backing";
> +switch (type) {
> +case 0: return strdup ("unallocated");
> +case 1: return strdup ("local");
> +case 2: asprintf (, "backing depth %d", type); return ret;
>  }
>}
> 
> @@ -810,7 +812,7 @@ extent_callback (void *user_data, const char
> *metacontext,
> 
>/* Print the entries received. */
>for (i = 0; i < nr_entries; i += 2) {
> -const char *descr = extent_description (map, entries[i+1]);
> +char *descr = extent_description (map, entries[i+1]);
> 
>  if (!json_output) {
>fprintf (fp, "%10" PRIu64 "  "
> @@ -837,6 +839,7 @@ extent_callback (void *user_data, const char
> *metacontext,
>comma = true;
>  }
> 
> +free (descr);
>  offset += entries[i];
>}
> 
> -- 
> 2.29.0
> 
> 
> 
> -- 
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.   +1-919-301-3226
> Virtualization:  qemu.org | libvirt.org
> 
> ___
> Libguestfs mailing list
> libgues...@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org




[PATCH for-6.0 v3 20/20] iotests/308: Add test for FUSE exports

2020-10-27 Thread Max Reitz
We have good coverage of the normal I/O paths now, but what remains is a
test that tests some more special cases: Exporting an image on itself
(thus turning a formatted image into a raw one), some error cases, and
non-writable and non-growable exports.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/308 | 339 +
 tests/qemu-iotests/308.out |  97 +++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 437 insertions(+)
 create mode 100755 tests/qemu-iotests/308
 create mode 100644 tests/qemu-iotests/308.out

diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
new file mode 100755
index 00..b30f4400f6
--- /dev/null
+++ b/tests/qemu-iotests/308
@@ -0,0 +1,339 @@
+#!/usr/bin/env bash
+#
+# Test FUSE exports (in ways that are not captured by the generic
+# tests)
+#
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+seq=$(basename "$0")
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+
+_cleanup()
+{
+_cleanup_qemu
+_cleanup_test_img
+rmdir "$EXT_MP" 2>/dev/null
+rm -f "$EXT_MP"
+rm -f "$COPIED_IMG"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+# Generic format, but needs a plain filename
+_supported_fmt generic
+if [ "$IMGOPTSSYNTAX" = "true" ]; then
+_unsupported_fmt $IMGFMT
+fi
+# We need the image to have exactly the specified size, and VPC does
+# not allow that by default
+_unsupported_fmt vpc
+
+_supported_proto file # We create the FUSE export manually
+_supported_os Linux # We need /dev/urandom
+
+# $1: Export ID
+# $2: Options (beyond the node-name and ID)
+# $3: Expected return value (defaults to 'return')
+# $4: Node to export (defaults to 'node-format')
+fuse_export_add()
+{
+_send_qemu_cmd $QEMU_HANDLE \
+"{'execute': 'block-export-add',
+  'arguments': {
+  'type': 'fuse',
+  'id': '$1',
+  'node-name': '${4:-node-format}',
+  $2
+  } }" \
+"${3:-return}" \
+| _filter_imgfmt
+}
+
+# $1: Export ID
+fuse_export_del()
+{
+_send_qemu_cmd $QEMU_HANDLE \
+"{'execute': 'block-export-del',
+  'arguments': {
+  'id': '$1'
+  } }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+'' \
+'BLOCK_EXPORT_DELETED'
+}
+
+# Return the length of the protocol file
+# $1: Protocol node export mount point
+# $2: Original file (to compare)
+get_proto_len()
+{
+len1=$(stat -c '%s' "$1")
+len2=$(stat -c '%s' "$2")
+
+if [ "$len1" != "$len2" ]; then
+echo 'ERROR: Length of export and original differ:' >&2
+echo "$len1 != $len2" >&2
+else
+echo '(OK: Lengths of export and original are the same)' >&2
+fi
+
+echo "$len1"
+}
+
+COPIED_IMG="$TEST_IMG.copy"
+EXT_MP="$TEST_IMG.fuse"
+
+echo '=== Set up ==='
+
+# Create image with random data
+_make_test_img 64M
+$QEMU_IO -c 'write -s /dev/urandom 0 64M' "$TEST_IMG" | _filter_qemu_io
+
+_launch_qemu
+_send_qemu_cmd $QEMU_HANDLE \
+"{'execute': 'qmp_capabilities'}" \
+'return'
+
+# Separate blockdev-add calls for format and protocol so we can remove
+# the format layer later on
+_send_qemu_cmd $QEMU_HANDLE \
+"{'execute': 'blockdev-add',
+  'arguments': {
+  'driver': 'file',
+  'node-name': 'node-protocol',
+  'filename': '$TEST_IMG'
+  } }" \
+'return'
+
+_send_qemu_cmd $QEMU_HANDLE \
+"{'execute': 'blockdev-add',
+  'arguments': {
+  'driver': '$IMGFMT',
+  'node-name': 'node-format',
+  'file': 'node-protocol'
+  } }" \
+'return'
+
+echo
+echo '=== Mountpoint not present ==='
+
+rmdir "$EXT_MP" 2>/dev/null
+rm -f "$EXT_MP"
+output=$(fuse_export_add 'export-err' "'mountpoint': '$EXT_MP'" error)
+
+if echo "$output" | grep -q "Invalid parameter 'fuse'"; then
+_notrun 'No FUSE support'
+fi
+
+echo "$output"
+
+echo
+echo '=== Mountpoint is a directory ==='
+
+mkdir "$EXT_MP"
+fuse_export_add 'export-err' "'mountpoint': '$EXT_MP'" error
+rmdir "$EXT_MP"
+
+echo
+echo '=== Mountpoint is a regular file ==='
+
+touch "$EXT_MP"
+fuse_export_add 'export-mp' "'mountpoint': '$EXT_MP'"
+
+# Check that the export presents 

[PATCH for-6.0 v3 19/20] iotests: Enable fuse for many tests

2020-10-27 Thread Max Reitz
Many tests (that do not support generic protocols) can run just fine
with FUSE-exported images, so allow them to.  Note that this is no
attempt at being definitely complete.  There are some tests that might
be modified to run on FUSE, but this patch still skips them.  This patch
only tries to pick the rather low-hanging fruits.

Note that 221 and 250 only pass when .lseek is correctly implemented,
which is only possible with a libfuse that is 3.8 or newer.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/025 | 2 +-
 tests/qemu-iotests/026 | 2 +-
 tests/qemu-iotests/028 | 2 +-
 tests/qemu-iotests/031 | 2 +-
 tests/qemu-iotests/034 | 2 +-
 tests/qemu-iotests/036 | 2 +-
 tests/qemu-iotests/037 | 2 +-
 tests/qemu-iotests/038 | 2 +-
 tests/qemu-iotests/039 | 2 +-
 tests/qemu-iotests/046 | 2 +-
 tests/qemu-iotests/050 | 2 +-
 tests/qemu-iotests/054 | 2 +-
 tests/qemu-iotests/060 | 2 +-
 tests/qemu-iotests/071 | 2 +-
 tests/qemu-iotests/079 | 2 +-
 tests/qemu-iotests/080 | 2 +-
 tests/qemu-iotests/089 | 2 +-
 tests/qemu-iotests/090 | 2 +-
 tests/qemu-iotests/091 | 2 +-
 tests/qemu-iotests/095 | 2 +-
 tests/qemu-iotests/097 | 2 +-
 tests/qemu-iotests/098 | 2 +-
 tests/qemu-iotests/102 | 2 +-
 tests/qemu-iotests/103 | 2 +-
 tests/qemu-iotests/106 | 2 +-
 tests/qemu-iotests/107 | 2 +-
 tests/qemu-iotests/108 | 2 +-
 tests/qemu-iotests/111 | 2 +-
 tests/qemu-iotests/112 | 2 +-
 tests/qemu-iotests/115 | 2 +-
 tests/qemu-iotests/117 | 2 +-
 tests/qemu-iotests/120 | 2 +-
 tests/qemu-iotests/121 | 2 +-
 tests/qemu-iotests/127 | 2 +-
 tests/qemu-iotests/133 | 2 +-
 tests/qemu-iotests/137 | 2 +-
 tests/qemu-iotests/138 | 2 +-
 tests/qemu-iotests/140 | 2 +-
 tests/qemu-iotests/154 | 2 +-
 tests/qemu-iotests/161 | 2 +-
 tests/qemu-iotests/171 | 2 +-
 tests/qemu-iotests/175 | 2 +-
 tests/qemu-iotests/176 | 2 +-
 tests/qemu-iotests/177 | 2 +-
 tests/qemu-iotests/179 | 2 +-
 tests/qemu-iotests/183 | 2 +-
 tests/qemu-iotests/186 | 2 +-
 tests/qemu-iotests/187 | 2 +-
 tests/qemu-iotests/191 | 2 +-
 tests/qemu-iotests/195 | 2 +-
 tests/qemu-iotests/200 | 2 +-
 tests/qemu-iotests/204 | 2 +-
 tests/qemu-iotests/214 | 2 +-
 tests/qemu-iotests/217 | 2 +-
 tests/qemu-iotests/220 | 2 +-
 tests/qemu-iotests/221 | 2 +-
 tests/qemu-iotests/229 | 2 +-
 tests/qemu-iotests/247 | 2 +-
 tests/qemu-iotests/249 | 2 +-
 tests/qemu-iotests/250 | 2 +-
 tests/qemu-iotests/252 | 2 +-
 tests/qemu-iotests/265 | 2 +-
 tests/qemu-iotests/268 | 2 +-
 tests/qemu-iotests/272 | 2 +-
 tests/qemu-iotests/273 | 2 +-
 tests/qemu-iotests/279 | 2 +-
 tests/qemu-iotests/286 | 2 +-
 tests/qemu-iotests/287 | 2 +-
 tests/qemu-iotests/289 | 2 +-
 tests/qemu-iotests/290 | 2 +-
 tests/qemu-iotests/291 | 2 +-
 tests/qemu-iotests/292 | 2 +-
 tests/qemu-iotests/293 | 2 +-
 tests/qemu-iotests/294 | 2 +-
 tests/qemu-iotests/305 | 2 +-
 75 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/tests/qemu-iotests/025 b/tests/qemu-iotests/025
index e05d833452..1569d912f4 100755
--- a/tests/qemu-iotests/025
+++ b/tests/qemu-iotests/025
@@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.pattern
 
 _supported_fmt raw qcow2 qed luks
-_supported_proto file sheepdog rbd nfs
+_supported_proto file sheepdog rbd nfs fuse
 
 echo "=== Creating image"
 echo
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index b9713eb591..9ecc5880b1 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -41,7 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # Currently only qcow2 supports rebasing
 _supported_fmt qcow2
-_supported_proto file
+_supported_proto file fuse
 _default_cache_mode writethrough
 _supported_cache_modes writethrough none
 # The refcount table tests expect a certain minimum width for refcount entries
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
index 864dc4a4e2..57d34aae99 100755
--- a/tests/qemu-iotests/028
+++ b/tests/qemu-iotests/028
@@ -46,7 +46,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 # Any format supporting backing files except vmdk and qcow which do not support
 # smaller backing files.
 _supported_fmt qcow2 qed
-_supported_proto file
+_supported_proto file fuse
 _supported_os Linux
 
 # Choose a size that is not necessarily a cluster size multiple for image
diff --git a/tests/qemu-iotests/031 b/tests/qemu-iotests/031
index 646ecd593f..2bcbc5886e 100755
--- a/tests/qemu-iotests/031
+++ b/tests/qemu-iotests/031
@@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # This tests qcow2-specific low-level functionality
 _supported_fmt qcow2
-_supported_proto file
+_supported_proto file fuse
 # We want to test compat=0.10, which does not support external data
 # files or refcount widths other than 16
 _unsupported_imgopts data_file 'refcount_bits=\([^1]\|.\([^6]\|$\)\)'
diff --git a/tests/qemu-iotests/034 b/tests/qemu-iotests/034
index ac2d687c71..08f7aea6d5 100755
--- a/tests/qemu-iotests/034
+++ b/tests/qemu-iotests/034
@@ -37,7 +37,7 @@ trap "_cleanup; exit 

[PATCH for-6.0 v3 18/20] iotests: Allow testing FUSE exports

2020-10-27 Thread Max Reitz
This pretends FUSE exports are a kind of protocol.  As such, they are
always tested under the format node.  This is probably the best way to
test them, actually, because this will generate more I/O load and more
varied patterns.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/check |   6 ++
 tests/qemu-iotests/common.filter |   5 +-
 tests/qemu-iotests/common.rc | 124 +++
 3 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 3c1fa4435a..952762d5ed 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -270,6 +270,7 @@ image protocol options
 -rbdtest rbd
 -sheepdog   test sheepdog
 -nbdtest nbd
+-fuse   test fuse
 -sshtest ssh
 -nfstest nfs
 
@@ -382,6 +383,11 @@ testlist options
 xpand=false
 ;;
 
+-fuse)
+IMGPROTO=fuse
+xpand=false
+;;
+
 -ssh)
 IMGPROTO=ssh
 xpand=false
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 838ed15793..172ea5752e 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -44,7 +44,8 @@ _filter_qom_path()
 _filter_testdir()
 {
 $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \
- -e "s#$SOCK_DIR/#SOCK_DIR/#g"
+ -e "s#$SOCK_DIR/#SOCK_DIR/#g" \
+ -e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
 }
 
 # replace occurrences of the actual IMGFMT value with IMGFMT
@@ -127,6 +128,7 @@ _filter_img_create_filenames()
 -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
 -e "s#$TEST_DIR#TEST_DIR#g" \
 -e "s#$SOCK_DIR#SOCK_DIR#g" \
+-e 's#SOCK_DIR/fuse-#TEST_DIR/#g' \
 -e "s#$IMGFMT#IMGFMT#g" \
 -e 's#nbd:127.0.0.1:[0-9]\\+#TEST_DIR/t.IMGFMT#g' \
 -e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g'
@@ -227,6 +229,7 @@ _filter_img_info()
 -e "s#$IMGFMT#IMGFMT#g" \
 -e 's#nbd://127.0.0.1:[0-9]\\+$#TEST_DIR/t.IMGFMT#g' \
 -e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g' \
+-e 's#SOCK_DIR/fuse-#TEST_DIR/#g' \
 -e "/encrypted: yes/d" \
 -e "/cluster_size: [0-9]\\+/d" \
 -e "/table_size: [0-9]\\+/d" \
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 20589e59a5..29354654cc 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -257,6 +257,9 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 TEST_IMG="$DRIVER,file.driver=nbd,file.type=unix"
 TEST_IMG="$TEST_IMG,file.path=$SOCK_DIR/nbd"
+elif [ "$IMGPROTO" = "fuse" ]; then
+TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
+TEST_IMG="$DRIVER,file.filename=$SOCK_DIR/fuse-t.$IMGFMT"
 elif [ "$IMGPROTO" = "ssh" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 
TEST_IMG="$DRIVER,file.driver=ssh,file.host=127.0.0.1,file.path=$TEST_IMG_FILE"
@@ -273,6 +276,9 @@ else
 elif [ "$IMGPROTO" = "nbd" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 TEST_IMG="nbd+unix:///?socket=$SOCK_DIR/nbd"
+elif [ "$IMGPROTO" = "fuse" ]; then
+TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
+TEST_IMG="$SOCK_DIR/fuse-t.$IMGFMT"
 elif [ "$IMGPROTO" = "ssh" ]; then
 TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
 
REMOTE_TEST_DIR="ssh://\\($USER@\\)\\?127.0.0.1\\(:[0-9]\\+\\)\\?$TEST_DIR"
@@ -288,6 +294,9 @@ fi
 ORIG_TEST_IMG_FILE=$TEST_IMG_FILE
 ORIG_TEST_IMG="$TEST_IMG"
 
+FUSE_PIDS=()
+FUSE_EXPORTS=()
+
 if [ -z "$TEST_DIR" ]; then
 TEST_DIR=$PWD/scratch
 fi
@@ -357,6 +366,10 @@ _test_img_to_test_img_file()
 echo "$1"
 ;;
 
+fuse)
+echo "$1" | sed -e "s#$SOCK_DIR/fuse-#$TEST_DIR/#"
+;;
+
 nfs)
 echo "$1" | sed -e "s#nfs://127.0.0.1##"
 ;;
@@ -385,6 +398,11 @@ _make_test_img()
 local opts_param=false
 local misc_params=()
 
+if [[ $IMGPROTO == fuse && $TEST_IMG == $SOCK_DIR/fuse-* ]]; then
+# The caller may be trying to overwrite an existing image
+_rm_test_img "$TEST_IMG"
+fi
+
 if [ -z "$TEST_IMG_FILE" ]; then
 img_name=$TEST_IMG
 elif [ "$IMGOPTSSYNTAX" != "true" -a \
@@ -469,11 +487,105 @@ _make_test_img()
 eval "$QEMU_NBD -v -t -k '$SOCK_DIR/nbd' -f $IMGFMT -e 42 -x '' 
$TEST_IMG_FILE >/dev/null &"
 sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
 fi
+
+if [ $IMGPROTO = "fuse" -a -f "$img_name" ]; then
+local export_mp
+local pid
+local pidfile
+local timeout
+
+export_mp=$(echo "$img_name" | sed -e "s#$TEST_DIR/#$SOCK_DIR/fuse-#")
+if ! echo "$export_mp" | grep -q "^$SOCK_DIR"; then
+echo 'Cannot use FUSE exports with images outside of TEST_DIR' >&2
+ 

[PATCH for-6.0 v3 17/20] iotests: Give access to the qemu-storage-daemon

2020-10-27 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/check | 11 +++
 tests/qemu-iotests/common.rc | 17 +
 2 files changed, 28 insertions(+)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 678b6e4910..3c1fa4435a 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -644,6 +644,17 @@ if [ -z $QEMU_NBD_PROG ]; then
 fi
 export QEMU_NBD_PROG="$(type -p "$QEMU_NBD_PROG")"
 
+if [ -z "$QSD_PROG" ]; then
+if [ -x "$build_iotests/qemu-storage-daemon" ]; then
+export QSD_PROG="$build_iotests/qemu-storage-daemon"
+elif [ -x "$build_root/storage-daemon/qemu-storage-daemon" ]; then
+export QSD_PROG="$build_root/storage-daemon/qemu-storage-daemon"
+else
+_init_error "qemu-storage-daemon not found"
+fi
+fi
+export QSD_PROG="$(type -p "$QSD_PROG")"
+
 if [ -x "$build_iotests/socket_scm_helper" ]
 then
 export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 23f46da2db..20589e59a5 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -124,6 +124,7 @@ fi
 : ${VALGRIND_QEMU_IMG=$VALGRIND_QEMU}
 : ${VALGRIND_QEMU_IO=$VALGRIND_QEMU}
 : ${VALGRIND_QEMU_NBD=$VALGRIND_QEMU}
+: ${VALGRIND_QSD=$VALGRIND_QEMU}
 
 # The Valgrind own parameters may be set with
 # its environment variable VALGRIND_OPTS, e.g.
@@ -211,6 +212,21 @@ _qemu_nbd_wrapper()
 return $RETVAL
 }
 
+_qemu_storage_daemon_wrapper()
+{
+local VALGRIND_LOGFILE="${TEST_DIR}"/$$.valgrind
+(
+if [ -n "${QSD_NEED_PID}" ]; then
+echo $BASHPID > "${QEMU_TEST_DIR}/qemu-storage-daemon.pid"
+fi
+VALGRIND_QEMU="${VALGRIND_QSD}" _qemu_proc_exec "${VALGRIND_LOGFILE}" \
+"$QSD_PROG" $QSD_OPTIONS "$@"
+)
+RETVAL=$?
+_qemu_proc_valgrind_log "${VALGRIND_LOGFILE}" $RETVAL
+return $RETVAL
+}
+
 # Valgrind bug #409141 https://bugs.kde.org/show_bug.cgi?id=409141
 # Until valgrind 3.16+ is ubiquitous, we must work around a hang in
 # valgrind when issuing sigkill. Disable valgrind for this invocation.
@@ -223,6 +239,7 @@ export QEMU=_qemu_wrapper
 export QEMU_IMG=_qemu_img_wrapper
 export QEMU_IO=_qemu_io_wrapper
 export QEMU_NBD=_qemu_nbd_wrapper
+export QSD=_qemu_storage_daemon_wrapper
 
 if [ "$IMGOPTSSYNTAX" = "true" ]; then
 DRIVER="driver=$IMGFMT"
-- 
2.26.2




[PATCH for-6.0 v3 10/20] iotests/046: Avoid renaming images

2020-10-27 Thread Max Reitz
This generally does not work on non-file protocols.  It is better to
create the image with the final name from the start, and most tests do
this already.  Let 046 follow suit.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/046 | 5 +++--
 tests/qemu-iotests/046.out | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
index 88b3363c19..40a9f30087 100755
--- a/tests/qemu-iotests/046
+++ b/tests/qemu-iotests/046
@@ -47,6 +47,8 @@ size=128M
 echo
 echo "== creating backing file for COW tests =="
 
+TEST_IMG_SAVE=$TEST_IMG
+TEST_IMG="$TEST_IMG.base"
 _make_test_img $size
 
 backing_io()
@@ -67,8 +69,7 @@ backing_io()
 
 backing_io 0 32 write | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
 
-mv "$TEST_IMG" "$TEST_IMG.base"
-
+TEST_IMG=$TEST_IMG_SAVE
 _make_test_img -b "$TEST_IMG.base" -F $IMGFMT 6G
 
 echo
diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out
index b022bcddd5..66ad987ab3 100644
--- a/tests/qemu-iotests/046.out
+++ b/tests/qemu-iotests/046.out
@@ -1,7 +1,7 @@
 QA output created by 046
 
 == creating backing file for COW tests ==
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 65536
-- 
2.26.2




[PATCH for-6.0 v3 12/20] iotests/091: Use _cleanup_qemu instad of "wait"

2020-10-27 Thread Max Reitz
If the test environment has some other child processes running (like a
storage daemon that provides a FUSE export), then "wait" will never
finish.  Use wait=yes _cleanup_qemu instead.

(We need to discard the output so there is no change to the reference
output.)

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/091 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091
index 68fbfd777b..8a4ce5b7e2 100755
--- a/tests/qemu-iotests/091
+++ b/tests/qemu-iotests/091
@@ -96,7 +96,8 @@ _send_qemu_cmd $h2 'qemu-io disk flush' "(qemu)"
 _send_qemu_cmd $h2 'quit' ""
 _send_qemu_cmd $h1 'quit' ""
 
-wait
+wait=yes _cleanup_qemu >/dev/null
+
 echo "Check image pattern"
 ${QEMU_IO} -c "read -P 0x22 0 4M" "${TEST_IMG}" | _filter_testdir | 
_filter_qemu_io
 
-- 
2.26.2




[PATCH for-6.0 v3 13/20] iotests: Restrict some Python tests to file

2020-10-27 Thread Max Reitz
Most Python tests are restricted to the file protocol (without
explicitly saying so), but these are the ones that would break
./check -fuse -qcow2.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/206 | 3 ++-
 tests/qemu-iotests/242 | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/206 b/tests/qemu-iotests/206
index 11bc51f256..0a3ee5ef00 100755
--- a/tests/qemu-iotests/206
+++ b/tests/qemu-iotests/206
@@ -23,7 +23,8 @@
 import iotests
 from iotests import imgfmt
 
-iotests.script_initialize(supported_fmts=['qcow2'])
+iotests.script_initialize(supported_fmts=['qcow2'],
+  supported_protocols=['file'])
 iotests.verify_working_luks()
 
 with iotests.FilePath('t.qcow2') as disk_path, \
diff --git a/tests/qemu-iotests/242 b/tests/qemu-iotests/242
index 64f1bd95e4..a16de3085f 100755
--- a/tests/qemu-iotests/242
+++ b/tests/qemu-iotests/242
@@ -24,7 +24,8 @@ import struct
 from iotests import qemu_img_create, qemu_io, qemu_img_pipe, \
 file_path, img_info_log, log, filter_qemu_io
 
-iotests.script_initialize(supported_fmts=['qcow2'])
+iotests.script_initialize(supported_fmts=['qcow2'],
+  supported_protocols=['file'])
 
 disk = file_path('disk')
 chunk = 256 * 1024
-- 
2.26.2




[PATCH for-6.0 v3 16/20] storage-daemon: Call bdrv_close_all() on exit

2020-10-27 Thread Max Reitz
Otherwise, exports and block devices are not properly shut down and
closed, unless the users explicitly issues blockdev-del and
block-export-del commands for each of them.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 storage-daemon/qemu-storage-daemon.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index e419ba9f19..a213edea66 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -317,6 +317,9 @@ int main(int argc, char *argv[])
 main_loop_wait(false);
 }
 
+bdrv_drain_all_begin();
+bdrv_close_all();
+
 monitor_cleanup();
 qemu_chr_cleanup();
 user_creatable_cleanup();
-- 
2.26.2




[PATCH for-6.0 v3 15/20] iotests/287: Clean up subshell test image

2020-10-27 Thread Max Reitz
287 creates an image in a subshell (thanks to the pipe) to see whether
that is possible with compression_type=zstd.  If _make_test_img were to
modify any global state, this global state would then be lost before we
could cleanup the image.

When using FUSE as the test protocol, this global state is important, so
clean up the image before the state is lost.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/287 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/287 b/tests/qemu-iotests/287
index f98a4cadc1..036cc09e82 100755
--- a/tests/qemu-iotests/287
+++ b/tests/qemu-iotests/287
@@ -51,8 +51,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 CLUSTER_SIZE=65536
 
 # Check if we can run this test.
-if IMGOPTS='compression_type=zstd' _make_test_img 64M |
-grep "Invalid parameter 'zstd'"; then
+output=$(_make_test_img -o 'compression_type=zstd' 64M; _cleanup_test_img)
+if echo "$output" | grep -q "Invalid parameter 'zstd'"; then
 _notrun "ZSTD is disabled"
 fi
 
-- 
2.26.2




[PATCH for-6.0 v3 11/20] iotests: Derive image names from $TEST_IMG

2020-10-27 Thread Max Reitz
Avoid creating images with custom filenames in $TEST_DIR, because
non-file protocols may want to keep $TEST_IMG (and all other test
images) in some other directory.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/200 | 3 +--
 tests/qemu-iotests/200.out | 4 ++--
 tests/qemu-iotests/229 | 3 +--
 tests/qemu-iotests/229.out | 6 +++---
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200
index 59f7854b9f..a7aabbd032 100755
--- a/tests/qemu-iotests/200
+++ b/tests/qemu-iotests/200
@@ -44,8 +44,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow2 qed
 _supported_proto file
 
-BACKING_IMG="${TEST_DIR}/backing.img"
-TEST_IMG="${TEST_DIR}/test.img"
+BACKING_IMG="$TEST_IMG.base"
 
 TEST_IMG="$BACKING_IMG" _make_test_img 512M
 _make_test_img -F $IMGFMT -b "$BACKING_IMG" 512M
diff --git a/tests/qemu-iotests/200.out b/tests/qemu-iotests/200.out
index a6776070e4..5883f16ac3 100644
--- a/tests/qemu-iotests/200.out
+++ b/tests/qemu-iotests/200.out
@@ -1,6 +1,6 @@
 QA output created by 200
-Formatting 'TEST_DIR/backing.img', fmt=IMGFMT size=536870912
-Formatting 'TEST_DIR/test.img', fmt=IMGFMT size=536870912 
backing_file=TEST_DIR/backing.img backing_fmt=IMGFMT
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=536870912
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=536870912 
backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 314572800/314572800 bytes at offset 512
 300 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229
index 89a5359f32..5f759fa587 100755
--- a/tests/qemu-iotests/229
+++ b/tests/qemu-iotests/229
@@ -51,8 +51,7 @@ _supported_os Linux
 _unsupported_imgopts data_file
 
 
-DEST_IMG="$TEST_DIR/d.$IMGFMT"
-TEST_IMG="$TEST_DIR/b.$IMGFMT"
+DEST_IMG="$TEST_IMG.dest"
 BLKDEBUG_CONF="$TEST_DIR/blkdebug.conf"
 
 _make_test_img 2M
diff --git a/tests/qemu-iotests/229.out b/tests/qemu-iotests/229.out
index 4de6dfaa28..7eed393013 100644
--- a/tests/qemu-iotests/229.out
+++ b/tests/qemu-iotests/229.out
@@ -1,6 +1,6 @@
 QA output created by 229
-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=2097152
-Formatting 'TEST_DIR/d.IMGFMT', fmt=IMGFMT size=2097152
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+Formatting 'TEST_DIR/t.IMGFMT.dest', fmt=IMGFMT size=2097152
 wrote 2097152/2097152 bytes at offset 0
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {'execute': 'qmp_capabilities'}
@@ -8,7 +8,7 @@ wrote 2097152/2097152 bytes at offset 0
 
 === Starting drive-mirror, causing error & stop  ===
 
-{'execute': 'drive-mirror', 'arguments': {'device': 'testdisk', 'format': 
'IMGFMT', 'target': 'blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/d.IMGFMT', 
'sync': 'full', 'mode': 'existing', 'on-source-error': 'stop', 
'on-target-error': 'stop' }}
+{'execute': 'drive-mirror', 'arguments': {'device': 'testdisk', 'format': 
'IMGFMT', 'target': 'blkdebug:TEST_DIR/blkdebug.conf:TEST_DIR/t.IMGFMT.dest', 
'sync': 'full', 'mode': 'existing', 'on-source-error': 'stop', 
'on-target-error': 'stop' }}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "created", "id": "testdisk"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "running", "id": "testdisk"}}
 {"return": {}}
-- 
2.26.2




[PATCH for-6.0 v3 00/20] block/export: Allow exporting BDSs via FUSE

2020-10-27 Thread Max Reitz
v1: https://lists.nongnu.org/archive/html/qemu-block/2019-12/msg00451.html
v2: https://lists.nongnu.org/archive/html/qemu-block/2020-09/msg01611.html

Branch: https://github.com/XanClic/qemu.git fuse-exports-v3
Branch: https://git.xanclic.moe/XanClic/qemu.git fuse-exports-v3


Hi,

Ever since I found out that you can mount FUSE filesystems on regular
files (not just directories), I had the idea of adding FUSE block
exports to qemu where you can export block nodes as raw images.  The
best thing is that you’d be able to mount an image on itself, so
whatever format it may be in, qemu lets it appear as a raw image (and
you can then use regular tools like dd on it).

The performance is quite bad so far, but we can always try to improve it
if the need arises.  For now I consider it mostly a cute feature to get
easy access to the raw contents of image files in any format (without
requiring root rights).


This series does the following:

First, add the FUSE export module (block/export/fuse.c) that implements
the basic file access functions.  (Note that you need libfuse 3.8.0 or
later for SEEK_HOLE/SEEK_DATA.)

Second, it allows using FUSE exports as a protocol in the iotests and
makes many iotests work with it.  (The file node is exported by a
background qemu instance to $SOCK_DIR.)

This gives us a lot of coverage for, well, not free (it does take twelve
patches), but for cheap; but there are still some more specialized
things we want to test, so third and last, this series adds an iotest
dedicated to FUSE exports.


Changes from v2:
- Let meson handle the libfuse and feature (lseek) detection
- Rebase on top of vhost-user-blk export
- Patch 2:
  - %s/5\.2/6.0/
  - Renamed init_fuse() to init_exports_table(), so I can add a
fuse_init() without being too confusing
  - Set max read/write request sizes (using that fuse_init() function,
and mount options that libfuse still needs for max_read)
  - Run fuse_session_receive_buf() in a loop until something other than
EINTR is returned
  - Let setup_fuse_export() clean up
  - Unmount and destroy the FUSE session only in fuse_export_delete()
(after all I/O has settled and thus all references have been
dropped)
  - Add MAINTAINERS entry
- Patch 3:
  - Don't use bdrv_query_image_info() when all we want is
bdrv_get_allocated_file_size()
  - Optionally let fuse_do_truncate() require zeroes in the added areas
(there is one fallocate() case where we don't need zeroes there)
  - Error out if the max read/write request sizes have been exceeded
instead of (wrongly) limiting the request size
  - Rename fuse_flush() to fuse_fsync(), and let a new fuse_flush()
invoke fuse_fsync()
- Patch 4:
  - Keep RESIZE permission for growable exports
- Patch 5:
  - Fix two bugs where I forgot to increment the offset when iterating
over some area
- Patch 17:
  - %s/QEMU_STGD/QSD/
- Patch 18:
  - %s/QEMU_STGD/QSD/
  - Drop superfluous -T from df invocation


git-backport-diff against v3:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/20:[down] 'meson: Detect libfuse'
002/20:[0103] [FC] 'fuse: Allow exporting BDSs via FUSE'
003/20:[0058] [FC] 'fuse: Implement standard FUSE operations'
004/20:[0034] [FC] 'fuse: Allow growable exports'
005/20:[0011] [FC] 'fuse: (Partially) implement fallocate()'
006/20:[0063] [FC] 'fuse: Implement hole detection through lseek'
007/20:[] [--] 'iotests: Do not needlessly filter _make_test_img'
008/20:[] [--] 'iotests: Do not pipe _make_test_img'
009/20:[] [--] 'iotests: Use convert -n in some cases'
010/20:[] [--] 'iotests/046: Avoid renaming images'
011/20:[] [--] 'iotests: Derive image names from $TEST_IMG'
012/20:[] [--] 'iotests/091: Use _cleanup_qemu instad of "wait"'
013/20:[] [--] 'iotests: Restrict some Python tests to file'
014/20:[] [--] 'iotests: Let _make_test_img guess $TEST_IMG_FILE'
015/20:[] [--] 'iotests/287: Clean up subshell test image'
016/20:[] [--] 'storage-daemon: Call bdrv_close_all() on exit'
017/20:[0018] [FC] 'iotests: Give access to the qemu-storage-daemon'
018/20:[0004] [FC] 'iotests: Allow testing FUSE exports'
019/20:[] [--] 'iotests: Enable fuse for many tests'
020/20:[] [--] 'iotests/308: Add test for FUSE exports'


Max Reitz (20):
  meson: Detect libfuse
  fuse: Allow exporting BDSs via FUSE
  fuse: Implement standard FUSE operations
  fuse: Allow growable exports
  fuse: (Partially) implement fallocate()
  fuse: Implement hole detection through lseek
  iotests: Do not needlessly filter _make_test_img
  iotests: Do not pipe _make_test_img
  iotests: Use convert -n in some cases
  iotests/046: Avoid renaming images
  iotests: Derive image names from $TEST_IMG
  iotests/091: Use _cleanup_qemu instad of "wait"
  iotests: Restrict some Python tests to file
  iotests: Let 

[PATCH for-6.0 v3 14/20] iotests: Let _make_test_img guess $TEST_IMG_FILE

2020-10-27 Thread Max Reitz
When most iotests want to create a test image that is named differently
from the default $TEST_IMG, they do something like this:

TEST_IMG="$TEST_IMG.base" _make_test_img $options

This works fine with the "file" protocol, but not so much for anything
else: _make_test_img tries to create an image under $TEST_IMG_FILE
first, and only under $TEST_IMG if the former is not set; and on
everything but "file", $TEST_IMG_FILE is set.

There are two ways we can fix this: First, we could make all tests
adjust not only TEST_IMG, but also TEST_IMG_FILE if that is present
(e.g. with something like _set_test_img_suffix $suffix that would affect
not only TEST_IMG but also TEST_IMG_FILE, if necessary).  This is a
pretty clean solution, and this is maybe what we should have done from
the start.

But it would also require changes to most existing bash tests.  So the
alternative is this: Let _make_test_img see whether $TEST_IMG_FILE still
points to the original value.  If so, it is possible that the caller has
adjusted $TEST_IMG but not $TEST_IMG_FILE.  In such a case, we can (for
most protocols) derive the corresponding $TEST_IMG_FILE value from
$TEST_IMG value and thus work around what technically is the caller
misbehaving.

This second solution is less clean, but it is robust against people
keeping their old habit of adjusting TEST_IMG only, and requires much
less changes.  So this patch implements it.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/common.rc | 40 +---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 494490a272..23f46da2db 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -268,6 +268,7 @@ else
 TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
 fi
 fi
+ORIG_TEST_IMG_FILE=$TEST_IMG_FILE
 ORIG_TEST_IMG="$TEST_IMG"
 
 if [ -z "$TEST_DIR" ]; then
@@ -330,6 +331,30 @@ _get_data_file()
 | sed -e "s#\\\$TEST_IMG#$1#"
 }
 
+# Translate a $TEST_IMG to its corresponding $TEST_IMG_FILE for
+# different protocols
+_test_img_to_test_img_file()
+{
+case "$IMGPROTO" in
+file)
+echo "$1"
+;;
+
+nfs)
+echo "$1" | sed -e "s#nfs://127.0.0.1##"
+;;
+
+ssh)
+echo "$1" | \
+sed -e "s#ssh://\\($USER@\\)\\?127.0.0.1\\(:[0-9]\\+\\)\\?##"
+;;
+
+*)
+return 1
+;;
+esac
+}
+
 _make_test_img()
 {
 # extra qemu-img options can be added by tests
@@ -343,10 +368,19 @@ _make_test_img()
 local opts_param=false
 local misc_params=()
 
-if [ -n "$TEST_IMG_FILE" ]; then
-img_name=$TEST_IMG_FILE
-else
+if [ -z "$TEST_IMG_FILE" ]; then
 img_name=$TEST_IMG
+elif [ "$IMGOPTSSYNTAX" != "true" -a \
+   "$TEST_IMG_FILE" = "$ORIG_TEST_IMG_FILE" ]; then
+# Handle cases of tests only updating TEST_IMG, but not TEST_IMG_FILE
+img_name=$(_test_img_to_test_img_file "$TEST_IMG")
+if [ "$?" != 0 ]; then
+img_name=$TEST_IMG_FILE
+fi
+else
+# $TEST_IMG_FILE is not the default value, so it definitely has been
+# modified by the test
+img_name=$TEST_IMG_FILE
 fi
 
 if [ -n "$IMGOPTS" ]; then
-- 
2.26.2




[PATCH for-6.0 v3 04/20] fuse: Allow growable exports

2020-10-27 Thread Max Reitz
These will behave more like normal files in that writes beyond the EOF
will automatically grow the export size.

As an optimization, keep the RESIZE permission for growable exports so
we do not have to take it for every post-EOF write.  (This permission is
not released when the export is destroyed, because at that point the
BlockBackend is destroyed altogether anyway.)

Signed-off-by: Max Reitz 
---
 qapi/block-export.json |  6 +-
 block/export/fuse.c| 44 ++
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index aecf052c07..140ba0d221 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -112,10 +112,14 @@
 # @mountpoint: Path on which to export the block device via FUSE.
 #  This must point to an existing regular file.
 #
+# @growable: Whether writes beyond the EOF should grow the block node
+#accordingly. (default: false)
+#
 # Since: 6.0
 ##
 { 'struct': 'BlockExportOptionsFuse',
-  'data': { 'mountpoint': 'str' },
+  'data': { 'mountpoint': 'str',
+'*growable': 'bool' },
   'if': 'defined(CONFIG_FUSE)' }
 
 ##
diff --git a/block/export/fuse.c b/block/export/fuse.c
index d995829ab7..92d2f50bcc 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -45,6 +45,7 @@ typedef struct FuseExport {
 
 char *mountpoint;
 bool writable;
+bool growable;
 } FuseExport;
 
 static GHashTable *exports;
@@ -72,6 +73,19 @@ static int fuse_export_create(BlockExport *blk_exp,
 
 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
 
+/* For growable exports, take the RESIZE permission */
+if (args->growable) {
+uint64_t blk_perm, blk_shared_perm;
+
+blk_get_perm(exp->common.blk, _perm, _shared_perm);
+
+ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+   blk_shared_perm, errp);
+if (ret < 0) {
+return ret;
+}
+}
+
 init_exports_table();
 
 /*
@@ -102,6 +116,7 @@ static int fuse_export_create(BlockExport *blk_exp,
 
 exp->mountpoint = g_strdup(args->mountpoint);
 exp->writable = blk_exp_args->writable;
+exp->growable = args->growable;
 
 ret = setup_fuse_export(exp, args->mountpoint, errp);
 if (ret < 0) {
@@ -349,19 +364,24 @@ static int fuse_do_truncate(const FuseExport *exp, 
int64_t size,
 truncate_flags |= BDRV_REQ_ZERO_WRITE;
 }
 
-blk_get_perm(exp->common.blk, _perm, _shared_perm);
+/* Growable exports have a permanent RESIZE permission */
+if (!exp->growable) {
+blk_get_perm(exp->common.blk, _perm, _shared_perm);
 
-ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
-   blk_shared_perm, NULL);
-if (ret < 0) {
-return ret;
+ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+   blk_shared_perm, NULL);
+if (ret < 0) {
+return ret;
+}
 }
 
 ret = blk_truncate(exp->common.blk, size, true, prealloc,
truncate_flags, NULL);
 
-/* Must succeed, because we are only giving up the RESIZE permission */
-blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, _abort);
+if (!exp->growable) {
+/* Must succeed, because we are only giving up the RESIZE permission */
+blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, _abort);
+}
 
 return ret;
 }
@@ -482,7 +502,15 @@ static void fuse_write(fuse_req_t req, fuse_ino_t inode, 
const char *buf,
 }
 
 if (offset + size > length) {
-size = length - offset;
+if (exp->growable) {
+ret = fuse_do_truncate(exp, offset + size, true, 
PREALLOC_MODE_OFF);
+if (ret < 0) {
+fuse_reply_err(req, -ret);
+return;
+}
+} else {
+size = length - offset;
+}
 }
 
 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
-- 
2.26.2




[PATCH for-6.0 v3 05/20] fuse: (Partially) implement fallocate()

2020-10-27 Thread Max Reitz
This allows allocating areas after the (old) EOF as part of a growing
resize, writing zeroes, and discarding.

Signed-off-by: Max Reitz 
---
 block/export/fuse.c | 84 +
 1 file changed, 84 insertions(+)

diff --git a/block/export/fuse.c b/block/export/fuse.c
index 92d2f50bcc..0b9d226b2f 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -521,6 +521,89 @@ static void fuse_write(fuse_req_t req, fuse_ino_t inode, 
const char *buf,
 }
 }
 
+/**
+ * Let clients perform various fallocate() operations.
+ */
+static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
+   off_t offset, off_t length,
+   struct fuse_file_info *fi)
+{
+FuseExport *exp = fuse_req_userdata(req);
+int64_t blk_len;
+int ret;
+
+if (!exp->writable) {
+fuse_reply_err(req, EACCES);
+return;
+}
+
+blk_len = blk_getlength(exp->common.blk);
+if (blk_len < 0) {
+fuse_reply_err(req, -blk_len);
+return;
+}
+
+if (mode & FALLOC_FL_KEEP_SIZE) {
+length = MIN(length, blk_len - offset);
+}
+
+if (mode & FALLOC_FL_PUNCH_HOLE) {
+if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+fuse_reply_err(req, EINVAL);
+return;
+}
+
+do {
+int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+ret = blk_pdiscard(exp->common.blk, offset, size);
+offset += size;
+length -= size;
+} while (ret == 0 && length > 0);
+} else if (mode & FALLOC_FL_ZERO_RANGE) {
+if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
+/* No need for zeroes, we are going to write them ourselves */
+ret = fuse_do_truncate(exp, offset + length, false,
+   PREALLOC_MODE_OFF);
+if (ret < 0) {
+fuse_reply_err(req, -ret);
+return;
+}
+}
+
+do {
+int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+ret = blk_pwrite_zeroes(exp->common.blk,
+offset, size, 0);
+offset += size;
+length -= size;
+} while (ret == 0 && length > 0);
+} else if (!mode) {
+/* We can only fallocate at the EOF with a truncate */
+if (offset < blk_len) {
+fuse_reply_err(req, EOPNOTSUPP);
+return;
+}
+
+if (offset > blk_len) {
+/* No preallocation needed here */
+ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+if (ret < 0) {
+fuse_reply_err(req, -ret);
+return;
+}
+}
+
+ret = fuse_do_truncate(exp, offset + length, true,
+   PREALLOC_MODE_FALLOC);
+} else {
+ret = -EOPNOTSUPP;
+}
+
+fuse_reply_err(req, ret < 0 ? -ret : 0);
+}
+
 /**
  * Let clients fsync the exported image.
  */
@@ -552,6 +635,7 @@ static const struct fuse_lowlevel_ops fuse_ops = {
 .open   = fuse_open,
 .read   = fuse_read,
 .write  = fuse_write,
+.fallocate  = fuse_fallocate,
 .flush  = fuse_flush,
 .fsync  = fuse_fsync,
 };
-- 
2.26.2




[PATCH for-6.0 v3 09/20] iotests: Use convert -n in some cases

2020-10-27 Thread Max Reitz
qemu-img convert (without -n) can often be replaced by a combination of
_make_test_img + qemu-img convert -n.  Doing so allows converting to
protocols that do not allow direct file creation, such as FUSE exports.
The only problem is that for formats other than qcow2 and qed (qcow1 at
least), this may lead to high disk usage for some reason, so we cannot
do it everywhere.

But we can do it in 028 and 089, so let us do that so they can run on
FUSE exports.  Also, in 028 this allows us to remove a 9-line comment
that used to explain why we cannot safely filter drive-backup's image
creation output.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/028 | 14 --
 tests/qemu-iotests/028.out |  3 +++
 tests/qemu-iotests/089 |  3 ++-
 tests/qemu-iotests/089.out |  1 +
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
index 6dd3ae09a3..864dc4a4e2 100755
--- a/tests/qemu-iotests/028
+++ b/tests/qemu-iotests/028
@@ -116,16 +116,10 @@ else
 QEMU_COMM_TIMEOUT=1
 fi
 
-# Silence output since it contains the disk image path and QEMU's readline
-# character echoing makes it very hard to filter the output. Plus, there
-# is no telling how many times the command will repeat before succeeding.
-# (Note that creating the image results in a "Formatting..." message over
-# stdout, which is the same channel the monitor uses.  We cannot reliably
-# wait for it because the monitor output may interact with it in such a
-# way that _timed_wait_for cannot read it.  However, once the block job is
-# done, we know that the "Formatting..." message must have appeared
-# already, so the output is still deterministic.)
-silent=y _send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)"
+TEST_IMG="$TEST_IMG.copy" _make_test_img $image_size
+_send_qemu_cmd $h "drive_backup -n disk ${TEST_IMG}.copy" "(qemu)" \
+| _filter_imgfmt
+
 silent=y qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active 
jobs"
 _send_qemu_cmd $h "info block-jobs" "No active jobs"
 _send_qemu_cmd $h 'quit' ""
diff --git a/tests/qemu-iotests/028.out b/tests/qemu-iotests/028.out
index 5a68de5c46..e580488216 100644
--- a/tests/qemu-iotests/028.out
+++ b/tests/qemu-iotests/028.out
@@ -468,6 +468,9 @@ No errors were found on the image.
 
 block-backup
 
+Formatting 'TEST_DIR/t.IMGFMT.copy', fmt=IMGFMT size=4294968832
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) drive_backup -n disk TEST_DIR/t.IMGFMT.copy
 (qemu) info block-jobs
 No active jobs
 === IO: pattern 195
diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089
index 66c5415abe..03a2ccf1e8 100755
--- a/tests/qemu-iotests/089
+++ b/tests/qemu-iotests/089
@@ -62,7 +62,8 @@ TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE
 $QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \
  -c 'write -P 66 1024 512' "$TEST_IMG.base" | _filter_qemu_io
 
-$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG"
+_make_test_img $IMG_SIZE
+$QEMU_IMG convert -f raw -O $IMGFMT -n "$TEST_IMG.base" "$TEST_IMG"
 
 $QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \
  -c 'read -P 42 0 512' -c 'read -P 23 512 512' \
diff --git a/tests/qemu-iotests/089.out b/tests/qemu-iotests/089.out
index 15682c2886..c53fc4823a 100644
--- a/tests/qemu-iotests/089.out
+++ b/tests/qemu-iotests/089.out
@@ -9,6 +9,7 @@ wrote 512/512 bytes at offset 512
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 512/512 bytes at offset 1024
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 read 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 512/512 bytes at offset 512
-- 
2.26.2




[PATCH for-6.0 v3 08/20] iotests: Do not pipe _make_test_img

2020-10-27 Thread Max Reitz
Executing _make_test_img as part of a pipe will undo all variable
changes it has done.  As such, this could not work with FUSE (because
we want to remember all of our exports and their qemu instances).

Replace the pipe by a temporary file in 071 and 174 (the two tests that
can run on FUSE).

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/071 | 19 +++
 tests/qemu-iotests/174 | 10 +-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 88faebcc1d..18fe9054b0 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -61,8 +61,17 @@ echo
 echo "=== Testing blkverify through filename ==="
 echo
 
-TEST_IMG="$TEST_IMG.base" IMGFMT="raw" _make_test_img --no-opts $IMG_SIZE |\
-_filter_imgfmt
+# _make_test_img may set variables that we need to retain.  Everything
+# in a pipe is executed in a subshell, so doing so would throw away
+# all changes.  Therefore, we have to store the output in some temp
+# file and filter that.
+scratch_out="$TEST_DIR/img-create.out"
+
+TEST_IMG="$TEST_IMG.base" IMGFMT="raw" _make_test_img --no-opts $IMG_SIZE \
+>"$scratch_out"
+_filter_imgfmt <"$scratch_out"
+rm -f "$scratch_out"
+
 _make_test_img $IMG_SIZE
 $QEMU_IO -c "open -o 
driver=raw,file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \
  -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 
512' | _filter_qemu_io
@@ -76,8 +85,10 @@ echo
 echo "=== Testing blkverify through file blockref ==="
 echo
 
-TEST_IMG="$TEST_IMG.base" IMGFMT="raw" _make_test_img --no-opts $IMG_SIZE |\
-_filter_imgfmt
+TEST_IMG="$TEST_IMG.base" IMGFMT="raw" _make_test_img --no-opts $IMG_SIZE \
+>"$scratch_out"
+_filter_imgfmt <"$scratch_out"
+
 _make_test_img $IMG_SIZE
 $QEMU_IO -c "open -o 
driver=raw,file.driver=blkverify,file.raw.filename=$TEST_IMG.base,file.test.driver=$IMGFMT,file.test.file.filename=$TEST_IMG"
 \
  -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 
512' | _filter_qemu_io
diff --git a/tests/qemu-iotests/174 b/tests/qemu-iotests/174
index e2f14a38c6..1b0dd2e8b7 100755
--- a/tests/qemu-iotests/174
+++ b/tests/qemu-iotests/174
@@ -40,7 +40,15 @@ _unsupported_fmt raw
 
 
 size=256K
-IMGFMT=raw IMGKEYSECRET= _make_test_img --no-opts $size | _filter_imgfmt
+
+# _make_test_img may set variables that we need to retain.  Everything
+# in a pipe is executed in a subshell, so doing so would throw away
+# all changes.  Therefore, we have to store the output in some temp
+# file and filter that.
+scratch_out="$TEST_DIR/img-create.out"
+IMGFMT=raw IMGKEYSECRET= _make_test_img --no-opts $size >"$scratch_out"
+_filter_imgfmt <"$scratch_out"
+rm -f "$scratch_out"
 
 echo
 echo "== reading wrong format should fail =="
-- 
2.26.2




Re: [PATCH 4/4] qemu-storage-daemon: QAPIfy --chardev

2020-10-27 Thread Eric Blake
On 10/26/20 5:10 AM, Markus Armbruster wrote:
> From: Kevin Wolf 
> 
> This removes the dependency on QemuOpts from the --chardev option of
> the storage daemon.
> 
> Help on option parameters is still wrong.  Marked FIXME.
> 
> There are quite a few differences between qemu-system-FOO -chardev,
> QMP chardev-add, and qemu-storage-daemon --chardev:
> 
> * QMP chardev-add wraps arguments other than "id" in a "backend"
>   object.  Parameters other than "type" are further wrapped in a
>   "data" object.  Example:
> 
> {"execute": "chardev-add",
>  "arguments": {
>  "id":"sock0",
>  "backend": {
>  "type": "socket",
>  "data": {
>  "addr": {
>  "type": "inet",
>...
> }
> 
>   qemu-system-FOO -chardev does not wrap.  Neither does
>   qemu-storage-daemon --chardev.
> 
> * qemu-system-FOO -chardev parameter "backend" corresponds to QMP
>   chardev-add "backend" member "type".  qemu-storage-daemon names it
>   "backend".
> 
> * qemu-system-FOO -chardev parameter "backend" recognizes a few
>   additional aliases for compatibility.  QMP chardev-add does not.
>   Neither does qemu-storage-daemon --chardev.
> 
> * qemu-system-FOO -chardev' with types "serial", "parallel" and "pipe"
>   parameter "path" corresponds to QMP chardev-add member "device".
>   qemu-storage-daemon --chardev follows QMP.
> 
> * Backend type "socket":
> 
>   - Intentionally different defaults (documented as such):
> qemu-system-FOO -chardev defaults to server=false and
> wait=true (if server=true), but QMP chardev-add defaults to
> server=true and wait=false.  qemu-storage-daemon --chardev follows
> QMP.
> 
>   - Accidentally different defaults: qemu-system-FOO -chardev defaults
> to tight=true, QMP chardev-add defaults to tight=false in
> QMP (this is a bug in commit 776b97d3).  qemu-storage-daemon
> follows QMP.

Should we be fixing that bug for 5.2?

> 
>   - QMP chardev-add wraps socket address arguments "path", "host",
> "port", etc in a "data" object.  qemu-system-FOO -chardev does not
> wrap.  Neither does qemu-storage-daemon --chardev.
> 
>   - qemu-system-FOO -chardev parameter "delay" corresponds to QMP
> chardev-add member "nodelay" with the sense reversed.
> qemu-storage-daemon --chardev follows QMP.
> 
> * Backend type "udp":
> 
>   - QMP chardev-add wraps remote and local address arguments in a
> "remote" and a "local" object, respectively.  qemu-system-FOO
> -chardev does not wrap, but prefixes the local address parameter
> names with "local" instead.
> 
>   - QMP chardev-add wraps socket address arguments in a "data" object.
> qemu-system-FOO -chardev does not wrap.  Neither does
> qemu-storage-daemon --chardev.  Same as for type "socket".
> 
> * I'm not sure qemu-system-FOO -chardev supports everything QMP
>   chardev-add does.  I am sure qemu-storage-daemon --chardev does.

Quite the list, but it is a good start for what remains to merge things
in the correct direction for 6.0.

> 
> Signed-off-by: Kevin Wolf 
> Signed-off-by: Markus Armbruster 
> ---
>  storage-daemon/qemu-storage-daemon.c | 37 +---
>  1 file changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/storage-daemon/qemu-storage-daemon.c 
> b/storage-daemon/qemu-storage-daemon.c
> index e419ba9f19..f1f3bdc320 100644
> --- a/storage-daemon/qemu-storage-daemon.c
> +++ b/storage-daemon/qemu-storage-daemon.c
> @@ -37,10 +37,13 @@
>  #include "qapi/error.h"
>  #include "qapi/qapi-visit-block-core.h"
>  #include "qapi/qapi-visit-block-export.h"
> +#include "qapi/qapi-visit-char.h"
> +#include "qapi/qapi-visit-char.h"

Duplicate.

>  #include "qapi/qapi-visit-control.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/qmp/qstring.h"
>  #include "qapi/qobject-input-visitor.h"
> +#include "qapi/qobject-output-visitor.h"
>  
>  #include "qemu-common.h"
>  #include "qemu-version.h"
> @@ -207,18 +210,34 @@ static void process_options(int argc, char *argv[])
>  }
>  case OPTION_CHARDEV:
>  {
> -/* TODO This interface is not stable until we QAPIfy it */
> -QemuOpts *opts = qemu_opts_parse_noisily(_chardev_opts,
> - optarg, true);
> -if (opts == NULL) {
> -exit(EXIT_FAILURE);
> -}
> +QDict *args;
> +Visitor *v;
> +ChardevOptions *chr;
> +q_obj_chardev_add_arg *arg;
> +bool help;
>  
> -if (!qemu_chr_new_from_opts(opts, NULL, _fatal)) {
> -/* No error, but NULL returned means help was printed */
> +args = keyval_parse(optarg, "backend", , _fatal);
> +if (help) {
> +if (qdict_haskey(args, "backend")) {
> 

[PATCH for-6.0 v3 06/20] fuse: Implement hole detection through lseek

2020-10-27 Thread Max Reitz
This is a relatively new feature in libfuse (available since 3.8.0,
which was released in November 2019), so we have to add a dedicated
check whether it is available before making use of it.

Signed-off-by: Max Reitz 
---
 configure   |  8 -
 meson.build | 20 
 block/export/fuse.c | 77 +
 meson_options.txt   |  2 ++
 4 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index fc315deebe..f74b7ef3f4 100755
--- a/configure
+++ b/configure
@@ -449,6 +449,7 @@ ninja=""
 skip_meson=no
 gettext=""
 fuse="auto"
+fuse_lseek="auto"
 
 bogus_os="no"
 malloc_trim="auto"
@@ -1524,6 +1525,10 @@ for opt do
   ;;
   --disable-fuse) fuse="disabled"
   ;;
+  --enable-fuse-lseek) fuse_lseek="enabled"
+  ;;
+  --disable-fuse-lseek) fuse_lseek="disabled"
+  ;;
   *)
   echo "ERROR: unknown option $opt"
   echo "Try '$0 --help' for more information"
@@ -1847,6 +1852,7 @@ disabled with --disable-FEATURE, default is enabled if 
available:
   rng-nonedummy RNG, avoid using /dev/(u)random and getrandom()
   libdaxctl   libdaxctl support
   fuseFUSE block device export
+  fuse-lseek  SEEK_HOLE/SEEK_DATA support for FUSE exports
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -6988,7 +6994,7 @@ NINJA=$ninja $meson setup \
 -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \
 -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
 -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
--Dfuse=$fuse \
+-Dfuse=$fuse -Dfuse_lseek=$fuse_lseek \
 $cross_arg \
 "$PWD" "$source_path"
 
diff --git a/meson.build b/meson.build
index 4e8436b456..ea1a68b46d 100644
--- a/meson.build
+++ b/meson.build
@@ -736,10 +736,28 @@ if not has_malloc_trim and 
get_option('malloc_trim').enabled()
   endif
 endif
 
+if get_option('fuse').disabled() and get_option('fuse_lseek').enabled()
+  error('Cannot enable fuse-lseek while fuse is disabled')
+endif
+
 fuse = dependency('fuse3', required: get_option('fuse'),
   version: '>=3.1', method: 'pkg-config',
   static: enable_static)
 
+fuse_lseek = not_found
+if not get_option('fuse_lseek').disabled()
+  if fuse.version().version_compare('>=3.8')
+# Dummy dependency
+fuse_lseek = declare_dependency()
+  elif get_option('fuse_lseek').enabled()
+if fuse.found()
+  error('fuse-lseek requires libfuse >=3.8, found ' + fuse.version())
+else
+  error('fuse-lseek requires libfuse, which was not found')
+endif
+  endif
+endif
+
 #
 # config-host.h #
 #
@@ -773,6 +791,7 @@ config_host_data.set('CONFIG_KEYUTILS', keyutils.found())
 config_host_data.set('CONFIG_GETTID', has_gettid)
 config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim)
 config_host_data.set('CONFIG_FUSE', fuse.found())
+config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found())
 config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
 config_host_data.set('QEMU_VERSION_MAJOR', 
meson.project_version().split('.')[0])
 config_host_data.set('QEMU_VERSION_MINOR', 
meson.project_version().split('.')[1])
@@ -2169,6 +2188,7 @@ summary_info += {'thread sanitizer':  
config_host.has_key('CONFIG_TSAN')}
 summary_info += {'rng-none':  config_host.has_key('CONFIG_RNG_NONE')}
 summary_info += {'Linux keyring': 
config_host.has_key('CONFIG_SECRET_KEYRING')}
 summary_info += {'FUSE exports':  fuse.found()}
+summary_info += {'FUSE lseek':fuse_lseek.found()}
 summary(summary_info, bool_yn: true)
 
 if not supported_cpus.contains(cpu)
diff --git a/block/export/fuse.c b/block/export/fuse.c
index 0b9d226b2f..38f74c94da 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -627,6 +627,80 @@ static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
 fuse_fsync(req, inode, 1, fi);
 }
 
+#ifdef CONFIG_FUSE_LSEEK
+/**
+ * Let clients inquire allocation status.
+ */
+static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
+   int whence, struct fuse_file_info *fi)
+{
+FuseExport *exp = fuse_req_userdata(req);
+
+if (whence != SEEK_HOLE && whence != SEEK_DATA) {
+fuse_reply_err(req, EINVAL);
+return;
+}
+
+while (true) {
+int64_t pnum;
+int ret;
+
+ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
+  offset, INT64_MAX, , NULL, NULL);
+if (ret < 0) {
+fuse_reply_err(req, -ret);
+return;
+}
+
+if (!pnum && (ret & BDRV_BLOCK_EOF)) {
+int64_t blk_len;
+
+/*
+ * If blk_getlength() rounds (e.g. by sectors), then the
+ * export length will be rounded, too.  However,
+ * bdrv_block_status_above() may return EOF at unaligned
+ * offsets.  We must 

[PATCH for-6.0 v3 03/20] fuse: Implement standard FUSE operations

2020-10-27 Thread Max Reitz
This makes the export actually useful instead of only producing errors
whenever it is accessed.

Signed-off-by: Max Reitz 
---
 block/export/fuse.c | 242 
 1 file changed, 242 insertions(+)

diff --git a/block/export/fuse.c b/block/export/fuse.c
index 0553bcd630..d995829ab7 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -282,8 +282,250 @@ static void fuse_init(void *userdata, struct 
fuse_conn_info *conn)
 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
 }
 
+/**
+ * Let clients look up files.  Always return ENOENT because we only
+ * care about the mountpoint itself.
+ */
+static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+fuse_reply_err(req, ENOENT);
+}
+
+/**
+ * Let clients get file attributes (i.e., stat() the file).
+ */
+static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
+ struct fuse_file_info *fi)
+{
+struct stat statbuf;
+int64_t length, allocated_blocks;
+time_t now = time(NULL);
+FuseExport *exp = fuse_req_userdata(req);
+mode_t mode;
+
+length = blk_getlength(exp->common.blk);
+if (length < 0) {
+fuse_reply_err(req, -length);
+return;
+}
+
+allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
+if (allocated_blocks <= 0) {
+allocated_blocks = DIV_ROUND_UP(length, 512);
+} else {
+allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
+}
+
+mode = S_IFREG | S_IRUSR;
+if (exp->writable) {
+mode |= S_IWUSR;
+}
+
+statbuf = (struct stat) {
+.st_ino = inode,
+.st_mode= mode,
+.st_nlink   = 1,
+.st_uid = getuid(),
+.st_gid = getgid(),
+.st_size= length,
+.st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
+.st_blocks  = allocated_blocks,
+.st_atime   = now,
+.st_mtime   = now,
+.st_ctime   = now,
+};
+
+fuse_reply_attr(req, , 1.);
+}
+
+static int fuse_do_truncate(const FuseExport *exp, int64_t size,
+bool req_zero_write, PreallocMode prealloc)
+{
+uint64_t blk_perm, blk_shared_perm;
+BdrvRequestFlags truncate_flags = 0;
+int ret;
+
+if (req_zero_write) {
+truncate_flags |= BDRV_REQ_ZERO_WRITE;
+}
+
+blk_get_perm(exp->common.blk, _perm, _shared_perm);
+
+ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+   blk_shared_perm, NULL);
+if (ret < 0) {
+return ret;
+}
+
+ret = blk_truncate(exp->common.blk, size, true, prealloc,
+   truncate_flags, NULL);
+
+/* Must succeed, because we are only giving up the RESIZE permission */
+blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, _abort);
+
+return ret;
+}
+
+/**
+ * Let clients set file attributes.  Only resizing is supported.
+ */
+static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat 
*statbuf,
+ int to_set, struct fuse_file_info *fi)
+{
+FuseExport *exp = fuse_req_userdata(req);
+int ret;
+
+if (!exp->writable) {
+fuse_reply_err(req, EACCES);
+return;
+}
+
+if (to_set & ~FUSE_SET_ATTR_SIZE) {
+fuse_reply_err(req, ENOTSUP);
+return;
+}
+
+ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
+if (ret < 0) {
+fuse_reply_err(req, -ret);
+return;
+}
+
+fuse_getattr(req, inode, fi);
+}
+
+/**
+ * Let clients open a file (i.e., the exported image).
+ */
+static void fuse_open(fuse_req_t req, fuse_ino_t inode,
+  struct fuse_file_info *fi)
+{
+fuse_reply_open(req, fi);
+}
+
+/**
+ * Handle client reads from the exported image.
+ */
+static void fuse_read(fuse_req_t req, fuse_ino_t inode,
+  size_t size, off_t offset, struct fuse_file_info *fi)
+{
+FuseExport *exp = fuse_req_userdata(req);
+int64_t length;
+void *buf;
+int ret;
+
+/* Limited by max_read, should not happen */
+if (size > FUSE_MAX_BOUNCE_BYTES) {
+fuse_reply_err(req, EINVAL);
+return;
+}
+
+/**
+ * Clients will expect short reads at EOF, so we have to limit
+ * offset+size to the image length.
+ */
+length = blk_getlength(exp->common.blk);
+if (length < 0) {
+fuse_reply_err(req, -length);
+return;
+}
+
+if (offset + size > length) {
+size = length - offset;
+}
+
+buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
+if (!buf) {
+fuse_reply_err(req, ENOMEM);
+return;
+}
+
+ret = blk_pread(exp->common.blk, offset, buf, size);
+if (ret >= 0) {
+fuse_reply_buf(req, buf, size);
+} else {
+fuse_reply_err(req, -ret);
+}
+
+qemu_vfree(buf);
+}
+
+/**
+ * Handle client writes to the exported 

[PATCH for-6.0 v3 07/20] iotests: Do not needlessly filter _make_test_img

2020-10-27 Thread Max Reitz
In most cases, _make_test_img does not need a _filter_imgfmt on top.  It
does that by itself.

(The exception is when IMGFMT has been overwritten but TEST_IMG has not.
In such cases, we do need a _filter_imgfmt on top to filter the test's
original IMGFMT from TEST_IMG.)

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
---
 tests/qemu-iotests/161 | 12 ++--
 tests/qemu-iotests/175 |  6 +++---
 tests/qemu-iotests/249 |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/qemu-iotests/161 b/tests/qemu-iotests/161
index e270976d87..bbf7dbbc5c 100755
--- a/tests/qemu-iotests/161
+++ b/tests/qemu-iotests/161
@@ -48,9 +48,9 @@ _supported_os Linux
 IMG_SIZE=1M
 
 # Create the images
-TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE | _filter_imgfmt
-TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT | 
_filter_imgfmt
-_make_test_img -b "$TEST_IMG.int" -F $IMGFMT -F $IMGFMT | _filter_imgfmt
+TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE
+TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT
+_make_test_img -b "$TEST_IMG.int" -F $IMGFMT -F $IMGFMT
 
 # First test: reopen $TEST.IMG changing the detect-zeroes option on
 # its backing file ($TEST_IMG.int).
@@ -105,9 +105,9 @@ echo
 echo "*** Commit and then change an option on the backing file"
 echo
 # Create the images again
-TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE | _filter_imgfmt
-TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT| 
_filter_imgfmt
-_make_test_img -b "$TEST_IMG.int" -F $IMGFMT | _filter_imgfmt
+TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE
+TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT
+_make_test_img -b "$TEST_IMG.int" -F $IMGFMT
 
 _launch_qemu -drive if=none,file="${TEST_IMG}"
 _send_qemu_cmd $QEMU_HANDLE \
diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175
index 00a626aa63..c3c2aed653 100755
--- a/tests/qemu-iotests/175
+++ b/tests/qemu-iotests/175
@@ -89,20 +89,20 @@ min_blocks=$(stat -c '%b' "$TEST_DIR/empty")
 
 echo
 echo "== creating image with default preallocation =="
-_make_test_img -o extent_size_hint=0 $size | _filter_imgfmt
+_make_test_img -o extent_size_hint=0 $size
 stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks 
$min_blocks $size
 
 for mode in off full falloc; do
 echo
 echo "== creating image with preallocation $mode =="
-_make_test_img -o preallocation=$mode,extent_size_hint=0 $size | 
_filter_imgfmt
+_make_test_img -o preallocation=$mode,extent_size_hint=0 $size
 stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks 
$min_blocks $size
 done
 
 for new_size in 4096 1048576; do
 echo
 echo "== resize empty image with block_resize =="
-_make_test_img -o extent_size_hint=0 0 | _filter_imgfmt
+_make_test_img -o extent_size_hint=0 0
 _block_resize $TEST_IMG $new_size >/dev/null
 stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks 
$min_blocks $new_size
 done
diff --git a/tests/qemu-iotests/249 b/tests/qemu-iotests/249
index 68f13ed328..a9aa9303eb 100755
--- a/tests/qemu-iotests/249
+++ b/tests/qemu-iotests/249
@@ -48,9 +48,9 @@ _supported_os Linux
 IMG_SIZE=1M
 
 # Create the images: base <- int <- active
-TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE | _filter_imgfmt
-TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT | 
_filter_imgfmt
-_make_test_img -b "$TEST_IMG.int" -F $IMGFMT | _filter_imgfmt
+TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE
+TEST_IMG="$TEST_IMG.int" _make_test_img -b "$TEST_IMG.base" -F $IMGFMT
+_make_test_img -b "$TEST_IMG.int" -F $IMGFMT
 
 # Launch QEMU with these two drives:
 # none0: base (read-only)
-- 
2.26.2




[PATCH for-6.0 v3 02/20] fuse: Allow exporting BDSs via FUSE

2020-10-27 Thread Max Reitz
block-export-add type=fuse allows mounting block graph nodes via FUSE on
some existing regular file.  That file should then appears like a raw
disk image, and accesses to it result in accesses to the exported BDS.

Right now, we only implement the necessary block export functions to set
it up and shut it down.  We do not implement any access functions, so
accessing the mount point only results in errors.  This will be
addressed by a followup patch.

We keep a hash table of exported mount points, because we want to be
able to detect when users try to use a mount point twice.  This is
because we invoke stat() to check whether the given mount point is a
regular file, but if that file is served by ourselves (because it is
already used as a mount point), then this stat() would have to be served
by ourselves, too, which is impossible to do while we (as the caller)
are waiting for it to settle.  Therefore, keep track of mount point
paths to at least catch the most obvious instances of that problem.

Signed-off-by: Max Reitz 
---
 qapi/block-export.json   |  23 ++-
 include/block/fuse.h |  30 
 block.c  |   1 +
 block/export/export.c|   4 +
 block/export/fuse.c  | 295 +++
 MAINTAINERS  |   6 +
 block/export/meson.build |   1 +
 7 files changed, 358 insertions(+), 2 deletions(-)
 create mode 100644 include/block/fuse.h
 create mode 100644 block/export/fuse.c

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 480c497690..aecf052c07 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -103,6 +103,21 @@
'*logical-block-size': 'size',
 '*num-queues': 'uint16'} }
 
+##
+# @BlockExportOptionsFuse:
+#
+# Options for exporting a block graph node on some (file) mountpoint
+# as a raw image.
+#
+# @mountpoint: Path on which to export the block device via FUSE.
+#  This must point to an existing regular file.
+#
+# Since: 6.0
+##
+{ 'struct': 'BlockExportOptionsFuse',
+  'data': { 'mountpoint': 'str' },
+  'if': 'defined(CONFIG_FUSE)' }
+
 ##
 # @NbdServerAddOptions:
 #
@@ -200,11 +215,13 @@
 #
 # @nbd: NBD export
 # @vhost-user-blk: vhost-user-blk export (since 5.2)
+# @fuse: FUSE export (since: 6.0)
 #
 # Since: 4.2
 ##
 { 'enum': 'BlockExportType',
-  'data': [ 'nbd', 'vhost-user-blk' ] }
+  'data': [ 'nbd', 'vhost-user-blk',
+{ 'name': 'fuse', 'if': 'defined(CONFIG_FUSE)' } ] }
 
 ##
 # @BlockExportOptions:
@@ -245,7 +262,9 @@
   'discriminator': 'type',
   'data': {
   'nbd': 'BlockExportOptionsNbd',
-  'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
+  'vhost-user-blk': 'BlockExportOptionsVhostUserBlk',
+  'fuse': { 'type': 'BlockExportOptionsFuse',
+'if': 'defined(CONFIG_FUSE)' }
} }
 
 ##
diff --git a/include/block/fuse.h b/include/block/fuse.h
new file mode 100644
index 00..ffa91fe364
--- /dev/null
+++ b/include/block/fuse.h
@@ -0,0 +1,30 @@
+/*
+ * Present a block device as a raw image through FUSE
+ *
+ * Copyright (c) 2020 Max Reitz 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 or later of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#ifndef BLOCK_FUSE_H
+#define BLOCK_FUSE_H
+
+#ifdef CONFIG_FUSE
+
+#include "block/export.h"
+
+extern const BlockExportDriver blk_exp_fuse;
+
+#endif /* CONFIG_FUSE */
+
+#endif
diff --git a/block.c b/block.c
index ee5b28a979..82b0f1887a 100644
--- a/block.c
+++ b/block.c
@@ -26,6 +26,7 @@
 #include "block/trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/fuse.h"
 #include "block/nbd.h"
 #include "block/qdict.h"
 #include "qemu/error-report.h"
diff --git a/block/export/export.c b/block/export/export.c
index c3478c6c97..778adc428e 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -17,6 +17,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "block/export.h"
+#include "block/fuse.h"
 #include "block/nbd.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-export.h"
@@ -31,6 +32,9 @@ static const BlockExportDriver *blk_exp_drivers[] = {
 #if defined(CONFIG_LINUX) && defined(CONFIG_VHOST_USER)
 _exp_vhost_user_blk,
 #endif
+#ifdef CONFIG_FUSE
+_exp_fuse,
+#endif
 };
 
 /* Only accessed from the main thread */
diff --git a/block/export/fuse.c b/block/export/fuse.c
new file mode 100644
index 00..0553bcd630
--- /dev/null
+++ b/block/export/fuse.c
@@ -0,0 

[PATCH for-6.0 v3 01/20] meson: Detect libfuse

2020-10-27 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 configure | 7 +++
 meson.build   | 6 ++
 meson_options.txt | 2 ++
 3 files changed, 15 insertions(+)

diff --git a/configure b/configure
index 83610b0db8..fc315deebe 100755
--- a/configure
+++ b/configure
@@ -448,6 +448,7 @@ meson=""
 ninja=""
 skip_meson=no
 gettext=""
+fuse="auto"
 
 bogus_os="no"
 malloc_trim="auto"
@@ -1519,6 +1520,10 @@ for opt do
   ;;
   --disable-libdaxctl) libdaxctl=no
   ;;
+  --enable-fuse) fuse="enabled"
+  ;;
+  --disable-fuse) fuse="disabled"
+  ;;
   *)
   echo "ERROR: unknown option $opt"
   echo "Try '$0 --help' for more information"
@@ -1841,6 +1846,7 @@ disabled with --disable-FEATURE, default is enabled if 
available:
   xkbcommon   xkbcommon support
   rng-nonedummy RNG, avoid using /dev/(u)random and getrandom()
   libdaxctl   libdaxctl support
+  fuseFUSE block device export
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -6982,6 +6988,7 @@ NINJA=$ninja $meson setup \
 -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt \
 -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
 -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
+-Dfuse=$fuse \
 $cross_arg \
 "$PWD" "$source_path"
 
diff --git a/meson.build b/meson.build
index 47e32e1fcb..4e8436b456 100644
--- a/meson.build
+++ b/meson.build
@@ -736,6 +736,10 @@ if not has_malloc_trim and 
get_option('malloc_trim').enabled()
   endif
 endif
 
+fuse = dependency('fuse3', required: get_option('fuse'),
+  version: '>=3.1', method: 'pkg-config',
+  static: enable_static)
+
 #
 # config-host.h #
 #
@@ -768,6 +772,7 @@ config_host_data.set('CONFIG_XKBCOMMON', xkbcommon.found())
 config_host_data.set('CONFIG_KEYUTILS', keyutils.found())
 config_host_data.set('CONFIG_GETTID', has_gettid)
 config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim)
+config_host_data.set('CONFIG_FUSE', fuse.found())
 config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
 config_host_data.set('QEMU_VERSION_MAJOR', 
meson.project_version().split('.')[0])
 config_host_data.set('QEMU_VERSION_MINOR', 
meson.project_version().split('.')[1])
@@ -2163,6 +2168,7 @@ endif
 summary_info += {'thread sanitizer':  config_host.has_key('CONFIG_TSAN')}
 summary_info += {'rng-none':  config_host.has_key('CONFIG_RNG_NONE')}
 summary_info += {'Linux keyring': 
config_host.has_key('CONFIG_SECRET_KEYRING')}
+summary_info += {'FUSE exports':  fuse.found()}
 summary(summary_info, bool_yn: true)
 
 if not supported_cpus.contains(cpu)
diff --git a/meson_options.txt b/meson_options.txt
index b4f1801875..2ef4ec628f 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -64,6 +64,8 @@ option('xkbcommon', type : 'feature', value : 'auto',
description: 'xkbcommon support')
 option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
+option('fuse', type: 'feature', value: 'auto',
+   description: 'FUSE block device export')
 
 option('capstone', type: 'combo', value: 'auto',
choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
-- 
2.26.2




Re: [PATCH v6 11/11] qapi: Use QAPI_LIST_ADD() where possible

2020-10-27 Thread Eric Blake
On 10/27/20 10:36 AM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> On 10/27/20 5:09 AM, Markus Armbruster wrote:
>>> Eric Blake  writes:
>>>
 Anywhere we create a list of just one item or by prepending items
 (typically because order doesn't matter), we can use the now-public
 macro.  But places where we must keep the list in order by appending
 remain open-coded.
>>>
>>> Should we rename the macro to QAPI_LIST_PREPEND()?
>>
>> That would make sense if we add a counterpart QAPI_LIST_APPEND.
> 
> It may make sense even if we don't.  QAPI_LIST_ADD() leaves the reader
> guessing whether we prepend or append.

That's a strong enough argument for me to make the rename in patch 2/11,
with minor rebase fallout in the rest of the series, and then this patch
gets a major rewrite (but I'm already not trying to get this patch into
5.2).


 @@ -1224,10 +1224,7 @@ GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error 
 **errp)
  QTAILQ_FOREACH(mount, , next) {
  g_debug("Building guest fsinfo for '%s'", mount->dirname);

 -new = g_malloc0(sizeof(*ret));
>>>
>>> Ugh!  Glad you get rid of this.
>>
>> Yep, C++ reserved words as a C variable name is always awkward.  It was
>> fun cleaning that up (several places in this patch).
> 
> I don't give a rat's ass about C++, actually.  I'm glad you got rid of
> the tacit "@new points to the same type as @ret does".
> 
> Clean:
> 
>  new = g_malloc0(sizeof(*new));
>  new = g_new0(GuestFilesystemInfoList, 1);
> 
> Clean (but I'd use g_new0() instead):
> 
>  new = g_malloc0(sizeof(GuestFilesystemInfoList));
> 
> Dirty:
> 
>  new = g_malloc0(sizeof(X));
> 
> where X is anything else.

Ah, I hadn't even spotted what you disliked, but yes, it makes total
sense that allocating for assignment to one variable by utilizing the
type from another puts unnecessary linkage that the two variables must
have the same type.


>>> Did you miss the spot where we add to this list?
>>>
>>>/* Go through each extent */
>>>for (i = 0; i < extents->NumberOfDiskExtents; i++) {
>>>disk = g_malloc0(sizeof(GuestDiskAddress));
>>>
>>>/* Disk numbers directly correspond to numbers used in UNCs
>>> *
>>> * See documentation for DISK_EXTENT:
>>> * 
>>> https://docs.microsoft.com/en-us/windows/desktop/api/winioctl/ns-winioctl-_disk_extent
>>> *
>>> * See also Naming Files, Paths and Namespaces:
>>> * 
>>> https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file#win32-device-namespaces
>>> */
>>>disk->has_dev = true;
>>>disk->dev = g_strdup_printf(".\\PhysicalDrive%lu",
>>>extents->Extents[i].DiskNumber);
>>>
>>>get_single_disk_info(extents->Extents[i].DiskNumber, disk, 
>>> _err);
>>>if (local_err) {
>>>error_propagate(errp, local_err);
>>>goto out;
>>>}
>>>cur_item = g_malloc0(sizeof(*list));
>>>cur_item->value = disk;
>>>disk = NULL;
>>>cur_item->next = list;
>>> --->   list = cur_item;
>>>}
>>
>> This is appending, not prepending.
> 
> One of us is blind, and it might be me :)

Oh, I indeed misread this.  Yes, this is prepending after all, so I'll
use the macro here.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 3/4] char: Flat alternative to overly nested chardev-add arguments

2020-10-27 Thread Eric Blake
On 10/26/20 5:10 AM, Markus Armbruster wrote:
> chardev-add's arguments use an annoying amount of nesting.  Example:
> 
> {"execute": "chardev-add",
>  "arguments": {
>  "id":"sock0",
>"backend": {
>"type": "socket",
>"data": {
>"addr": {
>"type": "inet",
>"data": {
>"host": "0.0.0.0",
>"port": "2445"}}
> 
> This is because chardev-add predates QAPI features that enable flatter
> data structures, both on the wire and in C: base types, flat unions,
> commands taking a union or alternate as 'data'.
> 
> The nesting would be even more annoying in dotted key syntax:
> 
> id=sock0,\
> backend.type=socket,\
> backend.data.addr.type=inet,\
> backend.data.addr.data.host=0.0.0.0,\
> backend.data.addr.data.port=2445
> 
> Relevant, because the next commit will QAPIfy qemu-storage-daemon
> --chardev.  We really want this instead:
> 
> --chardev socket,id=sock0,\
> addr.type=inet,\
> addr.host=0.0.0.0,\
> addr.port=2445
> 
> To get it, define a new QAPI type ChardevOptions that is the flat
> equivalent to chardev-add's arguments.
> 
> What we should do now is convert the internal interfaces to take this
> new type, and limit the nested old type to the external interface,
> similar to what commit bd269ebc82 "sockets: Limit SocketAddressLegacy
> to external interfaces" did.  But we're too close to the freeze to
> pull that off safely.
> 
> What I can do now is convert the new type to the old nested type, and
> promise to replace this by what should be done in the next development
> cycle.

Nice evaluation of the trade-off.

> 
> In more detail:
> 
> * Flat union ChardevOptions corresponds to chardev-add's implicit
>   arguments type.  It flattens a struct containing a simple union into
>   a flat union.
> 
> * The flat union's discriminator is named @backend, not @type.  This
>   avoids clashing with member @type of ChardevSpiceChannel.  For what
>   it's worth, -chardev also uses this name.
> 
> * Its branches @socket, @udp use ChardevSocketFlat, ChardevUdpFlat
>   instead of ChardevSocket, ChardevUdp.  This flattens simple union
>   SocketAddressLegacy members to flat union SocketAddress members.
> 
> * New chardev_options_crumple() converts ChardevOptions to
>   chardev-add's implict arguments type.

implicit

> 
> Only one existing QAPI definition is affected: some of ChardevSocket's
> members get moved to a new base type ChardevSocketBase, to reduce
> duplication.  No change to the generated C type and the wire format.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  qapi/char.json | 106 ---
>  include/chardev/char.h |   5 ++
>  include/qemu/sockets.h |   3 +
>  chardev/char-legacy.c  | 140 +
>  chardev/char-socket.c  |   3 +-
>  util/qemu-sockets.c|  38 +++
>  chardev/meson.build|   1 +
>  7 files changed, 287 insertions(+), 9 deletions(-)
>  create mode 100644 chardev/char-legacy.c

Big but worth it.  I'm liking the simplicity of this alternative over
Kevin's proposal, especially if we're aiming to get this in 5.2 soft freeze.

> 
> diff --git a/qapi/char.json b/qapi/char.json
> index 43486d1daa..31b693bbb2 100644
> --- a/qapi/char.json
> +++ b/qapi/char.json
> @@ -244,12 +244,8 @@
>'base': 'ChardevCommon' }
>  
>  ##
> -# @ChardevSocket:
> +# @ChardevSocketBase:
>  #
> -# Configuration info for (stream) socket chardevs.
> -#
> -# @addr: socket address to listen on (server=true)
> -#or connect to (server=false)
>  # @tls-creds: the ID of the TLS credentials object (since 2.6)
>  # @tls-authz: the ID of the QAuthZ authorization object against which
>  # the client's x509 distinguished name will be validated. This
> @@ -274,9 +270,8 @@
>  #
>  # Since: 1.4
>  ##
> -{ 'struct': 'ChardevSocket',
> -  'data': { 'addr': 'SocketAddressLegacy',
> -'*tls-creds': 'str',
> +{ 'struct': 'ChardevSocketBase',
> +  'data': { '*tls-creds': 'str',
>  '*tls-authz'  : 'str',
>  '*server': 'bool',
>  '*wait': 'bool',
> @@ -287,6 +282,35 @@
>  '*reconnect': 'int' },
>'base': 'ChardevCommon' }

Here we are subdividing ChardevSocket into everything that is already
flat, and excluding the awkward 'addr'...

>  
> +##
> +# @ChardevSocket:
> +#
> +# Configuration info for (stream) socket chardevs.
> +#
> +# @addr: socket address to listen on (server=true)
> +#or connect to (server=false)
> +#
> +# Since: 1.4
> +##
> +{ 'struct': 'ChardevSocket',
> +  # Do not add to 'data', it breaks chardev_options_crumple()!  Add to
> +  # ChardevSocketBase's 'data' instead.
> +  'data': { 'addr': 'SocketAddressLegacy' },
> +  'base': 'ChardevSocketBase' }

...legacy use pulls in the legacy 'addr'...

> +
> +##
> +# @ChardevSocketFlat:
> +#
> +# Note: This type should eventually 

Re: [PATCH 0/4] qemu-storage-daemon: QAPIfy --chardev the stupid way

2020-10-27 Thread Paolo Bonzini
On 26/10/20 11:10, Markus Armbruster wrote:
> Kevin's "[PATCH v2 0/6] qemu-storage-daemon: QAPIfy --chardev"
> involves surgery to the QAPI generator.  Some (most?) of it should go
> away if we deprecate the "data" wrappers due to simple unions in QMP.
> 
> Do we really need to mess with the code generator to solve the problem
> at hand?
> 
> 
> Let's recapitulate the problem:
> 
> * We want to QAPIfy --chardev, i.e. define its argument as a QAPI
>   type.

Considering that this is not 5.2 stuff at this point, I would like to
suggest again moving chardevs to -object, and ask you to evaluate that
option with the agreement that I do the work instead of you. :)

Paolo




Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Andrey Shinkevich



On 27.10.2020 20:57, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and 
doubt that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove 
the flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?


Hmm. At least, let's not implement new logic based on 
BLK_PERM_GRAPH_MOD. In original code it's only block_job_create's perm, 
not in shared_perm, not somewhere else.. So, if we keep it, let's keep 
it as is: only in perm in block_job_create, not implementing additional 
perm/shared_perm logic.




With @perm=0 in the block_job_add_bdrv(>common, "active node"...), it 
won't.





   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure 
here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, 
why to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case 
can be rewritten using

node-names and new "bottom" stream argument.



I guess it will not help for the whole test. Particularly, there is an 
issue with freezing the child link to COR-filter of the cuncurrent 
job, then it fails to finish first.


We should not have such frozen link, as our bottom node should be above 
COR-filter of concurrent job.





The bdrv_freeze_backing_chain(bs, above_base, errp) does that job. Max 
insisted on keeping it.


Andrey



[PATCH v7 2/3] nvme: add namespace I/O optimization fields to shared header

2020-10-27 Thread Klaus Jensen
From: Klaus Jensen 

This adds the NPWG, NPWA, NPDG, NPDA and NOWS family of fields to the
shared nvme.h header for use by later patches.

Signed-off-by: Klaus Jensen 
Cc: Stefan Hajnoczi 
Cc: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/nvme.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 966c3bb304bd..e95ff6ca9b37 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -990,7 +990,12 @@ typedef struct QEMU_PACKED NvmeIdNs {
 uint16_tnabspf;
 uint16_tnoiob;
 uint8_t nvmcap[16];
-uint8_t rsvd64[40];
+uint16_tnpwg;
+uint16_tnpwa;
+uint16_tnpdg;
+uint16_tnpda;
+uint16_tnows;
+uint8_t rsvd74[30];
 uint8_t nguid[16];
 uint64_teui64;
 NvmeLBAFlbaf[16];
-- 
2.29.1




Re: qcow2 overlay performance

2020-10-27 Thread Alberto Garcia
On Thu 22 Oct 2020 10:56:46 PM CEST, Yoonho Park wrote:
> I am still seeing the performance degradation, but I did find something
> interesting (and promising) with qemu 5.1.50. Enabling the subcluster
> allocation support in qemu 5.1.50 (extended_l2=on) eliminates the
> performance degradation of adding an overlay. Without subcluster allocation
> enabled, I still see the performance degradation in qemu 5.1.50 when adding
> an overlay. For these experiments, I used 64K blocks and 2M qcow2 cluster
> size.

Well, 2MB clusters have 64KB subclusters, so your request size is equal
to the subcluster size. If the requests are aligned there should be no
copy-on-write and therefore no performance degradation if you have
backing images.

Berto



[PATCH v7 3/3] hw/block/nvme: add the dataset management command

2020-10-27 Thread Klaus Jensen
From: Klaus Jensen 

Add support for the Dataset Management command and the Deallocate
attribute. Deallocation results in discards being sent to the underlying
block device. Whether of not the blocks are actually deallocated is
affected by the same factors as Write Zeroes (see previous commit).

 format | discard | dsm (512B)  dsm (4KiB)  dsm (64KiB)

  qcow2ignore   n   n   n
  qcow2unmapn   n   y
  raw  ignore   n   n   n
  raw  unmapn   y   y

Again, a raw format and 4KiB LBAs are preferable.

In order to set the Namespace Preferred Deallocate Granularity and
Alignment fields (NPDG and NPDA), choose a sane minimum discard
granularity of 4KiB. If we are using a passthru device supporting
discard at a 512B granularity, user should set the discard_granularity
property explicitly. NPDG and NPDA will also account for the
cluster_size of the block driver if required (i.e. for QCOW2).

See NVM Express 1.3d, Section 6.7 ("Dataset Management command").

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.h|   2 +
 hw/block/nvme-ns.c |  30 +++--
 hw/block/nvme.c| 102 -
 3 files changed, 129 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e080a2318a50..574333caa3f9 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -28,6 +28,7 @@ typedef struct NvmeRequest {
 struct NvmeNamespace*ns;
 BlockAIOCB  *aiocb;
 uint16_tstatus;
+void*opaque;
 NvmeCqe cqe;
 NvmeCmd cmd;
 BlockAcctCookie acct;
@@ -60,6 +61,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
 case NVME_CMD_WRITE:return "NVME_NVM_CMD_WRITE";
 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+case NVME_CMD_DSM:  return "NVME_NVM_CMD_DSM";
 default:return "NVME_NVM_CMD_UNKNOWN";
 }
 }
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index f1cc734c60f5..2d69b5177b51 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -28,10 +28,14 @@
 #include "nvme.h"
 #include "nvme-ns.h"
 
-static void nvme_ns_init(NvmeNamespace *ns)
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 {
+BlockDriverInfo bdi;
 NvmeIdNs *id_ns = >id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+int npdg;
 
 ns->id_ns.dlfeat = 0x9;
 
@@ -43,8 +47,19 @@ static void nvme_ns_init(NvmeNamespace *ns)
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
 
-/* support DULBE */
-id_ns->nsfeat |= 0x4;
+/* support DULBE and I/O optimization fields */
+id_ns->nsfeat |= (0x4 | 0x10);
+
+npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
+
+if (bdrv_get_info(blk_bs(ns->blkconf.blk), ) >= 0 &&
+bdi.cluster_size > ns->blkconf.discard_granularity) {
+npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
+}
+
+id_ns->npda = id_ns->npdg = npdg - 1;
+
+return 0;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -59,6 +74,11 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, 
Error **errp)
 return -1;
 }
 
+if (ns->blkconf.discard_granularity == -1) {
+ns->blkconf.discard_granularity =
+MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+}
+
 ns->size = blk_getlength(ns->blkconf.blk);
 if (ns->size < 0) {
 error_setg_errno(errp, -ns->size, "could not get blockdev size");
@@ -92,7 +112,9 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 return -1;
 }
 
-nvme_ns_init(ns);
+if (nvme_ns_init(ns, errp)) {
+return -1;
+}
 
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 44db841374eb..7c5e3cc3de0c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -967,6 +967,104 @@ static void nvme_rw_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(nvme_cq(req), req);
 }
 
+static void nvme_aio_discard_cb(void *opaque, int ret)
+{
+NvmeRequest *req = opaque;
+uintptr_t *discards = (uintptr_t *)>opaque;
+
+trace_pci_nvme_aio_discard_cb(nvme_cid(req));
+
+if (ret) {
+req->status = NVME_INTERNAL_DEV_ERROR;
+trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret),
+   req->status);
+}
+
+(*discards)--;
+
+if (*discards) {
+return;
+}
+
+req->opaque = NULL;
+
+nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeNamespace 

[PATCH v7 1/3] hw/block/nvme: add dulbe support

2020-10-27 Thread Klaus Jensen
From: Klaus Jensen 

Add support for reporting the Deallocated or Unwritten Logical Block
Error (DULBE).

Rely on the block status flags reported by the block layer and consider
any block with the BDRV_BLOCK_ZERO flag to be deallocated.

Multiple factors affect when a Write Zeroes command result in
deallocation of blocks.

  * the underlying file system block size
  * the blockdev format
  * the 'discard' and 'logical_block_size' parameters

 format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
-
  qcow2ignore   n  n  y
  qcow2unmapn  n  y
  raw  ignore   n  y  y
  raw  unmapn  y  y

So, this works best with an image in raw format and 4KiB LBAs, since
holes can then be punched on a per-block basis (this assumes a file
system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
of 64KiB by default and blocks will only be marked deallocated if a full
cluster is zeroed or discarded. However, this *is* consistent with the
spec since Write Zeroes "should" deallocate the block if the Deallocate
attribute is set and "may" deallocate if the Deallocate attribute is not
set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
always set).

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme-ns.h|  4 ++
 include/block/nvme.h  |  5 +++
 hw/block/nvme-ns.c|  8 ++--
 hw/block/nvme.c   | 91 ++-
 hw/block/trace-events |  4 ++
 5 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 83734f4606e1..44bf6271b744 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
 NvmeIdNs id_ns;
 
 NvmeNamespaceParams params;
+
+struct {
+uint32_t err_rec;
+} features;
 } NvmeNamespace;
 
 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 8a46d9cf015f..966c3bb304bd 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -687,6 +687,7 @@ enum NvmeStatusCodes {
 NVME_E2E_REF_ERROR  = 0x0284,
 NVME_CMP_FAILURE= 0x0285,
 NVME_ACCESS_DENIED  = 0x0286,
+NVME_DULB   = 0x0287,
 NVME_MORE   = 0x2000,
 NVME_DNR= 0x4000,
 NVME_NO_COMPLETE= 0x,
@@ -903,6 +904,9 @@ enum NvmeIdCtrlLpa {
 #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
 #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
 
+#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
+#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
+
 enum NvmeFeatureIds {
 NVME_ARBITRATION= 0x1,
 NVME_POWER_MANAGEMENT   = 0x2,
@@ -1023,6 +1027,7 @@ enum NvmeNsIdentifierType {
 
 
 #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
+#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
 #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
 #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
 #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 31c80cdf5b5f..f1cc734c60f5 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
 NvmeIdNs *id_ns = >id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 
-if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
-ns->id_ns.dlfeat = 0x9;
-}
+ns->id_ns.dlfeat = 0x9;
 
 id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
 
@@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
 /* no thin provisioning */
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
+
+/* support DULBE */
+id_ns->nsfeat |= 0x4;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -92,6 +93,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 }
 
 nvme_ns_init(ns);
+
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
 }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index fa2cba744b57..44db841374eb 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 
 static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_TEMPERATURE_THRESHOLD]= NVME_FEAT_CAP_CHANGE,
+[NVME_ERROR_RECOVERY]   = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
 [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
@@ -878,6 +879,49 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, 
NvmeNamespace *ns,
 return NVME_SUCCESS;
 }
 
+static uint16_t 

Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Vladimir Sementsov-Ogievskiy

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 ++
  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]


+    s = block_job_create(job_id, _job_driver, NULL, cor_filter_bs,
+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have chain-feeze, 
what BLK_PERM_GRAPH_MOD adds to it? I don't know, and doubt that somebody knows.



That is true for the commit/mirror jobs also. If we agree to remove the flag 
BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a separate series, 
won't it?


Hmm. At least, let's not implement new logic based on BLK_PERM_GRAPH_MOD. In 
original code it's only block_job_create's perm, not in shared_perm, not 
somewhere else.. So, if we keep it, let's keep it as is: only in perm in 
block_job_create, not implementing additional perm/shared_perm logic.




   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure here, we
+ * already have our own plans. Also don't allow resize as the image size is
+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, why to 
requre permissions?



Yes, '0' s right.


+   basic_flags | BLK_PERM_WRITE, _abort)) {
+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 'disabled in CI')
-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case can be 
rewritten using
node-names and new "bottom" stream argument.



I guess it will not help for the whole test. Particularly, there is an issue 
with freezing the child link to COR-filter of the cuncurrent job, then it fails 
to finish first.


We should not have such frozen link, as our bottom node should be above 
COR-filter of concurrent job.


--
Best regards,
Vladimir



[PATCH v7 0/3] hw/block/nvme: dulbe and dsm support

2020-10-27 Thread Klaus Jensen
From: Klaus Jensen 

This adds support for the Deallocated or Unwritten Logical Block error
recovery feature as well as the Dataset Management command.

v7:
  - Handle negative return value from bdrv_block_status.
  - bdrv_get_info may not be supported on all block drivers, so do not
consider it a fatal error.

v6:
  - Skip the allocation of the discards integer and just use the opaque
value directly (Philippe)
  - Split changes to include/block/nvme.h into a separate patch
(Philippe)
  - Clean up some convoluted checks on the discards value (Philippe)
  - Use unambiguous units in the commit messages (Philippe)
  - Stack allocate the range array (Keith)

v5:
  - Restore status code from callback (Keith)

v4:
  - Removed mixed declaration and code (Keith)
  - Set NPDG and NPDA and account for the blockdev cluster size.

Klaus Jensen (3):
  hw/block/nvme: add dulbe support
  nvme: add namespace I/O optimization fields to shared header
  hw/block/nvme: add the dataset management command

 hw/block/nvme-ns.h|   4 +
 hw/block/nvme.h   |   2 +
 include/block/nvme.h  |  12 ++-
 hw/block/nvme-ns.c|  34 ++--
 hw/block/nvme.c   | 193 +-
 hw/block/trace-events |   4 +
 6 files changed, 240 insertions(+), 9 deletions(-)

-- 
2.29.1




Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Andrey Shinkevich



On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and doubt 
that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove the 
flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?



   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure 
here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, why 
to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case can 
be rewritten using

node-names and new "bottom" stream argument.



I guess it will not help for the whole test. Particularly, there is an 
issue with freezing the child link to COR-filter of the cuncurrent job, 
then it fails to finish first.


Andrey



[PATCH 11/12] vhost-user-blk-test: drop unused return value

2020-10-27 Thread Stefan Hajnoczi
The sock_path return value was unused and bogus (it doesn't make sense
when there are multiple drives because only the last path is arbitrarily
returned).

Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/vhost-user-blk-test.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index 15daf8ccbc..0d056cc189 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -705,8 +705,8 @@ static void quit_storage_daemon(void *qmp_test_state)
 g_free(qmp_test_state);
 }
 
-static char *start_vhost_user_blk(GString *cmd_line, int vus_instances,
-  int num_queues)
+static void start_vhost_user_blk(GString *cmd_line, int vus_instances,
+ int num_queues)
 {
 const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
 int fd, qmp_fd, i;
@@ -774,7 +774,6 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
 g_test_queue_destroy(quit_storage_daemon, qmp_test_state);
 
 qobject_unref(qtest_qmp(qmp_test_state, "{'execute': 
'qmp_capabilities'}"));
-return sock_path;
 }
 
 static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
-- 
2.26.2



[PATCH 10/12] vhost-user-blk-test: close fork child file descriptors

2020-10-27 Thread Stefan Hajnoczi
Do not leave stdin, stdout, stderr open after fork. stdout is the
tap-driver.pl pipe. If we keep the pipe open then tap-driver.pl will not
detect that qos-test has terminated and it will hang.

Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/vhost-user-blk-test.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index f05f14c192..15daf8ccbc 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -749,6 +749,17 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
storage_daemon_command->str);
 pid_t pid = fork();
 if (pid == 0) {
+/*
+ * Close standard file descriptors so tap-driver.pl pipe detects when
+ * our parent terminates.
+ */
+close(0);
+close(1);
+close(2);
+open("/dev/null", O_RDONLY);
+open("/dev/null", O_WRONLY);
+open("/dev/null", O_WRONLY);
+
 execlp("/bin/sh", "sh", "-c", storage_daemon_command->str, NULL);
 exit(1);
 }
-- 
2.26.2



[PATCH 08/12] libqtest: add qtest_socket_server()

2020-10-27 Thread Stefan Hajnoczi
There is a qtest_socket_client() API. Add an equivalent
qtest_socket_server() API that returns a new UNIX domain socket in the
listen state. The code for this was already there but only used
internally in init_socket().

Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/libqos/libqtest.h |  8 +++
 tests/qtest/libqtest.c| 40 ---
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index 241b5f89fb..699be8c2a2 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -132,6 +132,14 @@ void qtest_qmp_send(QTestState *s, const char *fmt, ...)
 void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
 GCC_FMT_ATTR(2, 3);
 
+/**
+ * qtest_socket_server:
+ * @socket_path: the UNIX domain socket path
+ *
+ * Create and return a listen socket file descriptor, or abort on failure.
+ */
+int qtest_socket_server(const char *socket_path);
+
 /**
  * qtest_socket_client:
  * @server_socket_path: the socket server's path
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index ab34075f2b..d652ffc90d 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -82,24 +82,8 @@ static void qtest_client_set_rx_handler(QTestState *s, 
QTestRecvFn recv);
 
 static int init_socket(const char *socket_path)
 {
-struct sockaddr_un addr;
-int sock;
-int ret;
-
-sock = socket(PF_UNIX, SOCK_STREAM, 0);
-g_assert_cmpint(sock, !=, -1);
-
-addr.sun_family = AF_UNIX;
-snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+int sock = qtest_socket_server(socket_path);
 qemu_set_cloexec(sock);
-
-do {
-ret = bind(sock, (struct sockaddr *), sizeof(addr));
-} while (ret == -1 && errno == EINTR);
-g_assert_cmpint(ret, !=, -1);
-ret = listen(sock, 1);
-g_assert_cmpint(ret, !=, -1);
-
 return sock;
 }
 
@@ -638,6 +622,28 @@ QTestState *qtest_create_state_with_qmp_fd(int fd)
 return qmp_test_state;
 }
 
+int qtest_socket_server(const char *socket_path)
+{
+struct sockaddr_un addr;
+int sock;
+int ret;
+
+sock = socket(PF_UNIX, SOCK_STREAM, 0);
+g_assert_cmpint(sock, !=, -1);
+
+addr.sun_family = AF_UNIX;
+snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+
+do {
+ret = bind(sock, (struct sockaddr *), sizeof(addr));
+} while (ret == -1 && errno == EINTR);
+g_assert_cmpint(ret, !=, -1);
+ret = listen(sock, 1);
+g_assert_cmpint(ret, !=, -1);
+
+return sock;
+}
+
 int qtest_socket_client(char *server_socket_path)
 {
 struct sockaddr_un serv_addr;
-- 
2.26.2



[PATCH 12/12] vhost-user-blk-test: fix races by using fd passing

2020-10-27 Thread Stefan Hajnoczi
Pass the QMP and vhost-user-blk server sockets as file descriptors. That
way the sockets are already open and in a listen state when the QEMU
process is launched.

This solves the race with qemu-storage-daemon startup where the UNIX
domain sockets may not be ready yet when QEMU attempts to connect. It
also saves us sleeping for 1 second if the qemu-storage-daemon QMP
socket is not ready yet.

Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/vhost-user-blk-test.c | 42 +++
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index 0d056cc189..9589f90b14 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -683,8 +683,22 @@ static char *drive_create(void)
 return t_path;
 }
 
-static char sock_path_tempate[] = "/tmp/qtest.vhost_user_blk.XX";
-static char qmp_sock_path_tempate[] = "/tmp/qtest.vhost_user_blk.qmp.XX";
+static char *create_listen_socket(int *fd)
+{
+int tmp_fd;
+char *path;
+
+/* No race because our pid makes the path unique */
+path = g_strdup_printf("/tmp/qtest-%d-sock.XX", getpid());
+tmp_fd = mkstemp(path);
+g_assert_cmpint(tmp_fd, >=, 0);
+close(tmp_fd);
+unlink(path);
+
+*fd = qtest_socket_server(path);
+g_test_queue_destroy(destroy_file, path);
+return path;
+}
 
 static void quit_storage_daemon(void *qmp_test_state)
 {
@@ -709,37 +723,33 @@ static void start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
  int num_queues)
 {
 const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
-int fd, qmp_fd, i;
+int qmp_fd, i;
 QTestState *qmp_test_state;
 gchar *img_path;
-char *sock_path = NULL;
-char *qmp_sock_path = g_strdup(qmp_sock_path_tempate);
+char *qmp_sock_path;
 GString *storage_daemon_command = g_string_new(NULL);
 
-qmp_fd = mkstemp(qmp_sock_path);
-g_assert_cmpint(qmp_fd, >=, 0);
-g_test_queue_destroy(destroy_file, qmp_sock_path);
+qmp_sock_path = create_listen_socket(_fd);
 
 g_string_append_printf(storage_daemon_command,
 "exec %s "
-"--chardev socket,id=qmp,path=%s,server,nowait --monitor 
chardev=qmp ",
-vhost_user_blk_bin, qmp_sock_path);
+"--chardev socket,id=qmp,fd=%d,server,nowait --monitor chardev=qmp 
",
+vhost_user_blk_bin, qmp_fd);
 
 g_string_append_printf(cmd_line,
 " -object memory-backend-memfd,id=mem,size=256M,share=on -M 
memory-backend=mem ");
 
 for (i = 0; i < vus_instances; i++) {
-sock_path = g_strdup(sock_path_tempate);
-fd = mkstemp(sock_path);
-g_assert_cmpint(fd, >=, 0);
-g_test_queue_destroy(drive_file, sock_path);
+int fd;
+char *sock_path = create_listen_socket();
+
 /* create image file */
 img_path = drive_create();
 g_string_append_printf(storage_daemon_command,
 "--blockdev driver=file,node-name=disk%d,filename=%s "
-"--export 
type=vhost-user-blk,id=disk%d,addr.type=unix,addr.path=%s,"
+"--export type=vhost-user-blk,id=disk%d,addr.type=fd,addr.str=%d,"
 "node-name=disk%i,writable=on,num-queues=%d ",
-i, img_path, i, sock_path, i, num_queues);
+i, img_path, i, fd, i, num_queues);
 
 g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
i + 1, sock_path);
-- 
2.26.2



[PATCH 06/12] test: new qTest case to test the vhost-user-blk-server

2020-10-27 Thread Stefan Hajnoczi
From: Coiby Xu 

This test case has the same tests as tests/virtio-blk-test.c except for
tests have block_resize. Since vhost-user server can only server one
client one time, two instances of vhost-user-blk-server are started by
qemu-storage-daemon for the hotplug test.

In order to not block scripts/tap-driver.pl, vhost-user-blk-server will
send "quit" command to qemu-storage-daemon's QMP monitor. So a function
is added to libqtest.c to establish socket connection with socket
server.

Suggested-by: Thomas Huth 
Signed-off-by: Coiby Xu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Marc-André Lureau 
Message-id: 20200918080912.321299-7-coiby...@gmail.com
[Update meson.build to only test when CONFIG_TOOLS has built
qemu-storage-daemon. This prevents CI failures with --disable-tools.
Also bump RAM to 256 MB because that is the minimum RAM granularity on
ppc64 spapr machines.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/libqos/libqtest.h   |  17 +
 tests/qtest/libqos/vhost-user-blk.h |  48 ++
 tests/qtest/libqos/vhost-user-blk.c | 129 +
 tests/qtest/libqtest.c  |  36 +-
 tests/qtest/vhost-user-blk-test.c   | 751 
 tests/qtest/libqos/meson.build  |   1 +
 tests/qtest/meson.build |   2 +
 7 files changed, 982 insertions(+), 2 deletions(-)
 create mode 100644 tests/qtest/libqos/vhost-user-blk.h
 create mode 100644 tests/qtest/libqos/vhost-user-blk.c
 create mode 100644 tests/qtest/vhost-user-blk-test.c

diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index 5c959f1853..241b5f89fb 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -132,6 +132,23 @@ void qtest_qmp_send(QTestState *s, const char *fmt, ...)
 void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
 GCC_FMT_ATTR(2, 3);
 
+/**
+ * qtest_socket_client:
+ * @server_socket_path: the socket server's path
+ *
+ * Connect to a socket server.
+ */
+int qtest_socket_client(char *server_socket_path);
+
+/**
+ * qtest_create_state_with_qmp_fd:
+ * @fd: socket fd
+ *
+ * Wrap socket fd in QTestState to make use of qtest_qmp*
+ * functions
+ */
+QTestState *qtest_create_state_with_qmp_fd(int fd);
+
 /**
  * qtest_vqmp_fds:
  * @s: #QTestState instance to operate on.
diff --git a/tests/qtest/libqos/vhost-user-blk.h 
b/tests/qtest/libqos/vhost-user-blk.h
new file mode 100644
index 00..2a03456a45
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.h
@@ -0,0 +1,48 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu 
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see 
+ */
+
+#ifndef TESTS_LIBQOS_VHOST_USER_BLK_H
+#define TESTS_LIBQOS_VHOST_USER_BLK_H
+
+#include "qgraph.h"
+#include "virtio.h"
+#include "virtio-pci.h"
+
+typedef struct QVhostUserBlk QVhostUserBlk;
+typedef struct QVhostUserBlkPCI QVhostUserBlkPCI;
+typedef struct QVhostUserBlkDevice QVhostUserBlkDevice;
+
+struct QVhostUserBlk {
+QVirtioDevice *vdev;
+};
+
+struct QVhostUserBlkPCI {
+QVirtioPCIDevice pci_vdev;
+QVhostUserBlk blk;
+};
+
+struct QVhostUserBlkDevice {
+QOSGraphObject obj;
+QVhostUserBlk blk;
+};
+
+#endif
diff --git a/tests/qtest/libqos/vhost-user-blk.c 
b/tests/qtest/libqos/vhost-user-blk.c
new file mode 100644
index 00..58c7e1eb69
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.c
@@ -0,0 +1,129 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu 
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see 
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/module.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "vhost-user-blk.h"
+

[PATCH 09/12] vhost-user-blk-test: rename destroy_drive() to destroy_file()

2020-10-27 Thread Stefan Hajnoczi
The function is used not just for image files but also for UNIX domain
sockets (QMP monitor and vhost-user-blk). Reflect that in the name.

Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/vhost-user-blk-test.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index 31f2335f97..f05f14c192 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -658,7 +658,8 @@ static const char *qtest_qemu_storage_daemon_binary(void)
 return qemu_storage_daemon_bin;
 }
 
-static void drive_destroy(void *path)
+/* g_test_queue_destroy() cleanup function for files */
+static void destroy_file(void *path)
 {
 unlink(path);
 g_free(path);
@@ -678,7 +679,7 @@ static char *drive_create(void)
 g_assert_cmpint(ret, ==, 0);
 close(fd);
 
-g_test_queue_destroy(drive_destroy, t_path);
+g_test_queue_destroy(destroy_file, t_path);
 return t_path;
 }
 
@@ -717,7 +718,7 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
 
 qmp_fd = mkstemp(qmp_sock_path);
 g_assert_cmpint(qmp_fd, >=, 0);
-g_test_queue_destroy(drive_destroy, qmp_sock_path);
+g_test_queue_destroy(destroy_file, qmp_sock_path);
 
 g_string_append_printf(storage_daemon_command,
 "exec %s "
@@ -731,7 +732,7 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances,
 sock_path = g_strdup(sock_path_tempate);
 fd = mkstemp(sock_path);
 g_assert_cmpint(fd, >=, 0);
-g_test_queue_destroy(drive_destroy, sock_path);
+g_test_queue_destroy(drive_file, sock_path);
 /* create image file */
 img_path = drive_create();
 g_string_append_printf(storage_daemon_command,
-- 
2.26.2



[PATCH 07/12] tests/qtest: add multi-queue test case to vhost-user-blk-test

2020-10-27 Thread Stefan Hajnoczi
Signed-off-by: Stefan Hajnoczi 
Message-id: 20201001144604.559733-3-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 tests/qtest/vhost-user-blk-test.c | 81 +--
 1 file changed, 76 insertions(+), 5 deletions(-)

diff --git a/tests/qtest/vhost-user-blk-test.c 
b/tests/qtest/vhost-user-blk-test.c
index e7e44f9bf0..31f2335f97 100644
--- a/tests/qtest/vhost-user-blk-test.c
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -559,6 +559,67 @@ static void pci_hotplug(void *obj, void *data, 
QGuestAllocator *t_alloc)
 qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
 }
 
+static void multiqueue(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+QVirtioPCIDevice *pdev1 = obj;
+QVirtioDevice *dev1 = >vdev;
+QVirtioPCIDevice *pdev8;
+QVirtioDevice *dev8;
+QTestState *qts = pdev1->pdev->bus->qts;
+uint64_t features;
+uint16_t num_queues;
+
+/*
+ * The primary device has 1 queue and VIRTIO_BLK_F_MQ is not enabled. The
+ * VIRTIO specification allows VIRTIO_BLK_F_MQ to be enabled when there is
+ * only 1 virtqueue, but --device vhost-user-blk-pci doesn't do this (which
+ * is also spec-compliant).
+ */
+features = qvirtio_get_features(dev1);
+g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ), ==, 0);
+features = features & ~(QVIRTIO_F_BAD_FEATURE |
+(1u << VIRTIO_RING_F_INDIRECT_DESC) |
+(1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+(1u << VIRTIO_BLK_F_SCSI));
+qvirtio_set_features(dev1, features);
+
+/* Hotplug a secondary device with 8 queues */
+qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
+ "{'addr': %s, 'chardev': 'char2', 'num-queues': 8}",
+ stringify(PCI_SLOT_HP) ".0");
+
+pdev8 = virtio_pci_new(pdev1->pdev->bus,
+   &(QPCIAddress) {
+   .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
+   });
+g_assert_nonnull(pdev8);
+g_assert_cmpint(pdev8->vdev.device_type, ==, VIRTIO_ID_BLOCK);
+
+qos_object_start_hw(>obj);
+
+dev8 = >vdev;
+features = qvirtio_get_features(dev8);
+g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ),
+==,
+(1u << VIRTIO_BLK_F_MQ));
+features = features & ~(QVIRTIO_F_BAD_FEATURE |
+(1u << VIRTIO_RING_F_INDIRECT_DESC) |
+(1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+(1u << VIRTIO_BLK_F_SCSI) |
+(1u << VIRTIO_BLK_F_MQ));
+qvirtio_set_features(dev8, features);
+
+num_queues = qvirtio_config_readw(dev8,
+offsetof(struct virtio_blk_config, num_queues));
+g_assert_cmpint(num_queues, ==, 8);
+
+qvirtio_pci_device_disable(pdev8);
+qos_object_destroy(>obj);
+
+/* unplug secondary disk */
+qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
+}
+
 /*
  * Check that setting the vring addr on a non-existent virtqueue does
  * not crash.
@@ -643,7 +704,8 @@ static void quit_storage_daemon(void *qmp_test_state)
 g_free(qmp_test_state);
 }
 
-static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
+static char *start_vhost_user_blk(GString *cmd_line, int vus_instances,
+  int num_queues)
 {
 const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
 int fd, qmp_fd, i;
@@ -675,8 +737,8 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances)
 g_string_append_printf(storage_daemon_command,
 "--blockdev driver=file,node-name=disk%d,filename=%s "
 "--export 
type=vhost-user-blk,id=disk%d,addr.type=unix,addr.path=%s,"
-"node-name=disk%i,writable=on ",
-i, img_path, i, sock_path, i);
+"node-name=disk%i,writable=on,num-queues=%d ",
+i, img_path, i, sock_path, i, num_queues);
 
 g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
i + 1, sock_path);
@@ -705,7 +767,7 @@ static char *start_vhost_user_blk(GString *cmd_line, int 
vus_instances)
 
 static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
 {
-start_vhost_user_blk(cmd_line, 1);
+start_vhost_user_blk(cmd_line, 1, 1);
 return arg;
 }
 
@@ -719,7 +781,13 @@ static void *vhost_user_blk_test_setup(GString *cmd_line, 
void *arg)
 static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
 {
 /* "-chardev socket,id=char2" is used for pci_hotplug*/
-start_vhost_user_blk(cmd_line, 2);
+start_vhost_user_blk(cmd_line, 2, 1);
+return arg;
+}
+
+static void *vhost_user_blk_multiqueue_test_setup(GString *cmd_line, void *arg)
+{
+start_vhost_user_blk(cmd_line, 2, 8);
 return arg;
 }
 
@@ -746,6 +814,9 @@ static void register_vhost_user_blk_test(void)
 
 

[PATCH 04/12] block/export: fix vhost-user-blk get_config() information leak

2020-10-27 Thread Stefan Hajnoczi
Refuse get_config() requests in excess of sizeof(struct virtio_blk_config).

Signed-off-by: Stefan Hajnoczi 
---
 block/export/vhost-user-blk-server.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 33cc0818b8..62672d1cb9 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -266,6 +266,9 @@ vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t 
len)
 {
 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
+
+g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
+
 memcpy(config, >blkcfg, len);
 return 0;
 }
-- 
2.26.2



[PATCH 05/12] contrib/vhost-user-blk: fix get_config() information leak

2020-10-27 Thread Stefan Hajnoczi
Refuse get_config() in excess of sizeof(struct virtio_blk_config).

Signed-off-by: Stefan Hajnoczi 
---
 contrib/vhost-user-blk/vhost-user-blk.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/contrib/vhost-user-blk/vhost-user-blk.c 
b/contrib/vhost-user-blk/vhost-user-blk.c
index 25eccd02b5..caad88637e 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -404,6 +404,8 @@ vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 VugDev *gdev;
 VubDev *vdev_blk;
 
+g_return_val_if_fail(len <= sizeof(struct virtio_blk_config), -1);
+
 gdev = container_of(vu_dev, VugDev, parent);
 vdev_blk = container_of(gdev, VubDev, parent);
 memcpy(config, _blk->blkcfg, len);
-- 
2.26.2



[PATCH 03/12] block/export: make vhost-user-blk config space little-endian

2020-10-27 Thread Stefan Hajnoczi
VIRTIO 1.0 devices have little-endian configuration space. The
vhost-user-blk-server.c code already uses little-endian for virtqueue
processing but not for the configuration space fields. Fix this so the
vhost-user-blk export works on big-endian hosts.

Signed-off-by: Stefan Hajnoczi 
---
 block/export/vhost-user-blk-server.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 41f4933d6e..33cc0818b8 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -264,7 +264,6 @@ static uint64_t vu_blk_get_protocol_features(VuDev *dev)
 static int
 vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 {
-/* TODO blkcfg must be little-endian for VIRTIO 1.0 */
 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
 VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
 memcpy(config, >blkcfg, len);
@@ -343,18 +342,18 @@ vu_blk_initialize_config(BlockDriverState *bs,
  uint32_t blk_size,
  uint16_t num_queues)
 {
-config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
-config->blk_size = blk_size;
-config->size_max = 0;
-config->seg_max = 128 - 2;
-config->min_io_size = 1;
-config->opt_io_size = 1;
-config->num_queues = num_queues;
-config->max_discard_sectors = 32768;
-config->max_discard_seg = 1;
-config->discard_sector_alignment = config->blk_size >> 9;
-config->max_write_zeroes_sectors = 32768;
-config->max_write_zeroes_seg = 1;
+config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+config->blk_size = cpu_to_le32(blk_size);
+config->size_max = cpu_to_le32(0);
+config->seg_max = cpu_to_le32(128 - 2);
+config->min_io_size = cpu_to_le16(1);
+config->opt_io_size = cpu_to_le32(1);
+config->num_queues = cpu_to_le16(num_queues);
+config->max_discard_sectors = cpu_to_le32(32768);
+config->max_discard_seg = cpu_to_le32(1);
+config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9);
+config->max_write_zeroes_sectors = cpu_to_le32(32768);
+config->max_write_zeroes_seg = cpu_to_le32(1);
 }
 
 static void vu_blk_exp_request_shutdown(BlockExport *exp)
-- 
2.26.2



[PATCH 01/12] libvhost-user: follow QEMU comment style

2020-10-27 Thread Stefan Hajnoczi
Signed-off-by: Stefan Hajnoczi 
---
 contrib/libvhost-user/libvhost-user.h | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.h 
b/contrib/libvhost-user/libvhost-user.h
index 3bbeae8587..a1539dbb69 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -392,7 +392,8 @@ struct VuDev {
 bool broken;
 uint16_t max_queues;
 
-/* @read_msg: custom method to read vhost-user message
+/*
+ * @read_msg: custom method to read vhost-user message
  *
  * Read data from vhost_user socket fd and fill up
  * the passed VhostUserMsg *vmsg struct.
@@ -409,15 +410,19 @@ struct VuDev {
  *
  */
 vu_read_msg_cb read_msg;
-/* @set_watch: add or update the given fd to the watch set,
- * call cb when condition is met */
+
+/*
+ * @set_watch: add or update the given fd to the watch set,
+ * call cb when condition is met.
+ */
 vu_set_watch_cb set_watch;
 
 /* @remove_watch: remove the given fd from the watch set */
 vu_remove_watch_cb remove_watch;
 
-/* @panic: encountered an unrecoverable error, you may try to
- * re-initialize */
+/*
+ * @panic: encountered an unrecoverable error, you may try to re-initialize
+ */
 vu_panic_cb panic;
 const VuDevIface *iface;
 
-- 
2.26.2



[PATCH 02/12] configure: introduce --enable-vhost-user-blk-server

2020-10-27 Thread Stefan Hajnoczi
Make it possible to compile out the vhost-user-blk server. It is enabled
by default on Linux.

Note that vhost-user-server.c depends on libvhost-user, which requires
CONFIG_LINUX. The CONFIG_VHOST_USER dependency was erroneous since that
option controls vhost-user frontends (previously known as "master") and
not device backends (previously known as "slave").

Signed-off-by: Stefan Hajnoczi 
---
 configure| 15 +++
 block/export/export.c|  4 ++--
 block/export/meson.build |  2 +-
 util/meson.build |  2 +-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 55e07c82dd..b455ca8c7f 100755
--- a/configure
+++ b/configure
@@ -328,6 +328,7 @@ vhost_crypto=""
 vhost_scsi=""
 vhost_vsock=""
 vhost_user=""
+vhost_user_blk_server=""
 vhost_user_fs=""
 kvm="auto"
 hax="auto"
@@ -1240,6 +1241,10 @@ for opt do
   ;;
   --enable-vhost-vsock) vhost_vsock="yes"
   ;;
+  --disable-vhost-user-blk-server) vhost_user_blk_server="no"
+  ;;
+  --enable-vhost-user-blk-server) vhost_user_blk_server="yes"
+  ;;
   --disable-vhost-user-fs) vhost_user_fs="no"
   ;;
   --enable-vhost-user-fs) vhost_user_fs="yes"
@@ -1784,6 +1789,7 @@ disabled with --disable-FEATURE, default is enabled if 
available:
   vhost-cryptovhost-user-crypto backend support
   vhost-kernelvhost kernel backend support
   vhost-user  vhost-user backend support
+  vhost-user-blk-servervhost-user-blk server support
   vhost-vdpa  vhost-vdpa kernel backend support
   spice   spice
   rbd rados block device (rbd)
@@ -2375,6 +2381,12 @@ if test "$vhost_net" = ""; then
   test "$vhost_kernel" = "yes" && vhost_net=yes
 fi
 
+# libvhost-user is Linux-only
+test "$vhost_user_blk_server" = "" && vhost_user_blk_server=$linux
+if test "$vhost_user_blk_server" = "yes" && test "$linux" = "no"; then
+  error_exit "--enable-vhost-user-blk-server is only available on Linux"
+fi
+
 ##
 # pkg-config probe
 
@@ -6260,6 +6272,9 @@ fi
 if test "$vhost_vdpa" = "yes" ; then
   echo "CONFIG_VHOST_VDPA=y" >> $config_host_mak
 fi
+if test "$vhost_user_blk_server" = "yes" ; then
+  echo "CONFIG_VHOST_USER_BLK_SERVER=y" >> $config_host_mak
+fi
 if test "$vhost_user_fs" = "yes" ; then
   echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
 fi
diff --git a/block/export/export.c b/block/export/export.c
index c3478c6c97..bad6f21b1c 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -22,13 +22,13 @@
 #include "qapi/qapi-commands-block-export.h"
 #include "qapi/qapi-events-block-export.h"
 #include "qemu/id.h"
-#if defined(CONFIG_LINUX) && defined(CONFIG_VHOST_USER)
+#ifdef CONFIG_VHOST_USER_BLK_SERVER
 #include "vhost-user-blk-server.h"
 #endif
 
 static const BlockExportDriver *blk_exp_drivers[] = {
 _exp_nbd,
-#if defined(CONFIG_LINUX) && defined(CONFIG_VHOST_USER)
+#ifdef CONFIG_VHOST_USER_BLK_SERVER
 _exp_vhost_user_blk,
 #endif
 };
diff --git a/block/export/meson.build b/block/export/meson.build
index 9fb4fbf81d..19526435d8 100644
--- a/block/export/meson.build
+++ b/block/export/meson.build
@@ -1,2 +1,2 @@
 blockdev_ss.add(files('export.c'))
-blockdev_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: 
files('vhost-user-blk-server.c'))
+blockdev_ss.add(when: 'CONFIG_VHOST_USER_BLK_SERVER', if_true: 
files('vhost-user-blk-server.c'))
diff --git a/util/meson.build b/util/meson.build
index c5159ad79d..f359af0d46 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -66,7 +66,7 @@ if have_block
   util_ss.add(files('main-loop.c'))
   util_ss.add(files('nvdimm-utils.c'))
   util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 
'qemu-coroutine-io.c'))
-  util_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: [
+  util_ss.add(when: 'CONFIG_LINUX', if_true: [
 files('vhost-user-server.c'), vhost_user
   ])
   util_ss.add(files('block-helpers.c'))
-- 
2.26.2



[PATCH 00/12] block/export: vhost-user-blk server cleanups and tests

2020-10-27 Thread Stefan Hajnoczi
This patch series solves some issues with the new vhost-user-blk-server and
adds the qtest test case. The test case was not included in the pull request
that introduced the vhost-user-blk server because of reliability issues that
are fixed in this patch series.

Coiby Xu (1):
  test: new qTest case to test the vhost-user-blk-server

Stefan Hajnoczi (11):
  libvhost-user: follow QEMU comment style
  configure: introduce --enable-vhost-user-blk-server
  block/export: make vhost-user-blk config space little-endian
  block/export: fix vhost-user-blk get_config() information leak
  contrib/vhost-user-blk: fix get_config() information leak
  tests/qtest: add multi-queue test case to vhost-user-blk-test
  libqtest: add qtest_socket_server()
  vhost-user-blk-test: rename destroy_drive() to destroy_file()
  vhost-user-blk-test: close fork child file descriptors
  vhost-user-blk-test: drop unused return value
  vhost-user-blk-test: fix races by using fd passing

 configure   |  15 +
 contrib/libvhost-user/libvhost-user.h   |  15 +-
 tests/qtest/libqos/libqtest.h   |  25 +
 tests/qtest/libqos/vhost-user-blk.h |  48 ++
 block/export/export.c   |   4 +-
 block/export/vhost-user-blk-server.c|  28 +-
 contrib/vhost-user-blk/vhost-user-blk.c |   2 +
 tests/qtest/libqos/vhost-user-blk.c | 129 
 tests/qtest/libqtest.c  |  76 ++-
 tests/qtest/vhost-user-blk-test.c   | 843 
 block/export/meson.build|   2 +-
 tests/qtest/libqos/meson.build  |   1 +
 tests/qtest/meson.build |   2 +
 util/meson.build|   2 +-
 14 files changed, 1151 insertions(+), 41 deletions(-)
 create mode 100644 tests/qtest/libqos/vhost-user-blk.h
 create mode 100644 tests/qtest/libqos/vhost-user-blk.c
 create mode 100644 tests/qtest/vhost-user-blk-test.c

-- 
2.26.2



Re: [PATCH v2] xen: rework pci_piix3_xen_ide_unplug

2020-10-27 Thread John Snow

On 10/27/20 11:40 AM, Anthony PERARD wrote:

From: Anthony PERARD 

This is to allow IDE disks to be unplugged when adding to QEMU via:
 -drive file=/root/disk_file,if=none,id=ide-disk0,format=raw
 -device ide-hd,drive=ide-disk0,bus=ide.0,unit=0

as the current code only works for disk added with:
 -drive file=/root/disk_file,if=ide,index=0,media=disk,format=raw

Since the code already have the IDE controller as `dev`, we don't need
to use the legacy DriveInfo to find all the drive we want to unplug.
We can simply use `blk` from the controller, as it kind of was already
assume to be the same, by setting it to NULL.

Signed-off-by: Anthony PERARD 


Acked-by: John Snow 

Do you need me to send a PR for this?

--js



---
v2: coding style

CC: Paul Durrant 
CC: Stefano Stabellini 
---
  hw/ide/piix.c | 27 +--
  1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index b402a936362b..b9860e35a5c4 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -164,30 +164,29 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error 
**errp)
  int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
  {
  PCIIDEState *pci_ide;
-DriveInfo *di;
  int i;
  IDEDevice *idedev;
+IDEBus *idebus;
+BlockBackend *blk;
  
  pci_ide = PCI_IDE(dev);
  
  for (i = aux ? 1 : 0; i < 4; i++) {

-di = drive_get_by_index(IF_IDE, i);
-if (di != NULL && !di->media_cd) {
-BlockBackend *blk = blk_by_legacy_dinfo(di);
-DeviceState *ds = blk_get_attached_dev(blk);
+idebus = _ide->bus[i / 2];
+blk = idebus->ifs[i % 2].blk;
  
-blk_drain(blk);

-blk_flush(blk);
-
-if (ds) {
-blk_detach_dev(blk, ds);
-}
-pci_ide->bus[di->bus].ifs[di->unit].blk = NULL;
+if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
  if (!(i % 2)) {
-idedev = pci_ide->bus[di->bus].master;
+idedev = idebus->master;
  } else {
-idedev = pci_ide->bus[di->bus].slave;
+idedev = idebus->slave;
  }
+
+blk_drain(blk);
+blk_flush(blk);
+
+blk_detach_dev(blk, DEVICE(idedev));
+idebus->ifs[i % 2].blk = NULL;
  idedev->conf.blk = NULL;
  monitor_remove_blk(blk);
  blk_unref(blk);






Re: [PATCH 1/3] iotests.py: Fix type check errors in wait_migration()

2020-10-27 Thread John Snow

On 10/27/20 12:38 PM, Kevin Wolf wrote:

Commit 1847a4a8c20 clarified that event_wait() can return None (though
only with timeout=0) and commit f12a282ff47 annotated it as returning
Optional[QMPMessage].

Type checks in wait_migration() fail because of the unexpected optional
return type:

iotests.py:750: error: Value of type variable "Msg" of "log" cannot be 
"Optional[Dict[str, Any]]"
iotests.py:751: error: Value of type "Optional[Dict[str, Any]]" is not indexable
iotests.py:754: error: Value of type "Optional[Dict[str, Any]]" is not indexable

Fortunately, the non-zero default timeout is used in the event_wait()
call, so we can make mypy happy by just asserting this.

Signed-off-by: Kevin Wolf 


Reviewed-by: John Snow 


---
  tests/qemu-iotests/iotests.py | 4 
  1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 63d2ace93c..28388a0fbc 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -747,6 +747,10 @@ class VM(qtest.QEMUQtestMachine):
  def wait_migration(self, expect_runstate: Optional[str]) -> bool:
  while True:
  event = self.event_wait('MIGRATION')
+# We use the default timeout, and with a timeout, event_wait()
+# never returns None
+assert event
+
  log(event, filters=[filter_qmp_event])
  if event['data']['status'] in ('completed', 'failed'):
  break



I tried, briefly, to see if I could overload the function to mypy to 
make it Do The Right Thing, but I don't think mypy supports overloading 
on float literals, or not well.


I tried to do this:

@overload
def events_wait(self, events: Sequence[Tuple[str, Any]]) -> QMPMessage: ...

@overload
def events_wait(self, events: Sequence[Tuple[str, Any]],
timeout: Literal[0]) -> Optional[QMPMessage]: ...

@overload
def events_wait(self, events: Sequence[Tuple[str, Any]],
timeout: float = 60.0) -> QMPMessage: ...

but ultimately mypy doesn't like this:

qemu/machine/machine.py:655: error: Overloaded function implementation 
does not accept all possible arguments of signature 2

Found 1 error in 1 file (checked 7 source files)

Trying literal 0.0 works even less well, because the Literal system does 
not appear to like floats at all. Hmph.


... So much for trying to be clever about this, I guess.

(The event system is pretty wonky anyway; especially type-wise. I think 
it's in need of an overhaul, so I'll put it on the list of things to 
investigate at some point. There might be something nice that can be 
done with asyncio and events that might make more sense type-wise. 
Problems for later.)





Re: [PATCH 0/2] iotests/291: Two fixes

2020-10-27 Thread Eric Blake
On 10/27/20 11:44 AM, Max Reitz wrote:
> Hi,
> 
> This series fixes two things about 291, one of which is actually a
> problem right now (patch 1), and one of which is more or less of a style
> question currently, but will become a problem with my FUSE export series
> (patch 2).
> 
> (The issues were different enough that I decided to split the fixes into
> two patches.)
> 
> 
> Max Reitz (2):
>   iotests/291: Filter irrelevant parts of img-info
>   iotests/291: Stop NBD server

Series:
Reviewed-by: Eric Blake 

I'm happy to include this on my NBD pull request for soft freeze.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v2] qcow2: Document and enforce the QCowL2Meta invariants

2020-10-27 Thread Alberto Garcia
ping

On Wed 07 Oct 2020 06:13:23 PM CEST, Alberto Garcia wrote:
> The QCowL2Meta structure is used to store information about a part of
> a write request that touches clusters that need changes in their L2
> entries. This happens with newly-allocated clusters or subclusters.
>
> This structure has changed a bit since it was first created and its
> current documentation is not quite up-to-date.
>
> A write request can span a region consisting of a combination of
> clusters of different types, and qcow2_alloc_host_offset() can
> repeatedly call handle_copied() and handle_alloc() to add more
> clusters to the mix as long as they all are contiguous on the image
> file.
>
> Because of this a write request has a list of QCowL2Meta structures,
> one for each part of the request that needs changes in the L2
> metadata.
>
> Each one of them spans nb_clusters and has two copy-on-write regions
> located immediately before and after the middle region touched by that
> part of the write request. Even when those regions themselves are
> empty their offsets must be correct because they are used to know the
> location of the middle region.
>
> This was not always the case but it is not a problem anymore
> because the only two places where QCowL2Meta structures are created
> (calculate_l2_meta() and qcow2_co_truncate()) ensure that the
> copy-on-write regions are correctly defined, and so do assertions like
> the ones in perform_cow().
>
> The conditional initialization of the 'written_to' variable is
> therefore unnecessary and is removed by this patch.
>
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Eric Blake 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 



Re: [PATCH 08/25] block/nvme: Simplify device reset

2020-10-27 Thread Keith Busch
On Tue, Oct 27, 2020 at 04:53:31PM +0100, Philippe Mathieu-Daudé wrote:
> On 10/27/20 3:58 PM, Keith Busch wrote:
> > On Tue, Oct 27, 2020 at 02:55:30PM +0100, Philippe Mathieu-Daudé wrote:
> >> Avoid multiple endianess conversion by using device endianess.
> >>
> >> Signed-off-by: Philippe Mathieu-Daudé 
> >> ---
> >>  block/nvme.c | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/block/nvme.c b/block/nvme.c
> >> index e95d59d3126..be14350f959 100644
> >> --- a/block/nvme.c
> >> +++ b/block/nvme.c
> >> @@ -755,7 +755,7 @@ static int nvme_init(BlockDriverState *bs, const char 
> >> *device, int namespace,
> >>  timeout_ms = MIN(500 * NVME_CAP_TO(cap), 3);
> >>  
> >>  /* Reset device to get a clean state. */
> >> -regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
> >> +regs->cc &= const_le32(0xFE);
> > 
> > This doesn't look right. The 'regs' is an MMIO address, correct? Memory
> > mappings use the CPU native access.
> 
> cc is little-endian uint32_t.

Well, yes and no. PCI is defined as a little endian transport, so all
CPUs have to automatically convert from their native format when
accessing memory mapped addresses over that transport, so you always use
the arch native format from the host software.

This isn't just for CC. This includes all memory mapped registers, so
this driver's CSTS, AQA, doorbells, etc... shouldn't have any endian
swapping.

See also: every other nvme driver. :)



[PATCH 2/2] iotests/291: Stop NBD server

2020-10-27 Thread Max Reitz
nbd_server_start_unix_socket() includes an implicit nbd_server_stop(),
but we still need an explicit one at the end of the test (where there
follows no next nbd_server_start_unix_socket()), or qemu-nbd will linger
until the test exits.

This will become important when enabling this test to run on FUSE
exports, because then the export (which is the image used by qemu-nbd)
will go away before qemu-nbd exits, which will lead to qemu-nbd
complaining that it cannot flush the bitmaps in the image.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/291 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 77fa38f93d..b7320bc7ad 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -128,6 +128,8 @@ nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG"
 $QEMU_IMG map --output=json --image-opts \
 "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map
 
+nbd_server_stop
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
-- 
2.26.2




Re: [PATCH 3/3] iotests: Use Python 3 style super()

2020-10-27 Thread John Snow

On 10/27/20 12:38 PM, Kevin Wolf wrote:

pylint complains about the use of super with the current class and
instance as arguments in VM.__init__():

iotests.py:546:8: R1725: Consider using Python 3 style super() without 
arguments (super-with-arguments)

No reason not to follow the advice and make it happy, so let's do this.

Signed-off-by: Kevin Wolf 
---
  tests/qemu-iotests/iotests.py | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 28388a0fbc..814804a4c6 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -543,10 +543,10 @@ class VM(qtest.QEMUQtestMachine):
  
  def __init__(self, path_suffix=''):

  name = "qemu%s-%d" % (path_suffix, os.getpid())
-super(VM, self).__init__(qemu_prog, qemu_opts, name=name,
- test_dir=test_dir,
- socket_scm_helper=socket_scm_helper,
- sock_dir=sock_dir)
+super().__init__(qemu_prog, qemu_opts, name=name,
+ test_dir=test_dir,
+ socket_scm_helper=socket_scm_helper,
+ sock_dir=sock_dir)
  self._num_drives = 0
  
  def add_object(self, opts):




Reviewed-by: John Snow 




Re: [PATCH 2/3] iotests: Disable unsubscriptable-object in pylint

2020-10-27 Thread John Snow

On 10/27/20 12:38 PM, Kevin Wolf wrote:

When run with Python 3.9, pylint incorrectly warns about things like
Optional[foo] because it doesn't recognise Optional as unsubscriptable.
This is a known pylint bug:

 https://github.com/PyCQA/pylint/issues/3882

Just disable this check to get rid of the warnings.

Disabling this shouldn't make us miss any real bug because mypy also
has a similar check ("... is not indexable").

Signed-off-by: Kevin Wolf 
---
  tests/qemu-iotests/pylintrc | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc
index 5481afe528..cd3702e23c 100644
--- a/tests/qemu-iotests/pylintrc
+++ b/tests/qemu-iotests/pylintrc
@@ -17,6 +17,8 @@ disable=invalid-name,
  too-many-lines,
  too-many-locals,
  too-many-public-methods,
+# pylint warns about Optional[] etc. as unsubscriptable in 3.9
+unsubscriptable-object,
  # These are temporary, and should be removed:
  missing-docstring,
  



Reviewed-by: John Snow 




[PATCH 0/2] iotests/291: Two fixes

2020-10-27 Thread Max Reitz
Hi,

This series fixes two things about 291, one of which is actually a
problem right now (patch 1), and one of which is more or less of a style
question currently, but will become a problem with my FUSE export series
(patch 2).

(The issues were different enough that I decided to split the fixes into
two patches.)


Max Reitz (2):
  iotests/291: Filter irrelevant parts of img-info
  iotests/291: Stop NBD server

 tests/qemu-iotests/291 | 16 +---
 tests/qemu-iotests/291.out | 20 
 2 files changed, 13 insertions(+), 23 deletions(-)

-- 
2.26.2




Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Vladimir Sementsov-Ogievskiy

27.10.2020 19:42, Andrey Shinkevich wrote:

On 27.10.2020 19:21, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 19:01, Andrey Shinkevich wrote:

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, which would be 
wrong..

Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the qcow2 file..


I would choose this item #1 but have to check the probing code logic... 
Particularly, I do not remember now if the probing is able to recognize a 
protocol.
The logic for the format_name in the QEMU existent code (I has kept it here in 
the patch) is a slippery way for an imprudent user. That's why I staked on the 
backing_file_str deprication in the previous version.


2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file name again - 
not good ((


Not search, but only check one very likely option.


Yes, just strcmp(). And why a user may not merely specify a desired backing 
file as the base?


*shrung*





Additionally to 1. or 3. (or combined), or even keeping things as is (i.e. 
wrong, but it is preexisting), we can:

  - add backing-format argument to qapi as pair for backing-file
  - deprecate using backing-file without backing-format.

Then, after deprecation period we'll have correct code. This may be done in 
separate.




base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name



+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can use just 
backing_file instead of ternary operator.



Yes, if reliable. I has kept the conformation with the ternary operator at the 
first parameter above.

Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {










--
Best regards,
Vladimir



[PATCH 3/3] iotests: Use Python 3 style super()

2020-10-27 Thread Kevin Wolf
pylint complains about the use of super with the current class and
instance as arguments in VM.__init__():

iotests.py:546:8: R1725: Consider using Python 3 style super() without 
arguments (super-with-arguments)

No reason not to follow the advice and make it happy, so let's do this.

Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/iotests.py | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 28388a0fbc..814804a4c6 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -543,10 +543,10 @@ class VM(qtest.QEMUQtestMachine):
 
 def __init__(self, path_suffix=''):
 name = "qemu%s-%d" % (path_suffix, os.getpid())
-super(VM, self).__init__(qemu_prog, qemu_opts, name=name,
- test_dir=test_dir,
- socket_scm_helper=socket_scm_helper,
- sock_dir=sock_dir)
+super().__init__(qemu_prog, qemu_opts, name=name,
+ test_dir=test_dir,
+ socket_scm_helper=socket_scm_helper,
+ sock_dir=sock_dir)
 self._num_drives = 0
 
 def add_object(self, opts):
-- 
2.28.0




[PATCH 1/3] iotests.py: Fix type check errors in wait_migration()

2020-10-27 Thread Kevin Wolf
Commit 1847a4a8c20 clarified that event_wait() can return None (though
only with timeout=0) and commit f12a282ff47 annotated it as returning
Optional[QMPMessage].

Type checks in wait_migration() fail because of the unexpected optional
return type:

iotests.py:750: error: Value of type variable "Msg" of "log" cannot be 
"Optional[Dict[str, Any]]"
iotests.py:751: error: Value of type "Optional[Dict[str, Any]]" is not indexable
iotests.py:754: error: Value of type "Optional[Dict[str, Any]]" is not indexable

Fortunately, the non-zero default timeout is used in the event_wait()
call, so we can make mypy happy by just asserting this.

Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/iotests.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 63d2ace93c..28388a0fbc 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -747,6 +747,10 @@ class VM(qtest.QEMUQtestMachine):
 def wait_migration(self, expect_runstate: Optional[str]) -> bool:
 while True:
 event = self.event_wait('MIGRATION')
+# We use the default timeout, and with a timeout, event_wait()
+# never returns None
+assert event
+
 log(event, filters=[filter_qmp_event])
 if event['data']['status'] in ('completed', 'failed'):
 break
-- 
2.28.0




[PATCH 1/2] iotests/291: Filter irrelevant parts of img-info

2020-10-27 Thread Max Reitz
We need to let _img_info emit the format-specific information so we get
the list of bitmaps we want, but we do not need anything but the
bitmaps.  So filter out everything that is irrelevant to us.  (Ideally,
this would be a generalized function in common.filters that takes a list
of things to keep, but that would require implementing an anti-bitmap
filter, which would be hard, and which we do not need here.  So that is
why this function is just a local hack.)

This lets 291 pass with qcow2 options like refcount_bits or data_file
again.

Fixes: 14f16bf9474c860ecc127a66a86961942319f7af
   ("qemu-img: Support bitmap --merge into backing image")
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/291 | 14 +++---
 tests/qemu-iotests/291.out | 20 
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
index 4f837b2056..77fa38f93d 100755
--- a/tests/qemu-iotests/291
+++ b/tests/qemu-iotests/291
@@ -42,6 +42,14 @@ _require_command QEMU_NBD
 # compat=0.10 does not support bitmaps
 _unsupported_imgopts 'compat=0.10'
 
+# Filter irrelevant format-specific information from the qemu-img info
+# output (we only want the bitmaps, basically)
+_filter_irrelevant_img_info()
+{
+grep -v -e 'compat' -e 'compression type' -e 'data file' -e 'extended l2' \
+-e 'lazy refcounts' -e 'refcount bits'
+}
+
 echo
 echo "=== Initial image setup ==="
 echo
@@ -79,7 +87,7 @@ echo
 
 # Only bitmaps from the active layer are copied
 $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG"
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info
 # But we can also merge in bitmaps from other layers.  This test is a bit
 # contrived to cover more code paths, in reality, you could merge directly
 # into b0 without going through tmp
@@ -89,7 +97,7 @@ $QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F 
$IMGFMT \
 $QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0
 $QEMU_IMG bitmap --remove --image-opts \
 driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp
-_img_info --format-specific
+_img_info --format-specific | _filter_irrelevant_img_info
 
 echo
 echo "=== Merge from top layer into backing image ==="
@@ -98,7 +106,7 @@ echo
 $QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG"
 $QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \
  -f $IMGFMT "$TEST_IMG.base" b3
-_img_info --format-specific --backing-chain
+_img_info --format-specific --backing-chain | _filter_irrelevant_img_info
 
 echo
 echo "=== Check bitmap contents ==="
diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
index 3990f7aacc..23411c0ff4 100644
--- a/tests/qemu-iotests/291.out
+++ b/tests/qemu-iotests/291.out
@@ -26,9 +26,6 @@ file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -39,17 +36,12 @@ Format specific information:
 [0]: auto
 name: b2
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -64,9 +56,7 @@ Format specific information:
 flags:
 name: b0
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false
 
 === Merge from top layer into backing image ===
 
@@ -77,9 +67,6 @@ cluster_size: 65536
 backing file: TEST_DIR/t.IMGFMT.base
 backing file format: IMGFMT
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -94,18 +81,13 @@ Format specific information:
 flags:
 name: b0
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false
 
 image: TEST_DIR/t.IMGFMT.base
 file format: IMGFMT
 virtual size: 10 MiB (10485760 bytes)
 cluster_size: 65536
 Format specific information:
-compat: 1.1
-compression type: zlib
-lazy refcounts: false
 bitmaps:
 [0]:
 flags:
@@ -117,9 +99,7 @@ Format specific information:
 [0]: auto
 name: b3
 granularity: 65536
-refcount bits: 16
 corrupt: false
-extended l2: false
 
 === Check bitmap contents ===
 
-- 
2.26.2




Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Andrey Shinkevich

On 27.10.2020 19:21, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 19:01, Andrey Shinkevich wrote:

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, 
which would be wrong..


Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the 
qcow2 file..


I would choose this item #1 but have to check the probing code 
logic... Particularly, I do not remember now if the probing is able to 
recognize a protocol.
The logic for the format_name in the QEMU existent code (I has kept it 
here in the patch) is a slippery way for an imprudent user. That's why 
I staked on the backing_file_str deprication in the previous version.



2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file 
name again - not good ((


Not search, but only check one very likely option.


Yes, just strcmp(). And why a user may not merely specify a desired 
backing file as the base?




Additionally to 1. or 3. (or combined), or even keeping things as is 
(i.e. wrong, but it is preexisting), we can:


  - add backing-format argument to qapi as pair for backing-file
  - deprecate using backing-file without backing-format.

Then, after deprecation period we'll have correct code. This may be done 
in separate.




base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name




+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can 
use just backing_file instead of ternary operator.




Yes, if reliable. I has kept the conformation with the ternary 
operator at the first parameter above.


Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {











[PATCH 2/3] iotests: Disable unsubscriptable-object in pylint

2020-10-27 Thread Kevin Wolf
When run with Python 3.9, pylint incorrectly warns about things like
Optional[foo] because it doesn't recognise Optional as unsubscriptable.
This is a known pylint bug:

https://github.com/PyCQA/pylint/issues/3882

Just disable this check to get rid of the warnings.

Disabling this shouldn't make us miss any real bug because mypy also
has a similar check ("... is not indexable").

Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/pylintrc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc
index 5481afe528..cd3702e23c 100644
--- a/tests/qemu-iotests/pylintrc
+++ b/tests/qemu-iotests/pylintrc
@@ -17,6 +17,8 @@ disable=invalid-name,
 too-many-lines,
 too-many-locals,
 too-many-public-methods,
+# pylint warns about Optional[] etc. as unsubscriptable in 3.9
+unsubscriptable-object,
 # These are temporary, and should be removed:
 missing-docstring,
 
-- 
2.28.0




Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Vladimir Sementsov-Ogievskiy

27.10.2020 19:01, Andrey Shinkevich wrote:

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, which would be 
wrong..

Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the qcow2 file..


I would choose this item #1 but have to check the probing code logic... 
Particularly, I do not remember now if the probing is able to recognize a 
protocol.
The logic for the format_name in the QEMU existent code (I has kept it here in 
the patch) is a slippery way for an imprudent user. That's why I staked on the 
backing_file_str deprication in the previous version.


2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file name again - 
not good ((


Not search, but only check one very likely option.

Additionally to 1. or 3. (or combined), or even keeping things as is (i.e. 
wrong, but it is preexisting), we can:

 - add backing-format argument to qapi as pair for backing-file
 - deprecate using backing-file without backing-format.

Then, after deprecation period we'll have correct code. This may be done in 
separate.




base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name



+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can use just 
backing_file instead of ternary operator.



Yes, if reliable. I has kept the conformation with the ternary operator at the 
first parameter above.

Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {







--
Best regards,
Vladimir



Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Vladimir Sementsov-Ogievskiy

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 ++
  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 1ba74ab..f6ed315 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
  #include "block/blockjob_int.h"
  #include "qapi/error.h"
  #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
  #include "qemu/ratelimit.h"
  #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
  
  enum {

  /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
  BlockJob common;
  BlockDriverState *base_overlay; /* COW overlay (stream from this) */
  BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
  BlockdevOnError on_error;
  char *backing_file_str;
  bool bs_read_only;
@@ -44,8 +48,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
  {
  assert(bytes < SIZE_MAX);
  
-return blk_co_preadv(blk, offset, bytes, NULL,

- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
  }
  
  static void stream_abort(Job *job)

@@ -53,23 +56,20 @@ static void stream_abort(Job *job)
  StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
  
  if (s->chain_frozen) {

-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
  }
  }
  
  static int stream_prepare(Job *job)

  {
  StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
  BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
  
-bdrv_unfreeze_backing_chain(bs, s->above_base);

+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
  s->chain_frozen = false;
  
  if (bdrv_cow_child(unfiltered_bs)) {

@@ -105,15 +105,16 @@ static void stream_clean(Job *job)
  {
  StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
  BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
  
  /* Reopen the image back in read-only mode if necessary */

  if (s->bs_read_only) {
  /* Give up write permissions before making it read-only */
  blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
  }
  
+bdrv_cor_filter_drop(s->cor_filter_bs);

+
  g_free(s->backing_file_str);
  }
  
@@ -121,9 +122,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)

  {
  StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
  BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
  int64_t len;
  int64_t offset = 0;
  uint64_t delay_ns = 0;
@@ -135,21 +134,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
  return 0;
  }
  
-len = bdrv_getlength(bs);

+len = bdrv_getlength(s->target_bs);
  if (len < 0) {
  return len;
  }
  job_progress_set_remaining(>common.job, len);
  
-/* Turn on 

RE: [PATCH v2] xen: rework pci_piix3_xen_ide_unplug

2020-10-27 Thread Paul Durrant
> -Original Message-
> From: Anthony PERARD 
> Sent: 27 October 2020 15:41
> To: qemu-de...@nongnu.org
> Cc: Anthony PERARD ; Paul Durrant ; 
> Stefano Stabellini
> ; John Snow ; qemu-block@nongnu.org
> Subject: [PATCH v2] xen: rework pci_piix3_xen_ide_unplug
> 
> From: Anthony PERARD 
> 
> This is to allow IDE disks to be unplugged when adding to QEMU via:
> -drive file=/root/disk_file,if=none,id=ide-disk0,format=raw
> -device ide-hd,drive=ide-disk0,bus=ide.0,unit=0
> 
> as the current code only works for disk added with:
> -drive file=/root/disk_file,if=ide,index=0,media=disk,format=raw
> 
> Since the code already have the IDE controller as `dev`, we don't need
> to use the legacy DriveInfo to find all the drive we want to unplug.
> We can simply use `blk` from the controller, as it kind of was already
> assume to be the same, by setting it to NULL.
> 
> Signed-off-by: Anthony PERARD 
> 

Reviewed-by: Paul Durrant 

> ---
> v2: coding style
> 
> CC: Paul Durrant 
> CC: Stefano Stabellini 
> ---
>  hw/ide/piix.c | 27 +--
>  1 file changed, 13 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ide/piix.c b/hw/ide/piix.c
> index b402a936362b..b9860e35a5c4 100644
> --- a/hw/ide/piix.c
> +++ b/hw/ide/piix.c
> @@ -164,30 +164,29 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error 
> **errp)
>  int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
>  {
>  PCIIDEState *pci_ide;
> -DriveInfo *di;
>  int i;
>  IDEDevice *idedev;
> +IDEBus *idebus;
> +BlockBackend *blk;
> 
>  pci_ide = PCI_IDE(dev);
> 
>  for (i = aux ? 1 : 0; i < 4; i++) {
> -di = drive_get_by_index(IF_IDE, i);
> -if (di != NULL && !di->media_cd) {
> -BlockBackend *blk = blk_by_legacy_dinfo(di);
> -DeviceState *ds = blk_get_attached_dev(blk);
> +idebus = _ide->bus[i / 2];
> +blk = idebus->ifs[i % 2].blk;
> 
> -blk_drain(blk);
> -blk_flush(blk);
> -
> -if (ds) {
> -blk_detach_dev(blk, ds);
> -}
> -pci_ide->bus[di->bus].ifs[di->unit].blk = NULL;
> +if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
>  if (!(i % 2)) {
> -idedev = pci_ide->bus[di->bus].master;
> +idedev = idebus->master;
>  } else {
> -idedev = pci_ide->bus[di->bus].slave;
> +idedev = idebus->slave;
>  }
> +
> +blk_drain(blk);
> +blk_flush(blk);
> +
> +blk_detach_dev(blk, DEVICE(idedev));
> +idebus->ifs[i % 2].blk = NULL;
>  idedev->conf.blk = NULL;
>  monitor_remove_blk(blk);
>  blk_unref(blk);
> --
> Anthony PERARD





Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Andrey Shinkevich

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, which 
would be wrong..


Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the 
qcow2 file..


I would choose this item #1 but have to check the probing code logic... 
Particularly, I do not remember now if the probing is able to recognize 
a protocol.
The logic for the format_name in the QEMU existent code (I has kept it 
here in the patch) is a slippery way for an imprudent user. That's why I 
staked on the backing_file_str deprication in the previous version.



2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file name 
again - not good ((


base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name




+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can use 
just backing_file instead of ternary operator.




Yes, if reliable. I has kept the conformation with the ternary operator 
at the first parameter above.


Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {








[PATCH-for-5.2] block/nvme: Fix nvme_submit_command() on big-endian host

2020-10-27 Thread Philippe Mathieu-Daudé
The Completion Queue Command Identifier is a 16-bit value,
so nvme_submit_command() is unlikely to work on big-endian
hosts, as the relevant bits are truncated.

The "Completion Queue Entry: DW 2" describes it as:

  This identifier is assigned by host software when
  the command is submitted to the Submission

As the is just an opaque cookie, it is pointless to byte-swap it.

Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver")
Reported-by: Keith Busch 
Signed-off-by: Philippe Mathieu-Daudé 
---
Based-on: <20201027135547.374946-1-phi...@redhat.com>
---
 block/nvme.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index ff645eefe6a..d9b2245db40 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -343,7 +343,7 @@ static inline int nvme_translate_error(const NvmeCqe *c)
 trace_nvme_error(le32_to_cpu(c->result),
  le16_to_cpu(c->sq_head),
  le16_to_cpu(c->sq_id),
- le16_to_cpu(c->cid),
+ c->cid,
  le16_to_cpu(status));
 }
 switch (status) {
@@ -400,7 +400,7 @@ static bool nvme_process_completion(NVMeQueuePair *q)
 if (!q->cq.head) {
 q->cq_phase = !q->cq_phase;
 }
-cid = le16_to_cpu(c->cid);
+cid = c->cid;
 if (cid == 0 || cid > NVME_QUEUE_SIZE) {
 warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
 "queue size: %u", cid, NVME_QUEUE_SIZE);
@@ -468,7 +468,7 @@ static void nvme_submit_command(NVMeQueuePair *q, 
NVMeRequest *req,
 assert(!req->cb);
 req->cb = cb;
 req->opaque = opaque;
-cmd->cid = cpu_to_le32(req->cid);
+cmd->cid = req->cid;
 
 trace_nvme_submit_command(q->s, q->index, req->cid);
 nvme_trace_command(cmd);
-- 
2.26.2




Re: [PATCH 03/25] block/nvme: Report warning with warn_report()

2020-10-27 Thread Philippe Mathieu-Daudé
On 10/27/20 4:33 PM, Philippe Mathieu-Daudé wrote:
> On 10/27/20 3:45 PM, Keith Busch wrote:
>> On Tue, Oct 27, 2020 at 02:55:25PM +0100, Philippe Mathieu-Daudé wrote:
>>> Instead of displaying warning on stderr, use warn_report()
>>> which also displays it on the monitor.
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> ---
>>>  block/nvme.c | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/block/nvme.c b/block/nvme.c
>>> index 739a0a700cb..6f1d7f9b2a1 100644
>>> --- a/block/nvme.c
>>> +++ b/block/nvme.c
>>> @@ -399,8 +399,8 @@ static bool nvme_process_completion(NVMeQueuePair *q)
>>>  }
>>>  cid = le16_to_cpu(c->cid);
>>
>> Not related to your patch, but it stands out as odd that this is treated
>> as an endian type. The field is just an opaque cookie, so there shouldn't
>> be a need for byte swapping. It in fact looks like this is broken on a
>> big-endian host, as the swaping on submission uses a 32-bit value. Won't
>> that truncate the relavant bits?
> 
> You are right, thanks for having a look and catching this bug :)
> 
> I suppose we never tested on big-endian host yet.

FYI we barely have 64-bit testing on x86_64 and aarch64.




[PATCH v2] xen: rework pci_piix3_xen_ide_unplug

2020-10-27 Thread Anthony PERARD via
From: Anthony PERARD 

This is to allow IDE disks to be unplugged when adding to QEMU via:
-drive file=/root/disk_file,if=none,id=ide-disk0,format=raw
-device ide-hd,drive=ide-disk0,bus=ide.0,unit=0

as the current code only works for disk added with:
-drive file=/root/disk_file,if=ide,index=0,media=disk,format=raw

Since the code already have the IDE controller as `dev`, we don't need
to use the legacy DriveInfo to find all the drive we want to unplug.
We can simply use `blk` from the controller, as it kind of was already
assume to be the same, by setting it to NULL.

Signed-off-by: Anthony PERARD 

---
v2: coding style

CC: Paul Durrant 
CC: Stefano Stabellini 
---
 hw/ide/piix.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index b402a936362b..b9860e35a5c4 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -164,30 +164,29 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error 
**errp)
 int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
 {
 PCIIDEState *pci_ide;
-DriveInfo *di;
 int i;
 IDEDevice *idedev;
+IDEBus *idebus;
+BlockBackend *blk;
 
 pci_ide = PCI_IDE(dev);
 
 for (i = aux ? 1 : 0; i < 4; i++) {
-di = drive_get_by_index(IF_IDE, i);
-if (di != NULL && !di->media_cd) {
-BlockBackend *blk = blk_by_legacy_dinfo(di);
-DeviceState *ds = blk_get_attached_dev(blk);
+idebus = _ide->bus[i / 2];
+blk = idebus->ifs[i % 2].blk;
 
-blk_drain(blk);
-blk_flush(blk);
-
-if (ds) {
-blk_detach_dev(blk, ds);
-}
-pci_ide->bus[di->bus].ifs[di->unit].blk = NULL;
+if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
 if (!(i % 2)) {
-idedev = pci_ide->bus[di->bus].master;
+idedev = idebus->master;
 } else {
-idedev = pci_ide->bus[di->bus].slave;
+idedev = idebus->slave;
 }
+
+blk_drain(blk);
+blk_flush(blk);
+
+blk_detach_dev(blk, DEVICE(idedev));
+idebus->ifs[i % 2].blk = NULL;
 idedev->conf.blk = NULL;
 monitor_remove_blk(blk);
 blk_unref(blk);
-- 
Anthony PERARD




Re: [PATCH v2] block: End quiescent sections when a BDS is deleted

2020-10-27 Thread Kevin Wolf
Am 27.10.2020 um 16:24 hat Greg Kurz geschrieben:
> On Tue, 27 Oct 2020 13:54:04 +
> Stefan Hajnoczi  wrote:
> 
> > On Fri, Oct 23, 2020 at 05:01:10PM +0200, Greg Kurz wrote:
> > > +/**
> > > + * End all quiescent sections started by bdrv_drain_all_begin(). This is
> > > + * only needed when deleting a BDS before bdrv_drain_all_end() is called.
> > > + */
> > > +void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
> > 
> > This function is only called from block.c. Can it be moved to the
> > private block_int.h header?
> > 
> 
> Ha, I wasn't aware of block_int.h... It seems to be a very good idea.
> 
> > The code is not clear on whether bdrv_drain_all_end_quiesce() is an API
> > that others can use or an internal helper function that must only be
> > called by bdrv_close(). I came to the conclusion that the latter is true
> > after reviewing the patch.
> > 
> 
> Yes it is.
> 
> > Please update the bdrv_drain_all_end_quiesce() doc comment to clarify
> > that this function is an internal helper for bdrv_close() - no one else
> > needs to worry about it.
> 
> I'll do that.
> 
> Thanks for the suggestions Stefan.

I already sent a pull request, so if you're going to change something,
please make it a follow-up patch rather than a new patch version.

Kevin


signature.asc
Description: PGP signature


Re: [PATCH v6 11/11] qapi: Use QAPI_LIST_ADD() where possible

2020-10-27 Thread Markus Armbruster
Eric Blake  writes:

> On 10/27/20 5:09 AM, Markus Armbruster wrote:
>> Eric Blake  writes:
>> 
>>> Anywhere we create a list of just one item or by prepending items
>>> (typically because order doesn't matter), we can use the now-public
>>> macro.  But places where we must keep the list in order by appending
>>> remain open-coded.
>> 
>> Should we rename the macro to QAPI_LIST_PREPEND()?
>
> That would make sense if we add a counterpart QAPI_LIST_APPEND.

It may make sense even if we don't.  QAPI_LIST_ADD() leaves the reader
guessing whether we prepend or append.

>> How many places append?  If it's more than just a few, an attempt to
>> factor out the common code is in order.  Not in this patch, of course.
>> Not even in this series.
>
> Quite a few.  The most common pattern for appending is like this from
> qemu-img.c:
>
> ImageInfoList *head = NULL, *elem;
> ImageInfoList **last = 
> ...
> while (...) {
> elem = g_new0(ImageInfoList, 1);
> elem->value = info;
> *last = elem;
> last = >next;
> }
>
> although I saw several other patterns as well.  And we frequently have
> this comment, such as from block/qapi.c:
> /* XXX: waiting for the qapi to support qemu-queue.h types */
>
> Several of the existing append spots could be switched to prepend with
> no change to semantics (the resulting list would be presented to the
> user in the opposite order, but the semantics of that item were a set
> rather than an ordered list so other than tweaking the testsuite, it
> would not matter), while others absolutely have to append to maintain
> correct order.
>
> Part of me wonders if it would be worth adjusting the QAPI generator to
> create a head and tail pointer for _every_ FOOList member, rather than
> just a head pointer.

Changes the C type we generate for a QAPI array from pointer to struct.
Could be awkward.

>   Or to create a function for an O(n) reversal of an
> existing list, then flipping spots to construct lists in reverse order
> followed by a list reverse (no change in big-O complexity, more code
> reuse, but slightly more CPU time).

I'd expect the cost of reversing the list to be negligible.

Another option is to simply factor out the common part of the common way
to build a list by appending: "append to list, update tail pointer".
Similar to how QAPI_LIST_ADD() factors out the common "prepend to list,
update head pointer".

> But as you observe, that quickly
> goes beyond the scope of this series.

Definitely.

>>> +++ b/docs/devel/writing-qmp-commands.txt
>>> @@ -531,15 +531,10 @@ TimerAlarmMethodList *qmp_query_alarm_methods(Error 
>>> **errp)
>>>  bool current = true;
>>>
>>>  for (p = alarm_timers; p->name; p++) {
>>> -TimerAlarmMethodList *info = g_malloc0(sizeof(*info));
>
> [1]
>
>>> -info->value = g_malloc0(sizeof(*info->value));
>
> [2]
>
>>> -info->value->method_name = g_strdup(p->name);
>>> -info->value->current = current;
>>> -
>>> -current = false;
>>> -
>>> -info->next = method_list;
>>> -method_list = info;
>>> +   TimerAlarmMethod *value = g_new0(TimerAlarmMethod, 1);
>> 
>> Can just as well use g_new(), as QAPI_LIST_ADD() will set both members
>> of @value.  Same elsewhere.
>
> Not quite.  Allocation [1] can use g_new() instead of g_malloc0()
> because we fill both members of info, but allocation [2] is unchanged by
> this code transformation (I did not want to research whether the code
> was filling all members of info->value (probably true, but it was
> unrelated to my rewrite).  Switching to QAPI_LIST_ADD is what moves
> allocation [1] into the macro (where it indeed uses g_new), but
> QAPI_LIST_ADD has no impact on the contents of value in allocation [2]
> (which is the only allocation left locally in this hunk).

You're right.

> However, the fact that I changed from g_malloc0(sizeof(*info->value)) to
> g_new0(TimerAlarmMethod, 1), instead of keeping it as
> g_malloc0(sizeof(*value)), is indeed a case of me doing a bit more than
> a strict mechanical conversion; this was one of the hunks I touched
> earlier in my audit.
>
>
>>> @@ -655,15 +656,9 @@ static int 
>>> qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
>>>  qemu_opts_del(opts);
>>>  }
>>>
>>> -if (gconf->server == NULL) {
>>> -gconf->server = g_new0(SocketAddressList, 1);
>>> -gconf->server->value = gsconf;
>>> -curr = gconf->server;
>>> -} else {
>>> -curr->next = g_new0(SocketAddressList, 1);
>>> -curr->next->value = gsconf;
>>> -curr = curr->next;
>>> -}
>>> +*curr = g_new0(SocketAddressList, 1);
>>> +(*curr)->value = gsconf;
>>> +curr = &(*curr)->next;
>>>  gsconf = NULL;
>>>
>>>  qobject_unref(backing_options);
>> 
>> The change to qemu_gluster_parse_json() looks unrelated.
>
> Indeed, this is also one of the 

Re: [Libguestfs] [libnbd PATCH] info: Add support for new 'qemu-nbd -A' qemu:allocation-depth

2020-10-27 Thread Eric Blake
On 10/16/20 10:23 AM, Eric Blake wrote:
> A rather trivial decoding; we may enhance it further if qemu extends
> things to give an integer depth alongside its tri-state encoding.
> ---
> 
> I'll wait to push this to libnbd until the counterpart qemu patches
> land upstream, although it looks like I've got positive review.

Whoops, I accidentally pushed this before qemu stuff landed upstream,
and in the meantime, we changed our minds on what to expose over
qemu:allocation-depth to be a bare integer rather than a tri-state.
I'll push this followup (but this time, wait for the actual qemu patch
to land).  In fact, I should probably add test-suite coverage...


>From eba8734654e6fd340e18b3e07c3213ed1a0ab9e8 Mon Sep 17 00:00:00 2001
From: Eric Blake 
Date: Tue, 27 Oct 2020 10:27:25 -0500
Subject: [libnbd PATCH] info: Adjust to actual 'qemu-nbd -A' semantics

Review on the qemu list has led to an altered definition of what
'qemu:allocation-depth' should report: rather than a tri-state value,
it is an actual depth.  It's time to match what actually got committed
into qemu, which in turn means a slight refactoring to use a malloc'd
string for a description.

Fixes: 71455c021
---
 info/nbdinfo.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/info/nbdinfo.c b/info/nbdinfo.c
index 2b22f51..b152f28 100644
--- a/info/nbdinfo.c
+++ b/info/nbdinfo.c
@@ -767,28 +767,30 @@ get_content (struct nbd_handle *nbd, int64_t size)
 }

 /* Callback handling --map. */
-static const char *
+static char *
 extent_description (const char *metacontext, uint32_t type)
 {
+  char *ret;
+
   if (strcmp (metacontext, "base:allocation") == 0) {
 switch (type) {
-case 0: return "allocated";
-case 1: return "hole";
-case 2: return "zero";
-case 3: return "hole,zero";
+case 0: return strdup ("allocated");
+case 1: return strdup ("hole");
+case 2: return strdup ("zero");
+case 3: return strdup ("hole,zero");
 }
   }
   else if (strncmp (metacontext, "qemu:dirty-bitmap:", 18) == 0) {
 switch (type) {
-case 0: return "clean";
-case 1: return "dirty";
+case 0: return strdup ("clean");
+case 1: return strdup ("dirty");
 }
   }
   else if (strcmp (metacontext, "qemu:allocation-depth") == 0) {
-switch (type & 3) {
-case 0: return "unallocated";
-case 1: return "local";
-case 2: return "backing";
+switch (type) {
+case 0: return strdup ("unallocated");
+case 1: return strdup ("local");
+case 2: asprintf (, "backing depth %d", type); return ret;
 }
   }

@@ -810,7 +812,7 @@ extent_callback (void *user_data, const char
*metacontext,

   /* Print the entries received. */
   for (i = 0; i < nr_entries; i += 2) {
-const char *descr = extent_description (map, entries[i+1]);
+char *descr = extent_description (map, entries[i+1]);

 if (!json_output) {
   fprintf (fp, "%10" PRIu64 "  "
@@ -837,6 +839,7 @@ extent_callback (void *user_data, const char
*metacontext,
   comma = true;
 }

+free (descr);
 offset += entries[i];
   }

-- 
2.29.0



-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




[PULL 5/5] block: End quiescent sections when a BDS is deleted

2020-10-27 Thread Kevin Wolf
From: Greg Kurz 

If a BDS gets deleted during blk_drain_all(), it might miss a
call to bdrv_do_drained_end(). This means missing a call to
aio_enable_external() and the AIO context remains disabled for
ever. This can cause a device to become irresponsive and to
disrupt the guest execution, ie. hang, loop forever or worse.

This scenario is quite easy to encounter with virtio-scsi
on POWER when punching multiple blockdev-create QMP commands
while the guest is booting and it is still running the SLOF
firmware. This happens because SLOF disables/re-enables PCI
devices multiple times via IO/MEM/MASTER bits of PCI_COMMAND
register after the initial probe/feature negotiation, as it
tends to work with a single device at a time at various stages
like probing and running block/network bootloaders without
doing a full reset in-between. This naturally generates many
dataplane stops and starts, and thus many drain sections that
can race with blockdev_create_run(). In the end, SLOF bails
out.

It is somehow reproducible on x86 but it requires to generate
articial dataplane start/stop activity with stop/cont QMP
commands. In this case, seabios ends up looping for ever,
waiting for the virtio-scsi device to send a response to
a command it never received.

Add a helper that pairs all previously called bdrv_do_drained_begin()
with a bdrv_do_drained_end() and call it from bdrv_close().
While at it, update the "/bdrv-drain/graph-change/drain_all"
test in test-bdrv-drain so that it can catch the issue.

BugId: https://bugzilla.redhat.com/show_bug.cgi?id=1874441
Signed-off-by: Greg Kurz 
Message-Id: <160346526998.272601.9045392804399803158.st...@bahia.lan>
Signed-off-by: Kevin Wolf 
---
 include/block/block.h   |  6 ++
 block.c |  9 +
 block/io.c  | 13 +
 tests/test-bdrv-drain.c |  1 +
 4 files changed, 29 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index c9d7c58765..4bfe3b546b 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -781,6 +781,12 @@ void bdrv_drained_end(BlockDriverState *bs);
  */
 void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
 
+/**
+ * End all quiescent sections started by bdrv_drain_all_begin(). This is
+ * only needed when deleting a BDS before bdrv_drain_all_end() is called.
+ */
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+
 /**
  * End a quiescent section started by bdrv_subtree_drained_begin().
  */
diff --git a/block.c b/block.c
index 430edf79bb..ee5b28a979 100644
--- a/block.c
+++ b/block.c
@@ -4458,6 +4458,15 @@ static void bdrv_close(BlockDriverState *bs)
 }
 QLIST_INIT(>aio_notifiers);
 bdrv_drained_end(bs);
+
+/*
+ * If we're still inside some bdrv_drain_all_begin()/end() sections, end
+ * them now since this BDS won't exist anymore when bdrv_drain_all_end()
+ * gets called.
+ */
+if (bs->quiesce_counter) {
+bdrv_drain_all_end_quiesce(bs);
+}
 }
 
 void bdrv_close_all(void)
diff --git a/block/io.c b/block/io.c
index c33cecd58d..9918f2499c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -633,6 +633,19 @@ void bdrv_drain_all_begin(void)
 }
 }
 
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
+{
+int drained_end_counter = 0;
+
+g_assert(bs->quiesce_counter > 0);
+g_assert(!bs->refcnt);
+
+while (bs->quiesce_counter) {
+bdrv_do_drained_end(bs, false, NULL, true, _end_counter);
+}
+BDRV_POLL_WHILE(bs, qatomic_read(_end_counter) > 0);
+}
+
 void bdrv_drain_all_end(void)
 {
 BlockDriverState *bs = NULL;
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 1595bbc92e..8a29e33e00 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -594,6 +594,7 @@ static void test_graph_change_drain_all(void)
 
 g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
 g_assert_cmpint(b_s->drain_count, ==, 0);
+g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0);
 
 bdrv_unref(bs_b);
 blk_unref(blk_b);
-- 
2.28.0




Re: [PATCH v12 11/14] copy-on-read: add support for read flags to COR-filter

2020-10-27 Thread Vladimir Sementsov-Ogievskiy

27.10.2020 17:46, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Add the BDRV_REQ_COPY_ON_READ and BDRV_REQ_PREFETCH flags to the
supported_read_flags of the COR-filter.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 8178a91..a2b180a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -50,6 +50,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
  return -EINVAL;
  }
+    bs->supported_read_flags = BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH;
+
  bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
  (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);



This should be merged with the following patch, otherwise it doesn't make 
sense. You mark filter as supporting PREFETCH, but actually it just ignores it 
(and may crash on trying to read into qiov=NULL).



Ah, no, problem is not in qiov=NULL, but in that we will just pass PREFETCH to 
bs->file, which may not support it and crash in block.io in the new abort() 
from patch 10.


Also, any reason to add support for BDRV_REQ_COPY_ON_READ ? What it means for 
cor filter? I don't know. It make sense only for generic layer and handled in 
generic layer. It never passed to driver, so let's not declare support for it.

--
Best regards,
Vladimir



Re: [PATCH 03/25] block/nvme: Report warning with warn_report()

2020-10-27 Thread Philippe Mathieu-Daudé
On 10/27/20 3:45 PM, Keith Busch wrote:
> On Tue, Oct 27, 2020 at 02:55:25PM +0100, Philippe Mathieu-Daudé wrote:
>> Instead of displaying warning on stderr, use warn_report()
>> which also displays it on the monitor.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  block/nvme.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/block/nvme.c b/block/nvme.c
>> index 739a0a700cb..6f1d7f9b2a1 100644
>> --- a/block/nvme.c
>> +++ b/block/nvme.c
>> @@ -399,8 +399,8 @@ static bool nvme_process_completion(NVMeQueuePair *q)
>>  }
>>  cid = le16_to_cpu(c->cid);
> 
> Not related to your patch, but it stands out as odd that this is treated
> as an endian type. The field is just an opaque cookie, so there shouldn't
> be a need for byte swapping. It in fact looks like this is broken on a
> big-endian host, as the swaping on submission uses a 32-bit value. Won't
> that truncate the relavant bits?

You are right, thanks for having a look and catching this bug :)

I suppose we never tested on big-endian host yet.

> 
>>  if (cid == 0 || cid > NVME_QUEUE_SIZE) {
>> -fprintf(stderr, "Unexpected CID in completion queue: %" PRIu32 
>> "\n",
>> -cid);
>> +warn_report("NVMe: Unexpected CID in completion queue: 
>> %"PRIu32", "
>> +"queue size: %u", cid, NVME_QUEUE_SIZE);
>>  continue;
>>  }
>>  trace_nvme_complete_command(s, q->index, cid);
> 




[PULL 0/5] Block layer patches

2020-10-27 Thread Kevin Wolf
The following changes since commit d55450df995d6223486db11c66491cbf6c131523:

  Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20201026a' 
into staging (2020-10-27 10:25:42 +)

are available in the Git repository at:

  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 1a6d3bd229d429879a85a9105fb84cae049d083c:

  block: End quiescent sections when a BDS is deleted (2020-10-27 15:26:20 
+0100)


Block layer patches:

- qcow2: Skip copy-on-write when allocating a zero cluster
- qemu-img: add support for rate limit in qemu-img convert/commit
- Fix deadlock when deleting a block node during drain_all


Alberto Garcia (2):
  qcow2: Report BDRV_BLOCK_ZERO more accurately in bdrv_co_block_status()
  qcow2: Skip copy-on-write when allocating a zero cluster

Greg Kurz (1):
  block: End quiescent sections when a BDS is deleted

Zhengui Li (2):
  qemu-img: add support for rate limit in qemu-img commit
  qemu-img: add support for rate limit in qemu-img convert

 docs/tools/qemu-img.rst | 10 --
 include/block/block.h   |  8 
 block.c |  9 +
 block/io.c  | 48 
 block/qcow2.c   | 35 +++
 qemu-img.c  | 38 +++---
 tests/test-bdrv-drain.c |  1 +
 qemu-img-cmds.hx|  8 
 8 files changed, 128 insertions(+), 29 deletions(-)




Re: [PATCH v2] block: End quiescent sections when a BDS is deleted

2020-10-27 Thread Greg Kurz
On Tue, 27 Oct 2020 13:54:04 +
Stefan Hajnoczi  wrote:

> On Fri, Oct 23, 2020 at 05:01:10PM +0200, Greg Kurz wrote:
> > +/**
> > + * End all quiescent sections started by bdrv_drain_all_begin(). This is
> > + * only needed when deleting a BDS before bdrv_drain_all_end() is called.
> > + */
> > +void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
> 
> This function is only called from block.c. Can it be moved to the
> private block_int.h header?
> 

Ha, I wasn't aware of block_int.h... It seems to be a very good idea.

> The code is not clear on whether bdrv_drain_all_end_quiesce() is an API
> that others can use or an internal helper function that must only be
> called by bdrv_close(). I came to the conclusion that the latter is true
> after reviewing the patch.
> 

Yes it is.

> Please update the bdrv_drain_all_end_quiesce() doc comment to clarify
> that this function is an internal helper for bdrv_close() - no one else
> needs to worry about it.

I'll do that.

Thanks for the suggestions Stefan.

Cheers,

--
Greg


pgp05wwZFvnyO.pgp
Description: OpenPGP digital signature


[PULL 2/5] qemu-img: add support for rate limit in qemu-img convert

2020-10-27 Thread Kevin Wolf
From: Zhengui 

add support for rate limit in qemu-img convert.

Signed-off-by: Zhengui 
Message-Id: <1603205264-17424-3-git-send-email-lizhen...@huawei.com>
Reviewed-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 docs/tools/qemu-img.rst |  6 +-
 qemu-img.c  | 27 ++-
 qemu-img-cmds.hx|  4 ++--
 3 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index bcb11b0899..b615aa8419 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -188,6 +188,10 @@ Parameters to convert subcommand:
   allocated target image depending on the host support for getting allocation
   information.
 
+.. option:: -r
+
+   Rate limit for the convert process
+
 .. option:: --salvage
 
   Try to ignore I/O errors when reading.  Unless in quiet mode (``-q``), errors
@@ -410,7 +414,7 @@ Command description:
   4
 Error on reading data
 
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l 
SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 
[...]] OUTPUT_FILENAME
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l 
SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] 
FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
 
   Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM*
   to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can
diff --git a/qemu-img.c b/qemu-img.c
index 3023abea8b..a968c74cba 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -50,6 +50,8 @@
 #include "block/qapi.h"
 #include "crypto/init.h"
 #include "trace/control.h"
+#include "qemu/throttle.h"
+#include "block/throttle-groups.h"
 
 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
   "\n" QEMU_COPYRIGHT "\n"
@@ -1669,6 +1671,7 @@ enum ImgConvertBlockStatus {
 };
 
 #define MAX_COROUTINES 16
+#define CONVERT_THROTTLE_GROUP "img_convert"
 
 typedef struct ImgConvertState {
 BlockBackend **src;
@@ -2184,6 +2187,17 @@ static int convert_copy_bitmaps(BlockDriverState *src, 
BlockDriverState *dst)
 
 #define MAX_BUF_SECTORS 32768
 
+static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
+{
+ThrottleConfig cfg;
+
+throttle_config_init();
+cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
+
+blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
+blk_set_io_limits(blk, );
+}
+
 static int img_convert(int argc, char **argv)
 {
 int c, bs_i, flags, src_flags = 0;
@@ -2204,6 +2218,7 @@ static int img_convert(int argc, char **argv)
 bool force_share = false;
 bool explict_min_sparse = false;
 bool bitmaps = false;
+int64_t rate_limit = 0;
 
 ImgConvertState s = (ImgConvertState) {
 /* Need at least 4k of zeros for sparse detection */
@@ -2226,7 +2241,7 @@ static int img_convert(int argc, char **argv)
 {"bitmaps", no_argument, 0, OPTION_BITMAPS},
 {0, 0, 0, 0}
 };
-c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
+c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
 long_options, NULL);
 if (c == -1) {
 break;
@@ -2323,6 +2338,12 @@ static int img_convert(int argc, char **argv)
 case 'U':
 force_share = true;
 break;
+case 'r':
+rate_limit = cvtnum("rate limit", optarg);
+if (rate_limit < 0) {
+goto fail_getopt;
+}
+break;
 case OPTION_OBJECT: {
 QemuOpts *object_opts;
 object_opts = qemu_opts_parse_noisily(_object_opts,
@@ -2712,6 +2733,10 @@ static int img_convert(int argc, char **argv)
 s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
 }
 
+if (rate_limit) {
+set_rate_limit(s.target, rate_limit);
+}
+
 ret = convert_do_copy();
 
 /* Now copy the bitmaps */
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 965c1e3e59..b3620f29e5 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -46,9 +46,9 @@ SRST
 ERST
 
 DEF("convert", img_convert,
-"convert [--object objectdef] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t 
cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l 
snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename 
[filename2 [...]] output_filename")
+"convert [--object objectdef] [--image-opts] [--target-image-opts] 
[--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t 
cache] [-T src_cache] [-O output_fmt] 

[PULL 3/5] qcow2: Report BDRV_BLOCK_ZERO more accurately in bdrv_co_block_status()

2020-10-27 Thread Kevin Wolf
From: Alberto Garcia 

If a BlockDriverState supports backing files but has none then any
unallocated area reads back as zeroes.

bdrv_co_block_status() is only reporting this is if want_zero is true,
but this is an inexpensive test and there is no reason not to do it in
all cases.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Alberto Garcia 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: 
<66fa0914a0e2b727ab6d1b63ca773d7cd29a9a9e.1603731354.git.be...@igalia.com>
Signed-off-by: Kevin Wolf 
---
 block/io.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index 02528b3823..6fe1b275b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2282,17 +2282,17 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 
 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
 ret |= BDRV_BLOCK_ALLOCATED;
-} else if (want_zero && bs->drv->supports_backing) {
+} else if (bs->drv->supports_backing) {
 BlockDriverState *cow_bs = bdrv_cow_bs(bs);
 
-if (cow_bs) {
+if (!cow_bs) {
+ret |= BDRV_BLOCK_ZERO;
+} else if (want_zero) {
 int64_t size2 = bdrv_getlength(cow_bs);
 
 if (size2 >= 0 && offset >= size2) {
 ret |= BDRV_BLOCK_ZERO;
 }
-} else {
-ret |= BDRV_BLOCK_ZERO;
 }
 }
 
-- 
2.28.0




  1   2   >