date:20200904

[PATCH v5 09/11] meson: Fixes qapi tests.

2020-09-04 Thread Yonggang Luo

The error are:
+@end table
+
+@end deftypefn
+
make: *** [Makefile.mtest:63: check-qapi-schema] Error 1

Signed-off-by: Yonggang Luo 
---
 tests/qapi-schema/meson.build | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/qapi-schema/meson.build b/tests/qapi-schema/meson.build
index c87d141417..67ba0a5ebd 100644
--- a/tests/qapi-schema/meson.build
+++ b/tests/qapi-schema/meson.build
@@ -220,6 +220,7 @@ qapi_doc = custom_target('QAPI doc',
 
 # "full_path()" needed here to work around
 # https://github.com/mesonbuild/meson/issues/7585
-test('QAPI doc', diff, args: ['-u', files('doc-good.texi'), 
qapi_doc[0].full_path()],
+test('QAPI doc', diff, args: ['--strip-trailing-cr',
+  '-u', files('doc-good.texi'), 
qapi_doc[0].full_path()],
  depends: qapi_doc,
  suite: ['qapi-schema', 'qapi-doc'])
-- 
2.28.0.windows.1

[PATCH v5 08/11] osdep: These function are only available on Non-Win32 system.

2020-09-04 Thread Yonggang Luo

int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive);
int qemu_unlock_fd(int fd, int64_t start, int64_t len);
int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
bool qemu_has_ofd_lock(void);

Signed-off-by: Yonggang Luo 
---
 include/qemu/osdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 412962d91a..e80fddd1e8 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -502,11 +502,11 @@ int qemu_close(int fd);
 int qemu_unlink(const char *name);
 #ifndef _WIN32
 int qemu_dup(int fd);
-#endif
 int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive);
 int qemu_unlock_fd(int fd, int64_t start, int64_t len);
 int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive);
 bool qemu_has_ofd_lock(void);
+#endif
 
 #if defined(__HAIKU__) && defined(__i386__)
 #define FMT_pid "%ld"
-- 
2.28.0.windows.1

[PATCH v5 06/11] tests: Trying fixes test-replication.c on msys2.

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 tests/test-replication.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/test-replication.c b/tests/test-replication.c
index 9ab3666a90..d0e06f8d77 100644
--- a/tests/test-replication.c
+++ b/tests/test-replication.c
@@ -23,14 +23,18 @@
 
 /* primary */
 #define P_ID "primary-id"
-static char p_local_disk[] = "/tmp/p_local_disk.XX";
+#define P_LOCAL_DISK "%s/p_local_disk.XX"
+static char p_local_disk[PATH_MAX];
 
 /* secondary */
 #define S_ID "secondary-id"
 #define S_LOCAL_DISK_ID "secondary-local-disk-id"
-static char s_local_disk[] = "/tmp/s_local_disk.XX";
-static char s_active_disk[] = "/tmp/s_active_disk.XX";
-static char s_hidden_disk[] = "/tmp/s_hidden_disk.XX";
+#define S_LOCAL_DISK "%s/s_local_disk.XX"
+static char s_local_disk[PATH_MAX];
+#define S_ACTIVE_DISK "%s/s_active_disk.XX"
+static char s_active_disk[PATH_MAX];
+#define S_HIDDEN_DISK "%s/s_hidden_disk.XX"
+static char s_hidden_disk[PATH_MAX];
 
 /* FIXME: steal from blockdev.c */
 QemuOptsList qemu_drive_opts = {
@@ -571,7 +575,12 @@ static void setup_sigabrt_handler(void)
 int main(int argc, char **argv)
 {
 int ret;
+const char *tmpdir = g_get_tmp_dir();
 qemu_init_main_loop(&error_fatal);
+sprintf(p_local_disk, P_LOCAL_DISK, tmpdir);
+sprintf(s_local_disk, S_LOCAL_DISK, tmpdir);
+sprintf(s_active_disk, S_ACTIVE_DISK, tmpdir);
+sprintf(s_hidden_disk, S_HIDDEN_DISK, tmpdir);
 bdrv_init();
 
 g_test_init(&argc, &argv, NULL);
-- 
2.28.0.windows.1

[PATCH v5 07/11] block: get file-win32.c handle locking option consistence with file-posix.c

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 block/file-win32.c | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/block/file-win32.c b/block/file-win32.c
index ab69bd811a..14e5f5c3b5 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -299,6 +299,11 @@ static QemuOptsList raw_runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "host AIO implementation (threads, native)",
 },
+{
+.name = "locking",
+.type = QEMU_OPT_STRING,
+.help = "file locking mode (on/off/auto, default: auto)",
+},
 { /* end of list */ }
 },
 };
@@ -334,6 +339,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 const char *filename;
 bool use_aio;
 int ret;
+OnOffAuto locking;
 
 s->type = FTYPE_FILE;
 
@@ -342,11 +348,24 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 ret = -EINVAL;
 goto fail;
 }
-
-if (qdict_get_try_bool(options, "locking", false)) {
+locking = qapi_enum_parse(&OnOffAuto_lookup,
+  qemu_opt_get(opts, "locking"),
+  ON_OFF_AUTO_AUTO, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+ret = -EINVAL;
+goto fail;
+}
+switch (locking) {
+case ON_OFF_AUTO_ON:
 error_setg(errp, "locking=on is not supported on Windows");
 ret = -EINVAL;
 goto fail;
+case ON_OFF_AUTO_OFF:
+case ON_OFF_AUTO_AUTO:
+break;
+default:
+g_assert_not_reached();
 }
 
 filename = qemu_opt_get(opts, "filename");
-- 
2.28.0.windows.1

[PATCH v5 05/11] ci: Enable msys2 ci in cirrus

2020-09-04 Thread Yonggang Luo

Install msys2 in a proper way refer to 
https://github.com/cirruslabs/cirrus-ci-docs/issues/699
The https://wiki.qemu.org/Hosts/W32#Native_builds_with_MSYS2 need to be updated.
There is no need of --cross-prefix, open mingw64.exe instead of msys2.exe then 
we don't
need the --cross-prefix, besides we using environment variable settings:
MSYS: winsymlinks:nativestrict
MSYSTEM: MINGW64
CHERE_INVOKING: 1
to opening mingw64 native shell.
We now running tests with make -i check to skip tests errors.

Signed-off-by: Yonggang Luo 
---
 .cirrus.yml | 24 +
 scripts/ci/windows/msys2-build.sh   | 27 +++
 scripts/ci/windows/msys2-install.sh | 33 +
 3 files changed, 84 insertions(+)
 create mode 100644 scripts/ci/windows/msys2-build.sh
 create mode 100644 scripts/ci/windows/msys2-install.sh

diff --git a/.cirrus.yml b/.cirrus.yml
index 3dd9fcff7f..49335e68c9 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -63,3 +63,27 @@ macos_xcode_task:
--enable-werror --cc=clang || { cat config.log; exit 1; }
 - gmake -j$(sysctl -n hw.ncpu)
 - gmake check
+
+windows_msys2_task:
+  windows_container:
+image: cirrusci/windowsservercore:cmake
+os_version: 2019
+cpu: 8
+memory: 8G
+  env:
+MSYS: winsymlinks:nativestrict
+MSYSTEM: MINGW64
+CHERE_INVOKING: 1
+  printenv_script:
+- C:\tools\msys64\usr\bin\bash.exe -lc 'printenv'
+  install_script:
+- C:\tools\msys64\usr\bin\bash.exe -lc "cd /c/tools && curl -O 
http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz";
+- C:\tools\msys64\usr\bin\bash.exe -lc "cd /c/tools && curl -O 
http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz.sig";
+- C:\tools\msys64\usr\bin\bash.exe -lc "cd /c/tools && pacman -U 
--noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
+- C:\tools\msys64\usr\bin\bash.exe -lc "pacman -Sy --noconfirm"
+- C:\tools\msys64\usr\bin\bash.exe -lc "pacman --needed --noconfirm -S 
bash pacman pacman-mirrors msys2-runtime"
+- taskkill /F /IM gpg-agent.exe
+- C:\tools\msys64\usr\bin\bash.exe -lc "pacman --noconfirm -Su"
+- C:\tools\msys64\usr\bin\bash.exe -lc "sh 
scripts/ci/windows/msys2-install.sh"
+  script:
+- C:\tools\msys64\usr\bin\bash.exe -lc "sh 
scripts/ci/windows/msys2-build.sh"
diff --git a/scripts/ci/windows/msys2-build.sh 
b/scripts/ci/windows/msys2-build.sh
new file mode 100644
index 00..532cb847c0
--- /dev/null
+++ b/scripts/ci/windows/msys2-build.sh
@@ -0,0 +1,27 @@
+mkdir build
+cd build
+../configure \
+--python=python3 \
+--enable-stack-protector \
+--enable-guest-agent \
+--disable-pie \
+--enable-gnutls --enable-nettle \
+--enable-sdl --enable-sdl-image --enable-gtk --disable-vte --disable-curses 
--enable-iconv \
+--enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-vnc-png \
+--enable-slirp=git \
+--disable-brlapi --enable-curl \
+--enable-fdt \
+--disable-kvm --enable-hax --enable-whpx \
+--enable-libnfs --enable-libusb --enable-live-block-migration 
--enable-usb-redir \
+--enable-lzo --enable-snappy --enable-bzip2 --enable-zstd \
+--enable-membarrier --enable-coroutine-pool \
+--enable-libssh --enable-libxml2 \
+--enable-jemalloc --enable-avx2 \
+--enable-replication \
+--enable-tools \
+--enable-bochs --enable-cloop --enable-dmg --enable-qcow1 --enable-vdi 
--enable-vvfat --enable-qed --enable-parallels \
+--enable-sheepdog \
+--enable-capstone=git
+
+make -j$NUMBER_OF_PROCESSORS
+make -i -j$NUMBER_OF_PROCESSORS check
diff --git a/scripts/ci/windows/msys2-install.sh 
b/scripts/ci/windows/msys2-install.sh
new file mode 100644
index 00..6086452399
--- /dev/null
+++ b/scripts/ci/windows/msys2-install.sh
@@ -0,0 +1,33 @@
+pacman --noconfirm -S --needed \
+base-devel \
+git \
+mingw-w64-x86_64-python \
+mingw-w64-x86_64-python-setuptools \
+mingw-w64-x86_64-toolchain \
+mingw-w64-x86_64-SDL2 \
+mingw-w64-x86_64-SDL2_image \
+mingw-w64-x86_64-gtk3 \
+mingw-w64-x86_64-glib2 \
+mingw-w64-x86_64-ninja \
+mingw-w64-x86_64-make \
+mingw-w64-x86_64-jemalloc \
+mingw-w64-x86_64-lzo2 \
+mingw-w64-x86_64-zstd \
+mingw-w64-x86_64-libjpeg-turbo \
+mingw-w64-x86_64-pixman \
+mingw-w64-x86_64-libgcrypt \
+mingw-w64-x86_64-capstone \
+mingw-w64-x86_64-libpng \
+mingw-w64-x86_64-libssh \
+mingw-w64-x86_64-libxml2 \
+mingw-w64-x86_64-snappy \
+mingw-w64-x86_64-libusb \
+mingw-w64-x86_64-usbredir \
+mingw-w64-x86_64-libtasn1 \
+mingw-w64-x86_64-libnfs \
+mingw-w64-x86_64-nettle \
+mingw-w64-x86_64-cyrus-sasl \
+mingw-w64-x86_64-curl \
+mingw-w64-x86_64-gnutls \
+mingw-w64-x86_64-zstd \
+
-- 
2.28.0.windows.1

[PATCH v5 04/11] meson: upgrade meson for execute custom ninjatool under msys2 properly

2020-09-04 Thread Yonggang Luo

The ninja options now have no need anymore.

Signed-off-by: Yonggang Luo 
---
 meson | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meson b/meson
index 68ed748f84..492afe50a4 16
--- a/meson
+++ b/meson
@@ -1 +1 @@
-Subproject commit 68ed748f84f14c2d4e62dcbd123816e5898eb04c
+Subproject commit 492afe50a439d70df99d6e3e59572aff55e14c6b
-- 
2.28.0.windows.1

[PATCH v5 11/11] ci: Enable Github actions.

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 .github/workflows/main.yml| 31 +++
 scripts/ci/windows/msys2-download.bat |  4 
 2 files changed, 35 insertions(+)
 create mode 100644 .github/workflows/main.yml
 create mode 100644 scripts/ci/windows/msys2-download.bat

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00..758bbf6641
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,31 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI
+
+# Controls when the action will run. Triggers the workflow on push or pull 
request
+# events but only for the master branch
+on:
+  push:
+branches: [ master, msys2 ]
+  pull_request:
+branches: [ master ]
+
+# A workflow run is made up of one or more jobs that can run sequentially or 
in parallel
+jobs:
+  msys2-build:
+name: C++ msys2 (Windows)
+runs-on: windows-latest
+strategy:
+  fail-fast: false
+steps:
+  # Checks-out your repository under $GITHUB_WORKSPACE, so your job can 
access it
+  - uses: actions/checkout@v2
+  - name: Install MSYS2
+run: scripts/ci/windows/msys2-download
+  - name: Build
+env:
+  MSYS: winsymlinks:nativestrict
+  MSYSTEM: MINGW64
+  CHERE_INVOKING: 1
+run: C:\tools\msys64\usr\bin\bash.exe -lc "sh 
scripts/ci/windows/msys2-build.sh"
+
diff --git a/scripts/ci/windows/msys2-download.bat 
b/scripts/ci/windows/msys2-download.bat
new file mode 100644
index 00..2c7c41899e
--- /dev/null
+++ b/scripts/ci/windows/msys2-download.bat
@@ -0,0 +1,4 @@
+mkdir C:\tools
+cd /d C:\tools
+curl -LJ -s 
https://github.com/lygstate/qemu/releases/download/v5.1.0/msys64-v5.1.0.7z 
--output msys64.7z
+7z -mmt8 x msys64.7z
-- 
2.28.0.windows.1

[PATCH v5 03/11] ci: fixes msys2 build by upgrading capstone to 4.0.2

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 capstone  | 2 +-
 configure | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/capstone b/capstone
index 22ead3e0bf..1d23053284 16
--- a/capstone
+++ b/capstone
@@ -1 +1 @@
-Subproject commit 22ead3e0bfdb87516656453336160e0a37b066bf
+Subproject commit 1d230532840a37ac032c6ab80128238fc930c6c1
diff --git a/configure b/configure
index 5d8bf4d8bb..f8cbd2898c 100755
--- a/configure
+++ b/configure
@@ -5117,7 +5117,7 @@ case "$capstone" in
   LIBCAPSTONE=libcapstone.a
 fi
 capstone_libs="-Lcapstone -lcapstone"
-capstone_cflags="-I${source_path}/capstone/include"
+capstone_cflags="-I${source_path}/capstone/include 
-I${source_path}/capstone/include/capstone"
 ;;
 
   system)
-- 
2.28.0.windows.1

[PATCH v5 10/11] docker: Add win32/msys2/mingw64 docker

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 tests/docker/dockerfiles/msys2.docker | 11 +++
 1 file changed, 11 insertions(+)
 create mode 100644 tests/docker/dockerfiles/msys2.docker

diff --git a/tests/docker/dockerfiles/msys2.docker 
b/tests/docker/dockerfiles/msys2.docker
new file mode 100644
index 00..f898e0803d
--- /dev/null
+++ b/tests/docker/dockerfiles/msys2.docker
@@ -0,0 +1,11 @@
+FROM cirrusci/windowsservercore:cmake
+RUN echo | choco install -y --no-progress --ignore-package-exit-codes --params 
"/NoUpdate /InstallDir:C:\tools\msys64" msys2
+COPY msys2_install.sh C:/tools
+RUN C:\tools\msys64\usr\bin\bash.exe -lc "grep -rl 'repo.msys2.org/' 
/etc/pacman.d/mirrorlist.* | xargs sed -i 
's/repo.msys2.org\//mirrors.ustc.edu.cn\/msys2\//g'"
+RUN C:\tools\msys64\usr\bin\bash.exe -lc "pacman -Sy"
+RUN C:\tools\msys64\usr\bin\bash.exe -lc "sh /c/tools/msys2_install.sh"
+RUN C:\tools\msys64\usr\bin\bash.exe -lc "rm -rf /var/cache/pacman/pkg/*"
+
+# docker build --tag lygstate/windowsservercore:msys2 -f 
"../../../tests/docker/dockerfiles/msys2.docker" .
+# docker run -it lygstate/windowsservercore:msys2 cmd
+# docker push lygstate/windowsservercore:msys2
-- 
2.28.0.windows.1

[PATCH v5 02/11] block: Fixes nfs on msys2/mingw

2020-09-04 Thread Yonggang Luo

Signed-off-by: Yonggang Luo 
---
 block/nfs.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index 61a249a9fc..34b2cd5708 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -24,7 +24,9 @@
 
 #include "qemu/osdep.h"
 
+#if !defined(_WIN32)
 #include 
+#endif
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -51,6 +53,12 @@
 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
 
+#if defined (_WIN32)
+#define nfs_stat __stat64
+#else
+#define nfs_stat stat
+#endif
+
 typedef struct NFSClient {
 struct nfs_context *context;
 struct nfsfh *fh;
@@ -58,7 +66,7 @@ typedef struct NFSClient {
 bool has_zero_init;
 AioContext *aio_context;
 QemuMutex mutex;
-blkcnt_t st_blocks;
+int64_t st_size;
 bool cache_used;
 NFSServer *server;
 char *path;
@@ -70,7 +78,7 @@ typedef struct NFSRPC {
 int ret;
 int complete;
 QEMUIOVector *iov;
-struct stat *st;
+struct nfs_stat *st;
 Coroutine *co;
 NFSClient *client;
 } NFSRPC;
@@ -419,7 +427,7 @@ static int64_t nfs_client_open(NFSClient *client, 
BlockdevOptionsNfs *opts,
int flags, int open_flags, Error **errp)
 {
 int64_t ret = -EINVAL;
-struct stat st;
+struct nfs_stat st;
 char *file = NULL, *strp = NULL;
 
 qemu_mutex_init(&client->mutex);
@@ -545,7 +553,7 @@ static int64_t nfs_client_open(NFSClient *client, 
BlockdevOptionsNfs *opts,
 }
 
 ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
-client->st_blocks = st.st_blocks;
+client->st_size = st.st_size;
 client->has_zero_init = S_ISREG(st.st_mode);
 *strp = '/';
 goto out;
@@ -729,11 +737,11 @@ static int64_t 
nfs_get_allocated_file_size(BlockDriverState *bs)
 {
 NFSClient *client = bs->opaque;
 NFSRPC task = {0};
-struct stat st;
+struct nfs_stat st;
 
 if (bdrv_is_read_only(bs) &&
 !(bs->open_flags & BDRV_O_NOCACHE)) {
-return client->st_blocks * 512;
+return client->st_size;
 }
 
 task.bs = bs;
@@ -746,7 +754,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState 
*bs)
 nfs_set_events(client);
 BDRV_POLL_WHILE(bs, !task.complete);
 
-return (task.ret < 0 ? task.ret : st.st_blocks * 512);
+return (task.ret < 0 ? task.ret : st.st_size);
 }
 
 static int coroutine_fn
@@ -778,7 +786,7 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
   BlockReopenQueue *queue, Error **errp)
 {
 NFSClient *client = state->bs->opaque;
-struct stat st;
+struct nfs_stat st;
 int ret = 0;
 
 if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
@@ -800,7 +808,7 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
nfs_get_error(client->context));
 return ret;
 }
-client->st_blocks = st.st_blocks;
+client->st_size = st.st_size;
 }
 
 return 0;
-- 
2.28.0.windows.1

[PATCH v5 01/11] Revert "configure: add --ninja option"

2020-09-04 Thread Yonggang Luo

This reverts commit 48328880fddf0145bdccc499160fb24dfabfbd41.

Signed-off-by: Yonggang Luo 
---
 configure | 16 +---
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/configure b/configure
index d3495e107f..5d8bf4d8bb 100755
--- a/configure
+++ b/configure
@@ -517,7 +517,6 @@ rng_none="no"
 secret_keyring=""
 libdaxctl=""
 meson=""
-ninja=""
 skip_meson=no
 gettext=""
 
@@ -984,8 +983,6 @@ for opt do
   ;;
   --meson=*) meson="$optarg"
   ;;
-  --ninja=*) ninja="$optarg"
-  ;;
   --smbd=*) smbd="$optarg"
   ;;
   --extra-cflags=*)
@@ -1758,7 +1755,6 @@ Advanced options (experts only):
   --python=PYTHON  use specified python [$python]
   --sphinx-build=SPHINXuse specified sphinx-build [$sphinx_build]
   --meson=MESONuse specified meson [$meson]
-  --ninja=NINJAuse specified ninja [$ninja]
   --smbd=SMBD  use specified smbd [$smbd]
   --with-git=GIT   use specified git [$git]
   --static enable static build [$static]
@@ -1995,16 +1991,6 @@ case "$meson" in
 *) meson=$(command -v meson) ;;
 esac
 
-# Probe for ninja (used for compdb)
-
-if test -z "$ninja"; then
-for c in ninja ninja-build samu; do
-if has $c; then
-ninja=$(command -v "$c")
-break
-fi
-done
-fi
 
 # Check that the C compiler works. Doing this here before testing
 # the host CPU ensures that we had a valid CC to autodetect the
@@ -7917,7 +7903,7 @@ fi
 mv $cross config-meson.cross
 
 rm -rf meson-private meson-info meson-logs
-NINJA=${ninja:-$PWD/ninjatool} $meson setup \
+NINJA=$PWD/ninjatool $meson setup \
 --prefix "${pre_prefix}$prefix" \
 --libdir "${pre_prefix}$libdir" \
 --libexecdir "${pre_prefix}$libexecdir" \
-- 
2.28.0.windows.1

[PATCH v5 00/11] Green the msys2 CI make

2020-09-04 Thread Yonggang Luo

Also it's fixes issues about make check

Yonggang Luo (11):
  Revert "configure: add --ninja option"
  block: Fixes nfs on msys2/mingw
  ci: fixes msys2 build by upgrading capstone to 4.0.2
  meson: upgrade meson for execute custom ninjatool under msys2 properly
  ci: Enable msys2 ci in cirrus
  tests: Trying fixes test-replication.c on msys2.
  block: get file-win32.c handle locking option consistence with
file-posix.c
  osdep: These function are only available on Non-Win32 system.
  meson: Fixes qapi tests.
  docker: Add win32/msys2/mingw64 docker
  ci: Enable Github actions.

 .cirrus.yml   | 24 +++
 .github/workflows/main.yml| 31 +
 block/file-win32.c| 23 +--
 block/nfs.c   | 26 +
 capstone  |  2 +-
 configure | 18 ++-
 include/qemu/osdep.h  |  2 +-
 meson |  2 +-
 scripts/ci/windows/msys2-build.sh | 27 ++
 scripts/ci/windows/msys2-download.bat |  4 
 scripts/ci/windows/msys2-install.sh   | 33 +++
 tests/docker/dockerfiles/msys2.docker | 11 +
 tests/qapi-schema/meson.build |  3 ++-
 tests/test-replication.c  | 17 ++
 14 files changed, 188 insertions(+), 35 deletions(-)
 create mode 100644 .github/workflows/main.yml
 create mode 100644 scripts/ci/windows/msys2-build.sh
 create mode 100644 scripts/ci/windows/msys2-download.bat
 create mode 100644 scripts/ci/windows/msys2-install.sh
 create mode 100644 tests/docker/dockerfiles/msys2.docker

-- 
2.28.0.windows.1

Re: make -i check resut for msys2

2020-09-04 Thread Yonggang Luo

On Fri, Sep 4, 2020 at 4:51 PM Kevin Wolf  wrote:

> Am 04.09.2020 um 08:03 hat Thomas Huth geschrieben:
> > On 04/09/2020 00.53, 罗勇刚(Yonggang Luo) wrote:
> > >
> > >
> > > On Thu, Sep 3, 2020 at 10:33 PM Thomas Huth  > > > wrote:
> > >
> > > On 03/09/2020 11.18, 罗勇刚(Yonggang Luo) wrote:
> > > [...]
> > > >   TESTcheck-unit: tests/test-replication.exe
> > > > **
> > > > ERROR:C:/work/xemu/qemu/tests/test-replication.c:136:make_temp:
> > > > assertion failed: (fd >= 0)
> > > > ERROR test-replication.exe - Bail out!
> > > > ERROR:C:/work/xemu/qemu/tests/test-replication.c:136:make_temp:
> > > > assertion failed: (fd >= 0)
> > >
> > > At least this one should be easy to fix: The test uses /tmp as
> > > hard-coded directory for temporary files. I think it should use
> > > g_get_tmp_dir() from glib to get that directory instead.
> > >
> > >  Thomas
> > >
> > > After fixes tmp path, how to fixes following error:
> > > $ tests/test-replication.exe
>
> > >
> > >
> > >
> > > # random seed: R02Sdf2e4ffc0e6fbe96624598386b538927
> > > 1..13
> > > # Start of replication tests
> > > # Start of primary tests
> > > Unexpected error in bdrv_open_inherit() at ../block.c:3456:
> > > Block protocol 'file' doesn't support the option 'locking'
> >
> > Not sure ... as a temporary test, try to remove the "locking=off"
> > strings from the test. If it then works, it might be worth discussing
> > with the block layer folks how to handle this test on Windows in the
> > best way. If it still does not work, it's maybe simply not worth the
> > effort to try to get this test running on Windows - and thus mark it
> > with CONFIG_POSIX in the Makefile / meson.build.
>
> This is a bug in file-win32. It reads "locking" from the options QDict,
> but doesn't delete it from it.
>
> Does the following help? (Only compile-tested.)
>
> If it works for you, I'll send it as a proper patch.
>
> Kevin
>
> diff --git a/block/file-win32.c b/block/file-win32.c
> index ab69bd811a..e2900c3a51 100644
> --- a/block/file-win32.c
> +++ b/block/file-win32.c
> @@ -299,6 +299,11 @@ static QemuOptsList raw_runtime_opts = {
>  .type = QEMU_OPT_STRING,
>  .help = "host AIO implementation (threads, native)",
>  },
> +{
> +.name = "locking",
> +.type = QEMU_OPT_STRING,
> +.help = "file locking mode (on/off/auto, default: auto)",
> +},
>  { /* end of list */ }
>  },
>  };
> @@ -333,6 +338,7 @@ static int raw_open(BlockDriverState *bs, QDict
> *options, int flags,
>  Error *local_err = NULL;
>  const char *filename;
>  bool use_aio;
> +OnOffAuto locking;
>  int ret;
>
>  s->type = FTYPE_FILE;
> @@ -343,10 +349,24 @@ static int raw_open(BlockDriverState *bs, QDict
> *options, int flags,
>  goto fail;
>  }
>
> -if (qdict_get_try_bool(options, "locking", false)) {
> +locking = qapi_enum_parse(&OnOffAuto_lookup,
> +  qemu_opt_get(opts, "locking"),
> +  ON_OFF_AUTO_AUTO, &local_err);
> +if (local_err) {
> +error_propagate(errp, local_err);
> +ret = -EINVAL;
> +goto fail;
> +}
> +switch (locking) {
> +case ON_OFF_AUTO_ON:
>  error_setg(errp, "locking=on is not supported on Windows");
>  ret = -EINVAL;
>  goto fail;
> +case ON_OFF_AUTO_OFF:
> +case ON_OFF_AUTO_AUTO:
> +break;
> +default:
> +g_assert_not_reached();
>  }
>
>  filename = qemu_opt_get(opts, "filename");
>
> Partial error fixed, new error are coming:
$ ./tests/test-replication.exe
# random seed: R02S3f4d1c01af2b0a046990e0235c481faf
1..13
# Start of replication tests
# Start of primary tests
ok 1 /replication/primary/read
ok 2 /replication/primary/write
ok 3 /replication/primary/start
ok 4 /replication/primary/stop
ok 5 /replication/primary/do_checkpoint
ok 6 /replication/primary/get_error_all
# End of primary tests
# Start of secondary tests
ok 7 /replication/secondary/read
ok 8 /replication/secondary/write
Unexpected error in bdrv_reopen_prepare() at ../block.c:4191:
Block format 'file' used by node '#block4287' does not support reopening
files

-- 
 此致
礼
罗勇刚
Yours
sincerely,
Yonggang Luo

Re: [RFC PATCH 00/12] hw: Forbid DMA write accesses to MMIO regions

2020-09-04 Thread Li Qiang

Philippe Mathieu-Daudé  于2020年9月3日周四 下午7:09写道：
>
> Hi,
>
> I'm not suppose to work on this but I couldn't sleep so kept
> wondering about this problem the whole night and eventually
> woke up to write this quickly, so comments are scarce, sorry.
>
> The first part is obvious anyway, simply pass MemTxAttrs argument.
>
> The main patch is:
> "exec/memattrs: Introduce MemTxAttrs::direct_access field".
> This way we can restrict accesses to ROM/RAM by setting the
> 'direct_access' field. Illegal accesses return MEMTX_BUS_ERROR.
>
> Next patch restrict PCI DMA accesses by setting the direct_access
> field.
>
> Finally we add an assertion for any DMA write access to indirect
> memory to kill a class of bug recently found by Alexander while
> fuzzing.
>

Hi Philippe,

I have reviewed your patches.
Your patch just deny the DMA write to MMIO for PCI device.

1. The DMA write to MMIO is allowed for P2P. Unconditionally deny
is not right I think. Maybe we can add some flag for the device as property
so the device can indicate whether it supports DMA to MMIO.
But this method needs define we should apply the restrict to
DMA to MMIO initiator or target. If the target, we need to find the
target PCI device.

2. I think the MMIO read maybe also suffers the reentrant issue If the
MMIO read handler
does complicated work.

3. As your patch just consider the PCI case. This reentrant is quite
complicated if we consider
the no-PCI the qemu_irq cases. I agree to address the PCI cases first.

Thanks,
Li Qiang



> Regards,
>
> Phil.
>
> Klaus Jensen (1):
>   pci: pass along the return value of dma_memory_rw
>
> Philippe Mathieu-Daudé (11):
>   dma: Let dma_memory_valid() take MemTxAttrs argument
>   dma: Let dma_memory_set() take MemTxAttrs argument
>   dma: Let dma_memory_rw_relaxed() take MemTxAttrs argument
>   dma: Let dma_memory_rw() take MemTxAttrs argument
>   dma: Let dma_memory_read/write() take MemTxAttrs argument
>   dma: Let dma_memory_map() take MemTxAttrs argument
>   docs/devel/loads-stores: Add regexp for DMA functions
>   dma: Let load/store DMA functions take MemTxAttrs argument
>   exec/memattrs: Introduce MemTxAttrs::direct_access field
>   hw/pci: Only allow PCI slave devices to write to direct memory
>   dma: Assert when device writes to indirect memory (such MMIO regions)
>
>  docs/devel/loads-stores.rst   |  2 ++
>  include/exec/memattrs.h   |  3 ++
>  include/hw/pci/pci.h  | 21 ++---
>  include/hw/ppc/spapr_vio.h| 26 +--
>  include/sysemu/dma.h  | 59 +--
>  dma-helpers.c | 12 ---
>  exec.c|  8 +
>  hw/arm/musicpal.c | 13 
>  hw/arm/smmu-common.c  |  3 +-
>  hw/arm/smmuv3.c   | 14 ++---
>  hw/core/generic-loader.c  |  3 +-
>  hw/display/virtio-gpu.c   |  8 +++--
>  hw/dma/pl330.c| 12 ---
>  hw/dma/sparc32_dma.c  | 16 ++
>  hw/dma/xlnx-zynq-devcfg.c |  6 ++--
>  hw/dma/xlnx_dpdma.c   | 10 +++---
>  hw/hyperv/vmbus.c |  8 +++--
>  hw/i386/amd_iommu.c   | 16 +-
>  hw/i386/intel_iommu.c | 28 ++---
>  hw/ide/ahci.c |  9 --
>  hw/ide/macio.c|  2 +-
>  hw/intc/pnv_xive.c|  7 +++--
>  hw/intc/spapr_xive.c  |  3 +-
>  hw/intc/xive.c|  7 +++--
>  hw/misc/bcm2835_property.c|  3 +-
>  hw/misc/macio/mac_dbdma.c | 10 +++---
>  hw/net/allwinner-sun8i-emac.c | 21 -
>  hw/net/ftgmac100.c| 25 +--
>  hw/net/imx_fec.c  | 32 ---
>  hw/nvram/fw_cfg.c | 16 ++
>  hw/pci-host/pnv_phb3.c|  5 +--
>  hw/pci-host/pnv_phb3_msi.c|  9 --
>  hw/pci-host/pnv_phb4.c|  7 +++--
>  hw/sd/allwinner-sdhost.c  | 14 +
>  hw/sd/sdhci.c | 35 +
>  hw/usb/hcd-dwc2.c |  8 ++---
>  hw/usb/hcd-ehci.c |  6 ++--
>  hw/usb/hcd-ohci.c | 28 ++---
>  hw/usb/libhw.c|  3 +-
>  hw/virtio/virtio.c|  6 ++--
>  trace-events  |  1 +
>  41 files changed, 334 insertions(+), 191 deletions(-)
>
> --
> 2.26.2
>
>

Re: [PATCH 08/17] hw/block/nvme: refactor aio submission

2020-09-04 Thread Klaus Jensen

On Sep  4 14:15, Keith Busch wrote:
> On Fri, Sep 04, 2020 at 10:38:39PM +0200, Klaus Jensen wrote:
> > On Sep  4 12:47, Keith Busch wrote:
> > > On Fri, Sep 04, 2020 at 04:19:47PM +0200, Klaus Jensen wrote:
> > > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > > index bfac3385cb64..3e32f39c7c1d 100644
> > > > --- a/hw/block/nvme.c
> > > > +++ b/hw/block/nvme.c
> > > > @@ -110,6 +110,7 @@ static const uint32_t 
> > > > nvme_feature_cap[NVME_FID_MAX] = {
> > > >  };
> > > >  
> > > >  static void nvme_process_sq(void *opaque);
> > > > +static void nvme_aio_cb(void *opaque, int ret);
> > > 
> > > You don't need the forward declaration here. Just move the
> > > implementation above where it's used. It looks safe: nvme_aio_cb()
> > > doesn't have any circular dependencies.
> > 
> > You are right, of course. But it is getting a circular dependency in a
> > later patch. I left it there to reduce code movement later.
> 
> Is that coming in a future patch? Not finding it in this series.
> 
> About the whole patch in general, are multiple aio's per-request coming
> in later patch as well? I didn't see any use for it here, and the
> overhead of dynamic allocation and zeroing a new struct in the IO path
> is a bit concerning for performance. I'd like to see your intended use
> for this.

Intended use-case was parallel aios. There are a lot of use cases for
this, DSM, metadata, block allocation tracking and zns zoneinfo.

But I'll rip it out of the series and repost so we can focus on multiple
namespaces.

Re: [PATCH 08/17] hw/block/nvme: refactor aio submission

2020-09-04 Thread Keith Busch

On Fri, Sep 04, 2020 at 10:38:39PM +0200, Klaus Jensen wrote:
> On Sep  4 12:47, Keith Busch wrote:
> > On Fri, Sep 04, 2020 at 04:19:47PM +0200, Klaus Jensen wrote:
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index bfac3385cb64..3e32f39c7c1d 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -110,6 +110,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] 
> > > = {
> > >  };
> > >  
> > >  static void nvme_process_sq(void *opaque);
> > > +static void nvme_aio_cb(void *opaque, int ret);
> > 
> > You don't need the forward declaration here. Just move the
> > implementation above where it's used. It looks safe: nvme_aio_cb()
> > doesn't have any circular dependencies.
> 
> You are right, of course. But it is getting a circular dependency in a
> later patch. I left it there to reduce code movement later.

Is that coming in a future patch? Not finding it in this series.

About the whole patch in general, are multiple aio's per-request coming
in later patch as well? I didn't see any use for it here, and the
overhead of dynamic allocation and zeroing a new struct in the IO path
is a bit concerning for performance. I'd like to see your intended use
for this.

Re: [PATCH 08/17] hw/block/nvme: refactor aio submission

2020-09-04 Thread Klaus Jensen

On Sep  4 12:47, Keith Busch wrote:
> On Fri, Sep 04, 2020 at 04:19:47PM +0200, Klaus Jensen wrote:
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index bfac3385cb64..3e32f39c7c1d 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -110,6 +110,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
> >  };
> >  
> >  static void nvme_process_sq(void *opaque);
> > +static void nvme_aio_cb(void *opaque, int ret);
> 
> You don't need the forward declaration here. Just move the
> implementation above where it's used. It looks safe: nvme_aio_cb()
> doesn't have any circular dependencies.

You are right, of course. But it is getting a circular dependency in a
later patch. I left it there to reduce code movement later.

Re: [PATCH 08/17] hw/block/nvme: refactor aio submission

2020-09-04 Thread Keith Busch

On Fri, Sep 04, 2020 at 04:19:47PM +0200, Klaus Jensen wrote:
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index bfac3385cb64..3e32f39c7c1d 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -110,6 +110,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
>  };
>  
>  static void nvme_process_sq(void *opaque);
> +static void nvme_aio_cb(void *opaque, int ret);

You don't need the forward declaration here. Just move the
implementation above where it's used. It looks safe: nvme_aio_cb()
doesn't have any circular dependencies.

Re: [PATCH 00/17] hw/block/nvme: multiple namespaces support

2020-09-04 Thread Klaus Jensen

On Sep  4 18:12, Philippe Mathieu-Daudé wrote:
> Hi Klaus,
> 
> On 9/4/20 4:19 PM, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > This is the next round of my patches for the nvme device.
> > 
> > This includes a bit of cleanup and three new features:
> > 
> >   * refactored aio submission
> > This also adds support for multiple parallel AIOs per request which is 
> > in
> > preparation for DULBE, ZNS and metadata support. If it is found
> > controversial, it can easily be dropped from this series.
> > 
> >   * support for scatter/gather lists
> > 
> >   * multiple namespaces support through a new nvme-ns device
> > 
> > Finally, the series ends with changing the PCI vendor and device ID to get 
> > rid
> > of the internal Intel id and as a side-effect get rid of some Linux kernel
> > quirks that no longer applies.
> > 
> > "pci: pass along the return value of dma_memory_rw" has already been posted 
> > by
> > Philippe in another series, but since it is not applied yet, I am including 
> > it
> > here.
> > 
> > Gollu Appalanaidu (1):
> >   hw/block/nvme: add support for sgl bit bucket descriptor
> > 
> > Klaus Jensen (16):
> >   pci: pass along the return value of dma_memory_rw
> >   hw/block/nvme: handle dma errors
> >   hw/block/nvme: commonize nvme_rw error handling
> >   hw/block/nvme: alignment style fixes
> >   hw/block/nvme: add a lba to bytes helper
> >   hw/block/nvme: fix endian conversion
> >   hw/block/nvme: add symbolic command name to trace events
> >   hw/block/nvme: refactor aio submission
> >   hw/block/nvme: default request status to success
> >   hw/block/nvme: support multiple parallel aios per request
> >   hw/block/nvme: harden cmb access
> >   hw/block/nvme: add support for scatter gather lists
> >   hw/block/nvme: refactor identify active namespace id list
> >   hw/block/nvme: support multiple namespaces
> >   pci: allocate pci id for nvme
> >   hw/block/nvme: change controller pci id
> > 
> >  MAINTAINERS|   1 +
> >  docs/specs/nvme.txt|  23 +
> >  docs/specs/pci-ids.txt |   1 +
> >  hw/block/meson.build   |   2 +-
> >  hw/block/nvme-ns.c | 185 +
> >  hw/block/nvme-ns.h |  74 
> >  hw/block/nvme.c| 923 +++--
> >  hw/block/nvme.h| 126 +-
> >  hw/block/trace-events  |  21 +-
> >  hw/core/machine.c  |   1 +
> >  include/block/nvme.h   |   8 +-
> >  include/hw/pci/pci.h   |   4 +-
> 
> To ease the review, consider setup'ing scripts/git.orderfile,
> as it avoid reviewers to scroll patches in their email client.
> 

Oh wow. You learn something new everyday. That's really neat!

Thanks!


signature.asc
Description: PGP signature

Re: [PATCH 00/17] hw/block/nvme: multiple namespaces support

2020-09-04 Thread Philippe Mathieu-Daudé

Hi Klaus,

On 9/4/20 4:19 PM, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This is the next round of my patches for the nvme device.
> 
> This includes a bit of cleanup and three new features:
> 
>   * refactored aio submission
> This also adds support for multiple parallel AIOs per request which is in
> preparation for DULBE, ZNS and metadata support. If it is found
> controversial, it can easily be dropped from this series.
> 
>   * support for scatter/gather lists
> 
>   * multiple namespaces support through a new nvme-ns device
> 
> Finally, the series ends with changing the PCI vendor and device ID to get rid
> of the internal Intel id and as a side-effect get rid of some Linux kernel
> quirks that no longer applies.
> 
> "pci: pass along the return value of dma_memory_rw" has already been posted by
> Philippe in another series, but since it is not applied yet, I am including it
> here.
> 
> Gollu Appalanaidu (1):
>   hw/block/nvme: add support for sgl bit bucket descriptor
> 
> Klaus Jensen (16):
>   pci: pass along the return value of dma_memory_rw
>   hw/block/nvme: handle dma errors
>   hw/block/nvme: commonize nvme_rw error handling
>   hw/block/nvme: alignment style fixes
>   hw/block/nvme: add a lba to bytes helper
>   hw/block/nvme: fix endian conversion
>   hw/block/nvme: add symbolic command name to trace events
>   hw/block/nvme: refactor aio submission
>   hw/block/nvme: default request status to success
>   hw/block/nvme: support multiple parallel aios per request
>   hw/block/nvme: harden cmb access
>   hw/block/nvme: add support for scatter gather lists
>   hw/block/nvme: refactor identify active namespace id list
>   hw/block/nvme: support multiple namespaces
>   pci: allocate pci id for nvme
>   hw/block/nvme: change controller pci id
> 
>  MAINTAINERS|   1 +
>  docs/specs/nvme.txt|  23 +
>  docs/specs/pci-ids.txt |   1 +
>  hw/block/meson.build   |   2 +-
>  hw/block/nvme-ns.c | 185 +
>  hw/block/nvme-ns.h |  74 
>  hw/block/nvme.c| 923 +++--
>  hw/block/nvme.h| 126 +-
>  hw/block/trace-events  |  21 +-
>  hw/core/machine.c  |   1 +
>  include/block/nvme.h   |   8 +-
>  include/hw/pci/pci.h   |   4 +-

To ease the review, consider setup'ing scripts/git.orderfile,
as it avoid reviewers to scroll patches in their email client.

Thanks,

Phil.

[PATCH 13/13] dma: Let dma_memory_map() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

Let devices specify transaction attributes when calling
dma_memory_map().

Patch created mechanically using spatch with this script:

  @@
  expression E1, E2, E3, E4;
  @@
  - dma_memory_map(E1, E2, E3, E4)
  + dma_memory_map(E1, E2, E3, E4, MEMTXATTRS_UNSPECIFIED)

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/pci/pci.h| 3 ++-
 include/sysemu/dma.h| 5 +++--
 dma-helpers.c   | 3 ++-
 hw/display/virtio-gpu.c | 8 ++--
 hw/hyperv/vmbus.c   | 8 +---
 hw/ide/ahci.c   | 9 ++---
 hw/usb/libhw.c  | 3 ++-
 hw/virtio/virtio.c  | 6 --
 8 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 0c3217e019c..a221dfb3b08 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -831,7 +831,8 @@ static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t 
addr,
 {
 void *buf;
 
-buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir);
+buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
+ MEMTXATTRS_UNSPECIFIED);
 return buf;
 }
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index b9cb9c8944b..bb8b0a059f5 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -203,16 +203,17 @@ MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t 
addr,
  * @addr: address within that address space
  * @len: pointer to length of buffer; updated on return
  * @dir: indicates the transfer direction
+ * @attrs: memory attributes
  */
 static inline void *dma_memory_map(AddressSpace *as,
dma_addr_t addr, dma_addr_t *len,
-   DMADirection dir)
+   DMADirection dir, MemTxAttrs attrs)
 {
 hwaddr xlen = *len;
 void *p;
 
 p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
-  MEMTXATTRS_UNSPECIFIED);
+  attrs);
 *len = xlen;
 return p;
 }
diff --git a/dma-helpers.c b/dma-helpers.c
index 6c3b2200f16..0507a6f95b9 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -143,7 +143,8 @@ static void dma_blk_cb(void *opaque, int ret)
 while (dbs->sg_cur_index < dbs->sg->nsg) {
 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
+mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
+ MEMTXATTRS_UNSPECIFIED);
 /*
  * Make reads deterministic in icount mode. Windows sometimes issues
  * disk read requests with overlapping SGs. It leads
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 5f0dd7c1500..be7f5cdee46 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -648,7 +648,9 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
 hwaddr len = l;
 (*iov)[i].iov_len = l;
 (*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as,
-a, &len, DMA_DIRECTION_TO_DEVICE);
+a, &len,
+DMA_DIRECTION_TO_DEVICE,
+MEMTXATTRS_UNSPECIFIED);
 if (addr) {
 (*addr)[i] = a;
 }
@@ -1049,7 +1051,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, 
size_t size,
 hwaddr len = res->iov[i].iov_len;
 res->iov[i].iov_base =
 dma_memory_map(VIRTIO_DEVICE(g)->dma_as,
-   res->addrs[i], &len, DMA_DIRECTION_TO_DEVICE);
+   res->addrs[i], &len,
+   DMA_DIRECTION_TO_DEVICE,
+   MEMTXATTRS_UNSPECIFIED);
 
 if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
 /* Clean up the half-a-mapping we just created... */
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 75af6b83dde..56621d72e5b 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -372,7 +372,8 @@ static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, 
uint32_t len)
 
 maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
 
-iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
+iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
+   MEMTXATTRS_UNSPECIFIED);
 if (mlen != pgleft) {
 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
 iter->map = NULL;
@@ -488,7 +489,8 @@ int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, 
struct iovec *iov,
 goto err;
 }
 
-iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
+iov[ret_cnt].io

[PATCH 12/13] dma: Let dma_memory_read/write() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

Let devices specify transaction attributes when calling
dma_memory_read() or dma_memory_write().

Patch created mechanically using spatch with this script:

  @@
  expression E1, E2, E3, E4;
  @@
  (
  - dma_memory_read(E1, E2, E3, E4)
  + dma_memory_read(E1, E2, E3, E4, MEMTXATTRS_UNSPECIFIED)
  |
  - dma_memory_write(E1, E2, E3, E4)
  + dma_memory_write(E1, E2, E3, E4, MEMTXATTRS_UNSPECIFIED)
  )

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/ppc/spapr_vio.h|  6 --
 include/sysemu/dma.h  | 20 
 hw/arm/musicpal.c | 13 +++--
 hw/arm/smmu-common.c  |  3 ++-
 hw/arm/smmuv3.c   | 14 +-
 hw/core/generic-loader.c  |  3 ++-
 hw/dma/pl330.c| 12 
 hw/dma/sparc32_dma.c  | 16 ++--
 hw/dma/xlnx-zynq-devcfg.c |  6 --
 hw/dma/xlnx_dpdma.c   | 10 ++
 hw/i386/amd_iommu.c   | 16 +---
 hw/i386/intel_iommu.c | 28 +---
 hw/ide/macio.c|  2 +-
 hw/intc/xive.c|  7 ---
 hw/misc/bcm2835_property.c|  3 ++-
 hw/misc/macio/mac_dbdma.c | 10 ++
 hw/net/allwinner-sun8i-emac.c | 21 ++---
 hw/net/ftgmac100.c| 25 -
 hw/net/imx_fec.c  | 32 
 hw/nvram/fw_cfg.c |  9 ++---
 hw/pci-host/pnv_phb3.c|  5 +++--
 hw/pci-host/pnv_phb3_msi.c|  9 ++---
 hw/pci-host/pnv_phb4.c|  7 ---
 hw/sd/allwinner-sdhost.c  | 14 --
 hw/sd/sdhci.c | 35 ++-
 hw/usb/hcd-dwc2.c |  8 
 hw/usb/hcd-ehci.c |  6 --
 hw/usb/hcd-ohci.c | 18 +++---
 28 files changed, 221 insertions(+), 137 deletions(-)

diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index 6e5c0840248..8168f4fc5a5 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -102,14 +102,16 @@ static inline bool spapr_vio_dma_valid(SpaprVioDevice 
*dev, uint64_t taddr,
 static inline int spapr_vio_dma_read(SpaprVioDevice *dev, uint64_t taddr,
  void *buf, uint32_t size)
 {
-return (dma_memory_read(&dev->as, taddr, buf, size) != 0) ?
+return (dma_memory_read(&dev->as, taddr,
+buf, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
 H_DEST_PARM : H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_write(SpaprVioDevice *dev, uint64_t taddr,
   const void *buf, uint32_t size)
 {
-return (dma_memory_write(&dev->as, taddr, buf, size) != 0) ?
+return (dma_memory_write(&dev->as, taddr,
+ buf, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
 H_DEST_PARM : H_SUCCESS;
 }
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 0695d430119..b9cb9c8944b 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -143,12 +143,14 @@ static inline MemTxResult dma_memory_rw(AddressSpace *as, 
dma_addr_t addr,
  * @addr: address within that address space
  * @buf: buffer with the data transferred
  * @len: length of the data transferred
+ * @attrs: memory transaction attributes
  */
 static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr,
-  void *buf, dma_addr_t len)
+  void *buf, dma_addr_t len,
+  MemTxAttrs attrs)
 {
 return dma_memory_rw(as, addr, buf, len,
- DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+ DMA_DIRECTION_TO_DEVICE, attrs);
 }
 
 /**
@@ -162,12 +164,14 @@ static inline MemTxResult dma_memory_read(AddressSpace 
*as, dma_addr_t addr,
  * @addr: address within that address space
  * @buf: buffer with the data transferred
  * @len: the number of bytes to write
+ * @attrs: memory transaction attributes
  */
 static inline MemTxResult dma_memory_write(AddressSpace *as, dma_addr_t addr,
-   const void *buf, dma_addr_t len)
+   const void *buf, dma_addr_t len,
+   MemTxAttrs attrs)
 {
 return dma_memory_rw(as, addr, (void *)buf, len,
- DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
+ DMA_DIRECTION_FROM_DEVICE, attrs);
 }
 
 /**
@@ -240,7 +244,7 @@ static inline void dma_memory_unmap(AddressSpace *as,
 dma_addr_t addr) \
 {   \
 uint##_bits##_t val;\
-dma_memory_read(as, addr, &val, (_bits) / 8);   \
+dma_memory_read(as, addr, &val, (

[PATCH 10/13] dma: Let dma_memory_rw_relaxed() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

We will add the MemTxAttrs argument to dma_memory_rw() in
the next commit. Since dma_memory_rw_relaxed() is only used
by dma_memory_rw(), modify it first in a separate commit to
keep the next commit easier to review.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index d0381f9ae9b..59331ec0bd3 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -83,9 +83,10 @@ static inline bool dma_memory_valid(AddressSpace *as,
 static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as,
 dma_addr_t addr,
 void *buf, dma_addr_t len,
-DMADirection dir)
+DMADirection dir,
+MemTxAttrs attrs)
 {
-return address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
+return address_space_rw(as, addr, attrs,
 buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
@@ -93,7 +94,9 @@ static inline MemTxResult 
dma_memory_read_relaxed(AddressSpace *as,
   dma_addr_t addr,
   void *buf, dma_addr_t len)
 {
-return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+return dma_memory_rw_relaxed(as, addr, buf, len,
+ DMA_DIRECTION_TO_DEVICE,
+ MEMTXATTRS_UNSPECIFIED);
 }
 
 static inline MemTxResult dma_memory_write_relaxed(AddressSpace *as,
@@ -102,7 +105,8 @@ static inline MemTxResult 
dma_memory_write_relaxed(AddressSpace *as,
dma_addr_t len)
 {
 return dma_memory_rw_relaxed(as, addr, (void *)buf, len,
- DMA_DIRECTION_FROM_DEVICE);
+ DMA_DIRECTION_FROM_DEVICE,
+ MEMTXATTRS_UNSPECIFIED);
 }
 
 /**
@@ -124,7 +128,8 @@ static inline MemTxResult dma_memory_rw(AddressSpace *as, 
dma_addr_t addr,
 {
 dma_barrier(as, dir);
 
-return dma_memory_rw_relaxed(as, addr, buf, len, dir);
+return dma_memory_rw_relaxed(as, addr, buf, len, dir,
+ MEMTXATTRS_UNSPECIFIED);
 }
 
 /**
-- 
2.26.2

[PATCH 07/13] dma: Let dma_memory_write() propagate MemTxResult

2020-09-04 Thread Philippe Mathieu-Daudé

dma_memory_rw_relaxed() returns a MemTxResult type.
Do not discard it, return it to the caller.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 2961a96ad67..f4ade067a46 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -96,8 +96,10 @@ static inline MemTxResult 
dma_memory_read_relaxed(AddressSpace *as,
 return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write_relaxed(AddressSpace *as, dma_addr_t addr,
-   const void *buf, dma_addr_t len)
+static inline MemTxResult dma_memory_write_relaxed(AddressSpace *as,
+   dma_addr_t addr,
+   const void *buf,
+   dma_addr_t len)
 {
 return dma_memory_rw_relaxed(as, addr, (void *)buf, len,
  DMA_DIRECTION_FROM_DEVICE);
@@ -143,8 +145,20 @@ static inline MemTxResult dma_memory_read(AddressSpace 
*as, dma_addr_t addr,
 return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write(AddressSpace *as, dma_addr_t addr,
-   const void *buf, dma_addr_t len)
+/**
+ * address_space_write: Write to address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to write
+ */
+static inline MemTxResult dma_memory_write(AddressSpace *as, dma_addr_t addr,
+   const void *buf, dma_addr_t len)
 {
 return dma_memory_rw(as, addr, (void *)buf, len,
  DMA_DIRECTION_FROM_DEVICE);
-- 
2.26.2

[PATCH 11/13] dma: Let dma_memory_rw() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

Let devices specify transaction attributes when calling
dma_memory_rw().

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/pci/pci.h |  3 ++-
 include/sysemu/dma.h | 11 ++-
 dma-helpers.c|  3 ++-
 hw/intc/spapr_xive.c |  3 ++-
 hw/usb/hcd-ohci.c| 10 ++
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 896cef9ad47..0c3217e019c 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -788,7 +788,8 @@ static inline AddressSpace *pci_get_address_space(PCIDevice 
*dev)
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
  void *buf, dma_addr_t len, DMADirection dir)
 {
-return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
+return dma_memory_rw(pci_get_address_space(dev), addr, buf, len,
+ dir, MEMTXATTRS_UNSPECIFIED);
 }
 
 static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 59331ec0bd3..0695d430119 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -121,15 +121,15 @@ static inline MemTxResult 
dma_memory_write_relaxed(AddressSpace *as,
  * @buf: buffer with the data transferred
  * @len: the number of bytes to read or write
  * @dir: indicates the transfer direction
+ * @attrs: memory transaction attributes
  */
 static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
 void *buf, dma_addr_t len,
-DMADirection dir)
+DMADirection dir, MemTxAttrs attrs)
 {
 dma_barrier(as, dir);
 
-return dma_memory_rw_relaxed(as, addr, buf, len, dir,
- MEMTXATTRS_UNSPECIFIED);
+return dma_memory_rw_relaxed(as, addr, buf, len, dir, attrs);
 }
 
 /**
@@ -147,7 +147,8 @@ static inline MemTxResult dma_memory_rw(AddressSpace *as, 
dma_addr_t addr,
 static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr,
   void *buf, dma_addr_t len)
 {
-return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+return dma_memory_rw(as, addr, buf, len,
+ DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
 }
 
 /**
@@ -166,7 +167,7 @@ static inline MemTxResult dma_memory_write(AddressSpace 
*as, dma_addr_t addr,
const void *buf, dma_addr_t len)
 {
 return dma_memory_rw(as, addr, (void *)buf, len,
- DMA_DIRECTION_FROM_DEVICE);
+ DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
 }
 
 /**
diff --git a/dma-helpers.c b/dma-helpers.c
index 6a9735386dc..6c3b2200f16 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -305,7 +305,8 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, 
QEMUSGList *sg,
 while (len > 0) {
 ScatterGatherEntry entry = sg->sg[sg_cur_index++];
 int32_t xfer = MIN(len, entry.len);
-dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
+dma_memory_rw(sg->as, entry.base, ptr, xfer, dir,
+  MEMTXATTRS_UNSPECIFIED);
 ptr += xfer;
 len -= xfer;
 resid -= xfer;
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 4bd0d606ba1..dbf73a8bf47 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -1666,7 +1666,8 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
 mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset;
 
 if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8,
-  (flags & SPAPR_XIVE_ESB_STORE))) {
+  (flags & SPAPR_XIVE_ESB_STORE),
+  MEMTXATTRS_UNSPECIFIED)) {
 qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%"
   HWADDR_PRIx "\n", mmio_addr);
 return H_HARDWARE;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 1e6e85e86a8..bac1adf439c 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -586,7 +586,8 @@ static int ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
 if (n > len)
 n = len;
 
-if (dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, n, dir)) {
+if (dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf,
+  n, dir, MEMTXATTRS_UNSPECIFIED)) {
 return -1;
 }
 if (n == len) {
@@ -595,7 +596,7 @@ static int ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
 ptr = td->be & ~0xfffu;
 buf += n;
 if (dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf,
-  len - n, dir)) {
+  len - n, dir, MEMTXATTRS_UNSPECIFIED)) {
 return -1;
 }
 return 0;
@@ -613,7 +614,8 @@ static int ohci_copy_iso_td(OHCIState *ohci,
 if (n > len)
 n = len;
 
-if (dma_memory_rw(ohc

[PATCH 06/13] dma: Let dma_memory_read() propagate MemTxResult

2020-09-04 Thread Philippe Mathieu-Daudé

dma_memory_rw_relaxed() returns a MemTxResult type.
Do not discard it, return it to the caller.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 661d7d0ca88..2961a96ad67 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -89,8 +89,9 @@ static inline MemTxResult dma_memory_rw_relaxed(AddressSpace 
*as,
 buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
-  void *buf, dma_addr_t len)
+static inline MemTxResult dma_memory_read_relaxed(AddressSpace *as,
+  dma_addr_t addr,
+  void *buf, dma_addr_t len)
 {
 return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
@@ -124,8 +125,20 @@ static inline MemTxResult dma_memory_rw(AddressSpace *as, 
dma_addr_t addr,
 return dma_memory_rw_relaxed(as, addr, buf, len, dir);
 }
 
-static inline int dma_memory_read(AddressSpace *as, dma_addr_t addr,
-  void *buf, dma_addr_t len)
+/**
+ * dma_memory_read: Read from an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).  Called within RCU critical section.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ */
+static inline MemTxResult dma_memory_read(AddressSpace *as, dma_addr_t addr,
+  void *buf, dma_addr_t len)
 {
 return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
-- 
2.26.2

[PATCH 08/13] dma: Let dma_memory_valid() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

Let devices specify transaction attributes when calling
dma_memory_valid().

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/ppc/spapr_vio.h | 2 +-
 include/sysemu/dma.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index bed7df60e35..f134f6cf574 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -96,7 +96,7 @@ static inline void spapr_vio_irq_pulse(SpaprVioDevice *dev)
 static inline bool spapr_vio_dma_valid(SpaprVioDevice *dev, uint64_t taddr,
uint32_t size, DMADirection dir)
 {
-return dma_memory_valid(&dev->as, taddr, size, dir);
+return dma_memory_valid(&dev->as, taddr, size, dir, 
MEMTXATTRS_UNSPECIFIED);
 }
 
 static inline int spapr_vio_dma_read(SpaprVioDevice *dev, uint64_t taddr,
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index f4ade067a46..b322aa5947b 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -73,11 +73,11 @@ static inline void dma_barrier(AddressSpace *as, 
DMADirection dir)
  * dma_memory_{read,write}() and check for errors */
 static inline bool dma_memory_valid(AddressSpace *as,
 dma_addr_t addr, dma_addr_t len,
-DMADirection dir)
+DMADirection dir, MemTxAttrs attrs)
 {
 return address_space_access_valid(as, addr, len,
   dir == DMA_DIRECTION_FROM_DEVICE,
-  MEMTXATTRS_UNSPECIFIED);
+  attrs);
 }
 
 static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as,
-- 
2.26.2

[PATCH 09/13] dma: Let dma_memory_set() take MemTxAttrs argument

2020-09-04 Thread Philippe Mathieu-Daudé

Let devices specify transaction attributes when calling
dma_memory_set().

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/ppc/spapr_vio.h | 3 ++-
 include/sysemu/dma.h   | 3 ++-
 dma-helpers.c  | 5 ++---
 hw/nvram/fw_cfg.c  | 3 ++-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index f134f6cf574..6e5c0840248 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -116,7 +116,8 @@ static inline int spapr_vio_dma_write(SpaprVioDevice *dev, 
uint64_t taddr,
 static inline int spapr_vio_dma_set(SpaprVioDevice *dev, uint64_t taddr,
 uint8_t c, uint32_t size)
 {
-return (dma_memory_set(&dev->as, taddr, c, size) != 0) ?
+return (dma_memory_set(&dev->as, taddr,
+   c, size, MEMTXATTRS_UNSPECIFIED) != 0) ?
 H_DEST_PARM : H_SUCCESS;
 }
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index b322aa5947b..d0381f9ae9b 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -175,9 +175,10 @@ static inline MemTxResult dma_memory_write(AddressSpace 
*as, dma_addr_t addr,
  * @addr: address within that address space
  * @c: constant byte to fill the memory
  * @len: the number of bytes to fill with the constant byte
+ * @attrs: memory transaction attributes
  */
 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
-   uint8_t c, dma_addr_t len);
+   uint8_t c, dma_addr_t len, MemTxAttrs attrs);
 
 /**
  * address_space_map: Map a physical memory region into a DMA controller
diff --git a/dma-helpers.c b/dma-helpers.c
index 4a9e37d6d06..6a9735386dc 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -19,7 +19,7 @@
 /* #define DEBUG_IOMMU */
 
 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
-   uint8_t c, dma_addr_t len)
+   uint8_t c, dma_addr_t len, MemTxAttrs attrs)
 {
 dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
 
@@ -31,8 +31,7 @@ MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
 memset(fillbuf, c, FILLBUF_SIZE);
 while (len > 0) {
 l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-error |= address_space_write(as, addr, MEMTXATTRS_UNSPECIFIED,
- fillbuf, l);
+error |= address_space_write(as, addr, attrs, fillbuf, l);
 len -= l;
 addr += l;
 }
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index f3a4728288e..a15de06a10c 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -397,7 +397,8 @@ static void fw_cfg_dma_transfer(FWCfgState *s)
  * tested before.
  */
 if (read) {
-if (dma_memory_set(s->dma_as, dma.address, 0, len)) {
+if (dma_memory_set(s->dma_as, dma.address, 0, len,
+   MEMTXATTRS_UNSPECIFIED)) {
 dma.control |= FW_CFG_DMA_CTL_ERROR;
 }
 }
-- 
2.26.2

[PATCH 04/13] dma: Let dma_memory_set() propagate MemTxResult

2020-09-04 Thread Philippe Mathieu-Daudé

address_space_write() returns a MemTxResult type.
Do not discard it, return it to the caller.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 15 ++-
 dma-helpers.c|  7 ---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 19bc9ad1b69..ad8a3f82f47 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -123,7 +123,20 @@ static inline int dma_memory_write(AddressSpace *as, 
dma_addr_t addr,
  DMA_DIRECTION_FROM_DEVICE);
 }
 
-int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t 
len);
+/**
+ * dma_memory_set: Fill memory with a constant byte from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @c: constant byte to fill the memory
+ * @len: the number of bytes to fill with the constant byte
+ */
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+   uint8_t c, dma_addr_t len);
 
 /**
  * address_space_map: Map a physical memory region into a DMA controller
diff --git a/dma-helpers.c b/dma-helpers.c
index 41ef24a63b6..4a9e37d6d06 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -1,7 +1,7 @@
 /*
  * DMA helper functions
  *
- * Copyright (c) 2009 Red Hat
+ * Copyright (c) 2009,2020 Red Hat
  *
  * This work is licensed under the terms of the GNU General Public License
  * (GNU GPL), version 2 or later.
@@ -18,14 +18,15 @@
 
 /* #define DEBUG_IOMMU */
 
-int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t 
len)
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+   uint8_t c, dma_addr_t len)
 {
 dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
 
 #define FILLBUF_SIZE 512
 uint8_t fillbuf[FILLBUF_SIZE];
 int l;
-bool error = false;
+MemTxResult error = MEMTX_OK;
 
 memset(fillbuf, c, FILLBUF_SIZE);
 while (len > 0) {
-- 
2.26.2

[PATCH 05/13] dma: Let dma_memory_rw() propagate MemTxResult

2020-09-04 Thread Philippe Mathieu-Daudé

address_space_rw() returns a MemTxResult type.
Do not discard it, return it to the caller.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 30 ++
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index ad8a3f82f47..661d7d0ca88 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -80,12 +80,13 @@ static inline bool dma_memory_valid(AddressSpace *as,
   MEMTXATTRS_UNSPECIFIED);
 }
 
-static inline int dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr,
-void *buf, dma_addr_t len,
-DMADirection dir)
+static inline MemTxResult dma_memory_rw_relaxed(AddressSpace *as,
+dma_addr_t addr,
+void *buf, dma_addr_t len,
+DMADirection dir)
 {
-return (bool)address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
-  buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+return address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
+buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
 static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
@@ -101,9 +102,22 @@ static inline int dma_memory_write_relaxed(AddressSpace 
*as, dma_addr_t addr,
  DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_rw(AddressSpace *as, dma_addr_t addr,
-void *buf, dma_addr_t len,
-DMADirection dir)
+/**
+ * dma_memory_rw: Read from or write to an address space from DMA controller.
+ *
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: the number of bytes to read or write
+ * @dir: indicates the transfer direction
+ */
+static inline MemTxResult dma_memory_rw(AddressSpace *as, dma_addr_t addr,
+void *buf, dma_addr_t len,
+DMADirection dir)
 {
 dma_barrier(as, dir);
 
-- 
2.26.2

[PATCH 01/13] pci: pass along the return value of dma_memory_rw

2020-09-04 Thread Philippe Mathieu-Daudé

From: Klaus Jensen 

Some might actually care about the return value of dma_memory_rw. So
let us pass it along instead of ignoring it.

There are no existing users of the return value, so this patch should be
safe.

Signed-off-by: Klaus Jensen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Acked-by: Keith Busch 
Message-Id: <20191011070141.188713-2-...@irrelevant.dk>
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/pci/pci.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4ca7258b5b7..896cef9ad47 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -788,8 +788,7 @@ static inline AddressSpace *pci_get_address_space(PCIDevice 
*dev)
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
  void *buf, dma_addr_t len, DMADirection dir)
 {
-dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
-return 0;
+return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
 }
 
 static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
-- 
2.26.2

[PATCH 03/13] dma: Document address_space_map/address_space_unmap() prototypes

2020-09-04 Thread Philippe Mathieu-Daudé

Add documentation based on address_space_map / address_space_unmap.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/sysemu/dma.h | 30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 80c5bc3e02d..19bc9ad1b69 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -1,7 +1,7 @@
 /*
  * DMA helper functions
  *
- * Copyright (c) 2009 Red Hat
+ * Copyright (c) 2009, 2020 Red Hat
  *
  * This work is licensed under the terms of the GNU General Public License
  * (GNU GPL), version 2 or later.
@@ -125,6 +125,20 @@ static inline int dma_memory_write(AddressSpace *as, 
dma_addr_t addr,
 
 int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t 
len);
 
+/**
+ * address_space_map: Map a physical memory region into a DMA controller
+ *virtual address
+ *
+ * May map a subset of the requested range, given by and returned in @plen.
+ * May return %NULL and set *@plen to zero(0), if resources needed to perform
+ * the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @len: pointer to length of buffer; updated on return
+ * @dir: indicates the transfer direction
+ */
 static inline void *dma_memory_map(AddressSpace *as,
dma_addr_t addr, dma_addr_t *len,
DMADirection dir)
@@ -138,6 +152,20 @@ static inline void *dma_memory_map(AddressSpace *as,
 return p;
 }
 
+/**
+ * address_space_unmap: Unmaps a memory region previously mapped
+ *  by dma_memory_map()
+ *
+ * Will also mark the memory as dirty if @dir == %DMA_DIRECTION_FROM_DEVICE.
+ * @access_len gives the amount of memory that was actually read or written
+ * by the caller.
+ *
+ * @as: #AddressSpace used
+ * @buffer: host pointer as returned by address_space_map()
+ * @len: buffer length as returned by address_space_map()
+ * @dir: indicates the transfer direction
+ * @access_len: amount of data actually transferred
+ */
 static inline void dma_memory_unmap(AddressSpace *as,
 void *buffer, dma_addr_t len,
 DMADirection dir, dma_addr_t access_len)
-- 
2.26.2

[PATCH 02/13] docs/devel/loads-stores: Add regexp for DMA functions

2020-09-04 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 docs/devel/loads-stores.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
index 9a944ef1af6..5b20f907e4d 100644
--- a/docs/devel/loads-stores.rst
+++ b/docs/devel/loads-stores.rst
@@ -477,6 +477,8 @@ make sure our existing code is doing things correctly.
 
 Regexes for git grep
  - ``\``
+ - ``\``
+ - ``\``
 
 ``pci_dma_*`` and ``{ld,st}*_pci_dma``
 ~~
-- 
2.26.2

[PATCH 00/13] dma: Let the DMA API take MemTxAttrs argument and propagate MemTxResult

2020-09-04 Thread Philippe Mathieu-Daudé

Salvaging cleanups patches from the RFC series "Forbid DMA write
accesses to MMIO regions" [*], propagating MemTxResult and
adding documentation.

[*] https://www.mail-archive.com/qemu-block@nongnu.org/msg72924.html

Klaus Jensen (1):
  pci: pass along the return value of dma_memory_rw

Philippe Mathieu-Daudé (12):
  docs/devel/loads-stores: Add regexp for DMA functions
  dma: Document address_space_map/address_space_unmap() prototypes
  dma: Let dma_memory_set() propagate MemTxResult
  dma: Let dma_memory_rw() propagate MemTxResult
  dma: Let dma_memory_read() propagate MemTxResult
  dma: Let dma_memory_write() propagate MemTxResult
  dma: Let dma_memory_valid() take MemTxAttrs argument
  dma: Let dma_memory_set() take MemTxAttrs argument
  dma: Let dma_memory_rw_relaxed() take MemTxAttrs argument
  dma: Let dma_memory_rw() take MemTxAttrs argument
  dma: Let dma_memory_read/write() take MemTxAttrs argument
  dma: Let dma_memory_map() take MemTxAttrs argument

 docs/devel/loads-stores.rst   |   2 +
 include/hw/pci/pci.h  |   7 +-
 include/hw/ppc/spapr_vio.h|  11 ++-
 include/sysemu/dma.h  | 156 +++---
 dma-helpers.c |  16 ++--
 hw/arm/musicpal.c |  13 +--
 hw/arm/smmu-common.c  |   3 +-
 hw/arm/smmuv3.c   |  14 +--
 hw/core/generic-loader.c  |   3 +-
 hw/display/virtio-gpu.c   |   8 +-
 hw/dma/pl330.c|  12 ++-
 hw/dma/sparc32_dma.c  |  16 ++--
 hw/dma/xlnx-zynq-devcfg.c |   6 +-
 hw/dma/xlnx_dpdma.c   |  10 ++-
 hw/hyperv/vmbus.c |   8 +-
 hw/i386/amd_iommu.c   |  16 ++--
 hw/i386/intel_iommu.c |  28 +++---
 hw/ide/ahci.c |   9 +-
 hw/ide/macio.c|   2 +-
 hw/intc/spapr_xive.c  |   3 +-
 hw/intc/xive.c|   7 +-
 hw/misc/bcm2835_property.c|   3 +-
 hw/misc/macio/mac_dbdma.c |  10 ++-
 hw/net/allwinner-sun8i-emac.c |  21 +++--
 hw/net/ftgmac100.c|  25 --
 hw/net/imx_fec.c  |  32 ---
 hw/nvram/fw_cfg.c |  12 ++-
 hw/pci-host/pnv_phb3.c|   5 +-
 hw/pci-host/pnv_phb3_msi.c|   9 +-
 hw/pci-host/pnv_phb4.c|   7 +-
 hw/sd/allwinner-sdhost.c  |  14 +--
 hw/sd/sdhci.c |  35 +---
 hw/usb/hcd-dwc2.c |   8 +-
 hw/usb/hcd-ehci.c |   6 +-
 hw/usb/hcd-ohci.c |  28 +++---
 hw/usb/libhw.c|   3 +-
 hw/virtio/virtio.c|   6 +-
 37 files changed, 385 insertions(+), 189 deletions(-)

-- 
2.26.2

Re: [PATCH 0/3] block/nvme: Use NvmeBar structure from "block/nvme.h"

2020-09-04 Thread Klaus Jensen

On Sep  4 14:41, Philippe Mathieu-Daudé wrote:
> Cleanups in the NVMeRegs structure:
> - Use the already existing NvmeBar structure from "block/nvme.h"
> - Pair doorbell registers
> 
> Based-on: <20200903122803.405265-1-phi...@redhat.com>
> 
> Philippe Mathieu-Daudé (3):
>   block/nvme: Group controller registers in NVMeRegs structure
>   block/nvme: Use generic NvmeBar structure
>   block/nvme: Pair doorbell registers
> 
>  block/nvme.c | 43 +++
>  1 file changed, 15 insertions(+), 28 deletions(-)
> 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature

Re: [PATCH 0/3] block/nvme: Use NvmeBar structure from "block/nvme.h"

2020-09-04 Thread Fam Zheng

On 2020-09-04 14:41, Philippe Mathieu-Daudé wrote:
> Cleanups in the NVMeRegs structure:
> - Use the already existing NvmeBar structure from "block/nvme.h"
> - Pair doorbell registers
> 
> Based-on: <20200903122803.405265-1-phi...@redhat.com>
> 
> Philippe Mathieu-Daudé (3):
>   block/nvme: Group controller registers in NVMeRegs structure
>   block/nvme: Use generic NvmeBar structure
>   block/nvme: Pair doorbell registers
> 
>  block/nvme.c | 43 +++
>  1 file changed, 15 insertions(+), 28 deletions(-)
> 
> -- 
> 2.26.2
> 
> 

Reviewed-by: Fam Zheng

[PATCH 15/17] hw/block/nvme: support multiple namespaces

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

This adds support for multiple namespaces by introducing a new 'nvme-ns'
device model. The nvme device creates a bus named from the device name
('id'). The nvme-ns devices then connect to this and registers
themselves with the nvme device.

This changes how an nvme device is created. Example with two namespaces:

  -drive file=nvme0n1.img,if=none,id=disk1
  -drive file=nvme0n2.img,if=none,id=disk2
  -device nvme,serial=deadbeef,id=nvme0
  -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
  -device nvme-ns,drive=disk2,bus=nvme0,nsid=2

The drive property is kept on the nvme device to keep the change
backward compatible, but the property is now optional. Specifying a
drive for the nvme device will always create the namespace with nsid 1.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
Reviewed-by: Minwoo Im 
---
 hw/block/meson.build  |   2 +-
 hw/block/nvme-ns.c| 185 +++
 hw/block/nvme-ns.h|  74 +
 hw/block/nvme.c   | 247 +++---
 hw/block/nvme.h   |  46 
 hw/block/trace-events |   8 +-
 6 files changed, 447 insertions(+), 115 deletions(-)
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

diff --git a/hw/block/meson.build b/hw/block/meson.build
index 78cad8f7cba1..602ca6c8541d 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: 
files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: 
files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
new file mode 100644
index ..0d65b0998617
--- /dev/null
+++ b/hw/block/nvme-ns.c
@@ -0,0 +1,185 @@
+/*
+ * QEMU NVM Express Virtual Namespace
+ *
+ * Copyright (c) 2019 CNEX Labs
+ * Copyright (c) 2020 Samsung Electronics
+ *
+ * Authors:
+ *  Klaus Jensen  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+
+#include "nvme.h"
+#include "nvme-ns.h"
+
+static void nvme_ns_init(NvmeNamespace *ns)
+{
+NvmeIdNs *id_ns = &ns->id_ns;
+
+if (blk_get_flags(ns->blk) & BDRV_O_UNMAP) {
+ns->id_ns.dlfeat = 0x9;
+}
+
+id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+
+id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
+
+/* no thin provisioning */
+id_ns->ncap = id_ns->nsze;
+id_ns->nuse = id_ns->ncap;
+}
+
+static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+uint64_t perm, shared_perm;
+
+Error *local_err = NULL;
+int ret;
+
+perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+BLK_PERM_GRAPH_MOD;
+
+ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err);
+if (ret) {
+error_propagate_prepend(errp, local_err,
+"could not set block permissions: ");
+return ret;
+}
+
+ns->size = blk_getlength(ns->blk);
+if (ns->size < 0) {
+error_setg_errno(errp, -ns->size, "could not get blockdev size");
+return -1;
+}
+
+switch (n->conf.wce) {
+case ON_OFF_AUTO_ON:
+n->features.vwc = 1;
+break;
+case ON_OFF_AUTO_OFF:
+n->features.vwc = 0;
+break;
+case ON_OFF_AUTO_AUTO:
+n->features.vwc = blk_enable_write_cache(ns->blk);
+break;
+default:
+abort();
+}
+
+blk_set_enable_write_cache(ns->blk, n->features.vwc);
+
+return 0;
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+if (!ns->blk) {
+error_setg(errp, "block backend not configured");
+return -1;
+}
+
+return 0;
+}
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+if (nvme_ns_check_constraints(ns, errp)) {
+return -1;
+}
+
+if (nvme_ns_init_blk(n, ns, errp)) {
+return -1;
+}
+
+nvme_ns_init(ns);
+if (nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+
+return 0;
+}
+
+void nvme_ns_drain(NvmeNamespace *ns)
+{
+blk_drain(ns->blk);
+}
+
+void nvme_ns_flush(NvmeNamespace *ns)
+{
+bl

[PATCH 13/17] hw/block/nvme: add support for sgl bit bucket descriptor

2020-09-04 Thread Klaus Jensen

From: Gollu Appalanaidu 

This adds support for SGL descriptor type 0x1 (bit bucket descriptor).
See the NVM Express v1.3d specification, Section 4.4 ("Scatter Gather
List (SGL)").

Signed-off-by: Gollu Appalanaidu 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 33 +++--
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9bdcfeb901d9..6340af226341 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -431,6 +431,10 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 uint8_t type = NVME_SGL_TYPE(segment[i].type);
 
 switch (type) {
+case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
+if (req->cmd.opcode == NVME_CMD_WRITE) {
+continue;
+}
 case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
 break;
 case NVME_SGL_DESCR_TYPE_SEGMENT:
@@ -441,6 +445,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 }
 
 dlen = le32_to_cpu(segment[i].len);
+
 if (!dlen) {
 continue;
 }
@@ -461,6 +466,11 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 }
 
 trans_len = MIN(*len, dlen);
+
+if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) {
+goto next;
+}
+
 addr = le64_to_cpu(segment[i].addr);
 
 if (UINT64_MAX - addr < dlen) {
@@ -472,6 +482,7 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList 
*qsg,
 return status;
 }
 
+next:
 *len -= trans_len;
 }
 
@@ -541,7 +552,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, 
QEMUIOVector *iov,
 seg_len = le32_to_cpu(sgld->len);
 
 /* check the length of the (Last) Segment descriptor */
-if (!seg_len || seg_len & 0xf) {
+if ((!seg_len || seg_len & 0xf) &&
+(NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) {
 return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
 }
 
@@ -578,19 +590,27 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList 
*qsg, QEMUIOVector *iov,
 
 last_sgld = &segment[nsgld - 1];
 
-/* if the segment ends with a Data Block, then we are done */
-if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
+/*
+ * If the segment ends with a Data Block or Bit Bucket Descriptor Type,
+ * then we are done.
+ */
+switch (NVME_SGL_TYPE(last_sgld->type)) {
+case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
 status = nvme_map_sgl_data(n, qsg, iov, segment, nsgld, &len, req);
 if (status) {
 goto unmap;
 }
 
 goto out;
+
+default:
+break;
 }
 
 /*
- * If the last descriptor was not a Data Block, then the current
- * segment must not be a Last Segment.
+ * If the last descriptor was not a Data Block or Bit Bucket, then the
+ * current segment must not be a Last Segment.
  */
 if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
 status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
@@ -2721,7 +2741,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->nn = cpu_to_le32(n->num_namespaces);
 id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES);
-id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN);
+id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
+   NVME_CTRL_SGLS_BITBUCKET);
 
 subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
 strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
-- 
2.28.0

[PATCH 17/17] hw/block/nvme: change controller pci id

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

There are two reasons for changing this:

  1. The nvme device currently uses an internal Intel device id.

  2. Since commits "nvme: fix write zeroes offset and count" and "nvme:
 support multiple namespaces" the controller device no longer has
 the quirks that the Linux kernel think it has.

 As the quirks are applied based on pci vendor and device id, change
 them to get rid of the quirks.

To keep backward compatibility, add a new 'x-use-intel-id' parameter to
the nvme device to force use of the Intel vendor and device id. This is
off by default but add a compat property to set this for 5.1 machines
and older.

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 12 ++--
 hw/block/nvme.h   |  1 +
 hw/core/machine.c |  1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 453d3a89d475..8018f8679366 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2749,6 +2749,15 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 
 pci_conf[PCI_INTERRUPT_PIN] = 1;
 pci_config_set_prog_interface(pci_conf, 0x2);
+
+if (n->params.use_intel_id) {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+pci_config_set_device_id(pci_conf, 0x5846);
+} else {
+pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
+pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
+}
+
 pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
 pcie_endpoint_cap_init(pci_dev, 0x80);
 
@@ -2903,6 +2912,7 @@ static Property nvme_props[] = {
 DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
 DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
 DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+DEFINE_PROP_BOOL("x-use-intel-id", NvmeCtrl, params.use_intel_id, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2919,8 +2929,6 @@ static void nvme_class_init(ObjectClass *oc, void *data)
 pc->realize = nvme_realize;
 pc->exit = nvme_exit;
 pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
-pc->vendor_id = PCI_VENDOR_ID_INTEL;
-pc->device_id = 0x5845;
 pc->revision = 2;
 
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 72260f2e8ea9..a734a5e1370d 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -15,6 +15,7 @@ typedef struct NvmeParams {
 uint8_t  aerl;
 uint32_t aer_max_queued;
 uint8_t  mdts;
+bool use_intel_id;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ea26d612374d..67990232528c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -34,6 +34,7 @@ GlobalProperty hw_compat_5_1[] = {
 { "vhost-user-scsi", "num_queues", "1"},
 { "virtio-blk-device", "num-queues", "1"},
 { "virtio-scsi-device", "num_queues", "1"},
+{ "nvme", "x-use-intel-id", "on"},
 };
 const size_t hw_compat_5_1_len = G_N_ELEMENTS(hw_compat_5_1);
 
-- 
2.28.0

[PATCH 11/17] hw/block/nvme: harden cmb access

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Since the controller has only supported PRPs so far it has not been
required to check the ending address (addr + len - 1) of the CMB access
for validity since it has been guaranteed to be in range of the CMB.

This changes when the controller adds support for SGLs (next patch), so
add that check.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 36ec8cbb1168..6ef4dc762b80 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -143,7 +143,12 @@ static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr 
addr)
 
 static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
-if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
+hwaddr hi = addr + size - 1;
+if (hi < addr) {
+return 1;
+}
+
+if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
 return 0;
 }
-- 
2.28.0

[PATCH 12/17] hw/block/nvme: add support for scatter gather lists

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

For now, support the Data Block, Segment and Last Segment descriptor
types.

See NVM Express 1.3d, Section 4.4 ("Scatter Gather List (SGL)").

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
---
 hw/block/nvme.c   | 329 ++
 hw/block/trace-events |   4 +
 include/block/nvme.h  |   6 +-
 3 files changed, 279 insertions(+), 60 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6ef4dc762b80..9bdcfeb901d9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -414,13 +414,262 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 return NVME_SUCCESS;
 }
 
-static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
- uint64_t prp1, uint64_t prp2, DMADirection dir,
+/*
+ * Map 'nsgld' data descriptors from 'segment'. The function will subtract the
+ * number of bytes mapped in len.
+ */
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, QEMUSGList *qsg,
+  QEMUIOVector *iov,
+  NvmeSglDescriptor *segment, uint64_t nsgld,
+  size_t *len, NvmeRequest *req)
+{
+dma_addr_t addr, trans_len;
+uint32_t dlen;
+uint16_t status;
+
+for (int i = 0; i < nsgld; i++) {
+uint8_t type = NVME_SGL_TYPE(segment[i].type);
+
+switch (type) {
+case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+break;
+case NVME_SGL_DESCR_TYPE_SEGMENT:
+case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
+default:
+return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
+}
+
+dlen = le32_to_cpu(segment[i].len);
+if (!dlen) {
+continue;
+}
+
+if (*len == 0) {
+/*
+ * All data has been mapped, but the SGL contains additional
+ * segments and/or descriptors. The controller might accept
+ * ignoring the rest of the SGL.
+ */
+uint16_t sgls = le16_to_cpu(n->id_ctrl.sgls);
+if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
+break;
+}
+
+trace_pci_nvme_err_invalid_sgl_excess_length(nvme_cid(req));
+return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+}
+
+trans_len = MIN(*len, dlen);
+addr = le64_to_cpu(segment[i].addr);
+
+if (UINT64_MAX - addr < dlen) {
+return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+}
+
+status = nvme_map_addr(n, qsg, iov, addr, trans_len);
+if (status) {
+return status;
+}
+
+*len -= trans_len;
+}
+
+return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_sgl(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+ NvmeSglDescriptor sgl, size_t len,
  NvmeRequest *req)
+{
+/*
+ * Read the segment in chunks of 256 descriptors (one 4k page) to avoid
+ * dynamically allocating a potentially huge SGL. The spec allows the SGL
+ * to be larger (as in number of bytes required to describe the SGL
+ * descriptors and segment chain) than the command transfer size, so it is
+ * not bounded by MDTS.
+ */
+const int SEG_CHUNK_SIZE = 256;
+
+NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+uint64_t nsgld;
+uint32_t seg_len;
+uint16_t status;
+bool sgl_in_cmb = false;
+hwaddr addr;
+int ret;
+
+sgld = &sgl;
+addr = le64_to_cpu(sgl.addr);
+
+trace_pci_nvme_map_sgl(nvme_cid(req), NVME_SGL_TYPE(sgl.type), len);
+
+/*
+ * If the entire transfer can be described with a single data block it can
+ * be mapped directly.
+ */
+if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
+status = nvme_map_sgl_data(n, qsg, iov, sgld, 1, &len, req);
+if (status) {
+goto unmap;
+}
+
+goto out;
+}
+
+/*
+ * If the segment is located in the CMB, the submission queue of the
+ * request must also reside there.
+ */
+if (nvme_addr_is_cmb(n, addr)) {
+if (!nvme_addr_is_cmb(n, req->sq->dma_addr)) {
+return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+}
+
+sgl_in_cmb = true;
+}
+
+for (;;) {
+switch (NVME_SGL_TYPE(sgld->type)) {
+case NVME_SGL_DESCR_TYPE_SEGMENT:
+case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+break;
+default:
+return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+}
+
+seg_len = le32_to_cpu(sgld->len);
+
+/* check the length of the (Last) Segment descriptor */
+if (!seg_len || seg_len & 0xf) {
+return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+}
+
+if (UINT64_MAX - addr < seg_len) {
+return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+}
+
+

[PATCH 14/17] hw/block/nvme: refactor identify active namespace id list

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Prepare to support inactive namespaces.

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6340af226341..4155cb797856 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1538,7 +1538,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 uint32_t min_nsid = le32_to_cpu(c->nsid);
 uint32_t *list;
 uint16_t ret;
-int i, j = 0;
+int j = 0;
 
 trace_pci_nvme_identify_nslist(min_nsid);
 
@@ -1553,11 +1553,11 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 }
 
 list = g_malloc0(data_len);
-for (i = 0; i < n->num_namespaces; i++) {
-if (i < min_nsid) {
+for (int i = 1; i <= n->num_namespaces; i++) {
+if (i <= min_nsid) {
 continue;
 }
-list[j++] = cpu_to_le32(i + 1);
+list[j++] = cpu_to_le32(i);
 if (j == data_len / sizeof(uint32_t)) {
 break;
 }
-- 
2.28.0

[PATCH 16/17] pci: allocate pci id for nvme

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

The emulated nvme device (hw/block/nvme.c) is currently using an
internal Intel device id.

Prepare to change that by allocating a device id under the 1b36 (Red
Hat, Inc.) vendor id.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Acked-by: Gerd Hoffmann 
Reviewed-by: Maxim Levitsky 
---
 MAINTAINERS|  1 +
 docs/specs/nvme.txt| 23 +++
 docs/specs/pci-ids.txt |  1 +
 include/hw/pci/pci.h   |  1 +
 4 files changed, 26 insertions(+)
 create mode 100644 docs/specs/nvme.txt

diff --git a/MAINTAINERS b/MAINTAINERS
index b233da2a7379..5de612aae381 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1861,6 +1861,7 @@ L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/nvme*
 F: tests/qtest/nvme-test.c
+F: docs/specs/nvme.txt
 T: git git://git.infradead.org/qemu-nvme.git nvme-next
 
 megasas
diff --git a/docs/specs/nvme.txt b/docs/specs/nvme.txt
new file mode 100644
index ..56d393884e7a
--- /dev/null
+++ b/docs/specs/nvme.txt
@@ -0,0 +1,23 @@
+NVM Express Controller
+==
+
+The nvme device (-device nvme) emulates an NVM Express Controller.
+
+
+Reference Specifications
+
+
+The device currently implements most mandatory features of NVMe v1.3d, see
+
+  https://nvmexpress.org/resources/specifications/
+
+for the specification.
+
+
+Known issues
+
+
+* The accounting numbers in the SMART/Health are reset across power cycles
+
+* Interrupt Coalescing is not supported and is disabled by default in volation
+  of the specification.
diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt
index 4d53e5c7d9d5..abbdbca6be38 100644
--- a/docs/specs/pci-ids.txt
+++ b/docs/specs/pci-ids.txt
@@ -63,6 +63,7 @@ PCI devices (other than virtio):
 1b36:000b  PCIe Expander Bridge (-device pxb-pcie)
 1b36:000d  PCI xhci usb host adapter
 1b36:000f  mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c
+1b36:0010  PCIe NVMe device (-device nvme)
 
 All these devices are documented in docs/specs.
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 896cef9ad476..f7123c5b8a2e 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -105,6 +105,7 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_XHCI0x000d
 #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
 #define PCI_DEVICE_ID_REDHAT_MDPY0x000f
+#define PCI_DEVICE_ID_REDHAT_NVME0x0010
 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100
 
 #define FMT_PCIBUS  PRIx64
-- 
2.28.0

[PATCH 07/17] hw/block/nvme: add symbolic command name to trace events

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Add the symbolic command name to the pci_nvme_{io,admin}_cmd and
pci_nvme_rw trace events.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   |  8 +---
 hw/block/nvme.h   | 28 
 hw/block/trace-events |  6 +++---
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 50851ab8d90f..bfac3385cb64 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -678,7 +678,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 uint16_t status;
 
-trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
+trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode), nlb,
+  data_size, slba);
 
 status = nvme_check_mdts(n, data_size);
 if (status) {
@@ -727,7 +728,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
 
 trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
-  req->cmd.opcode);
+  req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
 
 if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
 trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
@@ -1584,7 +1585,8 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
 
 static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
-trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode);
+trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
+ nvme_adm_opc_str(req->cmd.opcode));
 
 switch (req->cmd.opcode) {
 case NVME_ADM_CMD_DELETE_SQ:
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 1675c1e0755c..ce9e931420d7 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -32,6 +32,34 @@ typedef struct NvmeRequest {
 QTAILQ_ENTRY(NvmeRequest)entry;
 } NvmeRequest;
 
+static inline const char *nvme_adm_opc_str(uint8_t opc)
+{
+switch (opc) {
+case NVME_ADM_CMD_DELETE_SQ:return "NVME_ADM_CMD_DELETE_SQ";
+case NVME_ADM_CMD_CREATE_SQ:return "NVME_ADM_CMD_CREATE_SQ";
+case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
+case NVME_ADM_CMD_DELETE_CQ:return "NVME_ADM_CMD_DELETE_CQ";
+case NVME_ADM_CMD_CREATE_CQ:return "NVME_ADM_CMD_CREATE_CQ";
+case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
+case NVME_ADM_CMD_ABORT:return "NVME_ADM_CMD_ABORT";
+case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
+case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
+case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
+default:return "NVME_ADM_CMD_UNKNOWN";
+}
+}
+
+static inline const char *nvme_io_opc_str(uint8_t opc)
+{
+switch (opc) {
+case NVME_CMD_FLUSH:return "NVME_NVM_CMD_FLUSH";
+case NVME_CMD_WRITE:return "NVME_NVM_CMD_WRITE";
+case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
+case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+default:return "NVME_NVM_CMD_UNKNOWN";
+}
+}
+
 typedef struct NvmeSQueue {
 struct NvmeCtrl *ctrl;
 uint16_tsqid;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 50d5702e6b80..0823d0fb47c5 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -36,9 +36,9 @@ pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, 
prp1=0x%"PRIx64" prp2
 pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
 pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len 
%"PRIu64""
 pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t 
prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 
0x%"PRIx64" num_prps %d"
-pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) 
"cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
-pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" 
sqid %"PRIu16" opc 0x%"PRIx8""
-pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, 
uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
+pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, 
const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" 
opname \"%s\""
+pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char 
*opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname \"%s\""
+pci_nvme_rw(uint16_t cid, const char *verb, uint32_t nlb, uint64_t count, 
uint64_t lba) "cid %"PRIu16" \"%s\" nlb %"PRIu32" count %"PRIu64" lba 
0x%"PRIx64""
 pci_nvme_rw_cb(uint16_t cid) "cid %"PRIu16""
 pci_nvme_write_zeroes(uint16_t cid, uint64_t slb

[PATCH 10/17] hw/block/nvme: support multiple parallel aios per request

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Move the BlockAIOCB to NvmeAIO and add a queue of pending AIOs to the
NvmeRequest. Only when the queue is empty is a completion enqueued.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 44 ++--
 hw/block/nvme.h |  6 --
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 64c8f232e3ea..36ec8cbb1168 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -629,10 +629,8 @@ static void nvme_aio_destroy(NvmeAIO *aio)
 
 static uint16_t nvme_do_aio(NvmeAIO *aio)
 {
-NvmeRequest *req = aio->req;
-
 BlockBackend *blk = aio->blk;
-BlockAcctCookie *acct = &req->acct;
+BlockAcctCookie *acct = &aio->acct;
 BlockAcctStats *stats = blk_get_stats(blk);
 
 bool is_write;
@@ -640,12 +638,12 @@ static uint16_t nvme_do_aio(NvmeAIO *aio)
 switch (aio->opc) {
 case NVME_AIO_OPC_FLUSH:
 block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
-req->aiocb = blk_aio_flush(blk, nvme_aio_cb, aio);
+aio->aiocb = blk_aio_flush(blk, nvme_aio_cb, aio);
 break;
 
 case NVME_AIO_OPC_WRITE_ZEROES:
 block_acct_start(stats, acct, aio->len, BLOCK_ACCT_WRITE);
-req->aiocb = blk_aio_pwrite_zeroes(blk, aio->offset, aio->len,
+aio->aiocb = blk_aio_pwrite_zeroes(blk, aio->offset, aio->len,
BDRV_REQ_MAY_UNMAP, nvme_aio_cb,
aio);
 break;
@@ -661,20 +659,20 @@ static uint16_t nvme_do_aio(NvmeAIO *aio)
 QEMUSGList *qsg = (QEMUSGList *)aio->payload;
 
 if (is_write) {
-req->aiocb = dma_blk_write(blk, qsg, aio->offset,
+aio->aiocb = dma_blk_write(blk, qsg, aio->offset,
BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
 } else {
-req->aiocb = dma_blk_read(blk, qsg, aio->offset,
+aio->aiocb = dma_blk_read(blk, qsg, aio->offset,
   BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
 }
 } else {
 QEMUIOVector *iov = (QEMUIOVector *)aio->payload;
 
 if (is_write) {
-req->aiocb = blk_aio_pwritev(blk, aio->offset, iov, 0,
+aio->aiocb = blk_aio_pwritev(blk, aio->offset, iov, 0,
  nvme_aio_cb, aio);
 } else {
-req->aiocb = blk_aio_preadv(blk, aio->offset, iov, 0,
+aio->aiocb = blk_aio_preadv(blk, aio->offset, iov, 0,
 nvme_aio_cb, aio);
 }
 }
@@ -693,6 +691,8 @@ static uint16_t nvme_aio_add(NvmeRequest *req, NvmeAIO *aio)
aio->offset, aio->len, nvme_aio_opc_str(aio),
req);
 
+QTAILQ_INSERT_TAIL(&req->aio_tailq, aio, entry);
+
 return nvme_do_aio(aio);
 }
 
@@ -702,7 +702,7 @@ static void nvme_aio_cb(void *opaque, int ret)
 NvmeRequest *req = aio->req;
 
 BlockBackend *blk = aio->blk;
-BlockAcctCookie *acct = &req->acct;
+BlockAcctCookie *acct = &aio->acct;
 BlockAcctStats *stats = blk_get_stats(blk);
 
 Error *local_err = NULL;
@@ -710,6 +710,8 @@ static void nvme_aio_cb(void *opaque, int ret)
 trace_pci_nvme_aio_cb(nvme_cid(req), aio, blk_name(blk), aio->offset,
   aio->len, nvme_aio_opc_str(aio), req);
 
+QTAILQ_REMOVE(&req->aio_tailq, aio, entry);
+
 if (!ret) {
 block_acct_done(stats, acct);
 } else {
@@ -738,10 +740,19 @@ static void nvme_aio_cb(void *opaque, int ret)
 error_setg_errno(&local_err, -ret, "aio failed");
 error_report_err(local_err);
 
-req->status = status;
+/*
+ * An Internal Error trumps all other errors. For other errors, only
+ * set the first encountered.
+ */
+if (!req->status || (status & 0xfff) == NVME_INTERNAL_DEV_ERROR) {
+req->status = status;
+}
+}
+
+if (QTAILQ_EMPTY(&req->aio_tailq)) {
+nvme_enqueue_req_completion(nvme_cq(req), req);
 }
 
-nvme_enqueue_req_completion(nvme_cq(req), req);
 nvme_aio_destroy(aio);
 }
 
@@ -872,6 +883,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
 NvmeRequest *r, *next;
 NvmeSQueue *sq;
 NvmeCQueue *cq;
+NvmeAIO *aio;
 uint16_t qid = le16_to_cpu(c->qid);
 
 if (unlikely(!qid || nvme_check_sqid(n, qid))) {
@@ -884,8 +896,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
 sq = n->sq[qid];
 while (!QTAILQ_EMPTY(&sq->out_req_list)) {
 r = QTAILQ_FIRST(&sq->out_req_list);
-assert(r->aiocb);
-blk_aio_cancel(r->aiocb);
+while (!QTAILQ_EMPTY(&r->aio_tailq)) {
+aio = QTAILQ_FIRST(&r->aio_tailq);
+assert(aio->aiocb);
+blk_aio_cancel(aio->aiocb);
+}

[PATCH 08/17] hw/block/nvme: refactor aio submission

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

This pulls block layer aio submission to a common function an introduces
the NvmeAIO structure that encapsulates this.

This adds more code with no immediate benefit, but is in preparation for
supporting multiple aios per request.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 191 --
 hw/block/nvme.h   |  51 +++
 hw/block/trace-events |   3 +
 3 files changed, 203 insertions(+), 42 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index bfac3385cb64..3e32f39c7c1d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -110,6 +110,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 };
 
 static void nvme_process_sq(void *opaque);
+static void nvme_aio_cb(void *opaque, int ret);
 
 static uint16_t nvme_cid(NvmeRequest *req)
 {
@@ -611,39 +612,151 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, 
NvmeNamespace *ns,
 return NVME_SUCCESS;
 }
 
-static void nvme_rw_cb(void *opaque, int ret)
+static NvmeAIO *nvme_aio_new(NvmeAIOOp opc, BlockBackend *blk, int64_t offset)
 {
-NvmeRequest *req = opaque;
-NvmeSQueue *sq = req->sq;
-NvmeCtrl *n = sq->ctrl;
-NvmeCQueue *cq = n->cq[sq->cqid];
+NvmeAIO *aio = g_new0(NvmeAIO, 1);
 
-trace_pci_nvme_rw_cb(nvme_cid(req));
+aio->opc = opc;
+aio->blk = blk;
+aio->offset = offset;
 
-if (!ret) {
-block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
-req->status = NVME_SUCCESS;
-} else {
-block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
-req->status = NVME_INTERNAL_DEV_ERROR;
+return aio;
+}
+
+static void nvme_aio_destroy(NvmeAIO *aio)
+{
+g_free(aio);
+}
+
+static uint16_t nvme_do_aio(NvmeAIO *aio)
+{
+NvmeRequest *req = aio->req;
+
+BlockBackend *blk = aio->blk;
+BlockAcctCookie *acct = &req->acct;
+BlockAcctStats *stats = blk_get_stats(blk);
+
+bool is_write;
+
+switch (aio->opc) {
+case NVME_AIO_OPC_FLUSH:
+block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
+req->aiocb = blk_aio_flush(blk, nvme_aio_cb, aio);
+break;
+
+case NVME_AIO_OPC_WRITE_ZEROES:
+block_acct_start(stats, acct, aio->len, BLOCK_ACCT_WRITE);
+req->aiocb = blk_aio_pwrite_zeroes(blk, aio->offset, aio->len,
+   BDRV_REQ_MAY_UNMAP, nvme_aio_cb,
+   aio);
+break;
+
+case NVME_AIO_OPC_READ:
+case NVME_AIO_OPC_WRITE:
+is_write = (aio->opc == NVME_AIO_OPC_WRITE);
+
+block_acct_start(stats, acct, aio->len,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+if (aio->flags & NVME_AIO_DMA) {
+QEMUSGList *qsg = (QEMUSGList *)aio->payload;
+
+if (is_write) {
+req->aiocb = dma_blk_write(blk, qsg, aio->offset,
+   BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
+} else {
+req->aiocb = dma_blk_read(blk, qsg, aio->offset,
+  BDRV_SECTOR_SIZE, nvme_aio_cb, aio);
+}
+} else {
+QEMUIOVector *iov = (QEMUIOVector *)aio->payload;
+
+if (is_write) {
+req->aiocb = blk_aio_pwritev(blk, aio->offset, iov, 0,
+ nvme_aio_cb, aio);
+} else {
+req->aiocb = blk_aio_preadv(blk, aio->offset, iov, 0,
+nvme_aio_cb, aio);
+}
+}
+
+break;
 }
 
-nvme_enqueue_req_completion(cq, req);
+return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_aio_add(NvmeRequest *req, NvmeAIO *aio)
+{
+aio->req = req;
+
+trace_pci_nvme_aio_add(nvme_cid(req), aio, blk_name(aio->blk),
+   aio->offset, aio->len, nvme_aio_opc_str(aio),
+   req);
+
+return nvme_do_aio(aio);
+}
+
+static void nvme_aio_cb(void *opaque, int ret)
+{
+NvmeAIO *aio = opaque;
+NvmeRequest *req = aio->req;
+
+BlockBackend *blk = aio->blk;
+BlockAcctCookie *acct = &req->acct;
+BlockAcctStats *stats = blk_get_stats(blk);
+
+Error *local_err = NULL;
+
+trace_pci_nvme_aio_cb(nvme_cid(req), aio, blk_name(blk), aio->offset,
+  aio->len, nvme_aio_opc_str(aio), req);
+
+if (!ret) {
+block_acct_done(stats, acct);
+req->status = NVME_SUCCESS;
+} else {
+uint16_t status;
+
+block_acct_failed(stats, acct);
+
+switch (aio->opc) {
+case NVME_AIO_OPC_READ:
+status = NVME_UNRECOVERED_READ;
+break;
+case NVME_AIO_OPC_FLUSH:
+case NVME_AIO_OPC_WRITE:
+case NVME_AIO_OPC_WRITE_ZEROES:
+status = NVME_WRITE_FAULT;
+break;
+default:
+status = NVME_INTERNAL_DEV_ERROR;
+break;
+

[PATCH 09/17] hw/block/nvme: default request status to success

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Make the default request status NVME_SUCCESS so only error status codes
has to be set.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 3e32f39c7c1d..64c8f232e3ea 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -231,6 +231,7 @@ static void nvme_req_clear(NvmeRequest *req)
 {
 req->ns = NULL;
 memset(&req->cqe, 0x0, sizeof(req->cqe));
+req->status = NVME_SUCCESS;
 }
 
 static void nvme_req_exit(NvmeRequest *req)
@@ -547,8 +548,6 @@ static void nvme_process_aers(void *opaque)
 result->log_page = event->result.log_page;
 g_free(event);
 
-req->status = NVME_SUCCESS;
-
 trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
 result->log_page);
 
@@ -713,7 +712,6 @@ static void nvme_aio_cb(void *opaque, int ret)
 
 if (!ret) {
 block_acct_done(stats, acct);
-req->status = NVME_SUCCESS;
 } else {
 uint16_t status;
 
-- 
2.28.0

[PATCH 05/17] hw/block/nvme: add a lba to bytes helper

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Add the nvme_l2b helper and use it for converting NLB and SLBA to byte
counts and offsets.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 12 
 hw/block/nvme.h |  6 ++
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 88b4e6288bea..08f824dd807d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -644,12 +644,10 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, 
NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
-const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
-const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
 uint64_t slba = le64_to_cpu(rw->slba);
 uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
-uint64_t offset = slba << data_shift;
-uint32_t count = nlb << data_shift;
+uint64_t offset = nvme_l2b(ns, slba);
+uint32_t count = nvme_l2b(ns, nlb);
 uint16_t status;
 
 trace_pci_nvme_write_zeroes(nvme_cid(req), slba, nlb);
@@ -674,10 +672,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
 
-uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
-uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
-uint64_t data_size = (uint64_t)nlb << data_shift;
-uint64_t data_offset = slba << data_shift;
+uint64_t data_size = nvme_l2b(ns, nlb);
+uint64_t data_offset = nvme_l2b(ns, slba);
 int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
 enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 uint16_t status;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 52ba794f2e9a..1675c1e0755c 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -77,6 +77,12 @@ static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
 return nvme_ns_lbaf(ns)->ds;
 }
 
+/* convert an LBA to the equivalent in bytes */
+static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
+{
+return lba << nvme_ns_lbads(ns);
+}
+
 #define TYPE_NVME "nvme"
 #define NVME(obj) \
 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
-- 
2.28.0

[PATCH 06/17] hw/block/nvme: fix endian conversion

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

The raw NLB field is a 16 bit value, so use le16_to_cpu instead of
le32_to_cpu and cast to uint32_t before incrementing the value to not
wrap around.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 08f824dd807d..50851ab8d90f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -645,7 +645,7 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest 
*req)
 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
 uint64_t slba = le64_to_cpu(rw->slba);
-uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
+uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
 uint64_t offset = nvme_l2b(ns, slba);
 uint32_t count = nvme_l2b(ns, nlb);
 uint16_t status;
@@ -669,7 +669,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
 NvmeNamespace *ns = req->ns;
-uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
+uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
 
 uint64_t data_size = nvme_l2b(ns, nlb);
-- 
2.28.0

[PATCH 03/17] hw/block/nvme: commonize nvme_rw error handling

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Move common error handling to a label.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 49bcdf31ced6..a94e648a80e4 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -687,20 +687,18 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 status = nvme_check_mdts(n, data_size);
 if (status) {
 trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
-block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-return status;
+goto invalid;
 }
 
 status = nvme_check_bounds(n, ns, slba, nlb);
 if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-return status;
+goto invalid;
 }
 
-if (nvme_map_dptr(n, data_size, req)) {
-block_acct_invalid(blk_get_stats(n->conf.blk), acct);
-return NVME_INVALID_FIELD | NVME_DNR;
+status = nvme_map_dptr(n, data_size, req);
+if (status) {
+goto invalid;
 }
 
 if (req->qsg.nsg > 0) {
@@ -722,6 +720,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 }
 
 return NVME_NO_COMPLETE;
+
+invalid:
+block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+return status;
 }
 
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
-- 
2.28.0

[PATCH 02/17] hw/block/nvme: handle dma errors

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Handling DMA errors gracefully is required for the device to pass the
block/011 test ("disable PCI device while doing I/O") in the blktests
suite.

With this patch the device passes the test by retrying "critical"
transfers (posting of completion entries and processing of submission
queue entries).

If DMA errors occur at any other point in the execution of the command
(say, while mapping the PRPs), the command is aborted with a Data
Transfer Error status code.

Signed-off-by: Klaus Jensen 
Acked-by: Keith Busch 
Reviewed-by: Maxim Levitsky 
---
 hw/block/nvme.c   | 43 ---
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  2 +-
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 63078f600920..49bcdf31ced6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -140,14 +140,14 @@ static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr 
addr)
 return &n->cmbuf[addr - n->ctrl_mem.addr];
 }
 
-static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
 if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
 memcpy(buf, nvme_addr_to_cmb(n, addr), size);
-return;
+return 0;
 }
 
-pci_dma_read(&n->parent_obj, addr, buf, size);
+return pci_dma_read(&n->parent_obj, addr, buf, size);
 }
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
@@ -253,7 +253,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector 
*iov, hwaddr addr,
 trace_pci_nvme_map_addr_cmb(addr, len);
 
 if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
-return NVME_DATA_TRAS_ERROR;
+return NVME_DATA_TRANSFER_ERROR;
 }
 
 qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
@@ -307,6 +307,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 int num_prps = (len >> n->page_bits) + 1;
 uint16_t status;
 bool prp_list_in_cmb = false;
+int ret;
 
 QEMUSGList *qsg = &req->qsg;
 QEMUIOVector *iov = &req->iov;
@@ -347,7 +348,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+if (ret) {
+trace_pci_nvme_err_addr_read(prp2);
+return NVME_DATA_TRANSFER_ERROR;
+}
 while (len != 0) {
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
@@ -364,8 +369,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, 
uint64_t prp2,
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp_ent, (void *)prp_list,
-prp_trans);
+ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
+ prp_trans);
+if (ret) {
+trace_pci_nvme_err_addr_read(prp_ent);
+return NVME_DATA_TRANSFER_ERROR;
+}
 prp_ent = le64_to_cpu(prp_list[i]);
 }
 
@@ -457,6 +466,7 @@ static void nvme_post_cqes(void *opaque)
 NvmeCQueue *cq = opaque;
 NvmeCtrl *n = cq->ctrl;
 NvmeRequest *req, *next;
+int ret;
 
 QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
 NvmeSQueue *sq;
@@ -466,15 +476,21 @@ static void nvme_post_cqes(void *opaque)
 break;
 }
 
-QTAILQ_REMOVE(&cq->req_list, req, entry);
 sq = req->sq;
 req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
 req->cqe.sq_id = cpu_to_le16(sq->sqid);
 req->cqe.sq_head = cpu_to_le16(sq->head);
 addr = cq->dma_addr + cq->tail * n->cqe_size;
+ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+sizeof(req->cqe));
+if (ret) {
+trace_pci_nvme_err_addr_write(addr);
+timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+  500 * SCALE_MS);
+break;
+}
+QTAILQ_REMOVE(&cq->req_list, req, entry);
 nvme_inc_cq_tail(cq);
-pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
-sizeof(req->cqe));
 nvme_req_exit(req);
 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
 }
@@ -1611,7 +1627,12 @@ static void nvme_process_sq(void *opaque)
 
 while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
 addr = sq->dma_addr + sq->head * n->sqe_size;
-nvme_addr_read(n,

[PATCH 04/17] hw/block/nvme: alignment style fixes

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Style fixes.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a94e648a80e4..88b4e6288bea 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -634,7 +634,7 @@ static void nvme_rw_cb(void *opaque, int ret)
 static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
 {
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
- BLOCK_ACCT_FLUSH);
+ BLOCK_ACCT_FLUSH);
 req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
 
 return NVME_NO_COMPLETE;
@@ -663,7 +663,7 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest 
*req)
 block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
  BLOCK_ACCT_WRITE);
 req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
-BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
+   BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
 return NVME_NO_COMPLETE;
 }
 
@@ -803,7 +803,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
 }
 
 static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
-uint16_t sqid, uint16_t cqid, uint16_t size)
+ uint16_t sqid, uint16_t cqid, uint16_t size)
 {
 int i;
 NvmeCQueue *cq;
@@ -1058,7 +1058,8 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
 }
 
 static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
-uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled)
+ uint16_t cqid, uint16_t vector, uint16_t size,
+ uint16_t irq_enabled)
 {
 int ret;
 
@@ -1118,7 +1119,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 
 cq = g_malloc0(sizeof(*cq));
 nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
-NVME_CQ_FLAGS_IEN(qflags));
+ NVME_CQ_FLAGS_IEN(qflags));
 
 /*
  * It is only required to set qs_created when creating a completion queue;
@@ -1520,7 +1521,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest 
*req)
 }
 
 if (((n->temperature >= n->features.temp_thresh_hi) ||
-(n->temperature <= n->features.temp_thresh_low)) &&
+ (n->temperature <= n->features.temp_thresh_low)) &&
 NVME_AEC_SMART(n->features.async_config) & NVME_SMART_TEMPERATURE) 
{
 nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
NVME_AER_INFO_SMART_TEMP_THRESH,
@@ -1770,9 +1771,9 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
 n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
 nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
-NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
+ NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
 nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
-NVME_AQA_ASQS(n->bar.aqa) + 1);
+ NVME_AQA_ASQS(n->bar.aqa) + 1);
 
 nvme_set_timestamp(n, 0ULL);
 
@@ -1782,7 +1783,7 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 }
 
 static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
-unsigned size)
+   unsigned size)
 {
 if (unlikely(offset & (sizeof(uint32_t) - 1))) {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
@@ -1925,7 +1926,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
"invalid write to PMRSWTP register, ignored");
 return;
 case 0xE14: /* TODO PMRMSC */
- break;
+break;
 default:
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
"invalid MMIO write,"
@@ -2101,7 +2102,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int 
val)
 }
 
 static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
-unsigned size)
+unsigned size)
 {
 NvmeCtrl *n = (NvmeCtrl *)opaque;
 
@@ -2125,7 +2126,7 @@ static const MemoryRegionOps nvme_mmio_ops = {
 };
 
 static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
-unsigned size)
+   unsigned size)
 {
 NvmeCtrl *n = (NvmeCtrl *)opaque;
 stn_le_p(&n->cmbuf[addr], size, data);
-- 
2.28.0

[PATCH 00/17] hw/block/nvme: multiple namespaces support

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

This is the next round of my patches for the nvme device.

This includes a bit of cleanup and three new features:

  * refactored aio submission
This also adds support for multiple parallel AIOs per request which is in
preparation for DULBE, ZNS and metadata support. If it is found
controversial, it can easily be dropped from this series.

  * support for scatter/gather lists

  * multiple namespaces support through a new nvme-ns device

Finally, the series ends with changing the PCI vendor and device ID to get rid
of the internal Intel id and as a side-effect get rid of some Linux kernel
quirks that no longer applies.

"pci: pass along the return value of dma_memory_rw" has already been posted by
Philippe in another series, but since it is not applied yet, I am including it
here.

Gollu Appalanaidu (1):
  hw/block/nvme: add support for sgl bit bucket descriptor

Klaus Jensen (16):
  pci: pass along the return value of dma_memory_rw
  hw/block/nvme: handle dma errors
  hw/block/nvme: commonize nvme_rw error handling
  hw/block/nvme: alignment style fixes
  hw/block/nvme: add a lba to bytes helper
  hw/block/nvme: fix endian conversion
  hw/block/nvme: add symbolic command name to trace events
  hw/block/nvme: refactor aio submission
  hw/block/nvme: default request status to success
  hw/block/nvme: support multiple parallel aios per request
  hw/block/nvme: harden cmb access
  hw/block/nvme: add support for scatter gather lists
  hw/block/nvme: refactor identify active namespace id list
  hw/block/nvme: support multiple namespaces
  pci: allocate pci id for nvme
  hw/block/nvme: change controller pci id

 MAINTAINERS|   1 +
 docs/specs/nvme.txt|  23 +
 docs/specs/pci-ids.txt |   1 +
 hw/block/meson.build   |   2 +-
 hw/block/nvme-ns.c | 185 +
 hw/block/nvme-ns.h |  74 
 hw/block/nvme.c| 923 +++--
 hw/block/nvme.h| 126 +-
 hw/block/trace-events  |  21 +-
 hw/core/machine.c  |   1 +
 include/block/nvme.h   |   8 +-
 include/hw/pci/pci.h   |   4 +-
 12 files changed, 1108 insertions(+), 261 deletions(-)
 create mode 100644 docs/specs/nvme.txt
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

-- 
2.28.0

[PATCH 01/17] pci: pass along the return value of dma_memory_rw

2020-09-04 Thread Klaus Jensen

From: Klaus Jensen 

Some devices might want to know the return value of dma_memory_rw, so
pass it along instead of ignoring it.

There are no existing users of the return value, so this patch should be
safe.

Signed-off-by: Klaus Jensen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Michael S. Tsirkin 
Acked-by: Keith Busch 
---
 include/hw/pci/pci.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4ca7258b5b71..896cef9ad476 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -788,8 +788,7 @@ static inline AddressSpace *pci_get_address_space(PCIDevice 
*dev)
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
  void *buf, dma_addr_t len, DMADirection dir)
 {
-dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
-return 0;
+return dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
 }
 
 static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
-- 
2.28.0

Re: [PATCH 0/4] nbd reconnect new fixes

2020-09-04 Thread Vladimir Sementsov-Ogievskiy


04.09.2020 16:34, no-re...@patchew.org wrote:

Patchew URL: 
https://patchew.org/QEMU/20200903190301.367620-1-vsement...@virtuozzo.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.






The full log is available at
http://patchew.org/logs/20200903190301.367620-1-vsement...@virtuozzo.com/testing.docker-mingw@fedora/?type=message.



bad link. it shows:

N/A. Internal error while reading log file


---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com




--
Best regards,
Vladimir

Re: [PATCH 0/4] nbd reconnect new fixes

2020-09-04 Thread Vladimir Sementsov-Ogievskiy


04.09.2020 16:32, no-re...@patchew.org wrote:

Patchew URL: 
https://patchew.org/QEMU/20200903190301.367620-1-vsement...@virtuozzo.com/



Hi,

This series failed build test on FreeBSD host. Please find the details below.






The full log is available at
http://patchew.org/logs/20200903190301.367620-1-vsement...@virtuozzo.com/testing.FreeBSD/?type=message.


bad link. it shows:

N/A. Internal error while reading log file


---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com




--
Best regards,
Vladimir

Re: [PATCH v8 5/7] copy-on-read: limit guest writes to base in COR driver

2020-09-04 Thread Vladimir Sementsov-Ogievskiy


04.09.2020 15:50, Max Reitz wrote:

On 28.08.20 18:52, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
during a stream job.


I don’t understand.   Shouldn’t we limit the areas where we set the COR
flag?


Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 49 +
  1 file changed, 49 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 1f858bb..ecbd1f8 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -57,6 +57,37 @@ static BlockDriverState *get_base_by_name(BlockDriverState 
*bs,
  return base_bs;
  }
  
+/*

+ * Returns 1 if the block is allocated in a node between 
cor_filter_bs->file->bs
+ * and the base_bs in the backing chain. Otherwise, returns 0.
+ * The COR operation is allowed if the base_bs is not specified - return 1.
+ * Returns negative errno on failure.
+ */
+static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
+   uint64_t bytes)
+{
+int ret;
+int64_t dummy;
+BlockDriverState *file = NULL;
+BDRVStateCOR *state = cor_filter_bs->opaque;
+
+if (!state->base_bs) {
+return 1;
+}
+
+ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
+  offset, bytes, &dummy, NULL, &file);
+if (ret < 0) {
+return ret;
+}
+
+if (file) {


Why check file and not the return value?


+return 1;
+}
+
+return 0;


“dummy” should really not be called that way, it should be evaluated
whether it’s smaller than bytes.

First, [offset, offset + dummy) may not be allocated above the base –
but [offset + dummy, offset + bytes) may be.  Then this function returns
0 here, even though there is something in that range that’s allocated.

Second, in that case we still shouldn’t return 1, but return the
shrunken offset instead.  Or, if there are multiple distinct allocated
areas, they should probably even all be copied separately.


(But all of that of course only if this function is intended to be used
to limit where we set the COR flag, because I don’t understand why we’d
want to limit where something can be written.)



Agree to all.

1. Write path shouldn't be changed: it's a copy-on-read filter.

2. On read we need is_allocated_above-driven loop, to insert the flag only to 
regions allocated above base
 (and other regions we read just without the flag, don't skip them). 
qiov_offset will help very well.

3. Like in many other places, let's ignore block-status errors (and just add 
the flag if block_status fails)


+}
+
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
@@ -153,6 +184,12 @@ static int coroutine_fn 
cor_co_pwritev_part(BlockDriverState *bs,
  QEMUIOVector *qiov,
  size_t qiov_offset, int flags)
  {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
  return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
  flags);
  }
@@ -162,6 +199,12 @@ static int coroutine_fn 
cor_co_pwrite_zeroes(BlockDriverState *bs,
   int64_t offset, int bytes,
   BdrvRequestFlags flags)
  {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
  return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
  }
  
@@ -178,6 +221,12 @@ static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,

uint64_t bytes,
QEMUIOVector *qiov)
  {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
  return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
 BDRV_REQ_WRITE_COMPRESSED);
  }







--
Best regards,
Vladimir

Re: [PATCH v8 6/7] block-stream: freeze link to base node during stream job

2020-09-04 Thread Vladimir Sementsov-Ogievskiy


04.09.2020 16:21, Max Reitz wrote:

On 28.08.20 18:52, Andrey Shinkevich wrote:

To keep the base node unchanged during the block-stream operation,
freeze it as the other part of the backing chain with the intermediate
nodes related to the job.
This patch revers the change made with the commit c624b015bf as the
correct base file name and its format have to be written down to the
QCOW2 header on the disk when the backing file is being changed after
the stream job completes.
This reversion incurs changes in the tests 030, 245 and discards the
test 258 where concurrent stream/commit jobs are tested. When the link
to a base node is frozen, the concurrent job cannot change the common
backing chain.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c |  29 ++--
  tests/qemu-iotests/030 |  10 +--
  tests/qemu-iotests/245 |   2 +-
  tests/qemu-iotests/258 | 161 -
  tests/qemu-iotests/258.out |  33 --
  5 files changed, 14 insertions(+), 221 deletions(-)
  delete mode 100755 tests/qemu-iotests/258
  delete mode 100644 tests/qemu-iotests/258.out


Does this need to be in this series?  (I’m not entirely sure, based on
what I can see in patch 7.)

When doing this, should we introduce a @bottom-node option alongside, so
that we can make all the tests that are deleted here pass still, just
with changes?



That's a question to discuss, and I asked Andrey to make this patch in this
simple way to see, how much damage we have with this change.

Honestly, I doubt that we need the new option. Previously, we decided that
we can make this change without a deprecation. If we still going to do it,
we shouldn't care about these use cases. So, if we freeze base again wituhout
a depreaction and:

1. add bottom-node

 - we keep the iotests
 - If (unlikely) someone will came and say: hey, you've broken my working scenario, we 
will say "use bottom-node option, sorry"
 - Most likely we'll have unused option and corresponding unused logic, making code more 
complex for nothing (and we'll have to say "sorry" anyway)

2. without option

 - we loose the iotests. this looks scary, but it is honest: we drop use-cases 
and corresponding iotests
 - If (unlikely) someone will came and say: hey, you've broken my working 
scenario, he will have to wait for next release / package version / or update 
the downstream himself
 - Most likely all will be OK.


Hmm. OK, and the hard-way:

3. Enable all the new logic (filter insertion, freezing base, etc.) only when 
filter-node-name option specified. And immediately deprecate not-specifying the 
option.
 [Note, that in way [3] we don't need bottom-node option]



--
Best regards,
Vladimir

Re: [PATCH v8 7/7] block: apply COR-filter to block-stream jobs

2020-09-04 Thread Max Reitz

On 28.08.20 18:52, Andrey Shinkevich wrote:
> This patch completes the series with the COR-filter insertion for
> block-stream operations. Adding the filter makes it possible for copied
> regions to be discarded in backing files during the block-stream job,
> what will reduce the disk overuse.
> The COR-filter insertion incurs changes in the iotests case
> 245:test_block_stream_4 that reopens the backing chain during a
> block-stream job. There are changes in the iotests #030 as well.
> The iotests case 030:test_stream_parallel was deleted due to multiple
> conflicts between the concurrent job operations over the same backing
> chain. The base backing node for one job is the top node for another
> job. It may change due to the filter node inserted into the backing
> chain while both jobs are running. Another issue is that the parts of
> the backing chain are being frozen by the running job and may not be
> changed by the concurrent job when needed. The concept of the parallel
> jobs with common nodes is considered vital no more.
> 
> Signed-off-by: Andrey Shinkevich 
> ---
>  block/stream.c | 78 
> +++---
>  tests/qemu-iotests/030 | 50 +++--
>  tests/qemu-iotests/030.out |  4 +--
>  tests/qemu-iotests/245 | 19 ---
>  4 files changed, 67 insertions(+), 84 deletions(-)

I’m not sure I can really review this, when I don’t know what the COR
filter really does.  For example, as it is in this version, it
unconditionally performs COR, and so on guest reads will copy everything
from the bottom layers even when they are below the base node.

Max



signature.asc
Description: OpenPGP digital signature

Re: [PATCH 0/4] nbd reconnect new fixes

2020-09-04 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20200903190301.367620-1-vsement...@virtuozzo.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.






The full log is available at
http://patchew.org/logs/20200903190301.367620-1-vsement...@virtuozzo.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH 0/4] nbd reconnect new fixes

2020-09-04 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20200903190301.367620-1-vsement...@virtuozzo.com/



Hi,

This series failed build test on FreeBSD host. Please find the details below.






The full log is available at
http://patchew.org/logs/20200903190301.367620-1-vsement...@virtuozzo.com/testing.FreeBSD/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v8 6/7] block-stream: freeze link to base node during stream job

2020-09-04 Thread Max Reitz

On 28.08.20 18:52, Andrey Shinkevich wrote:
> To keep the base node unchanged during the block-stream operation,
> freeze it as the other part of the backing chain with the intermediate
> nodes related to the job.
> This patch revers the change made with the commit c624b015bf as the
> correct base file name and its format have to be written down to the
> QCOW2 header on the disk when the backing file is being changed after
> the stream job completes.
> This reversion incurs changes in the tests 030, 245 and discards the
> test 258 where concurrent stream/commit jobs are tested. When the link
> to a base node is frozen, the concurrent job cannot change the common
> backing chain.
> 
> Signed-off-by: Andrey Shinkevich 
> ---
>  block/stream.c |  29 ++--
>  tests/qemu-iotests/030 |  10 +--
>  tests/qemu-iotests/245 |   2 +-
>  tests/qemu-iotests/258 | 161 
> -
>  tests/qemu-iotests/258.out |  33 --
>  5 files changed, 14 insertions(+), 221 deletions(-)
>  delete mode 100755 tests/qemu-iotests/258
>  delete mode 100644 tests/qemu-iotests/258.out

Does this need to be in this series?  (I’m not entirely sure, based on
what I can see in patch 7.)

When doing this, should we introduce a @bottom-node option alongside, so
that we can make all the tests that are deleted here pass still, just
with changes?

Max



signature.asc
Description: OpenPGP digital signature

Re: [PATCH v8 5/7] copy-on-read: limit guest writes to base in COR driver

2020-09-04 Thread Max Reitz

On 28.08.20 18:52, Andrey Shinkevich wrote:
> Limit the guest's COR operations by the base node in the backing chain
> during a stream job.

I don’t understand.   Shouldn’t we limit the areas where we set the COR
flag?

> Signed-off-by: Andrey Shinkevich 
> ---
>  block/copy-on-read.c | 49 +
>  1 file changed, 49 insertions(+)
> 
> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
> index 1f858bb..ecbd1f8 100644
> --- a/block/copy-on-read.c
> +++ b/block/copy-on-read.c
> @@ -57,6 +57,37 @@ static BlockDriverState *get_base_by_name(BlockDriverState 
> *bs,
>  return base_bs;
>  }
>  
> +/*
> + * Returns 1 if the block is allocated in a node between 
> cor_filter_bs->file->bs
> + * and the base_bs in the backing chain. Otherwise, returns 0.
> + * The COR operation is allowed if the base_bs is not specified - return 1.
> + * Returns negative errno on failure.
> + */
> +static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
> +   uint64_t bytes)
> +{
> +int ret;
> +int64_t dummy;
> +BlockDriverState *file = NULL;
> +BDRVStateCOR *state = cor_filter_bs->opaque;
> +
> +if (!state->base_bs) {
> +return 1;
> +}
> +
> +ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
> +  offset, bytes, &dummy, NULL, &file);
> +if (ret < 0) {
> +return ret;
> +}
> +
> +if (file) {

Why check file and not the return value?

> +return 1;
> +}
> +
> +return 0;

“dummy” should really not be called that way, it should be evaluated
whether it’s smaller than bytes.

First, [offset, offset + dummy) may not be allocated above the base –
but [offset + dummy, offset + bytes) may be.  Then this function returns
0 here, even though there is something in that range that’s allocated.

Second, in that case we still shouldn’t return 1, but return the
shrunken offset instead.  Or, if there are multiple distinct allocated
areas, they should probably even all be copied separately.


(But all of that of course only if this function is intended to be used
to limit where we set the COR flag, because I don’t understand why we’d
want to limit where something can be written.)

Max

> +}
> +
>  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
>  Error **errp)
>  {
> @@ -153,6 +184,12 @@ static int coroutine_fn 
> cor_co_pwritev_part(BlockDriverState *bs,
>  QEMUIOVector *qiov,
>  size_t qiov_offset, int flags)
>  {
> +int ret = node_above_base(bs, offset, bytes);
> +
> +if (!ret || ret < 0) {
> +return ret;
> +}
> +
>  return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
>  flags);
>  }
> @@ -162,6 +199,12 @@ static int coroutine_fn 
> cor_co_pwrite_zeroes(BlockDriverState *bs,
>   int64_t offset, int bytes,
>   BdrvRequestFlags flags)
>  {
> +int ret = node_above_base(bs, offset, bytes);
> +
> +if (!ret || ret < 0) {
> +return ret;
> +}
> +
>  return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
>  }
>  
> @@ -178,6 +221,12 @@ static int coroutine_fn 
> cor_co_pwritev_compressed(BlockDriverState *bs,
>uint64_t bytes,
>QEMUIOVector *qiov)
>  {
> +int ret = node_above_base(bs, offset, bytes);
> +
> +if (!ret || ret < 0) {
> +return ret;
> +}
> +
>  return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
> BDRV_REQ_WRITE_COMPRESSED);
>  }
> 




signature.asc
Description: OpenPGP digital signature

Re: [PATCH v7 1/8] Introduce yank feature

2020-09-04 Thread Eric Blake


On 9/4/20 7:33 AM, Lukas Straub wrote:


+##
+# @YankInstances:
+#
+# @instances: List of yank instances.
+#
+# Yank instances are named after the following schema:
+# "blockdev:", "chardev:" and "migration"
+#
+# Since: 5.1
+##
+{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }


I'm afraid this is a problematic QMP interface.

By making YankInstances a struct, you keep the door open to adding more
members, which is good.

But by making its 'instances' member a ['str'], you close the door to
using anything but a single string for the individual instances.  Not so
good.

The single string encodes information which QMP client will need to
parse from the string.  We frown on that in QMP.  Use QAPI complex types
capabilities for structured data.

Could you use something like this instead?

{ 'enum': 'YankInstanceType',
   'data': { 'block-node', 'chardev', 'migration' } }

{ 'struct': 'YankInstanceBlockNode',
   'data': { 'node-name': 'str' } }

{ 'struct': 'YankInstanceChardev',
   'data' { 'label': 'str' } }

{ 'union': 'YankInstance',
   'base': { 'type': 'YankInstanceType' },
   'discriminator': 'type',
   'data': {
   'block-node': 'YankInstanceBlockNode',
   'chardev': 'YankInstanceChardev' } }

{ 'command': 'yank',
   'data': { 'instances': ['YankInstance'] },
   'allow-oob': true }


This proposal looks good to me. Does everyone agree?


Yes; this is also more introspectible, so that if we add more yank 
instances down the road, or even more optional features to existing yank 
instances, it becomes easier to detect whether a particular qemu has 
those additions.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

[PATCH 3/3] block/nvme: Pair doorbell registers

2020-09-04 Thread Philippe Mathieu-Daudé

For each queue doorbell registers are paired as:
- Submission Queue Tail Doorbell
- Completion Queue Head Doorbell

Reflect that in the NVMeRegs structure, and adapt
nvme_create_queue_pair() accordingly.

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index a216cc407f6..f4f27b6da7d 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -84,7 +84,10 @@ typedef struct {
 /* Memory mapped registers */
 typedef volatile struct {
 NvmeBar ctrl;
-uint32_t doorbells[];
+struct {
+uint32_t sq_tail;
+uint32_t cq_head;
+} doorbells[];
 } NVMeRegs;
 
 #define INDEX_ADMIN 0
@@ -244,14 +247,14 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BDRVNVMeState *s,
 error_propagate(errp, local_err);
 goto fail;
 }
-q->sq.doorbell = &s->regs->doorbells[idx * 2 * s->doorbell_scale];
+q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
 
 nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto fail;
 }
-q->cq.doorbell = &s->regs->doorbells[(idx * 2 + 1) * s->doorbell_scale];
+q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
 
 return q;
 fail:
-- 
2.26.2

[PATCH 1/3] block/nvme: Group controller registers in NVMeRegs structure

2020-09-04 Thread Philippe Mathieu-Daudé

We want to use the NvmeBar structure from "block/nvme.h" in the
next commit. As a preliminary step, group all the NVMe controller
registers in the 'ctrl' field, keeping the doorbells registers
out of it.

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 48 +---
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 24e6e7f0866..c9c3fc02fed 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -83,21 +83,23 @@ typedef struct {
 
 /* Memory mapped registers */
 typedef volatile struct {
-uint64_t cap;
-uint32_t vs;
-uint32_t intms;
-uint32_t intmc;
-uint32_t cc;
-uint32_t reserved0;
-uint32_t csts;
-uint32_t nssr;
-uint32_t aqa;
-uint64_t asq;
-uint64_t acq;
-uint32_t cmbloc;
-uint32_t cmbsz;
-uint8_t  reserved1[0xec0];
-uint8_t  cmd_set_specfic[0x100];
+struct {
+uint64_t cap;
+uint32_t vs;
+uint32_t intms;
+uint32_t intmc;
+uint32_t cc;
+uint32_t reserved0;
+uint32_t csts;
+uint32_t nssr;
+uint32_t aqa;
+uint64_t asq;
+uint64_t acq;
+uint32_t cmbloc;
+uint32_t cmbsz;
+uint8_t  reserved1[0xec0];
+uint8_t  cmd_set_specfic[0x100];
+} ctrl;
 uint32_t doorbells[];
 } NVMeRegs;
 
@@ -734,7 +736,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 /* Perform initialize sequence as described in NVMe spec "7.6.1
  * Initialization". */
 
-cap = le64_to_cpu(s->regs->cap);
+cap = le64_to_cpu(s->regs->ctrl.cap);
 if (!(cap & (1ULL << 37))) {
 error_setg(errp, "Device doesn't support NVMe command set");
 ret = -EINVAL;
@@ -747,10 +749,10 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 3);
 
 /* Reset device to get a clean state. */
-s->regs->cc = cpu_to_le32(le32_to_cpu(s->regs->cc) & 0xFE);
+s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
 /* Wait for CSTS.RDY = 0. */
 deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
-while (le32_to_cpu(s->regs->csts) & 0x1) {
+while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to reset (%"
  PRId64 " ms)",
@@ -771,18 +773,18 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 s->nr_queues = 1;
 QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
-s->regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
-s->regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
-s->regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
 
 /* After setting up all control registers we can enable device now. */
-s->regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
+s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
   (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
   0x1);
 /* Wait for CSTS.RDY = 1. */
 now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 deadline = now + timeout_ms * 100;
-while (!(le32_to_cpu(s->regs->csts) & 0x1)) {
+while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to start (%"
  PRId64 " ms)",
-- 
2.26.2

[PATCH 2/3] block/nvme: Use generic NvmeBar structure

2020-09-04 Thread Philippe Mathieu-Daudé

Commit f3c507adcd7 ("NVMe: Initial commit for new storage interface")
introduced the NvmeBar structure. Unfortunately in commit bdd6a90a9e5
("block: Add VFIO based NVMe driver") we duplicated it.

Apparently in commit a3d9a352d48 ("block: Move NVMe constants to
a separate header") we tried to unify headers but forgot to remove
the structure declared in the block/nvme.c source file.

Do it now, and remove the structure size check which is redundant
with the header check added in commit 74e18435c0e ("hw/block/nvme:
Align I/O BAR to 4 KiB").

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index c9c3fc02fed..a216cc407f6 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -83,28 +83,10 @@ typedef struct {
 
 /* Memory mapped registers */
 typedef volatile struct {
-struct {
-uint64_t cap;
-uint32_t vs;
-uint32_t intms;
-uint32_t intmc;
-uint32_t cc;
-uint32_t reserved0;
-uint32_t csts;
-uint32_t nssr;
-uint32_t aqa;
-uint64_t asq;
-uint64_t acq;
-uint32_t cmbloc;
-uint32_t cmbsz;
-uint8_t  reserved1[0xec0];
-uint8_t  cmd_set_specfic[0x100];
-} ctrl;
+NvmeBar ctrl;
 uint32_t doorbells[];
 } NVMeRegs;
 
-QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);
-
 #define INDEX_ADMIN 0
 #define INDEX_IO(n) (1 + n)
 
-- 
2.26.2

[PATCH 0/3] block/nvme: Use NvmeBar structure from "block/nvme.h"

2020-09-04 Thread Philippe Mathieu-Daudé

Cleanups in the NVMeRegs structure:
- Use the already existing NvmeBar structure from "block/nvme.h"
- Pair doorbell registers

Based-on: <20200903122803.405265-1-phi...@redhat.com>

Philippe Mathieu-Daudé (3):
  block/nvme: Group controller registers in NVMeRegs structure
  block/nvme: Use generic NvmeBar structure
  block/nvme: Pair doorbell registers

 block/nvme.c | 43 +++
 1 file changed, 15 insertions(+), 28 deletions(-)

-- 
2.26.2

Re: [PATCH v7 1/8] Introduce yank feature

2020-09-04 Thread Lukas Straub

On Thu, 27 Aug 2020 14:37:00 +0200
Markus Armbruster  wrote:

> I apologize for not reviewing this much earlier.
> 
> Lukas Straub  writes:
> 
> > The yank feature allows to recover from hanging qemu by "yanking"
> > at various parts. Other qemu systems can register themselves and
> > multiple yank functions. Then all yank functions for selected
> > instances can be called by the 'yank' out-of-band qmp command.
> > Available instances can be queried by a 'query-yank' oob command.
> >
> > Signed-off-by: Lukas Straub 
> > Acked-by: Stefan Hajnoczi 
> > ---
> ...
> > diff --git a/qapi/misc.json b/qapi/misc.json
> > index 9d32820dc1..0d6a8f20b7 100644
> > --- a/qapi/misc.json
> > +++ b/qapi/misc.json
> > @@ -1615,3 +1615,48 @@
> >  ##
> >  { 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
> >
> > +##
> > +# @YankInstances:
> > +#
> > +# @instances: List of yank instances.
> > +#
> > +# Yank instances are named after the following schema:
> > +# "blockdev:", "chardev:" and "migration"
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'struct': 'YankInstances', 'data': {'instances': ['str'] } }  
> 
> I'm afraid this is a problematic QMP interface.
> 
> By making YankInstances a struct, you keep the door open to adding more
> members, which is good.
> 
> But by making its 'instances' member a ['str'], you close the door to
> using anything but a single string for the individual instances.  Not so
> good.
> 
> The single string encodes information which QMP client will need to
> parse from the string.  We frown on that in QMP.  Use QAPI complex types
> capabilities for structured data.
> 
> Could you use something like this instead?
> 
> { 'enum': 'YankInstanceType',
>   'data': { 'block-node', 'chardev', 'migration' } }
> 
> { 'struct': 'YankInstanceBlockNode',
>   'data': { 'node-name': 'str' } }
> 
> { 'struct': 'YankInstanceChardev',
>   'data' { 'label': 'str' } }
> 
> { 'union': 'YankInstance',
>   'base': { 'type': 'YankInstanceType' },
>   'discriminator': 'type',
>   'data': {
>   'block-node': 'YankInstanceBlockNode',
>   'chardev': 'YankInstanceChardev' } }
> 
> { 'command': 'yank',
>   'data': { 'instances': ['YankInstance'] },
>   'allow-oob': true }

This proposal looks good to me. Does everyone agree?

Regards,
Lukas Straub

> If you're confident nothing will ever be added to YankInstanceBlockNode
> and YankInstanceChardev, you could use str instead.
> 
> > +
> > +##
> > +# @yank:
> > +#
> > +# Recover from hanging qemu by yanking the specified instances.  
> 
> What's an "instance", and what does it mean to "yank" it?
> 
> The documentation of YankInstances above gives a clue on what an
> "instance" is: presumably a block node, a character device or the
> migration job.
> 
> I guess a YankInstance is whatever the code chooses to make one, and the
> current code makes these three kinds.
> 
> Does it make every block node a YankInstance?  If not, which ones?
> 
> Does it make every character device a YankInstance?  If not, which ones?
> 
> Does it make migration always a YankInstance?  If not, when?
> 
> > +#
> > +# Takes @YankInstances as argument.
> > +#
> > +# Returns: nothing.
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "yank", "arguments": { "instances": ["blockdev:nbd0"] } }
> > +# <- { "return": {} }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'yank', 'data': 'YankInstances', 'allow-oob': true }
> > +
> > +##
> > +# @query-yank:
> > +#
> > +# Query yank instances.
> > +#
> > +# Returns: @YankInstances
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "query-yank" }
> > +# <- { "return": { "instances": ["blockdev:nbd0"] } }
> > +#
> > +# Since: 5.1
> > +##
> > +{ 'command': 'query-yank', 'returns': 'YankInstances', 'allow-oob': true }
> ...


pgpCnC6MLDYNR.pgp
Description: OpenPGP digital signature

Re: [PATCH v8 4/7] copy-on-read: pass base file name to COR driver

2020-09-04 Thread Vladimir Sementsov-Ogievskiy


04.09.2020 15:17, Max Reitz wrote:

On 28.08.20 18:52, Andrey Shinkevich wrote:

To limit the guest's COR operations by the base node in the backing
chain during stream job, pass the base file name to the copy-on-read


Does it have to be a filename?  That sounds really bad to me.


Agree, it should be node-name. Filename-based interfaces is a headache.




driver. The rest of the functionality will be implemented in the patch
that follows.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 41 -
  block/copy-on-read.h |  1 +
  2 files changed, 41 insertions(+), 1 deletion(-)


Furthermore, I believe that this option should become an externally
visible option for the copy-on-read filter (i.e., part of its
BlockdevOptions) – but that definitely won’t be viable if @base contains
a filename.

Can’t we let the stream job invoke bdrv_find_backing_image() to
translate a filename into a node name that’s then passed to the COR filter?


diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 0ede7aa..1f858bb 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,45 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
  #include "qapi/qmp/qdict.h"
  #include "block/copy-on-read.h"
  
  
  typedef struct BDRVStateCOR {

  bool active;
+BlockDriverState *base_bs;
  } BDRVStateCOR;
  
+/*

+ * Non-zero pointers are the caller's responsibility.
+ */
+static BlockDriverState *get_base_by_name(BlockDriverState *bs,
+  const char *base_name, Error **errp)
+{
+BlockDriverState *base_bs = NULL;
+AioContext *aio_context;
+
+base_bs = bdrv_find_backing_image(bs, base_name);
+if (base_bs == NULL) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_name);
+return NULL;
+}
+
+aio_context = bdrv_get_aio_context(bs);
+aio_context_acquire(aio_context);
+assert(bdrv_get_aio_context(base_bs) == aio_context);
+aio_context_release(aio_context);


Er.  OK.  But why?  Isn’t this just guaranteed by the block layer?  I
don’t think we need an explicit assertion for this, especially if it
means having to acquire an AioContext.

Furthermore, I don’t even know why we’d need the AioContext.  On one
hand, we don’t need to acquire a context just to get it or compare it;
on the other, this I would have thought that .bdrv_open() runs in the
BDS’s AioContext anyway (or the caller already has it acquired at least).

Max




--
Best regards,
Vladimir

Re: [PATCH v8 4/7] copy-on-read: pass base file name to COR driver

2020-09-04 Thread Max Reitz

On 28.08.20 18:52, Andrey Shinkevich wrote:
> To limit the guest's COR operations by the base node in the backing
> chain during stream job, pass the base file name to the copy-on-read

Does it have to be a filename?  That sounds really bad to me.

> driver. The rest of the functionality will be implemented in the patch
> that follows.
> 
> Signed-off-by: Andrey Shinkevich 
> ---
>  block/copy-on-read.c | 41 -
>  block/copy-on-read.h |  1 +
>  2 files changed, 41 insertions(+), 1 deletion(-)

Furthermore, I believe that this option should become an externally
visible option for the copy-on-read filter (i.e., part of its
BlockdevOptions) – but that definitely won’t be viable if @base contains
a filename.

Can’t we let the stream job invoke bdrv_find_backing_image() to
translate a filename into a node name that’s then passed to the COR filter?

> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
> index 0ede7aa..1f858bb 100644
> --- a/block/copy-on-read.c
> +++ b/block/copy-on-read.c
> @@ -24,19 +24,45 @@
>  #include "block/block_int.h"
>  #include "qemu/module.h"
>  #include "qapi/error.h"
> +#include "qapi/qmp/qerror.h"
>  #include "qapi/qmp/qdict.h"
>  #include "block/copy-on-read.h"
>  
>  
>  typedef struct BDRVStateCOR {
>  bool active;
> +BlockDriverState *base_bs;
>  } BDRVStateCOR;
>  
> +/*
> + * Non-zero pointers are the caller's responsibility.
> + */
> +static BlockDriverState *get_base_by_name(BlockDriverState *bs,
> +  const char *base_name, Error 
> **errp)
> +{
> +BlockDriverState *base_bs = NULL;
> +AioContext *aio_context;
> +
> +base_bs = bdrv_find_backing_image(bs, base_name);
> +if (base_bs == NULL) {
> +error_setg(errp, QERR_BASE_NOT_FOUND, base_name);
> +return NULL;
> +}
> +
> +aio_context = bdrv_get_aio_context(bs);
> +aio_context_acquire(aio_context);
> +assert(bdrv_get_aio_context(base_bs) == aio_context);
> +aio_context_release(aio_context);

Er.  OK.  But why?  Isn’t this just guaranteed by the block layer?  I
don’t think we need an explicit assertion for this, especially if it
means having to acquire an AioContext.

Furthermore, I don’t even know why we’d need the AioContext.  On one
hand, we don’t need to acquire a context just to get it or compare it;
on the other, this I would have thought that .bdrv_open() runs in the
BDS’s AioContext anyway (or the caller already has it acquired at least).

Max

signature.asc
Description: OpenPGP digital signature

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Thomas Huth

On 04/09/2020 12.38, Max Reitz wrote:
> On 04.09.20 12:14, Thomas Huth wrote:
>> On 04/09/2020 10.25, Kevin Wolf wrote:
>>> Am 04.09.2020 um 07:57 hat Thomas Huth geschrieben:
 Test 030 is still occasionally failing in the CI ... so for the
 time being, let's disable it in the "auto" group. We can add it
 back once it got more stable.

 Signed-off-by: Thomas Huth 
>>>
>>> I would rather just disable this one test function as 030 is a pretty
>>> important one that tends to catch bugs.
>>
>> Ok, ... should it always get disabled, or shall we try to come up with
>> some magic checks so that it only gets disabled in the CI pipelines (...
>> though I don't have a clue how to check for Peter's merge test
>> environment...)?
> 
> I suppose we could let check-block.sh set some environment variable.

Sounds like a plan! I'll try to cook a patch.

 Thomas

Re: [PATCH v8 2/7] copy-on-read: add filter append/drop functions

2020-09-04 Thread Max Reitz

On 28.08.20 18:52, Andrey Shinkevich wrote:
> Provide API for the COR-filter insertion/removal.

Hm.  Why?

I see the implementation is just bdrv_open() + bdrv_replace_node(),
which is what I would have expected.  Why can’t the caller just do that?

Or maybe it would even make sense to just make it a generic block driver
function, like

BlockDriverState *
bdrv_insert_node(BlockDriverState *bs, const char *node_driver,
 const char *node_name, QDict *node_options,
 Error **errp);

?

(Similarly for dropping a node from a chain.)

> Also, drop the filter child permissions for an inactive state when the
> filter node is being removed.

Do we need .active for that?  Shouldn’t it be sufficient to just not
require BLK_PERM_WRITE_UNCHANGED when no permissions are taken on the
node (i.e. perm == 0 in cor_child_perm())?

Of course, using an .active field works, too.  But Vladimir wanted a
similar field in the preallocate filter, and there already is in
backup-top.  I feel like there shouldn’t be a need for this.

.bdrv_child_perm() should generally be able to decide what permissions
it needs based on the information it gets.  If every driver needs to
keep track of whether it has any parents and feed that information into
.bdrv_child_perm() as external state, then something feels wrong.

If perm == 0 is not sufficient to rule out any parents, we should just
explicitly add a boolean that tells .bdrv_child_perm() whether there are
any parents or not – shouldn’t be too difficult.

> Signed-off-by: Andrey Shinkevich 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> ---
>  block/copy-on-read.c | 104 
> +++
>  block/copy-on-read.h |  35 +
>  2 files changed, 139 insertions(+)
>  create mode 100644 block/copy-on-read.h

[...]

>  block_init(bdrv_copy_on_read_init);
> diff --git a/block/copy-on-read.h b/block/copy-on-read.h
> new file mode 100644
> index 000..1686e4e
> --- /dev/null
> +++ b/block/copy-on-read.h
> @@ -0,0 +1,35 @@
> +/*
> + * Copy-on-read filter block driver
> + *
> + * The filter driver performs Copy-On-Read (COR) operations
> + *
> + * Copyright (c) 2018-2020 Virtuozzo International GmbH.
> + *
> + * Author:
> + *   Andrey Shinkevich 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see .
> + */
> +
> +#ifndef COPY_ON_READ_FILTER
> +#define COPY_ON_READ_FILTER

Bit of a weird include guard, seeing that most header files in qemu
(certainly under block/) base their name on their filename.  So this
would be BLOCK_COPY_ON_READ_H (or COPY_ON_READ_H).

(It’s just that COPY_ON_READ_FILTER kind of sounds like a configured
option, i.e. whether the filter is present or not.)

Max

> +
> +#include "block/block_int.h"
> +
> +BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
> + const char *filter_node_name,
> + Error **errp);
> +void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
> +
> +#endif /* COPY_ON_READ_FILTER */
> 

signature.asc
Description: OpenPGP digital signature

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Max Reitz

On 04.09.20 12:14, Thomas Huth wrote:
> On 04/09/2020 10.25, Kevin Wolf wrote:
>> Am 04.09.2020 um 07:57 hat Thomas Huth geschrieben:
>>> Test 030 is still occasionally failing in the CI ... so for the
>>> time being, let's disable it in the "auto" group. We can add it
>>> back once it got more stable.
>>>
>>> Signed-off-by: Thomas Huth 
>>
>> I would rather just disable this one test function as 030 is a pretty
>> important one that tends to catch bugs.
> 
> Ok, ... should it always get disabled, or shall we try to come up with
> some magic checks so that it only gets disabled in the CI pipelines (...
> though I don't have a clue how to check for Peter's merge test
> environment...)?

I suppose we could let check-block.sh set some environment variable.

Max



signature.asc
Description: OpenPGP digital signature

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Kevin Wolf

Am 04.09.2020 um 12:14 hat Thomas Huth geschrieben:
> On 04/09/2020 10.25, Kevin Wolf wrote:
> > Am 04.09.2020 um 07:57 hat Thomas Huth geschrieben:
> >> Test 030 is still occasionally failing in the CI ... so for the
> >> time being, let's disable it in the "auto" group. We can add it
> >> back once it got more stable.
> >>
> >> Signed-off-by: Thomas Huth 
> > 
> > I would rather just disable this one test function as 030 is a pretty
> > important one that tends to catch bugs.
> 
> Ok, ... should it always get disabled, or shall we try to come up with
> some magic checks so that it only gets disabled in the CI pipelines (...
> though I don't have a clue how to check for Peter's merge test
> environment...)?

Maybe we can detect whether we're run as part of the "auto" group and
skip the test then (as in QMPTestCase.case_skip)?

Kevin

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Thomas Huth

On 04/09/2020 10.25, Kevin Wolf wrote:
> Am 04.09.2020 um 07:57 hat Thomas Huth geschrieben:
>> Test 030 is still occasionally failing in the CI ... so for the
>> time being, let's disable it in the "auto" group. We can add it
>> back once it got more stable.
>>
>> Signed-off-by: Thomas Huth 
> 
> I would rather just disable this one test function as 030 is a pretty
> important one that tends to catch bugs.

Ok, ... should it always get disabled, or shall we try to come up with
some magic checks so that it only gets disabled in the CI pipelines (...
though I don't have a clue how to check for Peter's merge test
environment...)?

 Thomas

Re: [PATCH v2 3/3] qcow2: Rewrite the documentation of qcow2_alloc_cluster_offset()

2020-09-04 Thread Max Reitz

On 04.09.20 11:36, Alberto Garcia wrote:
> On Fri 04 Sep 2020 11:28:18 AM CEST, Max Reitz wrote:
>>> + * If any clusters or subclusters were allocated then @m contains a
>>> + * list with the information of all the affected regions. Note that
>>> + * this can happen regardless of whether this function succeeds or
>>> + * not. The caller is responsible for updating the L2 metadata of the
>>> + * allocated clusters (on success) or freeing them (on failure), and
>>> + * for clearing the contents of @m afterwards in both cases.
>>
>> It’s too bad that preallocate_co() doesn’t do that then, isn’t it...
> 
> I was planning to have a closer look at that function next week.

Great! :)

Max



signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 3/3] qcow2: Rewrite the documentation of qcow2_alloc_cluster_offset()

2020-09-04 Thread Alberto Garcia

On Fri 04 Sep 2020 11:28:18 AM CEST, Max Reitz wrote:
>> + * If any clusters or subclusters were allocated then @m contains a
>> + * list with the information of all the affected regions. Note that
>> + * this can happen regardless of whether this function succeeds or
>> + * not. The caller is responsible for updating the L2 metadata of the
>> + * allocated clusters (on success) or freeing them (on failure), and
>> + * for clearing the contents of @m afterwards in both cases.
>
> It’s too bad that preallocate_co() doesn’t do that then, isn’t it...

I was planning to have a closer look at that function next week.

Berto

[PATCH v4 6/7] tests/qtest/vhost-user-test: add migrate_reconnect test

2020-09-04 Thread Dima Stepanov

Add new migrate_reconnect test for the vhost-user-blk device. Perform a
disconnect after sending response for the VHOST_USER_SET_LOG_BASE
command.

Signed-off-by: Dima Stepanov 
---
 tests/qtest/vhost-user-test.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index a8af613..4b715d3 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -146,6 +146,7 @@ static VhostUserMsg m __attribute__ ((unused));
 enum {
 TEST_FLAGS_OK,
 TEST_FLAGS_DISCONNECT,
+TEST_FLAGS_MIGRATE_DISCONNECT,
 TEST_FLAGS_BAD,
 TEST_FLAGS_END,
 };
@@ -436,6 +437,15 @@ static void chr_read(void *opaque, const uint8_t *buf, int 
size)
 qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE);
 
 g_cond_broadcast(&s->data_cond);
+/*
+ * Perform disconnect after sending a response. In this
+ * case the next write command on the QEMU side (for now
+ * it is SET_FEATURES will return -1, because of disconnect.
+ */
+if (s->test_flags == TEST_FLAGS_MIGRATE_DISCONNECT) {
+qemu_chr_fe_disconnect(chr);
+s->test_flags = TEST_FLAGS_BAD;
+}
 break;
 
 case VHOST_USER_SET_VRING_BASE:
@@ -737,6 +747,17 @@ static void *vhost_user_test_setup_memfd(GString 
*cmd_line, void *arg)
 return server;
 }
 
+static void *vhost_user_test_setup_migrate_reconnect(GString *cmd_line,
+void *arg)
+{
+TestServer *server;
+
+server = vhost_user_test_setup_memfd(cmd_line, arg);
+server->test_flags = TEST_FLAGS_MIGRATE_DISCONNECT;
+
+return server;
+}
+
 static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc)
 {
 TestServer *server = arg;
@@ -1150,5 +1171,9 @@ static void register_vhost_user_test(void)
 opts.before = vhost_user_test_setup_memfd;
 qos_add_test("migrate", "vhost-user-blk",
  test_migrate, &opts);
+
+opts.before = vhost_user_test_setup_migrate_reconnect;
+qos_add_test("migrate_reconnect", "vhost-user-blk",
+ test_migrate, &opts);
 }
 libqos_init(register_vhost_user_test);
-- 
2.7.4

[PATCH v4 4/7] tests/qtest/libqos/virtio-blk: add support for vhost-user-blk

2020-09-04 Thread Dima Stepanov

Add support for the vhost-user-blk-pci device. This node can be used by
the vhost-user-blk tests. Tests for the vhost-user-blk device are added
in the following patches.

Signed-off-by: Dima Stepanov 
---
 tests/qtest/libqos/virtio-blk.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tests/qtest/libqos/virtio-blk.c b/tests/qtest/libqos/virtio-blk.c
index 5da0259..959c5dc 100644
--- a/tests/qtest/libqos/virtio-blk.c
+++ b/tests/qtest/libqos/virtio-blk.c
@@ -36,6 +36,9 @@ static void *qvirtio_blk_get_driver(QVirtioBlk *v_blk,
 if (!g_strcmp0(interface, "virtio")) {
 return v_blk->vdev;
 }
+if (!g_strcmp0(interface, "vhost-user-blk")) {
+return v_blk;
+}
 
 fprintf(stderr, "%s not present in virtio-blk-device\n", interface);
 g_assert_not_reached();
@@ -120,6 +123,17 @@ static void virtio_blk_register_nodes(void)
 qos_node_produces("virtio-blk-pci", "virtio-blk");
 
 g_free(arg);
+
+/* vhost-user-blk-pci */
+arg = g_strdup_printf("id=drv0,chardev=chdev0,addr=%x.%x",
+PCI_SLOT, PCI_FN);
+opts.extra_device_opts = arg;
+add_qpci_address(&opts, &addr);
+qos_node_create_driver("vhost-user-blk-pci", virtio_blk_pci_create);
+qos_node_consumes("vhost-user-blk-pci", "pci-bus", &opts);
+qos_node_produces("vhost-user-blk-pci", "vhost-user-blk");
+
+g_free(arg);
 }
 
 libqos_init(virtio_blk_register_nodes);
-- 
2.7.4

[PATCH v4 0/7] vhost-user-blk: fix the migration issue and enhance qtests

2020-09-04 Thread Dima Stepanov

v3 -> v4:
  - vhost: recheck dev state in the vhost_migration_log routine
Reviewed-by: Raphael Norwitz
  - vhost: check queue state in the vhost_dev_set_log routine
Use "continue" instead of "break" to handle non-initialized
virtqueue case.

v2 -> v3:
  - update commit message for the 
"vhost: recheck dev state in the vhost_migration_log routine" commit
  - rename "started" field of the VhostUserBlk structure to
"started_vu", so there will be no confustion with the VHOST started
field
  - update vhost-user-test.c to always initialize nq local variable
(spotted by patchew)

v1 -> v2:
  - add comments to connected/started fields in the header file
  - move the "s->started" logic from the vhost_user_blk_disconnect
routine to the vhost_user_blk_stop routine

Reference e-mail threads:
  - https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg01509.html
  - https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg05241.html

If vhost-user daemon is used as a backend for the vhost device, then we
should consider a possibility of disconnect at any moment. There was a general
question here: should we consider it as an error or okay state for the 
vhost-user
devices during migration process?
I think the disconnect event for the vhost-user devices should not break the
migration process, because:
  - the device will be in the stopped state, so it will not be changed
during migration
  - if reconnect will be made the migration log will be reinitialized as
part of reconnect/init process:
#0  vhost_log_global_start (listener=0x563989cf7be0)
at hw/virtio/vhost.c:920
#1  0x56398603d8bc in listener_add_address_space 
(listener=0x563989cf7be0,
as=0x563986ea4340 )
at softmmu/memory.c:2664
#2  0x56398603dd30 in memory_listener_register (listener=0x563989cf7be0,
as=0x563986ea4340 )
at softmmu/memory.c:2740
#3  0x563985fd6956 in vhost_dev_init (hdev=0x563989cf7bd8,
opaque=0x563989cf7e30, backend_type=VHOST_BACKEND_TYPE_USER,
busyloop_timeout=0)
at hw/virtio/vhost.c:1385
#4  0x563985f7d0b8 in vhost_user_blk_connect (dev=0x563989cf7990)
at hw/block/vhost-user-blk.c:315
#5  0x563985f7d3f6 in vhost_user_blk_event (opaque=0x563989cf7990,
event=CHR_EVENT_OPENED)
at hw/block/vhost-user-blk.c:379
The first patch in the patchset fixes this issue by setting vhost device to the
stopped state in the disconnect handler and check it the vhost_migration_log()
routine before returning from the function.
qtest framework was updated to test vhost-user-blk functionality. The
vhost-user-blk/vhost-user-blk-tests/migrate_reconnect test was added to 
reproduce
the original issue found.

Dima Stepanov (7):
  vhost: recheck dev state in the vhost_migration_log routine
  vhost: check queue state in the vhost_dev_set_log routine
  tests/qtest/vhost-user-test: prepare the tests for adding new dev
class
  tests/qtest/libqos/virtio-blk: add support for vhost-user-blk
  tests/qtest/vhost-user-test: add support for the vhost-user-blk device
  tests/qtest/vhost-user-test: add migrate_reconnect test
  tests/qtest/vhost-user-test: enable the reconnect tests

 hw/block/vhost-user-blk.c  |  19 ++-
 hw/virtio/vhost.c  |  39 -
 include/hw/virtio/vhost-user-blk.h |  10 ++
 tests/qtest/libqos/virtio-blk.c|  14 ++
 tests/qtest/vhost-user-test.c  | 290 +++--
 5 files changed, 323 insertions(+), 49 deletions(-)

-- 
2.7.4

[PATCH v4 5/7] tests/qtest/vhost-user-test: add support for the vhost-user-blk device

2020-09-04 Thread Dima Stepanov

Add vhost_user_ops structure for the vhost-user-blk device class. Add
the test_reconnect and test_migrate tests for this device.

Signed-off-by: Dima Stepanov 
---
 tests/qtest/vhost-user-test.c | 139 +-
 1 file changed, 137 insertions(+), 2 deletions(-)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index 3df5322..a8af613 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -24,6 +24,7 @@
 #include "libqos/libqos.h"
 #include "libqos/pci-pc.h"
 #include "libqos/virtio-pci.h"
+#include "libqos/virtio-blk.h"
 
 #include "libqos/malloc-pc.h"
 #include "hw/virtio/virtio-net.h"
@@ -31,6 +32,7 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_net.h"
+#include "standard-headers/linux/virtio_blk.h"
 
 #ifdef CONFIG_LINUX
 #include 
@@ -43,6 +45,7 @@
 " -numa node,memdev=mem"
 #define QEMU_CMD_CHR" -chardev socket,id=%s,path=%s%s"
 #define QEMU_CMD_NETDEV " -netdev vhost-user,id=hs0,chardev=%s,vhostforce"
+#define QEMU_CMD_BLKCHR " -chardev socket,id=chdev0,path=%s%s"
 
 #define HUGETLBFS_MAGIC   0x958458f6
 
@@ -55,6 +58,7 @@
 #define VHOST_USER_PROTOCOL_F_MQ 0
 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN   6
+#define VHOST_USER_PROTOCOL_F_CONFIG 9
 
 #define VHOST_LOG_PAGE 0x1000
 
@@ -78,6 +82,8 @@ typedef enum VhostUserRequest {
 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
 VHOST_USER_GET_QUEUE_NUM = 17,
 VHOST_USER_SET_VRING_ENABLE = 18,
+VHOST_USER_GET_CONFIG = 24,
+VHOST_USER_SET_CONFIG = 25,
 VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -99,6 +105,14 @@ typedef struct VhostUserLog {
 uint64_t mmap_offset;
 } VhostUserLog;
 
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+typedef struct VhostUserConfig {
+uint32_t offset;
+uint32_t size;
+uint32_t flags;
+uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
 typedef struct VhostUserMsg {
 VhostUserRequest request;
 
@@ -114,6 +128,7 @@ typedef struct VhostUserMsg {
 struct vhost_vring_addr addr;
 VhostUserMemory memory;
 VhostUserLog log;
+VhostUserConfig config;
 } payload;
 } QEMU_PACKED VhostUserMsg;
 
@@ -137,6 +152,7 @@ enum {
 
 enum {
 VHOST_USER_NET,
+VHOST_USER_BLK,
 };
 
 typedef struct TestServer {
@@ -166,12 +182,15 @@ struct vhost_user_ops {
 int type;
 void (*append_opts)(TestServer *s, GString *cmd_line,
 const char *chr_opts);
+void (*driver_init)(void *obj, QGuestAllocator *alloc);
 
 /* VHOST-USER commands. */
 void (*set_features)(TestServer *s, CharBackend *chr,
 VhostUserMsg *msg);
 void (*get_protocol_features)(TestServer *s,
 CharBackend *chr, VhostUserMsg *msg);
+void (*get_config)(TestServer *s, CharBackend *chr,
+VhostUserMsg *msg);
 };
 
 static const char *init_hugepagefs(void);
@@ -194,6 +213,14 @@ static void append_vhost_net_opts(TestServer *s, GString 
*cmd_line,
chr_opts, s->chr_name);
 }
 
+static void append_vhost_blk_opts(TestServer *s, GString *cmd_line,
+ const char *chr_opts)
+{
+g_string_append_printf(cmd_line, QEMU_CMD_BLKCHR,
+   s->socket_path,
+   chr_opts);
+}
+
 static void append_mem_opts(TestServer *server, GString *cmd_line,
 int size, enum test_memfd memfd)
 {
@@ -425,6 +452,12 @@ static void chr_read(void *opaque, const uint8_t *buf, int 
size)
 qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
 break;
 
+case VHOST_USER_GET_CONFIG:
+if (s->vu_ops->get_config) {
+s->vu_ops->get_config(s, chr, &msg);
+}
+break;
+
 default:
 break;
 }
@@ -727,6 +760,9 @@ static void test_migrate(void *obj, void *arg, 
QGuestAllocator *alloc)
 guint8 *log;
 guint64 size;
 
+if (s->vu_ops->driver_init) {
+s->vu_ops->driver_init(obj, alloc);
+}
 if (!wait_for_fds(s)) {
 return;
 }
@@ -796,6 +832,24 @@ static void test_migrate(void *obj, void *arg, 
QGuestAllocator *alloc)
 g_string_free(dest_cmdline, true);
 }
 
+static void vu_blk_driver_init(void *obj, QGuestAllocator *alloc)
+{
+QVirtioBlk *blk_if;
+QVirtioDevice *dev;
+QVirtQueue *vq;
+uint64_t features;
+
+blk_if = obj;
+dev = blk_if->vdev;
+features = qvirtio_get_features(dev);
+qvirtio_set_features(dev, features);
+
+vq = qvirtqueue_setup(dev, alloc, 0);
+g_assert(vq);
+
+qvirtio_set_driver_ok(dev);
+}
+
 static void wait_for_rings_started(TestServer *s, size_t count)
 {
 gint64 end_time;
@@ -857,12 +911,21 @@ static void test_reconnect(void *obj, void *arg, 
QGuestAllocator *alloc)
 {
 TestServer *s = arg;
 GSource *src;
+int nq;

[PATCH v4 1/7] vhost: recheck dev state in the vhost_migration_log routine

2020-09-04 Thread Dima Stepanov

vhost-user devices can get a disconnect in the middle of the VHOST-USER
handshake on the migration start. If disconnect event happened right
before sending next VHOST-USER command, then the vhost_dev_set_log()
call in the vhost_migration_log() function will return error. This error
will lead to the assert() and close the QEMU migration source process.
For the vhost-user devices the disconnect event should not break the
migration process, because:
  - the device will be in the stopped state, so it will not be changed
during migration
  - if reconnect will be made the migration log will be reinitialized as
part of reconnect/init process:
#0  vhost_log_global_start (listener=0x563989cf7be0)
at hw/virtio/vhost.c:920
#1  0x56398603d8bc in listener_add_address_space 
(listener=0x563989cf7be0,
as=0x563986ea4340 )
at softmmu/memory.c:2664
#2  0x56398603dd30 in memory_listener_register (listener=0x563989cf7be0,
as=0x563986ea4340 )
at softmmu/memory.c:2740
#3  0x563985fd6956 in vhost_dev_init (hdev=0x563989cf7bd8,
opaque=0x563989cf7e30, backend_type=VHOST_BACKEND_TYPE_USER,
busyloop_timeout=0)
at hw/virtio/vhost.c:1385
#4  0x563985f7d0b8 in vhost_user_blk_connect (dev=0x563989cf7990)
at hw/block/vhost-user-blk.c:315
#5  0x563985f7d3f6 in vhost_user_blk_event (opaque=0x563989cf7990,
event=CHR_EVENT_OPENED)
at hw/block/vhost-user-blk.c:379
Update the vhost-user-blk device with the internal started_vu field which
will be used for initialization (vhost_user_blk_start) and clean up
(vhost_user_blk_stop). This additional flag in the VhostUserBlk structure
will be used to track whether the device really needs to be stopped and
cleaned up on a vhost-user level.
The disconnect event will set the overall VHOST device (not vhost-user) to
the stopped state, so it can be used by the general vhost_migration_log
routine.
Such approach could be propogated to the other vhost-user devices, but
better idea is just to make the same connect/disconnect code for all the
vhost-user devices.

This migration issue was slightly discussed earlier:
  - https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg01509.html
  - https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg05241.html

Signed-off-by: Dima Stepanov 
Reviewed-by: Raphael Norwitz 
---
 hw/block/vhost-user-blk.c  | 19 ---
 hw/virtio/vhost.c  | 27 ---
 include/hw/virtio/vhost-user-blk.h | 10 ++
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 39aec42..a076b1e 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -150,6 +150,7 @@ static int vhost_user_blk_start(VirtIODevice *vdev)
 error_report("Error starting vhost: %d", -ret);
 goto err_guest_notifiers;
 }
+s->started_vu = true;
 
 /* guest_notifier_mask/pending not used yet, so just unmask
  * everything here. virtio-pci will do the right thing by
@@ -175,6 +176,11 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 int ret;
 
+if (!s->started_vu) {
+return;
+}
+s->started_vu = false;
+
 if (!k->set_guest_notifiers) {
 return;
 }
@@ -341,9 +347,7 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
 }
 s->connected = false;
 
-if (s->dev.started) {
-vhost_user_blk_stop(vdev);
-}
+vhost_user_blk_stop(vdev);
 
 vhost_dev_cleanup(&s->dev);
 }
@@ -399,6 +403,15 @@ static void vhost_user_blk_event(void *opaque, 
QEMUChrEvent event)
 NULL, NULL, false);
 aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque);
 }
+
+/*
+ * Move vhost device to the stopped state. The vhost-user device
+ * will be clean up and disconnected in BH. This can be useful in
+ * the vhost migration code. If disconnect was caught there is an
+ * option for the general vhost code to get the dev state without
+ * knowing its type (in this case vhost-user).
+ */
+s->dev.started = false;
 break;
 case CHR_EVENT_BREAK:
 case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 1a1384e..ffef7ab 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -861,21 +861,42 @@ static int vhost_migration_log(MemoryListener *listener, 
bool enable)
 dev->log_enabled = enable;
 return 0;
 }
+
+r = 0;
 if (!enable) {
 r = vhost_dev_set_log(dev, false);
 if (r < 0) {
-return r;
+goto check_dev_state;
 }
 vhost_log_put(dev, false);
 } else {
 vhost_dev_log_resize(dev, vhost_get_log_size(dev));
 r = vhost_dev_set_log(dev, true);
 if (r < 0) {
-return r;
+

[PATCH v4 7/7] tests/qtest/vhost-user-test: enable the reconnect tests

2020-09-04 Thread Dima Stepanov

For now a QTEST_VHOST_USER_FIXME environment variable is used to
separate reconnect tests for the vhost-user-net device. Looks like the
reconnect functionality is pretty stable, so this separation is
deprecated.
Remove it and enable these tests for the default run.

Signed-off-by: Dima Stepanov 
---
 tests/qtest/vhost-user-test.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index 4b715d3..4b96312 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -1140,20 +1140,17 @@ static void register_vhost_user_test(void)
  "virtio-net",
  test_migrate, &opts);
 
-/* keeps failing on build-system since Aug 15 2017 */
-if (getenv("QTEST_VHOST_USER_FIXME")) {
-opts.before = vhost_user_test_setup_reconnect;
-qos_add_test("vhost-user/reconnect", "virtio-net",
- test_reconnect, &opts);
-
-opts.before = vhost_user_test_setup_connect_fail;
-qos_add_test("vhost-user/connect-fail", "virtio-net",
- test_vhost_user_started, &opts);
-
-opts.before = vhost_user_test_setup_flags_mismatch;
-qos_add_test("vhost-user/flags-mismatch", "virtio-net",
- test_vhost_user_started, &opts);
-}
+opts.before = vhost_user_test_setup_reconnect;
+qos_add_test("vhost-user/reconnect", "virtio-net",
+ test_reconnect, &opts);
+
+opts.before = vhost_user_test_setup_connect_fail;
+qos_add_test("vhost-user/connect-fail", "virtio-net",
+ test_vhost_user_started, &opts);
+
+opts.before = vhost_user_test_setup_flags_mismatch;
+qos_add_test("vhost-user/flags-mismatch", "virtio-net",
+ test_vhost_user_started, &opts);
 
 opts.before = vhost_user_test_setup_multiqueue;
 opts.edge.extra_device_opts = "mq=on";
-- 
2.7.4

[PATCH v4 2/7] vhost: check queue state in the vhost_dev_set_log routine

2020-09-04 Thread Dima Stepanov

If the vhost-user-blk daemon provides only one virtqueue, but device was
added with several queues, then QEMU will send more VHOST-USER command
than expected by daemon side. The vhost_virtqueue_start() routine
handles such case by checking the return value from the
virtio_queue_get_desc_addr() function call. Add the same check to the
vhost_dev_set_log() routine.

Signed-off-by: Dima Stepanov 
---
 hw/virtio/vhost.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index ffef7ab..a08b7d8 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -825,12 +825,24 @@ static int vhost_dev_set_features(struct vhost_dev *dev,
 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
 {
 int r, i, idx;
+hwaddr addr;
+
 r = vhost_dev_set_features(dev, enable_log);
 if (r < 0) {
 goto err_features;
 }
 for (i = 0; i < dev->nvqs; ++i) {
 idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
+addr = virtio_queue_get_desc_addr(dev->vdev, idx);
+if (!addr) {
+/*
+ * The queue might not be ready for start. If this
+ * is the case there is no reason to continue the process.
+ * The similar logic is used by the vhost_virtqueue_start()
+ * routine.
+ */
+continue;
+}
 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
  enable_log);
 if (r < 0) {
-- 
2.7.4

[PATCH v4 3/7] tests/qtest/vhost-user-test: prepare the tests for adding new dev class

2020-09-04 Thread Dima Stepanov

For now only vhost-user-net device is supported by the test. Other
vhost-user devices are not tested. As a first step make source code
refactoring so new devices can reuse the same test routines. To make
this provide a new vhost_user_ops structure with the methods to
initialize device, its command line or make a proper vhost-user
responses.

Signed-off-by: Dima Stepanov 
---
 tests/qtest/vhost-user-test.c | 105 ++
 1 file changed, 76 insertions(+), 29 deletions(-)

diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c
index 9ee0f1e..3df5322 100644
--- a/tests/qtest/vhost-user-test.c
+++ b/tests/qtest/vhost-user-test.c
@@ -135,6 +135,10 @@ enum {
 TEST_FLAGS_END,
 };
 
+enum {
+VHOST_USER_NET,
+};
+
 typedef struct TestServer {
 gchar *socket_path;
 gchar *mig_path;
@@ -154,10 +158,25 @@ typedef struct TestServer {
 bool test_fail;
 int test_flags;
 int queues;
+struct vhost_user_ops *vu_ops;
 } TestServer;
 
+struct vhost_user_ops {
+/* Device types. */
+int type;
+void (*append_opts)(TestServer *s, GString *cmd_line,
+const char *chr_opts);
+
+/* VHOST-USER commands. */
+void (*set_features)(TestServer *s, CharBackend *chr,
+VhostUserMsg *msg);
+void (*get_protocol_features)(TestServer *s,
+CharBackend *chr, VhostUserMsg *msg);
+};
+
 static const char *init_hugepagefs(void);
-static TestServer *test_server_new(const gchar *name);
+static TestServer *test_server_new(const gchar *name,
+struct vhost_user_ops *ops);
 static void test_server_free(TestServer *server);
 static void test_server_listen(TestServer *server);
 
@@ -167,7 +186,7 @@ enum test_memfd {
 TEST_MEMFD_NO,
 };
 
-static void append_vhost_opts(TestServer *s, GString *cmd_line,
+static void append_vhost_net_opts(TestServer *s, GString *cmd_line,
  const char *chr_opts)
 {
 g_string_append_printf(cmd_line, QEMU_CMD_CHR QEMU_CMD_NETDEV,
@@ -332,25 +351,15 @@ static void chr_read(void *opaque, const uint8_t *buf, 
int size)
 break;
 
 case VHOST_USER_SET_FEATURES:
-g_assert_cmpint(msg.payload.u64 & (0x1ULL << 
VHOST_USER_F_PROTOCOL_FEATURES),
-!=, 0ULL);
-if (s->test_flags == TEST_FLAGS_DISCONNECT) {
-qemu_chr_fe_disconnect(chr);
-s->test_flags = TEST_FLAGS_BAD;
+if (s->vu_ops->set_features) {
+s->vu_ops->set_features(s, chr, &msg);
 }
 break;
 
 case VHOST_USER_GET_PROTOCOL_FEATURES:
-/* send back features to qemu */
-msg.flags |= VHOST_USER_REPLY_MASK;
-msg.size = sizeof(m.payload.u64);
-msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
-msg.payload.u64 |= 1 << VHOST_USER_PROTOCOL_F_CROSS_ENDIAN;
-if (s->queues > 1) {
-msg.payload.u64 |= 1 << VHOST_USER_PROTOCOL_F_MQ;
+if (s->vu_ops->get_protocol_features) {
+s->vu_ops->get_protocol_features(s, chr, &msg);
 }
-p = (uint8_t *) &msg;
-qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
 break;
 
 case VHOST_USER_GET_VRING_BASE:
@@ -467,7 +476,8 @@ static const char *init_hugepagefs(void)
 #endif
 }
 
-static TestServer *test_server_new(const gchar *name)
+static TestServer *test_server_new(const gchar *name,
+struct vhost_user_ops *ops)
 {
 TestServer *server = g_new0(TestServer, 1);
 char template[] = "/tmp/vhost-test-XX";
@@ -495,6 +505,7 @@ static TestServer *test_server_new(const gchar *name)
 
 server->log_fd = -1;
 server->queues = 1;
+server->vu_ops = ops;
 
 return server;
 }
@@ -669,11 +680,11 @@ static void vhost_user_test_cleanup(void *s)
 
 static void *vhost_user_test_setup(GString *cmd_line, void *arg)
 {
-TestServer *server = test_server_new("vhost-user-test");
+TestServer *server = test_server_new("vhost-user-test", arg);
 test_server_listen(server);
 
 append_mem_opts(server, cmd_line, 256, TEST_MEMFD_AUTO);
-append_vhost_opts(server, cmd_line, "");
+server->vu_ops->append_opts(server, cmd_line, "");
 
 g_test_queue_destroy(vhost_user_test_cleanup, server);
 
@@ -682,11 +693,11 @@ static void *vhost_user_test_setup(GString *cmd_line, 
void *arg)
 
 static void *vhost_user_test_setup_memfd(GString *cmd_line, void *arg)
 {
-TestServer *server = test_server_new("vhost-user-test");
+TestServer *server = test_server_new("vhost-user-test", arg);
 test_server_listen(server);
 
 append_mem_opts(server, cmd_line, 256, TEST_MEMFD_YES);
-append_vhost_opts(server, cmd_line, "");
+server->vu_ops->append_opts(server, cmd_line, "");
 
 g_test_queue_destroy(vhost_user_test_cleanup, server);
 
@@ -720,7 +731,7 @@ static void test_migrate(void *obj, void *arg, 
QGuestAllocator *alloc)
 return;
 }
 
-dest = test_server_new("dest");
+dest = test_serve

Re: [PATCH v2 0/3] qcow2: Fix removal of list members from BDRVQcow2State.cluster_allocs

2020-09-04 Thread Max Reitz

On 03.09.20 18:37, Alberto Garcia wrote:
> Hi,
> 
> here are the changes from v1:
> 
> - Split changes into three different patches.
> - Rewrite the documentation of qcow2_alloc_cluster_offset() [Max]
> - Use peek_file_be in the test case to read the offset of the refcount
>   table [Max].
> - Extend the list of unsupported options for the test case [Max].
> 
> Berto

Thanks, applied to my block branch:

https://git.xanclic.moe/XanClic/qemu/commits/branch/block



signature.asc
Description: OpenPGP digital signature

Re: [PATCH v2 3/3] qcow2: Rewrite the documentation of qcow2_alloc_cluster_offset()

2020-09-04 Thread Max Reitz

On 03.09.20 18:37, Alberto Garcia wrote:
> The current text corresponds to an earlier, simpler version of this
> function and it does not explain how it works now.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 24 ++--
>  1 file changed, 14 insertions(+), 10 deletions(-)
> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 25e38daa78..f1ce6afcf5 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1713,18 +1713,22 @@ out:
>  }
>  
>  /*
> - * alloc_cluster_offset
> + * For a given area on the virtual disk defined by @offset and @bytes,
> + * find the corresponding area on the qcow2 image, allocating new
> + * clusters (or subclusters) if necessary. The result can span a
> + * combination of allocated and previously unallocated clusters.
>   *
> - * For a given offset on the virtual disk, find the cluster offset in qcow2
> - * file. If the offset is not found, allocate a new cluster.
> + * On return, @host_offset is set to the beginning of the requested
> + * area. This area is guaranteed to be contiguous on the qcow2 file
> + * but it can be smaller than initially requested. In this case @bytes
> + * is updated with the actual size.
>   *
> - * If the cluster was already allocated, m->nb_clusters is set to 0 and
> - * other fields in m are meaningless.
> - *
> - * If the cluster is newly allocated, m->nb_clusters is set to the number of
> - * contiguous clusters that have been allocated. In this case, the other
> - * fields of m are valid and contain information about the first allocated
> - * cluster.
> + * If any clusters or subclusters were allocated then @m contains a
> + * list with the information of all the affected regions. Note that
> + * this can happen regardless of whether this function succeeds or
> + * not. The caller is responsible for updating the L2 metadata of the
> + * allocated clusters (on success) or freeing them (on failure), and
> + * for clearing the contents of @m afterwards in both cases.

It’s too bad that preallocate_co() doesn’t do that then, isn’t it...

Max



signature.asc
Description: OpenPGP digital signature

Re: make -i check resut for msys2

2020-09-04 Thread Kevin Wolf

Am 04.09.2020 um 08:03 hat Thomas Huth geschrieben:
> On 04/09/2020 00.53, 罗勇刚(Yonggang Luo) wrote:
> > 
> > 
> > On Thu, Sep 3, 2020 at 10:33 PM Thomas Huth  > > wrote:
> > 
> > On 03/09/2020 11.18, 罗勇刚(Yonggang Luo) wrote:
> > [...]
> > >   TEST    check-unit: tests/test-replication.exe
> > > **
> > > ERROR:C:/work/xemu/qemu/tests/test-replication.c:136:make_temp:
> > > assertion failed: (fd >= 0)
> > > ERROR test-replication.exe - Bail out!
> > > ERROR:C:/work/xemu/qemu/tests/test-replication.c:136:make_temp:
> > > assertion failed: (fd >= 0)
> > 
> > At least this one should be easy to fix: The test uses /tmp as
> > hard-coded directory for temporary files. I think it should use
> > g_get_tmp_dir() from glib to get that directory instead.
> > 
> >  Thomas
> > 
> > After fixes tmp path, how to fixes following error:
> > $ tests/test-replication.exe                                            
> >                                                                        
> >                                                                        
> >          
> > # random seed: R02Sdf2e4ffc0e6fbe96624598386b538927
> > 1..13
> > # Start of replication tests
> > # Start of primary tests
> > Unexpected error in bdrv_open_inherit() at ../block.c:3456:
> > Block protocol 'file' doesn't support the option 'locking' 
> 
> Not sure ... as a temporary test, try to remove the "locking=off"
> strings from the test. If it then works, it might be worth discussing
> with the block layer folks how to handle this test on Windows in the
> best way. If it still does not work, it's maybe simply not worth the
> effort to try to get this test running on Windows - and thus mark it
> with CONFIG_POSIX in the Makefile / meson.build.

This is a bug in file-win32. It reads "locking" from the options QDict,
but doesn't delete it from it.

Does the following help? (Only compile-tested.)

If it works for you, I'll send it as a proper patch.

Kevin

diff --git a/block/file-win32.c b/block/file-win32.c
index ab69bd811a..e2900c3a51 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -299,6 +299,11 @@ static QemuOptsList raw_runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "host AIO implementation (threads, native)",
 },
+{
+.name = "locking",
+.type = QEMU_OPT_STRING,
+.help = "file locking mode (on/off/auto, default: auto)",
+},
 { /* end of list */ }
 },
 };
@@ -333,6 +338,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 Error *local_err = NULL;
 const char *filename;
 bool use_aio;
+OnOffAuto locking;
 int ret;

 s->type = FTYPE_FILE;
@@ -343,10 +349,24 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 goto fail;
 }

-if (qdict_get_try_bool(options, "locking", false)) {
+locking = qapi_enum_parse(&OnOffAuto_lookup,
+  qemu_opt_get(opts, "locking"),
+  ON_OFF_AUTO_AUTO, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+ret = -EINVAL;
+goto fail;
+}
+switch (locking) {
+case ON_OFF_AUTO_ON:
 error_setg(errp, "locking=on is not supported on Windows");
 ret = -EINVAL;
 goto fail;
+case ON_OFF_AUTO_OFF:
+case ON_OFF_AUTO_AUTO:
+break;
+default:
+g_assert_not_reached();
 }

 filename = qemu_opt_get(opts, "filename");

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Max Reitz

On 04.09.20 07:57, Thomas Huth wrote:
> Test 030 is still occasionally failing in the CI ... so for the
> time being, let's disable it in the "auto" group. We can add it
> back once it got more stable.
> 
> Signed-off-by: Thomas Huth 
> ---
>  I just saw the problem here:
>   https://cirrus-ci.com/task/5449330930745344?command=main#L6482
>  and Peter hit it a couple of weeks ago:
>   https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg00136.html
> 
>  tests/qemu-iotests/group | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks, applied to my block branch:

https://git.xanclic.moe/XanClic/qemu/commits/branch/block



signature.asc
Description: OpenPGP digital signature

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Max Reitz

On 04.09.20 10:31, Max Reitz wrote:
> On 04.09.20 07:57, Thomas Huth wrote:
>> Test 030 is still occasionally failing in the CI ... so for the
>> time being, let's disable it in the "auto" group. We can add it
>> back once it got more stable.
>>
>> Signed-off-by: Thomas Huth 
>> ---
>>  I just saw the problem here:
>>   https://cirrus-ci.com/task/5449330930745344?command=main#L6482
>>  and Peter hit it a couple of weeks ago:
>>   https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg00136.html
>>
>>  tests/qemu-iotests/group | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Thanks, applied to my block branch:
> 
> https://git.xanclic.moe/XanClic/qemu/commits/branch/block

Or maybe not O:)



signature.asc
Description: OpenPGP digital signature

Re: [PATCH] iotests: Remove 030 from the auto group

2020-09-04 Thread Kevin Wolf

Am 04.09.2020 um 07:57 hat Thomas Huth geschrieben:
> Test 030 is still occasionally failing in the CI ... so for the
> time being, let's disable it in the "auto" group. We can add it
> back once it got more stable.
> 
> Signed-off-by: Thomas Huth 

I would rather just disable this one test function as 030 is a pretty
important one that tends to catch bugs.

>  I just saw the problem here:
>   https://cirrus-ci.com/task/5449330930745344?command=main#L6482
>  and Peter hit it a couple of weeks ago:
>   https://lists.gnu.org/archive/html/qemu-devel/2020-08/msg00136.html

I wonder how this can still happen. The test should have more than
enough time to complete now. Except if the throttling doesn't work as
expected.

I can't seem to reproduce this even if I add rather long delays. After
40 seconds, all jobs have moved either by 512k (which is STREAM_CHUNK)
or not at all.

What is interesting is that in both cases it's stream-node8, which is
the job streaming from node6 to node8, and node8 is the top-level node.
It's also the last job to be changed to full speed, so all others did
succeed before.

Kevin

Re: [PATCH v2] tests: Trying fixes test-replication.c on msys2.

2020-09-04 Thread Paolo Bonzini

On 04/09/20 00:06, Yonggang Luo wrote:
> Signed-off-by: Yonggang Luo 
> ---
>  tests/test-replication.c | 17 +
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/test-replication.c b/tests/test-replication.c
> index 9ab3666a90..d0e06f8d77 100644
> --- a/tests/test-replication.c
> +++ b/tests/test-replication.c
> @@ -23,14 +23,18 @@
>  
>  /* primary */
>  #define P_ID "primary-id"
> -static char p_local_disk[] = "/tmp/p_local_disk.XX";
> +#define P_LOCAL_DISK "%s/p_local_disk.XX"
> +static char p_local_disk[PATH_MAX];
>  
>  /* secondary */
>  #define S_ID "secondary-id"
>  #define S_LOCAL_DISK_ID "secondary-local-disk-id"
> -static char s_local_disk[] = "/tmp/s_local_disk.XX";
> -static char s_active_disk[] = "/tmp/s_active_disk.XX";
> -static char s_hidden_disk[] = "/tmp/s_hidden_disk.XX";
> +#define S_LOCAL_DISK "%s/s_local_disk.XX"
> +static char s_local_disk[PATH_MAX];
> +#define S_ACTIVE_DISK "%s/s_active_disk.XX"
> +static char s_active_disk[PATH_MAX];
> +#define S_HIDDEN_DISK "%s/s_hidden_disk.XX"
> +static char s_hidden_disk[PATH_MAX];
>  
>  /* FIXME: steal from blockdev.c */
>  QemuOptsList qemu_drive_opts = {
> @@ -571,7 +575,12 @@ static void setup_sigabrt_handler(void)
>  int main(int argc, char **argv)
>  {
>  int ret;
> +const char *tmpdir = g_get_tmp_dir();
>  qemu_init_main_loop(&error_fatal);
> +sprintf(p_local_disk, P_LOCAL_DISK, tmpdir);
> +sprintf(s_local_disk, S_LOCAL_DISK, tmpdir);
> +sprintf(s_active_disk, S_ACTIVE_DISK, tmpdir);
> +sprintf(s_hidden_disk, S_HIDDEN_DISK, tmpdir);
>  bdrv_init();
>  
>  g_test_init(&argc, &argv, NULL);
> 

Cc: qemu-block@nongnu.org

Re: [PATCH v6 1/8] monitor: simplify functions for getting a dup'd fdset entry

2020-09-04 Thread Markus Armbruster

Daniel P. Berrangé  writes:

> Currently code has to call monitor_fdset_get_fd, then dup
> the return fd, and then add the duplicate FD back into the
> fdset. This dance is overly verbose for the caller and
> introduces extra failure modes which can be avoided by
> folding all the logic into monitor_fdset_dup_fd_add and
> removing monitor_fdset_get_fd entirely.
>
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Markus Armbruster

94 matches

Mail list logo