date:20220624

hw/nvme: why schedule sq timer when cq is full?

2022-06-24 Thread Jinhao Fan

Hi Keith,

I just came across this piece of code in nvme_process_db() that I found weird:

start_sqs = nvme_cq_full(cq) ? 1 : 0;
...
if (start_sqs) {
NvmeSQueue *sq;
QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}


The logic seems to be “If CQ is full, schedule SQ timer to produce more 
completions”. I cannot understand this. I think it would make more sense
with “If CQ is NOT full, schedule SQ timer to produce more completions”.Am 
I missing something?

Thanks,
Jinhao Fan

[PATCH v2 09/11] bsd-user: Implement pathconf, lpathconf and fpathconf

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 32 
 bsd-user/freebsd/os-syscall.c | 12 
 2 files changed, 44 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 4b2f6dcc1dc..065f576dfe8 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -894,4 +894,36 @@ static abi_long do_bsd_mkfifoat(abi_long arg1, abi_long 
arg2,
 return ret;
 }
 
+/* pathconf(2) */
+static abi_long do_bsd_pathconf(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(pathconf(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* lpathconf(2) */
+static abi_long do_bsd_lpathconf(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(lpathconf(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fpathconf(2) */
+static abi_long do_bsd_fpathconf(abi_long arg1, abi_long arg2)
+{
+return get_errno(fpathconf(arg1, arg2));
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index be225195fbd..7de4c40bb16 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -475,6 +475,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_mkfifoat(arg1, arg2, arg3);
 break;
 
+case TARGET_FREEBSD_NR_pathconf: /* pathconf(2) */
+ret = do_bsd_pathconf(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_lpathconf: /* lpathconf(2) */
+ret = do_bsd_lpathconf(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_fpathconf: /* fpathconf(2) */
+ret = do_bsd_fpathconf(arg1, arg2);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 11/11] bsd-user: Remove stray 'inline' from do_bsd_close

2022-06-24 Thread Warner Losh

In the last series, I inadvertantly didn't remove this inline, but did
all the others. Remove it for consistency.

Signed-off-by: Warner Losh 
---
 bsd-user/bsd-file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 108a5061850..588e0c50d45 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -252,7 +252,7 @@ static abi_long do_bsd_openat(abi_long arg1, abi_long arg2,
 }
 
 /* close(2) */
-static inline abi_long do_bsd_close(abi_long arg1)
+static abi_long do_bsd_close(abi_long arg1)
 {
 return get_errno(close(arg1));
 }
-- 
2.33.1

[PATCH v2 06/11] bsd-user: Implement chflags, lchflags and fchflags

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 32 
 bsd-user/freebsd/os-syscall.c | 12 
 2 files changed, 44 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index ac171c409ca..a1c80428d98 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -816,4 +816,36 @@ static abi_long do_bsd_fchownat(abi_long arg1, abi_long 
arg2,
 return ret;
 }
 
+/* chflags(2) */
+static abi_long do_bsd_chflags(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(chflags(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* lchflags(2) */
+static abi_long do_bsd_lchflags(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(lchflags(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fchflags(2) */
+static abi_long do_bsd_fchflags(abi_long arg1, abi_long arg2)
+{
+return get_errno(fchflags(arg1, arg2));
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 8090666b0d9..06bc76a326b 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -447,6 +447,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_fchownat(arg1, arg2, arg3, arg4, arg5);
 break;
 
+case TARGET_FREEBSD_NR_chflags: /* chflags(2) */
+ret = do_bsd_chflags(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_lchflags: /* lchflags(2) */
+ret = do_bsd_lchflags(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_fchflags: /* fchflags(2) */
+ret = do_bsd_fchflags(arg1, arg2);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 08/11] bsd-user: Implement mkfifo and mkfifoat

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 27 +++
 bsd-user/freebsd/os-syscall.c |  8 
 2 files changed, 35 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index c24054fed11..4b2f6dcc1dc 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -867,4 +867,31 @@ static abi_long do_bsd_flock(abi_long arg1, abi_long arg2)
 return get_errno(flock(arg1, arg2));
 }
 
+/* mkfifo(2) */
+static abi_long do_bsd_mkfifo(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(mkfifo(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* mkfifoat(2) */
+static abi_long do_bsd_mkfifoat(abi_long arg1, abi_long arg2,
+abi_long arg3)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg2);
+ret = get_errno(mkfifoat(arg1, p, arg3));
+UNLOCK_PATH(p, arg2);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index d252fb40737..be225195fbd 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -467,6 +467,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_flock(arg1, arg2);
 break;
 
+case TARGET_FREEBSD_NR_mkfifo: /* mkfifo(2) */
+ret = do_bsd_mkfifo(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_mkfifoat: /* mkfifoat(2) */
+ret = do_bsd_mkfifoat(arg1, arg2, arg3);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 10/11] bsd-user: Implement undelete

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 13 +
 bsd-user/freebsd/os-syscall.c |  4 
 2 files changed, 17 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 065f576dfe8..108a5061850 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -926,4 +926,17 @@ static abi_long do_bsd_fpathconf(abi_long arg1, abi_long 
arg2)
 return get_errno(fpathconf(arg1, arg2));
 }
 
+/* undelete(2) */
+static abi_long do_bsd_undelete(abi_long arg1)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(undelete(p)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 7de4c40bb16..57996cad8ae 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -487,6 +487,10 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_fpathconf(arg1, arg2);
 break;
 
+case TARGET_FREEBSD_NR_undelete: /* undelete(2) */
+ret = do_bsd_undelete(arg1);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 04/11] bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat

2022-06-24 Thread Warner Losh

These implement both the old-pre INO64 mknod variations, as well as the
now current INO64 variant. To implement the old stuff, we use some
linker magic to bind to the old versions of these functions.

Signed-off-by: Stacey Son 
Signed-off-by: Michal Meloun 
Signed-off-by: Warner Losh 
---
 bsd-user/bsd-file.h   | 47 +++
 bsd-user/freebsd/os-syscall.c | 13 ++
 2 files changed, 60 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 1af79866fc6..b05d3cbb717 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -721,4 +721,51 @@ static abi_long do_bsd_fchmodat(abi_long arg1, abi_long 
arg2,
 return ret;
 }
 
+/* pre-ino64 mknod(2) */
+static abi_long do_bsd_freebsd11_mknod(abi_long arg1, abi_long arg2, abi_long 
arg3)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(syscall(SYS_freebsd11_mknod, p, arg2, arg3));
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* pre-ino64 mknodat(2) */
+static abi_long do_bsd_freebsd11_mknodat(abi_long arg1, abi_long arg2,
+abi_long arg3, abi_long arg4)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg2);
+ret = get_errno(syscall(SYS_freebsd11_mknodat, arg1, p, arg3, arg4));
+UNLOCK_PATH(p, arg2);
+
+return ret;
+}
+
+/* post-ino64 mknodat(2) */
+static abi_long do_bsd_mknodat(void *cpu_env, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5,
+abi_long arg6)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg2);
+   /* 32-bit arch's use two 32 registers for 64 bit return value */
+if (regpairs_aligned(cpu_env) != 0) {
+ret = get_errno(mknodat(arg1, p, arg3, target_arg64(arg5, arg6)));
+} else {
+ret = get_errno(mknodat(arg1, p, arg3, target_arg64(arg4, arg5)));
+}
+UNLOCK_PATH(p, arg2);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index b33d548a4b6..d3125f340f7 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -32,6 +32,7 @@
 #include "qemu/cutils.h"
 #include "qemu/path.h"
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -418,6 +419,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_fchmodat(arg1, arg2, arg3, arg4);
 break;
 
+case TARGET_FREEBSD_NR_freebsd11_mknod: /* mknod(2) */
+ret = do_bsd_freebsd11_mknod(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_freebsd11_mknodat: /* mknodat(2) */
+ret = do_bsd_freebsd11_mknodat(arg1, arg2, arg3, arg4);
+break;
+
+case TARGET_FREEBSD_NR_mknodat: /* mknodat(2) */
+ret = do_bsd_mknodat(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 05/11] bsd-user: Implement chown, fchown, lchown and fchownat

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 48 +++
 bsd-user/freebsd/os-syscall.c | 16 
 2 files changed, 64 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index b05d3cbb717..ac171c409ca 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -768,4 +768,52 @@ static abi_long do_bsd_mknodat(void *cpu_env, abi_long 
arg1,
 return ret;
 }
 
+/* chown(2) */
+static abi_long do_bsd_chown(abi_long arg1, abi_long arg2, abi_long arg3)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(chown(p, arg2, arg3)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fchown(2) */
+static abi_long do_bsd_fchown(abi_long arg1, abi_long arg2,
+abi_long arg3)
+{
+return get_errno(fchown(arg1, arg2, arg3));
+}
+
+/* lchown(2) */
+static abi_long do_bsd_lchown(abi_long arg1, abi_long arg2,
+abi_long arg3)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(lchown(p, arg2, arg3)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fchownat(2) */
+static abi_long do_bsd_fchownat(abi_long arg1, abi_long arg2,
+abi_long arg3, abi_long arg4, abi_long arg5)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg2);
+ret = get_errno(fchownat(arg1, p, arg3, arg4, arg5)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg2);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index d3125f340f7..8090666b0d9 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -431,6 +431,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_mknodat(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
 break;
 
+case TARGET_FREEBSD_NR_chown: /* chown(2) */
+ret = do_bsd_chown(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_fchown: /* fchown(2) */
+ret = do_bsd_fchown(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_lchown: /* lchown(2) */
+ret = do_bsd_lchown(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_fchownat: /* fchownat(2) */
+ret = do_bsd_fchownat(arg1, arg2, arg3, arg4, arg5);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v6 15/15] block: refactor bdrv_remove_file_or_backing_child to bdrv_remove_child

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Now the function can remove any child, so give it more common name.
Drop assertions and drop bs argument which becomes unused. Function
would be reused in a further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 27 +--
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/block.c b/block.c
index 6b08d20d8c..ddd043f556 100644
--- a/block.c
+++ b/block.c
@@ -92,9 +92,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
 
 static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs);
-static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
-  BdrvChild *child,
-  Transaction *tran);
+static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
 Transaction *tran);
 
@@ -3335,7 +,7 @@ static int 
bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
 
 if (child) {
 bdrv_unset_inherits_from(parent_bs, child, tran);
-bdrv_remove_file_or_backing_child(parent_bs, child, tran);
+bdrv_remove_child(child, tran);
 }
 
 if (!child_bs) {
@@ -5019,26 +5017,19 @@ static bool should_update_child(BdrvChild *c, 
BlockDriverState *to)
 return ret;
 }
 
-static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
+static void bdrv_remove_child_commit(void *opaque)
 {
 GLOBAL_STATE_CODE();
 bdrv_child_free(opaque);
 }
 
-static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
-.commit = bdrv_remove_filter_or_cow_child_commit,
+static TransactionActionDrv bdrv_remove_child_drv = {
+.commit = bdrv_remove_child_commit,
 };
 
-/*
- * A function to remove backing or file child of @bs.
- * Function doesn't update permissions, caller is responsible for this.
- */
-static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
-  BdrvChild *child,
-  Transaction *tran)
+/* Function doesn't update permissions, caller is responsible for this. */
+static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
 {
-assert(child == bs->backing || child == bs->file);
-
 if (!child) {
 return;
 }
@@ -5047,7 +5038,7 @@ static void 
bdrv_remove_file_or_backing_child(BlockDriverState *bs,
 bdrv_replace_child_tran(child, NULL, tran);
 }
 
-tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, child);
+tran_add(tran, &bdrv_remove_child_drv, child);
 }
 
 /*
@@ -5058,7 +5049,7 @@ static void 
bdrv_remove_file_or_backing_child(BlockDriverState *bs,
 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
 Transaction *tran)
 {
-bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran);
+bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
 }
 
 static int bdrv_replace_node_noperm(BlockDriverState *from,
-- 
2.25.1

[PATCH v2 07/11] bsd-user: Implement chroot and flock

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 19 +++
 bsd-user/freebsd/os-syscall.c |  8 
 2 files changed, 27 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index a1c80428d98..c24054fed11 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -848,4 +848,23 @@ static abi_long do_bsd_fchflags(abi_long arg1, abi_long 
arg2)
 return get_errno(fchflags(arg1, arg2));
 }
 
+/* chroot(2) */
+static abi_long do_bsd_chroot(abi_long arg1)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(chroot(p)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* flock(2) */
+static abi_long do_bsd_flock(abi_long arg1, abi_long arg2)
+{
+return get_errno(flock(arg1, arg2));
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 06bc76a326b..d252fb40737 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -459,6 +459,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_fchflags(arg1, arg2);
 break;
 
+case TARGET_FREEBSD_NR_chroot: /* chroot(2) */
+ret = do_bsd_chroot(arg1);
+break;
+
+case TARGET_FREEBSD_NR_flock: /* flock(2) */
+ret = do_bsd_flock(arg1, arg2);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 03/11] bsd-user: implement chmod, fchmod, lchmod and fchmodat

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 46 +++
 bsd-user/freebsd/os-syscall.c | 16 
 2 files changed, 62 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 635ac8d0e62..1af79866fc6 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -675,4 +675,50 @@ static abi_long do_bsd_readlinkat(abi_long arg1, abi_long 
arg2,
 return ret;
 }
 
+/* chmod(2) */
+static abi_long do_bsd_chmod(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(chmod(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fchmod(2) */
+static abi_long do_bsd_fchmod(abi_long arg1, abi_long arg2)
+{
+return get_errno(fchmod(arg1, arg2));
+}
+
+/* lchmod(2) */
+static abi_long do_bsd_lchmod(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(lchmod(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* fchmodat(2) */
+static abi_long do_bsd_fchmodat(abi_long arg1, abi_long arg2,
+abi_long arg3, abi_long arg4)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg2);
+ret = get_errno(fchmodat(arg1, p, arg3, arg4));
+UNLOCK_PATH(p, arg2);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 80ec9dd4954..b33d548a4b6 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -402,6 +402,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_readlinkat(arg1, arg2, arg3, arg4);
 break;
 
+case TARGET_FREEBSD_NR_chmod: /* chmod(2) */
+ret = do_bsd_chmod(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_fchmod: /* fchmod(2) */
+ret = do_bsd_fchmod(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_lchmod: /* lchmod(2) */
+ret = do_bsd_lchmod(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_fchmodat: /* fchmodat(2) */
+ret = do_bsd_fchmodat(arg1, arg2, arg3, arg4);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 02/11] bsd-user: Implement symlink, symlinkat, readlink and readlinkat

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Jung-uk Kim 
Signed-off-by: Warner Losh 
---
 bsd-user/bsd-file.h   | 74 +++
 bsd-user/freebsd/os-syscall.c | 16 
 2 files changed, 90 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index a0f03102639..635ac8d0e62 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -601,4 +601,78 @@ static abi_long do_bsd_nmount(abi_long arg1, abi_long 
count,
 return ret;
 }
 
+/* symlink(2) */
+static abi_long do_bsd_symlink(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p1, *p2;
+
+LOCK_PATH2(p1, arg1, p2, arg2);
+ret = get_errno(symlink(p1, p2)); /* XXX path(p1), path(p2) */
+UNLOCK_PATH2(p1, arg1, p2, arg2);
+
+return ret;
+}
+
+/* symlinkat(2) */
+static abi_long do_bsd_symlinkat(abi_long arg1, abi_long arg2,
+abi_long arg3)
+{
+abi_long ret;
+void *p1, *p2;
+
+LOCK_PATH2(p1, arg1, p2, arg3);
+ret = get_errno(symlinkat(p1, arg2, p2)); /* XXX path(p1), path(p2) */
+UNLOCK_PATH2(p1, arg1, p2, arg3);
+
+return ret;
+}
+
+/* readlink(2) */
+static abi_long do_bsd_readlink(CPUArchState *env, abi_long arg1,
+abi_long arg2, abi_long arg3)
+{
+abi_long ret;
+void *p1, *p2;
+
+LOCK_PATH(p1, arg1);
+p2 = lock_user(VERIFY_WRITE, arg2, arg3, 0);
+if (p2 == NULL) {
+UNLOCK_PATH(p1, arg1);
+return -TARGET_EFAULT;
+}
+if (strcmp(p1, "/proc/curproc/file") == 0) {
+CPUState *cpu = env_cpu(env);
+TaskState *ts = (TaskState *)cpu->opaque;
+strncpy(p2, ts->bprm->fullpath, arg3);
+ret = MIN((abi_long)strlen(ts->bprm->fullpath), arg3);
+} else {
+ret = get_errno(readlink(path(p1), p2, arg3));
+}
+unlock_user(p2, arg2, ret);
+UNLOCK_PATH(p1, arg1);
+
+return ret;
+}
+
+/* readlinkat(2) */
+static abi_long do_bsd_readlinkat(abi_long arg1, abi_long arg2,
+abi_long arg3, abi_long arg4)
+{
+abi_long ret;
+void *p1, *p2;
+
+LOCK_PATH(p1, arg2);
+p2 = lock_user(VERIFY_WRITE, arg3, arg4, 0);
+if (p2 == NULL) {
+UNLOCK_PATH(p1, arg2);
+return -TARGET_EFAULT;
+}
+ret = get_errno(readlinkat(arg1, p1, p2, arg4));
+unlock_user(p2, arg3, ret);
+UNLOCK_PATH(p1, arg2);
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index bd4dfa6ddc7..80ec9dd4954 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -386,6 +386,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_nmount(arg1, arg2, arg3);
 break;
 
+case TARGET_FREEBSD_NR_symlink: /* symlink(2) */
+ret = do_bsd_symlink(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_symlinkat: /* symlinkat(2) */
+ret = do_bsd_symlinkat(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_readlink: /* readlink(2) */
+ret = do_bsd_readlink(cpu_env, arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_readlinkat: /* readlinkat(2) */
+ret = do_bsd_readlinkat(arg1, arg2, arg3, arg4);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 00/11] bsd-user: More file-related system calls

2022-06-24 Thread Warner Losh

A second round of mostly BSD-independent filesystem calls: mount, unmount,
nmount, symlink, symlinkat, readlink, readlinkat, chmod, fchmod, lchmod,
fchmodat, freebsd11_mknod, freebsd11_monodat, mknodat, chown, fchown, lchown,
fchownat, chflags, lchflags, fchflags, chroot, flock, mkfifo, mkfifoat,
pathconf, lpathconf, fpathconf, undelete.

These are all non-reentrant system calls, so these wrappers are pretty simple
and no safe_* versions need to be created.

In addition, a small correction to an earlier series is included.

V2: Updated with review comments.
Reworked freebsd11_mknod* stuff after unifdef BSD_HAVE_INO64
Fixed comments that had too many words
Added one more hunk to remove a stary 'inline' that slipped through earlier

Need reviews on:
   bsd-user: Implement symlink, symlinkat, readlink and readlinkat
   bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat
   bsd-user: Remove stray 'inline' from do_bsd_close

Warner Losh (11):
  bsd-user: Implement mount, umount and nmount
  bsd-user: Implement symlink, symlinkat, readlink and readlinkat
  bsd-user: implement chmod, fchmod, lchmod and fchmodat
  bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat
  bsd-user: Implement chown, fchown, lchown and fchownat
  bsd-user: Implement chflags, lchflags and fchflags
  bsd-user: Implement chroot and flock
  bsd-user: Implement mkfifo and mkfifoat
  bsd-user: Implement pathconf, lpathconf and fpathconf
  bsd-user: Implement undelete
  bsd-user: Remove stray 'inline' from do_bsd_close

 bsd-user/bsd-file.h   | 392 +-
 bsd-user/freebsd/os-syscall.c | 118 ++
 2 files changed, 509 insertions(+), 1 deletion(-)

-- 
2.33.1

[PATCH v6 13/15] block: Manipulate bs->file / bs->backing pointers in .attach/.detach

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

bs->file and bs->backing are a kind of duplication of part of
bs->children. But very useful diplication, so let's not drop them at
all:)

We should manage bs->file and bs->backing in same place, where we
manage bs->children, to keep them in sync.

Moreover, generic io paths are unprepared to BdrvChild without a bs, so
it's double good to clear bs->file / bs->backing when we detach the
child.

Detach is simple: if we detach bs->file or bs->backing child, just
set corresponding field to NULL.

Attach is a bit more complicated. But we still can precisely detect
should we set one of bs->file / bs->backing or not:

- if role is BDRV_CHILD_COW, we definitely deal with bs->backing
- else, if role is BDRV_CHILD_FILTERED (it must be also
  BDRV_CHILD_PRIMARY), it's a filtered child. Use
  bs->drv->filtered_child_is_backing to chose the pointer field to
  modify.
- else, if role is BDRV_CHILD_PRIMARY, we deal with bs->file
- in all other cases, it's neither bs->backing nor bs->file. It's some
  other child and we shouldn't care

OK. This change brings one more good thing: we can (and should) get rid
of all indirect pointers in the block-graph-change transactions:

bdrv_attach_child_common() stores BdrvChild** into transaction to clear
it on abort.

bdrv_attach_child_common() has two callers: bdrv_attach_child_noperm()
just pass-through this feature, bdrv_root_attach_child() doesn't need
the feature.

Look at bdrv_attach_child_noperm() callers:
  - bdrv_attach_child() doesn't need the feature
  - bdrv_set_file_or_backing_noperm() uses the feature to manage
bs->file and bs->backing, we don't want it anymore
  - bdrv_append() uses the feature to manage bs->backing, again we
don't want it anymore

So, we should drop this stuff! Great!

We could probably keep BdrvChild** argument to keep the int return
value, but it seems not worth the complexity.

Finally, we now set .file / .backing automatically in generic code and
want to restring setting them by hand outside of .attach/.detach.
So, this patch cleanups all remaining places where they were set.
To find such places I use:

  git grep '\->file ='
  git grep '\->backing ='
  git grep '&.*\'
  git grep '&.*\'

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c  | 234 ++-
 block/raw-format.c   |   4 +-
 block/snapshot-access.c  |   6 +-
 block/snapshot.c |   1 -
 include/block/block_int-common.h |  15 +-
 tests/unit/test-bdrv-drain.c |  10 +-
 6 files changed, 126 insertions(+), 144 deletions(-)

diff --git a/block.c b/block.c
index ca86cd86d3..6b08d20d8c 100644
--- a/block.c
+++ b/block.c
@@ -1438,9 +1438,39 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 
 assert_bdrv_graph_writable(bs);
 QLIST_INSERT_HEAD(&bs->children, child, next);
-
-if (child->role & BDRV_CHILD_COW) {
+if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
+/*
+ * Here we handle filters and block/raw-format.c when it behave like
+ * filter. They generally have a single PRIMARY child, which is also 
the
+ * FILTERED child, and that they may have multiple more children, which
+ * are neither PRIMARY nor FILTERED. And never we have a COW child 
here.
+ * So bs->file will be the PRIMARY child, unless the PRIMARY child goes
+ * into bs->backing on exceptional cases; and bs->backing will be
+ * nothing else.
+ */
+assert(!(child->role & BDRV_CHILD_COW));
+if (child->role & BDRV_CHILD_PRIMARY) {
+assert(child->role & BDRV_CHILD_FILTERED);
+assert(!bs->backing);
+assert(!bs->file);
+
+if (bs->drv->filtered_child_is_backing) {
+bs->backing = child;
+} else {
+bs->file = child;
+}
+} else {
+assert(!(child->role & BDRV_CHILD_FILTERED));
+}
+} else if (child->role & BDRV_CHILD_COW) {
+assert(bs->drv->supports_backing);
+assert(!(child->role & BDRV_CHILD_PRIMARY));
+assert(!bs->backing);
+bs->backing = child;
 bdrv_backing_attach(child);
+} else if (child->role & BDRV_CHILD_PRIMARY) {
+assert(!bs->file);
+bs->file = child;
 }
 
 bdrv_apply_subtree_drain(child, bs);
@@ -1458,6 +1488,12 @@ static void bdrv_child_cb_detach(BdrvChild *child)
 
 assert_bdrv_graph_writable(bs);
 QLIST_REMOVE(child, next);
+if (child == bs->backing) {
+assert(child != bs->file);
+bs->backing = NULL;
+} else if (child == bs->file) {
+bs->file = NULL;
+}
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@@ -1663,7 +1699,7 @@ open_failed:
 bs->drv = NULL;
 if (bs->file != NULL) {
 bdrv_unref_child(bs, bs->file);
-bs->file = NULL;
+assert(!bs->file);
 }
 g_free(bs->opaque);
 bs->opaque = NULL;

[PATCH v2 01/11] bsd-user: Implement mount, umount and nmount

2022-06-24 Thread Warner Losh

Signed-off-by: Stacey Son 
Signed-off-by: Jung-uk Kim 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-file.h   | 52 +++
 bsd-user/freebsd/os-syscall.c | 13 +
 2 files changed, 65 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index b2dca586129..a0f03102639 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -549,4 +549,56 @@ static abi_long do_bsd_sync(void)
 return 0;
 }
 
+/* mount(2) */
+static abi_long do_bsd_mount(abi_long arg1, abi_long arg2, abi_long arg3,
+abi_long arg4)
+{
+abi_long ret;
+void *p1, *p2;
+
+LOCK_PATH2(p1, arg1, p2, arg2);
+/*
+ * XXX arg4 should be locked, but it isn't clear how to do that since it 
may
+ * be not be a NULL-terminated string.
+ */
+if (arg4 == 0) {
+ret = get_errno(mount(p1, p2, arg3, NULL)); /* XXX path(p2)? */
+} else {
+ret = get_errno(mount(p1, p2, arg3, g2h_untagged(arg4))); /* XXX 
path(p2)? */
+}
+UNLOCK_PATH2(p1, arg1, p2, arg2);
+
+return ret;
+}
+
+/* unmount(2) */
+static abi_long do_bsd_unmount(abi_long arg1, abi_long arg2)
+{
+abi_long ret;
+void *p;
+
+LOCK_PATH(p, arg1);
+ret = get_errno(unmount(p, arg2)); /* XXX path(p)? */
+UNLOCK_PATH(p, arg1);
+
+return ret;
+}
+
+/* nmount(2) */
+static abi_long do_bsd_nmount(abi_long arg1, abi_long count,
+abi_long flags)
+{
+abi_long ret;
+struct iovec *vec = lock_iovec(VERIFY_READ, arg1, count, 1);
+
+if (vec != NULL) {
+ret = get_errno(nmount(vec, count, flags));
+unlock_iovec(vec, arg1, count, 0);
+} else {
+return -TARGET_EFAULT;
+}
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 2623caf8007..bd4dfa6ddc7 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -33,6 +33,7 @@
 #include "qemu/path.h"
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -373,6 +374,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_bsd_sync();
 break;
 
+case TARGET_FREEBSD_NR_mount: /* mount(2) */
+ret = do_bsd_mount(arg1, arg2, arg3, arg4);
+break;
+
+case TARGET_FREEBSD_NR_unmount: /* unmount(2) */
+ret = do_bsd_unmount(arg1, arg2);
+break;
+
+case TARGET_FREEBSD_NR_nmount: /* nmount(2) */
+ret = do_bsd_nmount(arg1, arg2, arg3);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v6 12/15] Revert "block: Pass BdrvChild ** to replace_child_noperm"

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

That's a preparation to previously reverted
"block: Let replace_child_noperm free children". Drop it too, we don't
need it for a new approach.

This reverts commit be64bbb0149748f3999c49b13976aafb8330ea86.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 3ac332a460..ca86cd86d3 100644
--- a/block.c
+++ b/block.c
@@ -90,7 +90,7 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 static bool bdrv_recurse_has_child(BlockDriverState *bs,
BlockDriverState *child);
 
-static void bdrv_replace_child_noperm(BdrvChild **child,
+static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs);
 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
   BdrvChild *child,
@@ -2352,7 +2352,7 @@ static void bdrv_replace_child_abort(void *opaque)
 
 GLOBAL_STATE_CODE();
 /* old_bs reference is transparently moved from @s to @s->child */
-bdrv_replace_child_noperm(&s->child, s->old_bs);
+bdrv_replace_child_noperm(s->child, s->old_bs);
 bdrv_unref(new_bs);
 }
 
@@ -2382,7 +2382,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, 
BlockDriverState *new_bs,
 if (new_bs) {
 bdrv_ref(new_bs);
 }
-bdrv_replace_child_noperm(&child, new_bs);
+bdrv_replace_child_noperm(child, new_bs);
 /* old_bs reference is transparently moved from @child to @s */
 }
 
@@ -2764,10 +2764,9 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission 
qapi_perm)
 return permissions[qapi_perm];
 }
 
-static void bdrv_replace_child_noperm(BdrvChild **childp,
+static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs)
 {
-BdrvChild *child = *childp;
 BlockDriverState *old_bs = child->bs;
 int new_bs_quiesce_counter;
 int drain_saldo;
@@ -2865,7 +2864,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
 BlockDriverState *bs = child->bs;
 
 GLOBAL_STATE_CODE();
-bdrv_replace_child_noperm(s->child, NULL);
+bdrv_replace_child_noperm(child, NULL);
 
 if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
@@ -2966,7 +2965,7 @@ static int bdrv_attach_child_common(BlockDriverState 
*child_bs,
 }
 
 bdrv_ref(child_bs);
-bdrv_replace_child_noperm(&new_child, child_bs);
+bdrv_replace_child_noperm(new_child, child_bs);
 
 *child = new_child;
 
@@ -3022,13 +3021,13 @@ static int bdrv_attach_child_noperm(BlockDriverState 
*parent_bs,
 return 0;
 }
 
-static void bdrv_detach_child(BdrvChild **childp)
+static void bdrv_detach_child(BdrvChild *child)
 {
-BlockDriverState *old_bs = (*childp)->bs;
+BlockDriverState *old_bs = child->bs;
 
 GLOBAL_STATE_CODE();
-bdrv_replace_child_noperm(childp, NULL);
-bdrv_child_free(*childp);
+bdrv_replace_child_noperm(child, NULL);
+bdrv_child_free(child);
 
 if (old_bs) {
 /*
@@ -3140,7 +3139,7 @@ void bdrv_root_unref_child(BdrvChild *child)
 GLOBAL_STATE_CODE();
 
 child_bs = child->bs;
-bdrv_detach_child(&child);
+bdrv_detach_child(child);
 bdrv_unref(child_bs);
 }
 
-- 
2.25.1

Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active

2022-06-24 Thread Peter Maydell

On Fri, 24 Jun 2022 at 21:34, Richard Henderson
 wrote:
>
> On 6/24/22 08:30, Peter Maydell wrote:
> > So the thing that worries me about structuring this this way
> > is that the SME supplement appendix includes this caution:
> >
> > # The instruction encoding tables in this section [...] will
> > # require correction if subsequent versions of the A64 ISA
> > # add new instructions which overlap with these encodings.
> >
> > My guess (based on how the H.a Arm ARM has incorporated
> > SME) is that these tables aren't going to be included
> > in the Arm ARM and updated going forward. Instead the
> > behaviour will be documented based on whether (existing
> > and new) instructions call CheckNonStreamingSVEEnabled()
> > or CheckSVEEnabled() in their pseudocode.
>
> I agree that this would be cleaner and more correct long-term.
>
> > So I'm a bit uncertain about how awkward it's going to be
> > in future to maintain this transliteration of the SME
> > supplement tables into decodetree: we might find that
> > we have to look at new instructions and kind of
> > reverse-engineer back out any required changes to the
> > tables here, rather than simply "write the trans_ function
> > for the new insn, looking at the pseudocode to see which
> > _access_check() function it should be calling"...
>
> I thought about this, and if it were simply a matter of annotating the 
> trans_* functions
> within translate-sve.c, I would have done it.  But I would need to adjust A64 
> AdvSIMD as
> well, which is still done with the by-hand decoder.
>
> Can we use this solution in the short term, and fix up advsimd while 
> coverting it to
> decodetree?  I'm more and more convinced we'll want this sooner than later.

Yeah, I guess so. Is it possible to do the SVE stuff the right
long-term way and have the short-term fix only for the A64 AdvSIMD,
or do we need to do both the same way ?

-- PMM

[PATCH v6 10/15] Revert "block: Let replace_child_tran keep indirect pointer"

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

That's a preparation to previously reverted
"block: Let replace_child_noperm free children". Drop it too, we don't
need it for a new approach.

This reverts commit 82b54cf51656bf3cd5ed1ac549e8a1085a0e3290.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 81 +++--
 1 file changed, 10 insertions(+), 71 deletions(-)

diff --git a/block.c b/block.c
index 34ca046470..a83845b120 100644
--- a/block.c
+++ b/block.c
@@ -2334,7 +2334,6 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, 
uint64_t perm,
 
 typedef struct BdrvReplaceChildState {
 BdrvChild *child;
-BdrvChild **childp;
 BlockDriverState *old_bs;
 } BdrvReplaceChildState;
 
@@ -2352,29 +2351,7 @@ static void bdrv_replace_child_abort(void *opaque)
 BlockDriverState *new_bs = s->child->bs;
 
 GLOBAL_STATE_CODE();
-/*
- * old_bs reference is transparently moved from @s to s->child.
- *
- * Pass &s->child here instead of s->childp, because:
- * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
- * modify the BdrvChild * pointer we indirectly pass to it, i.e. it
- * will not modify s->child.  From that perspective, it does not matter
- * whether we pass s->childp or &s->child.
- * (TODO: Right now, bdrv_replace_child_noperm() never modifies that
- * pointer anyway (though it will in the future), so at this point it
- * absolutely does not matter whether we pass s->childp or &s->child.)
- * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
- * it here.
- * (3) If new_bs is NULL, *s->childp will have been NULLed by
- * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
- * must not pass a NULL *s->childp here.
- * (TODO: In its current state, bdrv_replace_child_noperm() will not
- * have NULLed *s->childp, so this does not apply yet.  It will in the
- * future.)
- *
- * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
- * any case, there is no reason to pass it anyway.
- */
+/* old_bs reference is transparently moved from @s to @s->child */
 bdrv_replace_child_noperm(&s->child, s->old_bs);
 bdrv_unref(new_bs);
 }
@@ -2391,32 +2368,22 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  * Note: real unref of old_bs is done only on commit.
  *
  * The function doesn't update permissions, caller is responsible for this.
- *
- * Note that if new_bs == NULL, @childp is stored in a state object attached
- * to @tran, so that the old child can be reinstated in the abort handler.
- * Therefore, if @new_bs can be NULL, @childp must stay valid until the
- * transaction is committed or aborted.
- *
- * (TODO: The reinstating does not happen yet, but it will once
- * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.)
  */
-static void bdrv_replace_child_tran(BdrvChild **childp,
-BlockDriverState *new_bs,
+static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
 Transaction *tran)
 {
 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
 *s = (BdrvReplaceChildState) {
-.child = *childp,
-.childp = new_bs == NULL ? childp : NULL,
-.old_bs = (*childp)->bs,
+.child = child,
+.old_bs = child->bs,
 };
 tran_add(tran, &bdrv_replace_child_drv, s);
 
 if (new_bs) {
 bdrv_ref(new_bs);
 }
-bdrv_replace_child_noperm(childp, new_bs);
-/* old_bs reference is transparently moved from *childp to @s */
+bdrv_replace_child_noperm(&child, new_bs);
+/* old_bs reference is transparently moved from @child to @s */
 }
 
 /*
@@ -5041,7 +5008,6 @@ static bool should_update_child(BdrvChild *c, 
BlockDriverState *to)
 
 typedef struct BdrvRemoveFilterOrCowChild {
 BdrvChild *child;
-BlockDriverState *bs;
 bool is_backing;
 } BdrvRemoveFilterOrCowChild;
 
@@ -5071,19 +5037,10 @@ static void bdrv_remove_filter_or_cow_child_commit(void 
*opaque)
 bdrv_child_free(s->child);
 }
 
-static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
-{
-BdrvRemoveFilterOrCowChild *s = opaque;
-
-/* Drop the bs reference after the transaction is done */
-bdrv_unref(s->bs);
-g_free(s);
-}
-
 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
 .abort = bdrv_remove_filter_or_cow_child_abort,
 .commit = bdrv_remove_filter_or_cow_child_commit,
-.clean = bdrv_remove_filter_or_cow_child_clean,
+.clean = g_free,
 };
 
 /*
@@ -5101,11 +5058,6 @@ static void 
bdrv_remove_file_or_backing_child(BlockDriverState *bs,
 return;
 }
 
-/*
- * Keep a reference to @bs so @childp will stay valid throughout the
- * transaction (required by bdrv_replace_child_tran())
- */
-bdrv_ref(bs);
 if (child == bs->back

Re: [PATCH] aspeed: i2c: Fix DMA len write-enable bit handling

2022-06-24 Thread Cédric Le Goater


On 6/24/22 22:34, Peter Delevoryas wrote:




On Jun 24, 2022, at 1:31 PM, Peter Delevoryas  wrote:

I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It
seems to be because the Zephyr i2c driver sets the RX DMA len with the
RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1]

/* 0x1C : I2CM Master DMA Transfer Length Register   */

I think we should be checking the write-enable bits on the incoming
value, not checking the register array. I'm not sure we're even writing
the write-enable bits to the register array, actually.

[1] 
https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148


Arg, forgot this:

Fixes: ba2cccd64e90f34 ("aspeed: i2c: Add new mode support”)

Should I resend as v2?



No. patchwork did it :

http://patchwork.ozlabs.org/project/qemu-devel/patch/20220624203151.2026355-1-p...@fb.com/

Thanks,

C.



Thanks,
Peter



Signed-off-by: Peter Delevoryas 
---
hw/i2c/aspeed_i2c.c | 8 
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 37ae1f2e04..c4fce7474a 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
hwaddr offset,
  RX_BUF_LEN) + 1;
 break;
 case A_I2CM_DMA_LEN:
-w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
-   ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
+w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
+  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
 /* If none of the w1t bits are set, just write to the reg as normal. */
 if (!w1t) {
 bus->regs[R_I2CM_DMA_LEN] = value;
 break;
 }
-if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
+if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN,
  FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN));
 }
-if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
+if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN,
  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN));
 }
--
2.30.2

[PATCH v6 09/15] Revert "block: Let replace_child_noperm free children"

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

We are going to reimplement this behavior (clear bs->file / bs->backing
pointers automatically when child->bs is cleared) in a nicer way, see
further commit
"block: Manipulate bs->file / bs->backing pointers in .attach/.detach".

With this revert we bring back a problem that was fixed by b0a9f6fed3d8.
Still the problem was mostly theoretical, we don't have concrete bugs
fixed by b0a9f6fed3d8, we don't have a specific test. Probably some
accidental failures of iotests are related.

Alternatively, we may merge this and following three reverts into final
"block: Manipulate ..." to avoid any kind of regression. But seems that
in this case having separate clear revert commits is better.

This reverts commit b0a9f6fed3d80de610dcd04a7e66f9f30a04174f.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 102 +---
 1 file changed, 23 insertions(+), 79 deletions(-)

diff --git a/block.c b/block.c
index 883b1bb242..34ca046470 100644
--- a/block.c
+++ b/block.c
@@ -90,10 +90,8 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 static bool bdrv_recurse_has_child(BlockDriverState *bs,
BlockDriverState *child);
 
-static void bdrv_child_free(BdrvChild *child);
 static void bdrv_replace_child_noperm(BdrvChild **child,
-  BlockDriverState *new_bs,
-  bool free_empty_child);
+  BlockDriverState *new_bs);
 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
   BdrvChild *child,
   Transaction *tran);
@@ -2338,7 +2336,6 @@ typedef struct BdrvReplaceChildState {
 BdrvChild *child;
 BdrvChild **childp;
 BlockDriverState *old_bs;
-bool free_empty_child;
 } BdrvReplaceChildState;
 
 static void bdrv_replace_child_commit(void *opaque)
@@ -2346,9 +2343,6 @@ static void bdrv_replace_child_commit(void *opaque)
 BdrvReplaceChildState *s = opaque;
 GLOBAL_STATE_CODE();
 
-if (s->free_empty_child && !s->child->bs) {
-bdrv_child_free(s->child);
-}
 bdrv_unref(s->old_bs);
 }
 
@@ -2366,26 +2360,22 @@ static void bdrv_replace_child_abort(void *opaque)
  * modify the BdrvChild * pointer we indirectly pass to it, i.e. it
  * will not modify s->child.  From that perspective, it does not matter
  * whether we pass s->childp or &s->child.
+ * (TODO: Right now, bdrv_replace_child_noperm() never modifies that
+ * pointer anyway (though it will in the future), so at this point it
+ * absolutely does not matter whether we pass s->childp or &s->child.)
  * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
  * it here.
  * (3) If new_bs is NULL, *s->childp will have been NULLed by
  * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
  * must not pass a NULL *s->childp here.
+ * (TODO: In its current state, bdrv_replace_child_noperm() will not
+ * have NULLed *s->childp, so this does not apply yet.  It will in the
+ * future.)
  *
  * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
  * any case, there is no reason to pass it anyway.
  */
-bdrv_replace_child_noperm(&s->child, s->old_bs, true);
-/*
- * The child was pre-existing, so s->old_bs must be non-NULL, and
- * s->child thus must not have been freed
- */
-assert(s->child != NULL);
-if (!new_bs) {
-/* As described above, *s->childp was cleared, so restore it */
-assert(s->childp != NULL);
-*s->childp = s->child;
-}
+bdrv_replace_child_noperm(&s->child, s->old_bs);
 bdrv_unref(new_bs);
 }
 
@@ -2402,44 +2392,30 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  *
  * The function doesn't update permissions, caller is responsible for this.
  *
- * (*childp)->bs must not be NULL.
- *
  * Note that if new_bs == NULL, @childp is stored in a state object attached
  * to @tran, so that the old child can be reinstated in the abort handler.
  * Therefore, if @new_bs can be NULL, @childp must stay valid until the
  * transaction is committed or aborted.
  *
- * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
- * freed (on commit).  @free_empty_child should only be false if the
- * caller will free the BDrvChild themselves (which may be important
- * if this is in turn called in another transactional context).
+ * (TODO: The reinstating does not happen yet, but it will once
+ * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.)
  */
 static void bdrv_replace_child_tran(BdrvChild **childp,
 BlockDriverState *new_bs,
-Transaction *tran,
-bool free_empty_ch

[PATCH v6 06/15] test-bdrv-graph-mod: fix filters to be filters

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

bdrv_pass_through is used as filter, even all node variables has
corresponding names. We want to append it, so it should be
backing-child-based filter like mirror_top.
So, in test_update_perm_tree, first child should be DATA, as we don't
want filters with two filtered children.

bdrv_exclusive_writer is used as a filter once. So it should be filter
anyway. We want to append it, so it should be backing-child-based
fitler too.

Make all FILTERED children to be PRIMARY as well. We are going to force
this rule by assertion soon.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 include/block/block_int-common.h |  5 +++--
 tests/unit/test-bdrv-graph-mod.c | 24 +---
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 9d91ccbcbf..d68adc6ff3 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -122,8 +122,9 @@ struct BlockDriver {
 /*
  * Only make sense for filter drivers, for others must be false.
  * If true, filtered child is bs->backing. Otherwise it's bs->file.
- * Only two internal filters use bs->backing as filtered child and has this
- * field set to true: mirror_top and commit_top.
+ * Two internal filters use bs->backing as filtered child and has this
+ * field set to true: mirror_top and commit_top. There also two such test
+ * filters in tests/unit/test-bdrv-graph-mod.c.
  *
  * Never create any more such filters!
  *
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index e2f1355af1..c522591531 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -26,6 +26,8 @@
 
 static BlockDriver bdrv_pass_through = {
 .format_name = "pass-through",
+.is_filter = true,
+.filtered_child_is_backing = true,
 .bdrv_child_perm = bdrv_default_perms,
 };
 
@@ -57,6 +59,8 @@ static void exclusive_write_perms(BlockDriverState *bs, 
BdrvChild *c,
 
 static BlockDriver bdrv_exclusive_writer = {
 .format_name = "exclusive-writer",
+.is_filter = true,
+.filtered_child_is_backing = true,
 .bdrv_child_perm = exclusive_write_perms,
 };
 
@@ -134,7 +138,7 @@ static void test_update_perm_tree(void)
 blk_insert_bs(root, bs, &error_abort);
 
 bdrv_attach_child(filter, bs, "child", &child_of_bds,
-  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort);
+  BDRV_CHILD_DATA, &error_abort);
 
 ret = bdrv_append(filter, bs, NULL);
 g_assert_cmpint(ret, <, 0);
@@ -228,11 +232,14 @@ static void test_parallel_exclusive_write(void)
  */
 bdrv_ref(base);
 
-bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA,
+bdrv_attach_child(top, fl1, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
-bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+bdrv_attach_child(fl1, base, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
-bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+bdrv_attach_child(fl2, base, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
 
 bdrv_replace_node(fl1, fl2, &error_abort);
@@ -344,9 +351,11 @@ static void test_parallel_perm_update(void)
   BDRV_CHILD_DATA, &error_abort);
 c_fl2 = bdrv_attach_child(ws, fl2, "second", &child_of_bds,
   BDRV_CHILD_DATA, &error_abort);
-bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+bdrv_attach_child(fl1, base, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
-bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+bdrv_attach_child(fl2, base, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
 
 /* Select fl1 as first child to be active */
@@ -397,7 +406,8 @@ static void test_append_greedy_filter(void)
 BlockDriverState *base = no_perm_node("base");
 BlockDriverState *fl = exclusive_writer_node("fl1");
 
-bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW,
+bdrv_attach_child(top, base, "backing", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
   &error_abort);
 
 bdrv_append(fl, base, &error_abort);
-- 
2.25.1

[PATCH v6 14/15] block/snapshot: drop indirection around bdrv_snapshot_fallback_ptr

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Now the indirection is not actually used, we can safely reduce it to
simple pointer. For consistency do a bit of refactoring to get rid of
_ptr suffixes that become meaningless.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/snapshot.c | 38 --
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index f3971ac2bd..e22ac3eac6 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -151,34 +151,29 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState 
*bs,
 }
 
 /**
- * Return a pointer to the child BDS pointer to which we can fall
+ * Return a pointer to child of given BDS to which we can fall
  * back if the given BDS does not support snapshots.
  * Return NULL if there is no BDS to (safely) fall back to.
- *
- * We need to return an indirect pointer because bdrv_snapshot_goto()
- * has to modify the BdrvChild pointer.
  */
-static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
+static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs)
 {
-BdrvChild **fallback;
-BdrvChild *child = bdrv_primary_child(bs);
+BdrvChild *fallback = bdrv_primary_child(bs);
+BdrvChild *child;
 
 /* We allow fallback only to primary child */
-if (!child) {
+if (!fallback) {
 return NULL;
 }
-fallback = (child == bs->file ? &bs->file : &bs->backing);
-assert(*fallback == child);
 
 /*
  * Check that there are no other children that would need to be
  * snapshotted.  If there are, it is not safe to fall back to
- * *fallback.
+ * fallback.
  */
 QLIST_FOREACH(child, &bs->children, next) {
 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
BDRV_CHILD_FILTERED) &&
-child != *fallback)
+child != fallback)
 {
 return NULL;
 }
@@ -189,8 +184,7 @@ static BdrvChild 
**bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
 
 static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
 {
-BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs);
-return child_ptr ? (*child_ptr)->bs : NULL;
+return child_bs(bdrv_snapshot_fallback_child(bs));
 }
 
 int bdrv_can_snapshot(BlockDriverState *bs)
@@ -237,7 +231,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
Error **errp)
 {
 BlockDriver *drv = bs->drv;
-BdrvChild **fallback_ptr;
+BdrvChild *fallback;
 int ret, open_ret;
 
 GLOBAL_STATE_CODE();
@@ -260,13 +254,13 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 return ret;
 }
 
-fallback_ptr = bdrv_snapshot_fallback_ptr(bs);
-if (fallback_ptr) {
+fallback = bdrv_snapshot_fallback_child(bs);
+if (fallback) {
 QDict *options;
 QDict *file_options;
 Error *local_err = NULL;
-BlockDriverState *fallback_bs = (*fallback_ptr)->bs;
-char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name);
+BlockDriverState *fallback_bs = fallback->bs;
+char *subqdict_prefix = g_strdup_printf("%s.", fallback->name);
 
 options = qdict_clone_shallow(bs->options);
 
@@ -277,8 +271,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 qobject_unref(file_options);
 g_free(subqdict_prefix);
 
-/* Force .bdrv_open() below to re-attach fallback_bs on *fallback_ptr 
*/
-qdict_put_str(options, (*fallback_ptr)->name,
+/* Force .bdrv_open() below to re-attach fallback_bs on fallback */
+qdict_put_str(options, fallback->name,
   bdrv_get_node_name(fallback_bs));
 
 /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */
@@ -287,7 +281,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 }
 
 /* .bdrv_open() will re-attach it */
-bdrv_unref_child(bs, *fallback_ptr);
+bdrv_unref_child(bs, fallback);
 
 ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
 open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
-- 
2.25.1

[PATCH v6 11/15] Revert "block: Restructure remove_file_or_backing_child()"

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

That's a preparation to previously reverted
"block: Let replace_child_noperm free children". Drop it too, we don't
need it for a new approach.

This reverts commit 562bda8bb41879eeda0bd484dd3d55134579b28e.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index a83845b120..3ac332a460 100644
--- a/block.c
+++ b/block.c
@@ -5051,33 +5051,30 @@ static void 
bdrv_remove_file_or_backing_child(BlockDriverState *bs,
   BdrvChild *child,
   Transaction *tran)
 {
-BdrvChild **childp;
 BdrvRemoveFilterOrCowChild *s;
 
+assert(child == bs->backing || child == bs->file);
+
 if (!child) {
 return;
 }
 
-if (child == bs->backing) {
-childp = &bs->backing;
-} else if (child == bs->file) {
-childp = &bs->file;
-} else {
-g_assert_not_reached();
-}
-
 if (child->bs) {
-bdrv_replace_child_tran(*childp, NULL, tran);
+bdrv_replace_child_tran(child, NULL, tran);
 }
 
 s = g_new(BdrvRemoveFilterOrCowChild, 1);
 *s = (BdrvRemoveFilterOrCowChild) {
 .child = child,
-.is_backing = (childp == &bs->backing),
+.is_backing = (child == bs->backing),
 };
 tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
 
-*childp = NULL;
+if (s->is_backing) {
+bs->backing = NULL;
+} else {
+bs->file = NULL;
+}
 }
 
 /*
-- 
2.25.1

[PATCH v6 04/15] test-bdrv-graph-mod: update test_parallel_perm_update test case

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

test_parallel_perm_update() does two things that we are going to
restrict in the near future:

1. It updates bs->file field by hand. bs->file will be managed
   automatically by generic code (together with bs->children list).

   Let's better refactor our "tricky" bds to have own state where one
   of children is linked as "selected".
   This also looks less "tricky", so avoid using this word.

2. It create FILTERED children that are not PRIMARY. Except for tests
   all FILTERED children in the Qemu block layer are always PRIMARY as
   well.  We are going to formalize this rule, so let's better use DATA
   children here.

3. It creates more than one FILTERED child, which is already abandoned
   in BDRV_CHILD_FILTERED's description.

While being here, update the picture to better correspond to the test
code.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/unit/test-bdrv-graph-mod.c | 80 +++-
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index a6e3bb79be..e2f1355af1 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -241,13 +241,26 @@ static void test_parallel_exclusive_write(void)
 bdrv_unref(top);
 }
 
-static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role,
- BlockReopenQueue *reopen_queue,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared)
+/*
+ * write-to-selected node may have several DATA children, one of them may be
+ * "selected". Exclusive write permission is taken on selected child.
+ *
+ * We don't realize write handler itself, as we need only to test how 
permission
+ * update works.
+ */
+typedef struct BDRVWriteToSelectedState {
+BdrvChild *selected;
+} BDRVWriteToSelectedState;
+
+static void write_to_selected_perms(BlockDriverState *bs, BdrvChild *c,
+BdrvChildRole role,
+BlockReopenQueue *reopen_queue,
+uint64_t perm, uint64_t shared,
+uint64_t *nperm, uint64_t *nshared)
 {
-if (bs->file && c == bs->file) {
+BDRVWriteToSelectedState *s = bs->opaque;
+
+if (s->selected && c == s->selected) {
 *nperm = BLK_PERM_WRITE;
 *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
 } else {
@@ -256,9 +269,10 @@ static void write_to_file_perms(BlockDriverState *bs, 
BdrvChild *c,
 }
 }
 
-static BlockDriver bdrv_write_to_file = {
-.format_name = "tricky-perm",
-.bdrv_child_perm = write_to_file_perms,
+static BlockDriver bdrv_write_to_selected = {
+.format_name = "write-to-selected",
+.instance_size = sizeof(BDRVWriteToSelectedState),
+.bdrv_child_perm = write_to_selected_perms,
 };
 
 
@@ -266,15 +280,18 @@ static BlockDriver bdrv_write_to_file = {
  * The following test shows that topological-sort order is required for
  * permission update, simple DFS is not enough.
  *
- * Consider the block driver which has two filter children: one active
- * with exclusive write access and one inactive with no specific
- * permissions.
+ * Consider the block driver (write-to-selected) which has two children: one is
+ * selected so we have exclusive write access to it and for the other one we
+ * don't need any specific permissions.
  *
  * And, these two children has a common base child, like this:
+ *   (additional "top" on top is used in test just because the only public
+ *function to update permission should get a specific child to update.
+ *Making bdrv_refresh_perms() public just for this test isn't worth it)
  *
- * ┌─┐ ┌──┐
- * │ fl2 │ ◀── │ top  │
- * └─┘ └──┘
+ * ┌─┐ ┌───┐ ┌─┐
+ * │ fl2 │ ◀── │ write-to-selected │ ◀── │ top │
+ * └─┘ └───┘ └─┘
  *   │   │
  *   │   │ w
  *   │   ▼
@@ -290,14 +307,14 @@ static BlockDriver bdrv_write_to_file = {
  *
  * So, exclusive write is propagated.
  *
- * Assume, we want to make fl2 active instead of fl1.
- * So, we set some option for top driver and do permission update.
+ * Assume, we want to select fl2 instead of fl1.
+ * So, we set some option for write-to-selected driver and do permission 
update.
  *
  * With simple DFS, if permission update goes first through
- * top->fl1->base branch it will succeed: it firstly drop exclusive write
- * permissions and than apply them for another BdrvChildren.
- * But if permission update goes first through top->fl2->base branch it
- * will fail, as when we try to update fl2->base child, old not yet
+ * write-to-selected -> fl1 -> base branch it will succeed: it firstly drop
+ * exclusive write permissions and than apply them for another BdrvChildren.
+ * But if

[PATCH v6 08/15] block/snapshot: stress that we fallback to primary child

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Actually what we chose is a primary child. Let's stress it in the code.

We are going to drop indirect pointer logic here in future. Actually
this commit simplifies the future work: we drop use of indirection in
the assertion now.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/snapshot.c | 30 ++
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index d6f53c3065..75e8d3a937 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -161,21 +161,14 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState 
*bs,
 static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
 {
 BdrvChild **fallback;
-BdrvChild *child;
+BdrvChild *child = bdrv_primary_child(bs);
 
-/*
- * The only BdrvChild pointers that are safe to modify (and which
- * we can thus return a reference to) are bs->file and
- * bs->backing.
- */
-fallback = &bs->file;
-if (!*fallback && bs->drv && bs->drv->is_filter) {
-fallback = &bs->backing;
-}
-
-if (!*fallback) {
+/* We allow fallback only to primary child */
+if (!child) {
 return NULL;
 }
+fallback = (child == bs->file ? &bs->file : &bs->backing);
+assert(*fallback == child);
 
 /*
  * Check that there are no other children that would need to be
@@ -309,15 +302,12 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 }
 
 /*
- * fallback_ptr is &bs->file or &bs->backing.  *fallback_ptr
- * was closed above and set to NULL, but the .bdrv_open() call
- * has opened it again, because we set the respective option
- * (with the qdict_put_str() call above).
- * Assert that .bdrv_open() has attached some child on
- * *fallback_ptr, and that it has attached the one we wanted
- * it to (i.e., fallback_bs).
+ * fallback was a primary child. It was closed above and set to NULL,
+ * but the .bdrv_open() call has opened it again, because we set the
+ * respective option (with the qdict_put_str() call above).
+ * Assert that .bdrv_open() has attached the right BDS as primary 
child.
  */
-assert(*fallback_ptr && fallback_bs == (*fallback_ptr)->bs);
+assert(bdrv_primary_bs(bs) == fallback_bs);
 bdrv_unref(fallback_bs);
 return ret;
 }
-- 
2.25.1

[PATCH v6 00/15] block: cleanup backing and file handling

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Hi all!

That's the first part of
"[PATCH v5 00/45] Transactional block-graph modifying API",
updated and almost reviewed.

On commit (15) is added to original scope of
"block: cleanup backing and file handling", as it's related.

01: add Hanna's r-b
02: - mention snapshot-access in commit msg
- return ret in compress_open instead of EINVAL
- add Hanna's r-b
03: add Hanna's r-b
04: - add case in commit msg
- fix comments
05: - fix type in commit msg
- add Hanna's r-b
06: add Hanna's r-b
07: wording improvements
08: - fix wording
- add Hanna's r-b

09: I add the description, whey we allow a degradation. Still,
up to maintainers: it's OK to merge 09-13 into one bit commit

13: - fix s/|/||/
- improve comment
- more readable logic when handle filters in bdrv_child_cb_attach()
- don't keep **child indirection, move to just returning a child ptr
  (honestly, I didn't analyze all the callers do they need this int value. 
Do you think it's needed?)
- handle snapshot-access.c
14: get rid of _ptr
15: update comment

Vladimir Sementsov-Ogievskiy (15):
  block: BlockDriver: add .filtered_child_is_backing field
  block: introduce bdrv_open_file_child() helper
  block/blklogwrites: don't care to remove bs->file child on failure
  test-bdrv-graph-mod: update test_parallel_perm_update test case
  tests-bdrv-drain: bdrv_replace_test driver: declare supports_backing
  test-bdrv-graph-mod: fix filters to be filters
  block: document connection between child roles and
bs->backing/bs->file
  block/snapshot: stress that we fallback to primary child
  Revert "block: Let replace_child_noperm free children"
  Revert "block: Let replace_child_tran keep indirect pointer"
  Revert "block: Restructure remove_file_or_backing_child()"
  Revert "block: Pass BdrvChild ** to replace_child_noperm"
  block: Manipulate bs->file / bs->backing pointers in .attach/.detach
  block/snapshot: drop indirection around bdrv_snapshot_fallback_ptr
  block: refactor bdrv_remove_file_or_backing_child to bdrv_remove_child

 block.c| 435 ++---
 block/blkdebug.c   |   9 +-
 block/blklogwrites.c   |  11 +-
 block/blkreplay.c  |   7 +-
 block/blkverify.c  |   9 +-
 block/bochs.c  |   7 +-
 block/cloop.c  |   7 +-
 block/commit.c |   1 +
 block/copy-before-write.c  |   9 +-
 block/copy-on-read.c   |   9 +-
 block/crypto.c |  11 +-
 block/dmg.c|   7 +-
 block/filter-compress.c|   8 +-
 block/mirror.c |   1 +
 block/parallels.c  |   7 +-
 block/preallocate.c|   9 +-
 block/qcow.c   |   6 +-
 block/qcow2.c  |   8 +-
 block/qed.c|   8 +-
 block/raw-format.c |   4 +-
 block/replication.c|   8 +-
 block/snapshot-access.c|   6 +-
 block/snapshot.c   |  59 ++--
 block/throttle.c   |   8 +-
 block/vdi.c|   7 +-
 block/vhdx.c   |   7 +-
 block/vmdk.c   |   7 +-
 block/vpc.c|   7 +-
 include/block/block-common.h   |  39 +++
 include/block/block-global-state.h |   3 +
 include/block/block_int-common.h   |  29 +-
 tests/unit/test-bdrv-drain.c   |  11 +-
 tests/unit/test-bdrv-graph-mod.c   | 104 ---
 33 files changed, 389 insertions(+), 479 deletions(-)

-- 
2.25.1

[PATCH v6 07/15] block: document connection between child roles and bs->backing/bs->file

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Make the informal rules formal. In further commit we'll add
corresponding assertions.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-common.h | 39 
 1 file changed, 39 insertions(+)

diff --git a/include/block/block-common.h b/include/block/block-common.h
index fdb7306e78..fda67a7c38 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -313,6 +313,45 @@ enum {
  *
  * At least one of DATA, METADATA, FILTERED, or COW must be set for
  * every child.
+ *
+ *
+ * = Connection with bs->children, bs->file and bs->backing fields =
+ *
+ * 1. Filters
+ *
+ * Filter drivers have drv->is_filter = true.
+ *
+ * Filter node has exactly one FILTERED|PRIMARY child, and may have other
+ * children which must not have these bits (one example is the
+ * copy-before-write filter, which also has its target DATA child).
+ *
+ * Filter nodes never have COW children.
+ *
+ * For most filters, the filtered child is linked in bs->file, bs->backing is
+ * NULL.  For some filters (as an exception), it is the other way around; those
+ * drivers will have drv->filtered_child_is_backing set to true (see that
+ * field’s documentation for what drivers this concerns)
+ *
+ * 2. "raw" driver (block/raw-format.c)
+ *
+ * Formally it's not a filter (drv->is_filter = false)
+ *
+ * bs->backing is always NULL
+ *
+ * Only has one child, linked in bs->file. Its role is either FILTERED|PRIMARY
+ * (like filter) or DATA|PRIMARY depending on options.
+ *
+ * 3. Other drivers
+ *
+ * Don't have any FILTERED children.
+ *
+ * May have at most one COW child. In this case it's linked in bs->backing.
+ * Otherwise bs->backing is NULL. COW child is never PRIMARY.
+ *
+ * May have at most one PRIMARY child. In this case it's linked in bs->file.
+ * Otherwise bs->file is NULL.
+ *
+ * May also have some other children that don't have the PRIMARY or COW bit 
set.
  */
 enum BdrvChildRoleBits {
 /*
-- 
2.25.1

[PATCH v6 02/15] block: introduce bdrv_open_file_child() helper

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Almost all drivers call bdrv_open_child() similarly. Let's create a
helper for this.

The only not updated drivers that call bdrv_open_child() to set
bs->file are raw-format and snapshot-access:
raw-format sometimes want to have filtered child but
don't set drv->is_filter to true.
snapshot-access wants only DATA | PRIMARY

Possibly we should implement drv->is_filter_func() handler, to consider
raw-format as filter when it works as filter.. But it's another story.

Note also, that we decrease assignments to bs->file in code: it helps
us restrict modifying this field in further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block.c| 21 +
 block/blkdebug.c   |  9 +++--
 block/blklogwrites.c   |  7 ++-
 block/blkreplay.c  |  7 ++-
 block/blkverify.c  |  9 +++--
 block/bochs.c  |  7 +++
 block/cloop.c  |  7 +++
 block/copy-before-write.c  |  9 -
 block/copy-on-read.c   |  9 -
 block/crypto.c | 11 ++-
 block/dmg.c|  7 +++
 block/filter-compress.c|  8 +++-
 block/parallels.c  |  7 +++
 block/preallocate.c|  9 -
 block/qcow.c   |  6 ++
 block/qcow2.c  |  8 
 block/qed.c|  8 
 block/replication.c|  8 +++-
 block/throttle.c   |  8 +++-
 block/vdi.c|  7 +++
 block/vhdx.c   |  7 +++
 block/vmdk.c   |  7 +++
 block/vpc.c|  7 +++
 include/block/block-global-state.h |  3 +++
 24 files changed, 95 insertions(+), 101 deletions(-)

diff --git a/block.c b/block.c
index 2c0080..883b1bb242 100644
--- a/block.c
+++ b/block.c
@@ -3666,6 +3666,27 @@ BdrvChild *bdrv_open_child(const char *filename,
  errp);
 }
 
+/*
+ * Wrapper on bdrv_open_child() for most popular case: open primary child of 
bs.
+ */
+int bdrv_open_file_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState *parent, Error **errp)
+{
+BdrvChildRole role;
+
+/* commit_top and mirror_top don't use this function */
+assert(!parent->drv->filtered_child_is_backing);
+
+role = parent->drv->is_filter ?
+(BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
+
+parent->file = bdrv_open_child(filename, options, bdref_key, parent,
+   &child_of_bds, role, false, errp);
+
+return parent->file ? 0 : -EINVAL;
+}
+
 /*
  * TODO Future callers may need to specify parent/child_class in order for
  * option inheritance to work. Existing callers use it for the root node.
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bbf2948703..5fcfc8ac6f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -503,12 +503,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 /* Open the image file */
-bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
-   bs, &child_of_bds,
-   BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-   false, errp);
-if (!bs->file) {
-ret = -EINVAL;
+ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
+   bs, errp);
+if (ret < 0) {
 goto out;
 }
 
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index f7a251e91f..f66a617eb3 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -155,11 +155,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 /* Open the file */
-bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
-   BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false,
-   errp);
-if (!bs->file) {
-ret = -EINVAL;
+ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
+if (ret < 0) {
 goto fail;
 }
 
diff --git a/block/blkreplay.c b/block/blkreplay.c
index dcbe780ddb..76a0b8d12a 100644
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -26,11 +26,8 @@ static int blkreplay_open(BlockDriverState *bs, QDict 
*options, int flags,
 int ret;
 
 /* Open the image file */
-bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds,
-   BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-   false, errp);
-if (!bs->file) {
-ret = -EINVAL;
+ret = bdrv_open_file_child(NULL, options, "image", bs, errp);
+if (ret

[PATCH v6 01/15] block: BlockDriver: add .filtered_child_is_backing field

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

Unfortunately not all filters use .file child as filtered child. Two
exclusions are mirror_top and commit_top. Happily they both are private
filters. Bad thing is that this inconsistency is observable through qmp
commands query-block / query-named-block-nodes. So, could we just
change mirror_top and commit_top to use file child as all other filter
driver is an open question. Probably, we could do that with some kind
of deprecation period, but how to warn users during it?

For now, let's just add a field so we can distinguish them in generic
code, it will be used in further commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/commit.c   |  1 +
 block/mirror.c   |  1 +
 include/block/block_int-common.h | 13 +
 3 files changed, 15 insertions(+)

diff --git a/block/commit.c b/block/commit.c
index 851d1c557a..7722a392af 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -238,6 +238,7 @@ static BlockDriver bdrv_commit_top = {
 .bdrv_child_perm= bdrv_commit_top_child_perm,
 
 .is_filter  = true,
+.filtered_child_is_backing  = true,
 };
 
 void commit_start(const char *job_id, BlockDriverState *bs,
diff --git a/block/mirror.c b/block/mirror.c
index d8ecb9efa2..824b273fc7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1578,6 +1578,7 @@ static BlockDriver bdrv_mirror_top = {
 .bdrv_child_perm= bdrv_mirror_top_child_perm,
 
 .is_filter  = true,
+.filtered_child_is_backing  = true,
 };
 
 static BlockJob *mirror_start_job(
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..9d91ccbcbf 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -119,6 +119,19 @@ struct BlockDriver {
  * (And this filtered child must then be bs->file or bs->backing.)
  */
 bool is_filter;
+/*
+ * Only make sense for filter drivers, for others must be false.
+ * If true, filtered child is bs->backing. Otherwise it's bs->file.
+ * Only two internal filters use bs->backing as filtered child and has this
+ * field set to true: mirror_top and commit_top.
+ *
+ * Never create any more such filters!
+ *
+ * TODO: imagine how to deprecate this behavior and make all filters work
+ * similarly using bs->file as filtered child.
+ */
+bool filtered_child_is_backing;
+
 /*
  * Set to true if the BlockDriver is a format driver.  Format nodes
  * generally do not expect their children to be other format nodes
-- 
2.25.1

[PATCH v6 05/15] tests-bdrv-drain: bdrv_replace_test driver: declare supports_backing

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

We do add COW child to the node.  In future we are going to forbid
adding COW child to the node that doesn't support backing. So, fix it
here now.

Don't worry about setting bs->backing itself: in further commit we'll
update the block-layer to automatically set/unset this field in generic
code.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 tests/unit/test-bdrv-drain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 36be84ae55..23d425a494 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1948,6 +1948,7 @@ static void coroutine_fn 
bdrv_replace_test_co_drain_end(BlockDriverState *bs)
 static BlockDriver bdrv_replace_test = {
 .format_name= "replace_test",
 .instance_size  = sizeof(BDRVReplaceTestState),
+.supports_backing   = true,
 
 .bdrv_close = bdrv_replace_test_close,
 .bdrv_co_preadv = bdrv_replace_test_co_preadv,
-- 
2.25.1

[PATCH v6 03/15] block/blklogwrites: don't care to remove bs->file child on failure

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

We don't need to remove bs->file, generic layer takes care of it. No
other driver cares to remove bs->file on failure by hand.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/blklogwrites.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index f66a617eb3..7d25df97cc 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -254,10 +254,6 @@ fail_log:
 s->log_file = NULL;
 }
 fail:
-if (ret < 0) {
-bdrv_unref_child(bs, bs->file);
-bs->file = NULL;
-}
 qemu_opts_del(opts);
 return ret;
 }
-- 
2.25.1

[PATCH] Align Raspberry Pi DMA interrupts with Linux DTS

2022-06-24 Thread Andrey Makarov

All Raspberry Pi models 1-3 (based on bcm2835) have
Linux device tree (arch/arm/boot/dts/bcm2835-common.dtsi +25):

/* dma channel 11-14 share one irq */

which mismatched the Qemu model.
In this patch channels 0--10 and 11--14 are handled separately.

Signed-off-by: Andrey Makarov 
---
 hw/arm/bcm2835_peripherals.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 48538c9360..3d808b0e31 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -322,13 +322,21 @@ static void bcm2835_peripherals_realize(DeviceState *dev, 
Error **errp)
 memory_region_add_subregion(&s->peri_mr, DMA15_OFFSET,
 sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dma), 1));
 
-for (n = 0; n <= 12; n++) {
+for (n = 0; n <= 10; n++) {
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->dma), n,
qdev_get_gpio_in_named(DEVICE(&s->ic),
   BCM2835_IC_GPU_IRQ,
   INTERRUPT_DMA0 + n));
 }
 
+/* According to DTS, dma channels 11-14 share one irq */
+for (n = 11; n <= 14; n++) {
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->dma), n,
+   qdev_get_gpio_in_named(DEVICE(&s->ic),
+  BCM2835_IC_GPU_IRQ,
+  INTERRUPT_DMA0 + 11));
+}
+
 /* THERMAL */
 if (!sysbus_realize(SYS_BUS_DEVICE(&s->thermal), errp)) {
 return;
-- 
2.30.2

Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active

2022-06-24 Thread Richard Henderson


On 6/24/22 08:30, Peter Maydell wrote:

So the thing that worries me about structuring this this way
is that the SME supplement appendix includes this caution:

# The instruction encoding tables in this section [...] will
# require correction if subsequent versions of the A64 ISA
# add new instructions which overlap with these encodings.

My guess (based on how the H.a Arm ARM has incorporated
SME) is that these tables aren't going to be included
in the Arm ARM and updated going forward. Instead the
behaviour will be documented based on whether (existing
and new) instructions call CheckNonStreamingSVEEnabled()
or CheckSVEEnabled() in their pseudocode.


I agree that this would be cleaner and more correct long-term.


So I'm a bit uncertain about how awkward it's going to be
in future to maintain this transliteration of the SME
supplement tables into decodetree: we might find that
we have to look at new instructions and kind of
reverse-engineer back out any required changes to the
tables here, rather than simply "write the trans_ function
for the new insn, looking at the pseudocode to see which
_access_check() function it should be calling"...


I thought about this, and if it were simply a matter of annotating the trans_* functions 
within translate-sve.c, I would have done it.  But I would need to adjust A64 AdvSIMD as 
well, which is still done with the by-hand decoder.


Can we use this solution in the short term, and fix up advsimd while coverting it to 
decodetree?  I'm more and more convinced we'll want this sooner than later.



r~

Re: [PULL v2 00/20] Block layer patches

2022-06-24 Thread Richard Henderson


On 6/24/22 08:40, Kevin Wolf wrote:

The following changes since commit 3a821c52e1a30ecd9a436f2c67cc66b5628c829f:

   Merge tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme into 
staging (2022-06-23 14:52:30 -0700)

are available in the Git repository at:

   git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 779d82e1d305f2a9cbd7f48cf6555ad58145e04a:

   vduse-blk: Add name option (2022-06-24 17:07:06 +0200)


Block layer patches

- Add vduse-blk export
- Dirty bitmaps: Fix and improve bitmap merge
- gluster: correctly set max_pdiscard
- rbd: report a better error when namespace does not exist
- aio_wait_kick: add missing memory barrier
- Code cleanups


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~






Emanuele Giuseppe Esposito (1):
   aio_wait_kick: add missing memory barrier

Eric Blake (1):
   nbd: Drop dead code spotted by Coverity

Fabian Ebner (1):
   block/gluster: correctly set max_pdiscard

Stefan Hajnoczi (3):
   block: drop unused bdrv_co_drain() API
   block: get rid of blk->guest_block_size
   qsd: document vduse-blk exports

Stefano Garzarella (1):
   block/rbd: report a better error when namespace does not exist

Vladimir Sementsov-Ogievskiy (3):
   block: block_dirty_bitmap_merge(): fix error path
   block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap
   block: simplify handling of try to merge different sized bitmaps

Xie Yongji (10):
   block: Support passing NULL ops to blk_set_dev_ops()
   block/export: Fix incorrect length passed to vu_queue_push()
   block/export: Abstract out the logic of virtio-blk I/O process
   linux-headers: Add vduse.h
   libvduse: Add VDUSE (vDPA Device in Userspace) library
   vduse-blk: Implement vduse-blk export
   vduse-blk: Add vduse-blk resize support
   libvduse: Add support for reconnecting
   vduse-blk: Add serial option
   vduse-blk: Add name option

  qapi/block-export.json  |   29 +-
  docs/tools/qemu-storage-daemon.rst  |   22 +
  meson_options.txt   |4 +
  block/export/vduse-blk.h|   20 +
  block/export/virtio-blk-handler.h   |   37 +
  include/block/aio-wait.h|2 +
  include/block/block-io.h|1 -
  include/block/block_int-io.h|2 +-
  include/qemu/hbitmap.h  |   15 +-
  include/sysemu/block-backend-io.h   |1 -
  linux-headers/linux/vduse.h |  306 ++
  subprojects/libvduse/include/atomic.h   |1 +
  subprojects/libvduse/include/compiler.h |1 +
  subprojects/libvduse/libvduse.h |  247 +
  block/backup.c  |6 +-
  block/block-backend.c   |   12 +-
  block/dirty-bitmap.c|   26 +-
  block/export/export.c   |6 +
  block/export/vduse-blk.c|  374 
  block/export/vhost-user-blk-server.c|  263 +
  block/export/virtio-blk-handler.c   |  240 +
  block/gluster.c |2 +-
  block/io.c  |   15 -
  block/monitor/bitmap-qmp-cmds.c |   40 +-
  block/nbd.c |8 +-
  block/rbd.c |   24 +
  hw/block/virtio-blk.c   |1 -
  hw/block/xen-block.c|1 -
  hw/ide/core.c   |1 -
  hw/scsi/scsi-disk.c |1 -
  hw/scsi/scsi-generic.c  |1 -
  storage-daemon/qemu-storage-daemon.c|   10 +
  subprojects/libvduse/libvduse.c | 1375 +++
  util/aio-wait.c |   16 +-
  util/hbitmap.c  |   25 +-
  MAINTAINERS |9 +
  block/export/meson.build|7 +-
  meson.build |   34 +
  scripts/meson-buildoptions.sh   |7 +
  scripts/update-linux-headers.sh |2 +-
  subprojects/libvduse/linux-headers/linux|1 +
  subprojects/libvduse/meson.build|   10 +
  subprojects/libvduse/standard-headers/linux |1 +
  43 files changed, 2852 insertions(+), 354 deletions(-)
  create mode 100644 block/export/vduse-blk.h
  create mode 100644 block/export/virtio-blk-handler.h
  create mode 100644 linux-headers/linux/vduse.h
  create mode 12 subprojects/libvduse/include/atomic.h
  create mode 12 subprojects/libvduse/include/compiler.h
  create mode 100644 subprojects/libvduse/libvduse.h
  create mode 100644 block/exp

Re: [PATCH] aspeed: i2c: Fix DMA len write-enable bit handling

2022-06-24 Thread Peter Delevoryas



> On Jun 24, 2022, at 1:31 PM, Peter Delevoryas  wrote:
> 
> I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It
> seems to be because the Zephyr i2c driver sets the RX DMA len with the
> RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1]
> 
> /* 0x1C : I2CM Master DMA Transfer Length Register   */
> 
> I think we should be checking the write-enable bits on the incoming
> value, not checking the register array. I'm not sure we're even writing
> the write-enable bits to the register array, actually.
> 
> [1] 
> https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148

Arg, forgot this:

Fixes: ba2cccd64e90f34 ("aspeed: i2c: Add new mode support”)

Should I resend as v2?

Thanks,
Peter

> 
> Signed-off-by: Peter Delevoryas 
> ---
> hw/i2c/aspeed_i2c.c | 8 
> 1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
> index 37ae1f2e04..c4fce7474a 100644
> --- a/hw/i2c/aspeed_i2c.c
> +++ b/hw/i2c/aspeed_i2c.c
> @@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
> hwaddr offset,
>  RX_BUF_LEN) + 1;
> break;
> case A_I2CM_DMA_LEN:
> -w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
> -   ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
> +w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
> +  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
> /* If none of the w1t bits are set, just write to the reg as normal. 
> */
> if (!w1t) {
> bus->regs[R_I2CM_DMA_LEN] = value;
> break;
> }
> -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
> +if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
> ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN,
>  FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN));
> }
> -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
> +if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
> ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN,
>  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN));
> }
> -- 
> 2.30.2
>

[PATCH] aspeed: i2c: Fix DMA len write-enable bit handling

2022-06-24 Thread Peter Delevoryas

I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It
seems to be because the Zephyr i2c driver sets the RX DMA len with the
RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1]

/* 0x1C : I2CM Master DMA Transfer Length Register   */

I think we should be checking the write-enable bits on the incoming
value, not checking the register array. I'm not sure we're even writing
the write-enable bits to the register array, actually.

[1] 
https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148

Signed-off-by: Peter Delevoryas 
---
 hw/i2c/aspeed_i2c.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 37ae1f2e04..c4fce7474a 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, 
hwaddr offset,
  RX_BUF_LEN) + 1;
 break;
 case A_I2CM_DMA_LEN:
-w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
-   ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
+w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) ||
+  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T);
 /* If none of the w1t bits are set, just write to the reg as normal. */
 if (!w1t) {
 bus->regs[R_I2CM_DMA_LEN] = value;
 break;
 }
-if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
+if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN,
  FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN));
 }
-if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
+if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN,
  FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN));
 }
-- 
2.30.2

Re: [PATCH qemu v2 1/2] ppc: Define SETFIELD for the ppc target

2022-06-24 Thread Daniel Henrique Barboza


Alexey,

The newer version of this patch is having trouble with Gitlab runners, as
you can read in my feedback there.

I've tested this one just in case. The same problems happen. E.g. for the
cross-armel-system runner:


In file included from ../hw/intc/pnv_xive.c:14:
../hw/intc/pnv_xive.c: In function ‘pnv_xive_block_id’:
/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: conversion from ‘long long 
unsigned int’ to ‘long unsigned int’ changes value from ‘4222124650659840’ to 
‘0’ [-Werror=overflow]
   45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | 
PPC_BIT(bs))
  | 
^~~
/builds/danielhb/qemu/target/ppc/cpu.h:51:42: note: in definition of macro 
‘GETFIELD’
   51 | (((word) & (mask)) >> __builtin_ctzl(mask))
  |  ^~~~
../hw/intc/pnv_xive_regs.h:77:41: note: in expansion of macro ‘PPC_BITMASK’
   77 | #define  PC_TCTXT_CHIPIDPPC_BITMASK(12, 15)
  | ^~~
../hw/intc/pnv_xive.c:80:24: note: in expansion of macro ‘PC_TCTXT_CHIPID’
   80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val);
  |^~~
../hw/intc/pnv_xive.c: In function ‘pnv_xive_vst_addr’:
/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: conversion from ‘long long 
unsigned int’ to ‘long unsigned int’ changes value from ‘13835058055282163712’ 
to ‘0’ [-Werror=overflow]
   45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | 
PPC_BIT(bs))
  | 
^~~
/builds/danielhb/qemu/target/ppc/cpu.h:51:42: note: in definition of macro 
‘GETFIELD’
   51 | (((word) & (mask)) >> __builtin_ctzl(mask))
  |  ^~~~
../hw/intc/pnv_xive_regs.h:230:33: note: in expansion of macro ‘PPC_BITMASK’
  230 | #define VSD_MODEPPC_BITMASK(0, 1)
  | ^~~
../hw/intc/pnv_xive.c:226:18: note: in expansion of macro ‘VSD_MODE’
  226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
  |  ^~~~
../hw/intc/pnv_xive.c: In function ‘pnv_xive_end_update’:


Link:

https://gitlab.com/danielhb/qemu/-/jobs/2637716673


I don´t know how to deal with that.


For the record: if this is too troublesome to fix, I am ok with just 
consolidating
the GETFIELD and SETFIELD inlines we already have, under cpu.h, keeping them 
exactly
as they are today (functions, not macros).


Thanks,


Daniel



On 6/17/22 03:07, Alexey Kardashevskiy wrote:

It keeps repeating, move it to the header. This uses __builtin_ctzl() to
allow using the macros in #define.

Signed-off-by: Alexey Kardashevskiy 
---
  include/hw/pci-host/pnv_phb3_regs.h | 16 
  target/ppc/cpu.h|  5 +
  hw/intc/pnv_xive.c  | 20 
  hw/intc/pnv_xive2.c | 20 
  hw/pci-host/pnv_phb4.c  | 16 
  5 files changed, 5 insertions(+), 72 deletions(-)

diff --git a/include/hw/pci-host/pnv_phb3_regs.h 
b/include/hw/pci-host/pnv_phb3_regs.h
index a174ef1f7045..38f8ce9d7406 100644
--- a/include/hw/pci-host/pnv_phb3_regs.h
+++ b/include/hw/pci-host/pnv_phb3_regs.h
@@ -12,22 +12,6 @@
  
  #include "qemu/host-utils.h"
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * These are common with the PnvXive model.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-uint64_t value)
-{
-return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
  /*
   * PBCQ XSCOM registers
   */
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6d78078f379d..9a1f1ea3 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -47,6 +47,11 @@
   PPC_BIT32(bs))
  #define PPC_BITMASK8(bs, be)((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs))
  
+#define GETFIELD(mask, word)   \

+(((word) & (mask)) >> __builtin_ctzl(mask))
+#define SETFIELD(mask, word, val)   \
+(((word) & ~(mask)) | (((uint64_t)(val) << __builtin_ctzl(mask)) & (mask)))
+
  
/*/
  /* Exception vectors definitions 
*/
  enum {
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 1ce1d7b07d63..c7b75ed12ee0 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -66,26 +66,6 @@ static const XiveVstInfo vst_infos[] = {
  qemu_log_mask(LOG_GUEST_ERROR, "XIVE[%x] - " fmt "\n",  \
(xive)->chip->chip_id, ## __VA_ARGS__);
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * TODO: It might be better to use the existing extract64

Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment

2022-06-24 Thread Leandro Lupori


On 6/24/22 15:04, Richard Henderson wrote:


On 6/24/22 10:16, Leandro Lupori wrote:

Check if each page dir/table base address is properly aligned and
log a guest error if not, as real hardware behave incorrectly in
this case.

Signed-off-by: Leandro Lupori 
---
  target/ppc/mmu-radix64.c | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 339cf5b4d8..1e7d932893 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace 
*as, vaddr eaddr,

  *psize -= *nls;
  if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
  *nls = pde & R_PDE_NLS;
+
+    if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) {
+    qemu_log_mask(LOG_GUEST_ERROR,
+    "%s: misaligned page dir/table base: 0x%"VADDR_PRIx
+    " page dir size: 0x"TARGET_FMT_lx"\n",
+    __func__, (pde & R_PDE_NLB), BIT(*nls + 3));
+    }
+
  index = eaddr >> (*psize - *nls);   /* Shift */
  index &= ((1UL << *nls) - 1);   /* Mask */
  *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));


In your response to my question on v1, you said that it appears that the 
cpu ignores bits
*nls+3. This isn't ignoring them -- it's including [nls+2 : nls] into 
pte_addr.


It would be better to compute this as

     index = ...
     index &= ...
     *pte_addr = ...
     if (*pte_addr & 7) {
     qemu_log(...);
     }



Right, I wanted to warn about the invalid alignment but I ended up 
forgetting to make QEMU match the CPU behavior.


The CPU seems to ignore bits [nls+2 : 0] of NLB.

The multiplication of index by sizeof(pde) discards the 3 lower bits and 
it's not possible for NLB to have its 8 lower bits set, as these are 
used for NLS plus some reserved bits in the PDE.

Then we need to make sure that bits [nls+2 : 8] of NLB are also 0.

So maybe something like this would do it:

index = eaddr >> (*psize - *nls);   /* Shift */
index &= ((1UL << *nls) - 1);   /* Mask */
*pte_addr = pde & R_PDE_NLB;
mask = MAKE_64BIT_MASK(0, *nls + 3);
if (*pte_addr & mask) {
qemu_log(...);
*pte_addr &= ~mask;
}
*pte_addr += index * sizeof(pde);

Thanks,
Leandro



r~

[PATCH] python: QEMUMachine: enable qmp accept timeout by default

2022-06-24 Thread Vladimir Sementsov-Ogievskiy

I've spent much time trying to debug hanging pipeline in gitlab. I
started from and idea that I have problem in code in my series (which
has some timeouts). Finally I found that the problem is that I've used
QEMUMachine class directly to avoid qtest, and didn't add necessary
arguments. Qemu fails and we wait for qmp accept endlessly. In gitlab
it's just stopped by timeout (one hour) with no sign of what's going
wrong.

With timeout enabled, gitlab don't wait for an hour and prints all
needed information.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

Hi all!

Just compare this
  https://gitlab.com/vsementsov/qemu/-/pipelines/572232557
and this
  https://gitlab.com/vsementsov/qemu/-/pipelines/572526252

and you'll see that the latter is much better.

 python/qemu/machine/machine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 37191f433b..01a12f6f73 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -131,7 +131,7 @@ def __init__(self,
  drain_console: bool = False,
  console_log: Optional[str] = None,
  log_dir: Optional[str] = None,
- qmp_timer: Optional[float] = None):
+ qmp_timer: float = 30):
 '''
 Initialize a QEMUMachine
 
-- 
2.25.1

Re: [PATCH RESEND v2 2/2] target/ppc: Implement ISA 3.00 tlbie[l]

2022-06-24 Thread Richard Henderson


On 6/24/22 12:14, Leandro Lupori wrote:

This initial version supports the invalidation of one or all
TLB entries. Flush by PID/LPID, or based in process/partition
scope is not supported, because it would make using the
generic QEMU TLB implementation hard. In these cases, all
entries are flushed.

Signed-off-by: Leandro Lupori 
---
  target/ppc/helper.h  |  18 +++
  target/ppc/mmu_helper.c  | 154 +++
  target/ppc/translate/storage-ctrl-impl.c.inc |  15 ++
  3 files changed, 187 insertions(+)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d627cfe6ed..5e663a0a50 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -672,6 +672,24 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
  DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
  DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
  #if defined(TARGET_PPC64)
+
+/*
+ * tlbie[l] helper flags
+ *
+ * RIC, PRS, R and local are passed as flags in the last argument.
+ */
+#define TLBIE_F_RIC_SHIFT   0
+#define TLBIE_F_PRS_SHIFT   2
+#define TLBIE_F_R_SHIFT 3
+#define TLBIE_F_LOCAL_SHIFT 4
+
+#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT)
+#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT)
+#define TLBIE_F_R   (1 << TLBIE_F_R_SHIFT)
+#define TLBIE_F_LOCAL   (1 << TLBIE_F_LOCAL_SHIFT)
+


Better to put these somewhere else -- internal.h probably -- helper.h is included multiple 
times.


r~

[PATCH RESEND v2 2/2] target/ppc: Implement ISA 3.00 tlbie[l]

2022-06-24 Thread Leandro Lupori

This initial version supports the invalidation of one or all
TLB entries. Flush by PID/LPID, or based in process/partition
scope is not supported, because it would make using the
generic QEMU TLB implementation hard. In these cases, all
entries are flushed.

Signed-off-by: Leandro Lupori 
---
 target/ppc/helper.h  |  18 +++
 target/ppc/mmu_helper.c  | 154 +++
 target/ppc/translate/storage-ctrl-impl.c.inc |  15 ++
 3 files changed, 187 insertions(+)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d627cfe6ed..5e663a0a50 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -672,6 +672,24 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
 #if defined(TARGET_PPC64)
+
+/*
+ * tlbie[l] helper flags
+ *
+ * RIC, PRS, R and local are passed as flags in the last argument.
+ */
+#define TLBIE_F_RIC_SHIFT   0
+#define TLBIE_F_PRS_SHIFT   2
+#define TLBIE_F_R_SHIFT 3
+#define TLBIE_F_LOCAL_SHIFT 4
+
+#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT)
+#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT)
+#define TLBIE_F_R   (1 << TLBIE_F_R_SHIFT)
+#define TLBIE_F_LOCAL   (1 << TLBIE_F_LOCAL_SHIFT)
+
+DEF_HELPER_FLAGS_4(tlbie_isa300, TCG_CALL_NO_WG, void, \
+env, tl, tl, i32)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 15239dc95b..b881aee23f 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -429,6 +429,160 @@ void helper_tlbie(CPUPPCState *env, target_ulong addr)
 ppc_tlb_invalidate_one(env, addr);
 }
 
+#if defined(TARGET_PPC64)
+
+/* Invalidation Selector */
+#define TLBIE_IS_VA 0
+#define TLBIE_IS_PID1
+#define TLBIE_IS_LPID   2
+#define TLBIE_IS_ALL3
+
+/* Radix Invalidation Control */
+#define TLBIE_RIC_TLB   0
+#define TLBIE_RIC_PWC   1
+#define TLBIE_RIC_ALL   2
+#define TLBIE_RIC_GRP   3
+
+/* Radix Actual Page sizes */
+#define TLBIE_R_AP_4K   0
+#define TLBIE_R_AP_64K  5
+#define TLBIE_R_AP_2M   1
+#define TLBIE_R_AP_1G   2
+
+/* RB field masks */
+#define TLBIE_RB_EPN_MASK   PPC_BITMASK(0, 51)
+#define TLBIE_RB_IS_MASKPPC_BITMASK(52, 53)
+#define TLBIE_RB_AP_MASKPPC_BITMASK(56, 58)
+
+void helper_tlbie_isa300(CPUPPCState *env, target_ulong rb, target_ulong rs,
+ uint32_t flags)
+{
+unsigned ric = (flags & TLBIE_F_RIC_MASK) >> TLBIE_F_RIC_SHIFT;
+/*
+ * With the exception of the checks for invalid instruction forms,
+ * PRS is currently ignored, because we don't know if a given TLB entry
+ * is process or partition scoped.
+ */
+bool prs = flags & TLBIE_F_PRS;
+bool r = flags & TLBIE_F_R;
+bool local = flags & TLBIE_F_LOCAL;
+bool effR;
+unsigned is = extract64(rb, PPC_BIT_NR(53), 2), set;
+unsigned ap;/* actual page size */
+target_ulong addr, pgoffs_mask;
+
+qemu_log_mask(CPU_LOG_MMU,
+"%s: local=%d addr=" TARGET_FMT_lx " ric=%u prs=%d r=%d is=%u\n",
+__func__, local, rb & TARGET_PAGE_MASK, ric, prs, r, is);
+
+effR = FIELD_EX64(env->msr, MSR, HV) ? r : env->spr[SPR_LPCR] & LPCR_HR;
+
+/* Partial TLB invalidation is supported for Radix only for now. */
+if (!effR) {
+goto inval_all;
+}
+
+/* Check for invalid instruction forms (effR=1). */
+if (unlikely(ric == TLBIE_RIC_GRP ||
+ ((ric == TLBIE_RIC_PWC || ric == TLBIE_RIC_ALL) &&
+   is == TLBIE_IS_VA) ||
+ (!prs && is == TLBIE_IS_PID))) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: invalid instruction form: ric=%u prs=%d r=%d is=%u\n",
+__func__, ric, prs, r, is);
+goto invalid;
+}
+
+/* We don't cache Page Walks. */
+if (ric == TLBIE_RIC_PWC) {
+if (local) {
+set = extract64(rb, PPC_BIT_NR(51), 12);
+if (set != 0) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid set: %d\n",
+  __func__, set);
+goto invalid;
+}
+}
+return;
+}
+
+/*
+ * Invalidation by LPID or PID is not supported, so fallback
+ * to full TLB flush in these cases.
+ */
+if (is != TLBIE_IS_VA) {
+goto inval_all;
+}
+
+/*
+ * The results of an attempt to invalidate a translation outside of
+ * quadrant 0 for Radix Tree translation (effR=1, RIC=0, PRS=1, IS=0,
+ * and EA 0:1 != 0b00) are boundedly undefined.
+ */
+if (unlikely(ric == TLBIE_RIC_TLB && prs && is == TLBIE_IS_VA &&
+ (rb & R_EADDR_QUADRANT) != R_EADDR_QUADRANT0)) {
+

[PATCH RESEND v2 1/2] target/ppc: Move tlbie[l] to decode tree

2022-06-24 Thread Leandro Lupori

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
 target/ppc/cpu_init.c|  4 +-
 target/ppc/insn32.decode |  8 ++
 target/ppc/translate.c   | 64 +-
 target/ppc/translate/storage-ctrl-impl.c.inc | 87 
 4 files changed, 99 insertions(+), 64 deletions(-)
 create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..8d7e77f778 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
PPC_FLOAT_EXT |
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD |
@@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
PPC_FLOAT_EXT |
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD |
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6ea48d5163..2b985249b8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -809,3 +809,11 @@ VMODSD  000100 . . . 1001011@VX
 VMODUD  000100 . . . 11011001011@VX
 VMODSQ  000100 . . . 1111011@VX
 VMODUQ  000100 . . . 1101011@VX
+
+## TLB Management Instructions
+
+&X_tlbierb rs ric prs:bool r:bool
+@X_tlbie.. rs:5 - ric:2 prs:1 r:1 rb:5 .. . &X_tlbie
+
+TLBIE   01 . - .. . . . 0100110010 -@X_tlbie
+TLBIEL  01 . - .. . . . 0100010010 -@X_tlbie
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 1d6daa4608..4fcb311c2d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -5424,64 +5424,6 @@ static void gen_tlbia(DisasContext *ctx)
 #endif  /* defined(CONFIG_USER_ONLY) */
 }
 
-/* tlbiel */
-static void gen_tlbiel(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-
-if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
-/*
- * tlbiel is privileged except when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* tlbie */
-static void gen_tlbie(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-TCGv_i32 t1;
-
-if (ctx->pr) {
-/* tlbie is privileged... */
-GEN_PRIV;
-} else if (!ctx->hv) {
-if (!ctx->gtse || (!psr && ctx->hr)) {
-/*
- * ... except when GTSE=0 or when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-}
-
-if (NARROW_MODE(ctx)) {
-TCGv t0 = tcg_temp_new();
-tcg_gen_ext32u_tl(t0, cpu_gpr[rB(ctx->opcode)]);
-gen_helper_tlbie(cpu_env, t0);
-tcg_temp_free(t0);
-} else {
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-}
-t1 = tcg_temp_new_i32();
-tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
-tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_temp_free_i32(t1);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
 /* tlbsync */
 static void gen_tlbsync(DisasContext *ctx)
 {
@@ -6699,6 +6641,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, 
arg_PLS_D *a)
 
 #include "translate/branch-impl.c.inc"
 
+#include "translate/storage-ctrl-impl.c.inc"
+
 /* Handles lfdp */
 static void gen_dform39(DisasContext *ctx)
 {
@@ -6937,10 +6881,6 @@ GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, 
PPC_MEM_TLBIA),
  * XXX Those instructions will need to be handled differently for
  * different ISA versions
  */
-GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
-GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
-GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x0011, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(tlbie, 0x1F, 0x12, 0x09, 0x0011, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(tlbsync, 0x1

[PATCH RESEND v2 0/2] ppc: Implement ISA 3.00 tlbie[l]

2022-06-24 Thread Leandro Lupori

Resent after rebasing and fixing conflicts with master.

Changes from v1:
- squashed first 2 commits into 1, because adding PPC_MEM_TLBIE to
  P9/P10's insns_flags and moving only tlbie (and not tlbiel) to
  decode tree breaks PowerPC64 instruction decoder initialization.

Leandro Lupori (2):
  target/ppc: Move tlbie[l] to decode tree
  target/ppc: Implement ISA 3.00 tlbie[l]

 target/ppc/cpu_init.c|   4 +-
 target/ppc/helper.h  |  18 +++
 target/ppc/insn32.decode |   8 +
 target/ppc/mmu_helper.c  | 154 +++
 target/ppc/translate.c   |  64 +---
 target/ppc/translate/storage-ctrl-impl.c.inc | 102 
 6 files changed, 286 insertions(+), 64 deletions(-)
 create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

-- 
2.25.1

Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment

2022-06-24 Thread Fabiano Rosas

Leandro Lupori  writes:

> Check if each page dir/table base address is properly aligned and
> log a guest error if not, as real hardware behave incorrectly in
> this case.

I think the commit message could be clearer, something like:

According to PowerISA 3.1B, Book III 6.7.6 programming note, the page
directory base addresses are expected to be aligned to their size. Real
hardware seems to rely on that and will access the wrong address if they
are misaligned. This results in a translation failure even if the page
tables seem to be properly populated.

Let's make sure we capture this assumption in the code to help anyone
implementing page tables.

>
> Signed-off-by: Leandro Lupori 
> ---
>  target/ppc/mmu-radix64.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 339cf5b4d8..1e7d932893 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, 
> vaddr eaddr,
>  *psize -= *nls;
>  if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
>  *nls = pde & R_PDE_NLS;
> +
> +if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +"%s: misaligned page dir/table base: 0x%"VADDR_PRIx
> +" page dir size: 0x"TARGET_FMT_lx"\n",
> +__func__, (pde & R_PDE_NLB), BIT(*nls + 3));
> +}
> +
>  index = eaddr >> (*psize - *nls);   /* Shift */
>  index &= ((1UL << *nls) - 1);   /* Mask */
>  *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));
> @@ -295,6 +303,13 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
> eaddr,
>  uint64_t index, pde, rpn, mask;
>  int level = 0;
>  
> +if (base_addr & MAKE_64BIT_MASK(0, nls + 3)) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +"%s: misaligned page dir base: 0x%"VADDR_PRIx
> +" page dir size: 0x"TARGET_FMT_lx"\n",
> +__func__, base_addr, BIT(nls + 3));
> +}
> +
>  index = eaddr >> (*psize - nls);/* Shift */
>  index &= ((1UL << nls) - 1);   /* Mask */
>  *pte_addr = base_addr + (index * sizeof(pde));

Re: [PATCH v2 1/3] ppc: Check partition and process table alignment

2022-06-24 Thread Fabiano Rosas

Leandro Lupori  writes:

> Check if partition and process tables are properly aligned, in
> their size, according to PowerISA 3.1B, Book III 6.7.6 programming
> note. Hardware and KVM also raise an exception in these cases.
>
> Signed-off-by: Leandro Lupori 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 2/3] target/ppc: Improve Radix xlate level validation

2022-06-24 Thread Fabiano Rosas

Leandro Lupori  writes:

> Check if the number and size of Radix levels are valid on
> POWER9/POWER10 CPUs, according to the supported Radix Tree
> Configurations described in their User Manuals.
>
> Signed-off-by: Leandro Lupori 
> ---
>  target/ppc/mmu-radix64.c | 51 +++-
>  1 file changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 9a8a2e2875..339cf5b4d8 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -236,17 +236,39 @@ static void ppc_radix64_set_rc(PowerPCCPU *cpu, 
> MMUAccessType access_type,
>  }
>  }
>  
> +static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
> +{
> +/*
> + * Check if this is a valid level, according to POWER9 and POWER10
> + * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, 
> respectively:
> + * Supported Radix Tree Configurations and Resulting Page Sizes.
> + *
> + * NOTE: these checks are valid for POWER9 and POWER10 CPUs only. If
> + *   new CPUs that support other Radix configurations are added
> + *   (e.g., Microwatt), then a new method should be added to
> + *   PowerPCCPUClass, with this function being the POWER9/POWER10
> + *   implementation.
> + */

Sorry, this got too specific now. I could not respond in time before you
sent the v2. Let's cut the mentions to the code:

  Note: these checks are specific to POWER9 and POWER10 CPUs. Any future
  CPUs that supports a different Radix MMU configuration will need their
  own implementation.

> +switch (level) {
> +case 0: /* Root Page Dir */
> +return psize == 52 && nls == 13;
> +case 1:
> +case 2:
> +return nls == 9;
> +case 3:
> +return nls == 9 || nls == 5;
> +default:
> +qemu_log_mask(LOG_GUEST_ERROR, "invalid radix level: %d\n", level);
> +return false;
> +}
> +}
> +
>  static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr,
>uint64_t *pte_addr, uint64_t *nls,
>int *psize, uint64_t *pte, int 
> *fault_cause)
>  {
>  uint64_t index, pde;
>  
> -if (*nls < 5) { /* Directory maps less than 2**5 entries */
> -*fault_cause |= DSISR_R_BADCONFIG;
> -return 1;
> -}
> -
>  /* Read page  entry from guest address space */
>  pde = ldq_phys(as, *pte_addr);
>  if (!(pde & R_PTE_VALID)) { /* Invalid Entry */
> @@ -270,12 +292,8 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
> eaddr,
>   hwaddr *raddr, int *psize, uint64_t *pte,
>   int *fault_cause, hwaddr *pte_addr)
>  {
> -uint64_t index, pde, rpn , mask;
> -
> -if (nls < 5) { /* Directory maps less than 2**5 entries */
> -*fault_cause |= DSISR_R_BADCONFIG;
> -return 1;
> -}
> +uint64_t index, pde, rpn, mask;
> +int level = 0;
>  
>  index = eaddr >> (*psize - nls);/* Shift */
>  index &= ((1UL << nls) - 1);   /* Mask */
> @@ -283,6 +301,11 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
> eaddr,
>  do {
>  int ret;
>  
> +if (!ppc_radix64_is_valid_level(level++, *psize, nls)) {
> +*fault_cause |= DSISR_R_BADCONFIG;
> +return 1;
> +}
> +
>  ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde,
>   fault_cause);
>  if (ret) {
> @@ -456,6 +479,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu,
>  }
>  } else {
>  uint64_t rpn, mask;
> +int level = 0;
>  
>  index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
>  index &= ((1UL << nls) - 1);/* Mask */
> @@ -475,6 +499,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu,
>  return ret;
>  }
>  
> +if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) {
> +fault_cause |= DSISR_R_BADCONFIG;
> +return 1;
> +}
> +
>  ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, 
> &h_raddr,
>   &nls, g_page_size, &pte, 
> &fault_cause);
>  if (ret) {

[PATCH v4] hw: m25p80: add tests for write protect (WP# and SRWD bit)

2022-06-24 Thread Iris Chen

Signed-off-by: Iris Chen 
---
Adding Signed Off By tag -- sorry I missed that !

 tests/qtest/aspeed_smc-test.c | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index ec233315e6..7786addfb8 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -56,7 +56,9 @@ enum {
 BULK_ERASE = 0xc7,
 READ = 0x03,
 PP = 0x02,
+WRSR = 0x1,
 WREN = 0x6,
+SRWD = 0x80,
 RESET_ENABLE = 0x66,
 RESET_MEMORY = 0x99,
 EN_4BYTE_ADDR = 0xB7,
@@ -390,6 +392,64 @@ static void test_read_status_reg(void)
 flash_reset();
 }
 
+static void test_status_reg_write_protection(void)
+{
+uint8_t r;
+
+spi_conf(CONF_ENABLE_W0);
+
+/* default case: WP# is high and SRWD is low -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# high and SRWD high -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, 0);
+
+/* WP# low and SRWD low -> status register writable */
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 0);
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# low and SRWD high -> status register NOT writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+/* write is not successful */
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 1);
+flash_reset();
+}
+
 static char tmp_path[] = "/tmp/qtest.m25p80.XX";
 
 int main(int argc, char **argv)
@@ -416,6 +476,8 @@ int main(int argc, char **argv)
 qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
 qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
 qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
+qtest_add_func("/ast2400/smc/status_reg_write_protection",
+   test_status_reg_write_protection);
 
 ret = g_test_run();
 
-- 
2.30.2

[RFC PATCH] tests/9p: introduce declarative function calls

2022-06-24 Thread Christian Schoenebeck

There are currently 3 different functions for sending a 9p 'Twalk'
request. They are all doing the same thing, just in a slightly different
way and with slightly different function arguments.

Merge those 3 functions into a single function by using a struct for
function call arguments and use designated initializers when calling this
function to turn usage into a declarative approach, which is better
readable and easier to maintain.

Signed-off-by: Christian Schoenebeck 
---
  Before working on actual new stuff, I looked at the current unit test code
  and thought it's probably a good time to make the overall test code better
  readable before piling up more test code soon.

  In this patch I am suggesting to use named function arguments. For instance
 
 do_walk_expect_error(v9p, "non-existent", ENOENT);

  is probably a bit hard to tell what it is supposed to be doing without
  looking up the function prototype, whereas
  
Twalk((TWalkOpt) {
  .client = v9p, .path = "non-existent", .expectErr = ENOENT
});

  should make it immediately clear (provided you have some knowledge about the
  9p network protocol). I'm using this coding style of declarative functions
  calls a lot nowadays, which makes especially sense in the context of unit
  test code as those are typically passing literals as function arguments as
  shown above very often. But also in other contexts it is beneficial as it
  allows various linear combinations of possible function arguments being
  used / ommitted on function calls and still being handled with only one
  function implementation.
  
  Caller has a great flexibility of which function arguments to use, and is
  also completely free of the order of the arguments being specified.

  Another benefit is that you can also extend functionality later on, without
  breaking existing function calls. So this avoids a lot of refactoring work
  on the long-term.

  With C++ you could also define specific default values for ommitted function
  arguments. In C unfortunately it is just the language default initializer
  which usually is simply zero.

  Obviously with a large number of possible function arguments provided, some
  combinations make sense and some simply don't. In this patch for instance
  this is handled with assertion faults like:
  
/* you can expect either Rwalk or Rlerror, but obviously not both */
g_assert(!opt.expectErr || !(opt.Rwalk.nwqid || opt.Rwalk.wqid));

  So this would be a runtime error. In C++ you could turn the function into
  a constexpr and make that a compile error instead, in C there is
  
_Static_assert(...)

  but as there is no constexpr, that would probably be a hard to achieve.

  Thoughts?
---
 tests/qtest/virtio-9p-test.c | 79 +++-
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 25305a4cf7..6a7f1f6252 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -669,50 +669,51 @@ static void do_version(QVirtio9P *v9p)
 g_assert_cmpmem(server_version, server_len, version, strlen(version));
 }
 
+/* options for 'Twalk' 9p request */
+typedef struct TWalkOpt {
+/* 9P client being used (mandatory) */
+QVirtio9P *client;
+/* path to walk to (mandatory) */
+const char *path;
+/* data being received from 9p server as 'Rwalk' response (optional) */
+struct {
+uint16_t *nwqid;
+v9fs_qid **wqid;
+} Rwalk;
+/* do we expect an Rlerror response, if yes which error code? (optional) */
+uint32_t expectErr;
+} TWalkOpt;
+
 /*
  * utility function: walk to requested dir and return fid for that dir and
  * the QIDs of server response
  */
-static uint32_t do_walk_rqids(QVirtio9P *v9p, const char *path, uint16_t 
*nwqid,
-  v9fs_qid **wqid)
+static uint32_t Twalk(TWalkOpt opt)
 {
 char **wnames;
 P9Req *req;
+uint32_t err;
 const uint32_t fid = genfid();
 
-int nwnames = split(path, "/", &wnames);
-
-req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0);
-v9fs_req_wait_for_reply(req, NULL);
-v9fs_rwalk(req, nwqid, wqid);
-
-split_free(&wnames);
-return fid;
-}
+g_assert(opt.client);
+g_assert(opt.path);
+/* you can expect either Rwalk or Rlerror, but obviously not both */
+g_assert(!opt.expectErr || !(opt.Rwalk.nwqid || opt.Rwalk.wqid));
 
-/* utility function: walk to requested dir and return fid for that dir */
-static uint32_t do_walk(QVirtio9P *v9p, const char *path)
-{
-return do_walk_rqids(v9p, path, NULL, NULL);
-}
+int nwnames = split(opt.path, "/", &wnames);
 
-/* utility function: walk to requested dir and expect passed error response */
-static void do_walk_expect_error(QVirtio9P *v9p, const char *path, uint32_t 
err)
-{
-char **wnames;
-P9Req *req;
-uint32_t _err;
-const uint32_t fid = genfid();
-
-int nwnames = split(path, "/", &wnames);
-

Re: [PATCH v7 01/18] job.c: make job_mutex and job_lock/unlock() public

2022-06-24 Thread Vladimir Sementsov-Ogievskiy


I've already acked this (honestly, because Stefan do), but still, want to 
clarify:

On 6/16/22 16:18, Emanuele Giuseppe Esposito wrote:

job mutex will be used to protect the job struct elements and list,
replacing AioContext locks.

Right now use a shared lock for all jobs, in order to keep things
simple. Once the AioContext lock is gone, we can introduce per-job
locks.

To simplify the switch from aiocontext to job lock, introduce
*nop*  lock/unlock functions and macros.
We want to always call job_lock/unlock outside the AioContext locks,
and not vice-versa, otherwise we might get a deadlock.


Could you describe here, why we get a deadlock?

As I understand, we'll deadlock if two code paths exist simultaneously:

1. we take job mutex under aiocontext lock
2. we take aiocontex lock under job mutex

If these paths exists, it's possible that one thread goes through [1] and 
another through [2]. If thread [1] holds job-mutex and want to take 
aiocontext-lock, and in the same time thread [2] holds aiocontext-lock and want 
to take job-mutext, that's a dead-lock.

If you say, that we must avoid [1], do you have in mind that we have [2] 
somewhere? If so, this should be mentioned here.

If not, could we just make a normal mutex, not a noop?


This is not
straightforward to do, and that's why we start with nop functions.
Once everything is protected by job_lock/unlock, we can change the nop into
an actual mutex and remove the aiocontext lock.

Since job_mutex is already being used, add static
real_job_{lock/unlock} for the existing usage.

Signed-off-by: Emanuele Giuseppe Esposito
Reviewed-by: Stefan Hajnoczi



--
Best regards,
Vladimir

Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment

2022-06-24 Thread Richard Henderson


On 6/24/22 10:16, Leandro Lupori wrote:

Check if each page dir/table base address is properly aligned and
log a guest error if not, as real hardware behave incorrectly in
this case.

Signed-off-by: Leandro Lupori 
---
  target/ppc/mmu-radix64.c | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 339cf5b4d8..1e7d932893 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr 
eaddr,
  *psize -= *nls;
  if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
  *nls = pde & R_PDE_NLS;
+
+if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: misaligned page dir/table base: 0x%"VADDR_PRIx
+" page dir size: 0x"TARGET_FMT_lx"\n",
+__func__, (pde & R_PDE_NLB), BIT(*nls + 3));
+}
+
  index = eaddr >> (*psize - *nls);   /* Shift */
  index &= ((1UL << *nls) - 1);   /* Mask */
  *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));


In your response to my question on v1, you said that it appears that the cpu ignores bits 
*nls+3. This isn't ignoring them -- it's including [nls+2 : nls] into pte_addr.


It would be better to compute this as

index = ...
index &= ...
*pte_addr = ...
if (*pte_addr & 7) {
qemu_log(...);
}


r~

Re: [PATCH v11 2/2] qtest/cxl: Add aarch64 virt test for CXL

2022-06-24 Thread Jonathan Cameron via

On Fri, 24 Jun 2022 17:12:25 +0100
Peter Maydell  wrote:

> On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron
>  wrote:
> >
> > Add a single complex case for aarch64 virt machine.
> >
> > Signed-off-by: Jonathan Cameron 
> > ---
> >  tests/qtest/cxl-test.c  | 48 +
> >  tests/qtest/meson.build |  1 +
> >  2 files changed, 40 insertions(+), 9 deletions(-)
> >
> > diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> > index 2133e973f4..1015d0e7c2 100644
> > --- a/tests/qtest/cxl-test.c
> > +++ b/tests/qtest/cxl-test.c
> > @@ -17,6 +17,11 @@
> >"-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \
> >"-M 
> > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G "
> >
> > +#define QEMU_VIRT_2PXB_CMD "-machine virt,cxl=on "  \
> > +  "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 "  \
> > +  "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 "  \
> > +  "-M 
> > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G "
> > +  
> 
> If CXL requires booting via UEFI, what does this test case do?
> It doesn't seem to be passing in a BIOS image.

Not a lot beyond checking device creation is valid etc and the machine boots.

There is a bios tables test that checks we pass the right tables to the BIOS 
image.
I didn't duplicate that for ARM on the basis it's more or less identical, but
perhaps that is worth adding.

To do any useful functional testing will require a mass of complex OS
handling after booting. That testing is definitely something I'd like to
add, but the userspace tooling isn't all in place yet. Final kernel series 
that's
needed to get to the point where you can use the non volatile memory had
a new version posted yesterday.

Jonathan

> 
> thanks
> -- PMM

Re: [PULL 0/3] Linux user for 7.1 patches

2022-06-24 Thread Richard Henderson


On 6/24/22 02:49, Laurent Vivier wrote:

The following changes since commit c8b2d413761af732a0798d8df45ce968732083fe:

   Merge tag 'bsd-user-syscall-2022q2-pull-request' of 
ssh://github.com/qemu-bsd-user/qemu-bsd-user into staging (2022-06-19 13:56:13 
-0700)

are available in the Git repository at:

   https://gitlab.com/laurent_vivier/qemu.git 
tags/linux-user-for-7.1-pull-request

for you to fetch changes up to 9a7f682c26acae5bc8bfd1f7c774070da54f1625:

   linux-user: Adjust child_tidptr on set_tid_address() syscall (2022-06-24 
10:00:01 +0200)


linux-user pull request 20220624


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~






Helge Deller (1):
   linux-user: Adjust child_tidptr on set_tid_address() syscall

Ilya Leoshkevich (1):
   linux-user: Add partial support for MADV_DONTNEED

Richard Henderson (1):
   linux-user/x86_64: Fix ELF_PLATFORM

  linux-user/elfload.c| 30 +
  linux-user/mmap.c   | 64 +
  linux-user/syscall.c| 20 ++--
  linux-user/user-internals.h |  1 +
  linux-user/user-mmap.h  |  1 +
  5 files changed, 92 insertions(+), 24 deletions(-)

[PATCH v9 4/4] module: Use bundle mechanism

2022-06-24 Thread Akihiko Odaki

Before this change, the directory of the executable was being added to
resolve modules in the build tree. However, get_relocated_path() can now
resolve them with the new bundle mechanism.

Signed-off-by: Akihiko Odaki 
---
 util/module.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/util/module.c b/util/module.c
index 6bb4ad915a1..8ddb0e18f51 100644
--- a/util/module.c
+++ b/util/module.c
@@ -274,7 +274,6 @@ bool module_load_one(const char *prefix, const char 
*lib_name, bool mayfail)
 dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
 }
 dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR);
-dirs[n_dirs++] = g_strdup(qemu_get_exec_dir());
 
 #ifdef CONFIG_MODULE_UPGRADES
 version_dir = g_strcanon(g_strdup(QEMU_PKGVERSION),
-- 
2.32.1 (Apple Git-133)

Re: [PATCH 2/2] target/arm: Check V7VE as well as LPAE in arm_pamax

2022-06-24 Thread Richard Henderson


On 6/24/22 09:27, Peter Maydell wrote:

+/*
+ * In machvirt_init, we call arm_pamax on a cpu that is not fully
+ * initialized, so we can't rely on the propagation done in realize.
+ */
+if (arm_feature(&cpu->env, ARM_FEATURE_LPAE) ||
+arm_feature(&cpu->env, ARM_FEATURE_V7VE)) {
  /* v7 with LPAE */
  return 40;


I guess this is expedient, so on that basis
Reviewed-by: Peter Maydell 

but as I mentioned in the gitlab issue it's kind of bogus
that the virt board is doing stuff to a non-realized CPU object.


My first look suggested that the virt board wasn't even setting all of the cpu properties 
properly, so realization might not help.  I meant to go back again and soend more time, 
but that hasn't happened yet.



r~

[PATCH v9 2/4] cutils: Introduce bundle mechanism

2022-06-24 Thread Akihiko Odaki

Developers often run QEMU without installing. The bundle mechanism
allows to look up files which should be present in installation even in
such a situation.

It is a general mechanism and can find any files in the installation
tree. The build tree will have a new directory, qemu-bundle, to
represent what files the installation tree would have for reference by
the executables.

Note that it abandons compatibility with Windows older than 8. The
extended support for the prior version, 7 ended more than 2 years ago,
and it is unlikely that someone would like to run the latest QEMU on
such an old system.

Signed-off-by: Akihiko Odaki 
Suggested-by: Paolo Bonzini 
---
 docs/about/build-platforms.rst  |  2 +-
 include/qemu/cutils.h   | 18 +++--
 include/qemu/osdep.h|  2 +-
 meson.build |  4 ++
 scripts/symlink-install-tree.py | 37 ++
 util/cutils.c   | 68 +++--
 util/meson.build|  1 +
 7 files changed, 106 insertions(+), 26 deletions(-)
 create mode 100755 scripts/symlink-install-tree.py

diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
index 1958edb4305..ebde20f9815 100644
--- a/docs/about/build-platforms.rst
+++ b/docs/about/build-platforms.rst
@@ -88,7 +88,7 @@ Windows
 
 The project aims to support the two most recent versions of Windows that are
 still supported by the vendor. The minimum Windows API that is currently
-targeted is "Windows 7", so theoretically the QEMU binaries can still be run
+targeted is "Windows 8", so theoretically the QEMU binaries can still be run
 on older versions of Windows, too. However, such old versions of Windows are
 not tested anymore, so it is recommended to use one of the latest versions of
 Windows instead.
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index d3e532b64c8..92c436d8c70 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -224,9 +224,21 @@ const char *qemu_get_exec_dir(void);
  * @dir: the directory (typically a `CONFIG_*DIR` variable) to be relocated.
  *
  * Returns a path for @dir that uses the directory of the running executable
- * as the prefix.  For example, if `bindir` is `/usr/bin` and @dir is
- * `/usr/share/qemu`, the function will append `../share/qemu` to the
- * directory that contains the running executable and return the result.
+ * as the prefix.
+ *
+ * When a directory named `qemu-bundle` exists in the directory of the running
+ * executable, the path to the directory will be prepended to @dir. For
+ * example, if the directory of the running executable is `/qemu/build` @dir
+ * is `/usr/share/qemu`, the result will be
+ * `/qemu/build/qemu-bundle/usr/share/qemu`. The directory is expected to exist
+ * in the build tree.
+ *
+ * Otherwise, the directory of the running executable will be used as the
+ * prefix and it appends the relative path from `bindir` to @dir. For example,
+ * if the directory of the running executable is `/opt/qemu/bin`, `bindir` is
+ * `/usr/bin` and @dir is `/usr/share/qemu`, the result will be
+ * `/opt/qemu/bin/../share/qemu`.
+ *
  * The returned string should be freed by the caller.
  */
 char *get_relocated_path(const char *dir);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index b1c161c035a..84f8b9d0243 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int);
 #ifdef _WIN32
 /* as defined in sdkddkver.h */
 #ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */
+#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be in sync with glib) */
 #endif
 /* reduces the number of implicitly included headers */
 #ifndef WIN32_LEAN_AND_MEAN
diff --git a/meson.build b/meson.build
index 9efcb175d16..c49f5ebfc37 100644
--- a/meson.build
+++ b/meson.build
@@ -7,6 +7,8 @@ add_test_setup('quick', exclude_suites: ['slow', 'thorough'], 
is_default: true)
 add_test_setup('slow', exclude_suites: ['thorough'], env: ['G_TEST_SLOW=1', 
'SPEED=slow'])
 add_test_setup('thorough', env: ['G_TEST_SLOW=1', 'SPEED=thorough'])
 
+meson.add_postconf_script('scripts/symlink-install-tree.py')
+
 not_found = dependency('', required: false)
 keyval = import('keyval')
 ss = import('sourceset')
@@ -356,10 +358,12 @@ nvmm =not_found
 hvf = not_found
 midl = not_found
 widl = not_found
+pathcch = not_found
 host_dsosuf = '.so'
 if targetos == 'windows'
   midl = find_program('midl', required: false)
   widl = find_program('widl', required: false)
+  pathcch = cc.find_library('pathcch')
   socket = cc.find_library('ws2_32')
   winmm = cc.find_library('winmm')
 
diff --git a/scripts/symlink-install-tree.py b/scripts/symlink-install-tree.py
new file mode 100755
index 000..f35aa176404
--- /dev/null
+++ b/scripts/symlink-install-tree.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import errno
+import json
+import os
+import subprocess
+imp

[PATCH v9 3/4] datadir: Use bundle mechanism

2022-06-24 Thread Akihiko Odaki

softmmu/datadir.c had its own implementation to find files in the
build tree, but now bundle mechanism provides the unified
implementation which works for datadir and the other files.

Signed-off-by: Akihiko Odaki 
---
 .travis.yml |  2 +-
 pc-bios/keymaps/meson.build | 21 ++---
 pc-bios/meson.build | 13 +++--
 scripts/oss-fuzz/build.sh   |  2 +-
 softmmu/datadir.c   | 22 +-
 tests/qtest/fuzz/fuzz.c | 15 ---
 6 files changed, 12 insertions(+), 63 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9afc4a54b8f..4fdc9a67855 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -223,7 +223,7 @@ jobs:
 - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$?
 - |
   if [ "$BUILD_RC" -eq 0 ] ; then
-  mv pc-bios/s390-ccw/*.img pc-bios/ ;
+  mv pc-bios/s390-ccw/*.img qemu-bundle/usr/local/share/qemu ;
   ${TEST_CMD} ;
   else
   $(exit $BUILD_RC);
diff --git a/pc-bios/keymaps/meson.build b/pc-bios/keymaps/meson.build
index 44247a12b54..2837eb34f4e 100644
--- a/pc-bios/keymaps/meson.build
+++ b/pc-bios/keymaps/meson.build
@@ -40,9 +40,9 @@ else
 endif
 cp = find_program('cp')
 
-t = []
-foreach km, args: keymaps
-  if native_qemu_keymap.found()
+if native_qemu_keymap.found()
+  t = []
+  foreach km, args: keymaps
 # generate with qemu-kvm
 t += custom_target(km,
build_by_default: true,
@@ -50,20 +50,11 @@ foreach km, args: keymaps
command: [native_qemu_keymap, '-f', '@OUTPUT@', 
args.split()],
install: true,
install_dir: qemu_datadir / 'keymaps')
-  else
-# copy from source tree
-t += custom_target(km,
-   build_by_default: true,
-   input: km,
-   output: km,
-   command: [cp, '@INPUT@', '@OUTPUT@'],
-   install: true,
-   install_dir: qemu_datadir / 'keymaps')
-  endif
-endforeach
+  endforeach
 
-if native_qemu_keymap.found()
   alias_target('update-keymaps', t)
+else
+  install_data(keymaps.keys(), install_dir: qemu_datadir / 'keymaps')
 endif
 
 install_data(['sl', 'sv'], install_dir: qemu_datadir / 'keymaps')
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index 41ba1c0ec7b..388e0db6e40 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -85,16 +85,9 @@ blobs = [
   'vof-nvram.bin',
 ]
 
-ln_s = [find_program('ln', required: true), '-sf']
-foreach f : blobs
-  roms += custom_target(f,
-build_by_default: have_system,
-output: f,
-input: files('meson.build'),# dummy input
-install: get_option('install_blobs'),
-install_dir: qemu_datadir,
-command: [ ln_s, meson.project_source_root() / 'pc-bios' / f, 
'@OUTPUT@' ])
-endforeach
+if get_option('install_blobs')
+  install_data(blobs, install_dir: qemu_datadir)
+endif
 
 subdir('descriptors')
 subdir('keymaps')
diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh
index 98b56e05210..16316b25662 100755
--- a/scripts/oss-fuzz/build.sh
+++ b/scripts/oss-fuzz/build.sh
@@ -88,7 +88,7 @@ if [ "$GITLAB_CI" != "true" ]; then
 fi
 
 # Copy over the datadir
-cp  -r ../pc-bios/ "$DEST_DIR/pc-bios"
+cp  -r ../pc-bios/ "$DEST_DIR/data"
 
 targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/  {print $2}')
 base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)"
diff --git a/softmmu/datadir.c b/softmmu/datadir.c
index 160cac999a6..697cffea932 100644
--- a/softmmu/datadir.c
+++ b/softmmu/datadir.c
@@ -83,26 +83,6 @@ void qemu_add_data_dir(char *path)
 data_dir[data_dir_idx++] = path;
 }
 
-/*
- * Find a likely location for support files using the location of the binary.
- * When running from the build tree this will be "$bindir/pc-bios".
- * Otherwise, this is CONFIG_QEMU_DATADIR (possibly relocated).
- *
- * The caller must use g_free() to free the returned data when it is
- * no longer required.
- */
-static char *find_datadir(void)
-{
-g_autofree char *dir = NULL;
-
-dir = g_build_filename(qemu_get_exec_dir(), "pc-bios", NULL);
-if (g_file_test(dir, G_FILE_TEST_IS_DIR)) {
-return g_steal_pointer(&dir);
-}
-
-return get_relocated_path(CONFIG_QEMU_DATADIR);
-}
-
 void qemu_add_default_firmwarepath(void)
 {
 char **dirs;
@@ -116,7 +96,7 @@ void qemu_add_default_firmwarepath(void)
 g_strfreev(dirs);
 
 /* try to find datadir relative to the executable path */
-qemu_add_data_dir(find_datadir());
+qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR));
 }
 
 void qemu_list_data_dirs(void)
diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c
index 0ad4ba9e94d..2062b40d82b 100644
--- a/tests/qtest/fuzz/fuzz.c
+++ b/tests/qtest/fuzz/fuzz.c
@@ -174,21 +174,6 @@ int LLVMFuzzerInitialize(int *a

[PATCH v9 1/4] tests/vm: do not specify -bios option

2022-06-24 Thread Akihiko Odaki

From: Paolo Bonzini 

When running from the build tree, the executable is able to find
the BIOS on its own; when running from the source tree, a firmware
blob should already be installed and there is no guarantee that
the one in the source tree works with the QEMU that is being used for
the installation.

Just remove the -bios option, since it is unnecessary and in fact
there are other x86 VM tests that do not bother specifying it.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Thomas Huth 
Signed-off-by: Akihiko Odaki 
Message-Id: <20220616083025.116902-1-pbonz...@redhat.com>
---
 tests/vm/fedora  | 1 -
 tests/vm/freebsd | 1 -
 tests/vm/netbsd  | 1 -
 tests/vm/openbsd | 1 -
 4 files changed, 4 deletions(-)

diff --git a/tests/vm/fedora b/tests/vm/fedora
index 92b78d6e2c9..12eca919a08 100755
--- a/tests/vm/fedora
+++ b/tests/vm/fedora
@@ -79,7 +79,6 @@ class FedoraVM(basevm.BaseVM):
 self.exec_qemu_img("create", "-f", "qcow2", img_tmp, self.size)
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
diff --git a/tests/vm/freebsd b/tests/vm/freebsd
index 805db759d67..cd1fabde523 100755
--- a/tests/vm/freebsd
+++ b/tests/vm/freebsd
@@ -95,7 +95,6 @@ class FreeBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
diff --git a/tests/vm/netbsd b/tests/vm/netbsd
index 45aa9a7fda7..aa883ec23c9 100755
--- a/tests/vm/netbsd
+++ b/tests/vm/netbsd
@@ -86,7 +86,6 @@ class NetBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-cdrom", iso
 ])
diff --git a/tests/vm/openbsd b/tests/vm/openbsd
index 13c82542140..6f1b6f5b98a 100755
--- a/tests/vm/openbsd
+++ b/tests/vm/openbsd
@@ -82,7 +82,6 @@ class OpenBSDVM(basevm.BaseVM):
 
 self.print_step("Booting installer")
 self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
 "-machine", "graphics=off",
 "-device", "VGA",
 "-cdrom", iso
-- 
2.32.1 (Apple Git-133)

[PATCH v9 0/4] cutils: Introduce bundle mechanism

2022-06-24 Thread Akihiko Odaki

Developers often run QEMU without installing. The bundle mechanism
allows to look up files which should be present in installation even in
such a situation.

It is a general mechanism and can find any files located relative
to the installation tree. The build tree must have a new directory,
qemu-bundle, to represent what files the installation tree would
have for reference by the executables.

Note that this abandons compatibility with Windows older than 8 to use
PathCchSkipRoot(). The extended support for the prior version, 7 ended
more than 2 years ago, and it is unlikely that anyone would like to run
the latest QEMU on such an old system.

v9:
* Update _WIN32_WINNT in include/qemu/osdep.h (Thomas Huth)

v8:
* Pass absolute paths to get_relocated_path() (Paolo Bonzini)
* Use meson introspection (Paolo Bonzini)
* Drop "qga: Relocate a path emitted in the help text" as it is no longer
  relevant for the bundle mechanism.

v7: Properly fix --firmwarepath (Daniel P. Berrangé)

v6: Reuse get_relocated_path() in find_bundle() (Paolo Bonzini)

v5:
* Prefer qemu-bundle if it exists. (Daniel P. Berrangé)
* Check install_blobs option before installing BIOSes (Paolo Bonzini)
* Add common code to set up qemu-bundle to the top level meson.build
  (Paolo Bonzini)

v4:
* Add Daniel P. Berrangé to CC. Hopefully this helps merging his patch:
  https://mail.gnu.org/archive/html/qemu-devel/2022-06/msg02276.html
* Rebased to the latest QEMU.

v3:
* Note that the bundle mechanism is for any files located relative to the
  installation tree including but not limited to datadir. (Peter Maydell)
* Fix "bridge" typo (Philippe Mathieu-Daudé)

v2: Rebased to the latest QEMU.

Akihiko Odaki (3):
  cutils: Introduce bundle mechanism
  datadir: Use bundle mechanism
  module: Use bundle mechanism

Paolo Bonzini (1):
  tests/vm: do not specify -bios option

 .travis.yml |  2 +-
 docs/about/build-platforms.rst  |  2 +-
 include/qemu/cutils.h   | 18 +++--
 include/qemu/osdep.h|  2 +-
 meson.build |  4 ++
 pc-bios/keymaps/meson.build | 21 +++---
 pc-bios/meson.build | 13 ++-
 scripts/oss-fuzz/build.sh   |  2 +-
 scripts/symlink-install-tree.py | 37 ++
 softmmu/datadir.c   | 22 +--
 tests/qtest/fuzz/fuzz.c | 15 
 tests/vm/fedora |  1 -
 tests/vm/freebsd|  1 -
 tests/vm/netbsd |  1 -
 tests/vm/openbsd|  1 -
 util/cutils.c   | 68 +++--
 util/meson.build|  1 +
 util/module.c   |  1 -
 18 files changed, 118 insertions(+), 94 deletions(-)
 create mode 100755 scripts/symlink-install-tree.py

-- 
2.32.1 (Apple Git-133)

Re: [PATCH v7 10/18] jobs: rename static functions called with job_mutex held

2022-06-24 Thread Emanuele Giuseppe Esposito

Am 24/06/2022 um 17:28 schrieb Paolo Bonzini:
> On 6/24/22 16:29, Kevin Wolf wrote:
>> Yes, I think Vladimir is having the same difficulties with reading the
>> series as I had. And I believe his suggestion would make the
>> intermediate states less impossible to review. The question is how much
>> work it would be and whether you're willing to do this. As I said, if
>> reorganising is too hard, I'm okay with just ignoring the intermediate
>> state and reviewing the series as if it were a single patch.
> 
> I think we've tried different intermediate states for each of the
> previous 6 versions, and none of them were really satisfactory. :(
> 

Yes. v7 in this case basically means that we tried at least 4-5 times to
reorganize patches.

Nevertheless I could give it a try. I just hope I won't regret it :)

If I don't manage, I will just give up and re-send the serie with
Vladimir's nitpicks.

But yeah, I guess we all agree that this is the last time I reorganize
this serie.

Feedback are always very well welcome, but not anymore on reordering
please ;)

Thank you,
Emanuele

[PATCH v2 3/3] target/ppc: Check page dir/table base alignment

2022-06-24 Thread Leandro Lupori

Check if each page dir/table base address is properly aligned and
log a guest error if not, as real hardware behave incorrectly in
this case.

Signed-off-by: Leandro Lupori 
---
 target/ppc/mmu-radix64.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 339cf5b4d8..1e7d932893 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr 
eaddr,
 *psize -= *nls;
 if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
 *nls = pde & R_PDE_NLS;
+
+if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: misaligned page dir/table base: 0x%"VADDR_PRIx
+" page dir size: 0x"TARGET_FMT_lx"\n",
+__func__, (pde & R_PDE_NLB), BIT(*nls + 3));
+}
+
 index = eaddr >> (*psize - *nls);   /* Shift */
 index &= ((1UL << *nls) - 1);   /* Mask */
 *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));
@@ -295,6 +303,13 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
eaddr,
 uint64_t index, pde, rpn, mask;
 int level = 0;
 
+if (base_addr & MAKE_64BIT_MASK(0, nls + 3)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: misaligned page dir base: 0x%"VADDR_PRIx
+" page dir size: 0x"TARGET_FMT_lx"\n",
+__func__, base_addr, BIT(nls + 3));
+}
+
 index = eaddr >> (*psize - nls);/* Shift */
 index &= ((1UL << nls) - 1);   /* Mask */
 *pte_addr = base_addr + (index * sizeof(pde));
-- 
2.25.1

[PATCH v2 2/3] target/ppc: Improve Radix xlate level validation

2022-06-24 Thread Leandro Lupori

Check if the number and size of Radix levels are valid on
POWER9/POWER10 CPUs, according to the supported Radix Tree
Configurations described in their User Manuals.

Signed-off-by: Leandro Lupori 
---
 target/ppc/mmu-radix64.c | 51 +++-
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 9a8a2e2875..339cf5b4d8 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -236,17 +236,39 @@ static void ppc_radix64_set_rc(PowerPCCPU *cpu, 
MMUAccessType access_type,
 }
 }
 
+static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
+{
+/*
+ * Check if this is a valid level, according to POWER9 and POWER10
+ * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, respectively:
+ * Supported Radix Tree Configurations and Resulting Page Sizes.
+ *
+ * NOTE: these checks are valid for POWER9 and POWER10 CPUs only. If
+ *   new CPUs that support other Radix configurations are added
+ *   (e.g., Microwatt), then a new method should be added to
+ *   PowerPCCPUClass, with this function being the POWER9/POWER10
+ *   implementation.
+ */
+switch (level) {
+case 0: /* Root Page Dir */
+return psize == 52 && nls == 13;
+case 1:
+case 2:
+return nls == 9;
+case 3:
+return nls == 9 || nls == 5;
+default:
+qemu_log_mask(LOG_GUEST_ERROR, "invalid radix level: %d\n", level);
+return false;
+}
+}
+
 static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr,
   uint64_t *pte_addr, uint64_t *nls,
   int *psize, uint64_t *pte, int *fault_cause)
 {
 uint64_t index, pde;
 
-if (*nls < 5) { /* Directory maps less than 2**5 entries */
-*fault_cause |= DSISR_R_BADCONFIG;
-return 1;
-}
-
 /* Read page  entry from guest address space */
 pde = ldq_phys(as, *pte_addr);
 if (!(pde & R_PTE_VALID)) { /* Invalid Entry */
@@ -270,12 +292,8 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
eaddr,
  hwaddr *raddr, int *psize, uint64_t *pte,
  int *fault_cause, hwaddr *pte_addr)
 {
-uint64_t index, pde, rpn , mask;
-
-if (nls < 5) { /* Directory maps less than 2**5 entries */
-*fault_cause |= DSISR_R_BADCONFIG;
-return 1;
-}
+uint64_t index, pde, rpn, mask;
+int level = 0;
 
 index = eaddr >> (*psize - nls);/* Shift */
 index &= ((1UL << nls) - 1);   /* Mask */
@@ -283,6 +301,11 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
eaddr,
 do {
 int ret;
 
+if (!ppc_radix64_is_valid_level(level++, *psize, nls)) {
+*fault_cause |= DSISR_R_BADCONFIG;
+return 1;
+}
+
 ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde,
  fault_cause);
 if (ret) {
@@ -456,6 +479,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 }
 } else {
 uint64_t rpn, mask;
+int level = 0;
 
 index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
 index &= ((1UL << nls) - 1);/* Mask */
@@ -475,6 +499,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu,
 return ret;
 }
 
+if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) {
+fault_cause |= DSISR_R_BADCONFIG;
+return 1;
+}
+
 ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, 
&h_raddr,
  &nls, g_page_size, &pte, 
&fault_cause);
 if (ret) {
-- 
2.25.1

[PATCH v2 1/3] ppc: Check partition and process table alignment

2022-06-24 Thread Leandro Lupori

Check if partition and process tables are properly aligned, in
their size, according to PowerISA 3.1B, Book III 6.7.6 programming
note. Hardware and KVM also raise an exception in these cases.

Signed-off-by: Leandro Lupori 
---
 hw/ppc/spapr.c |  5 +
 hw/ppc/spapr_hcall.c   |  9 +
 target/ppc/mmu-book3s-v3.c |  5 +
 target/ppc/mmu-radix64.c   | 17 +
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd4942e881..4b1f346087 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1329,6 +1329,11 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, 
PowerPCCPU *cpu,
 patb = spapr->nested_ptcr & PTCR_PATB;
 pats = spapr->nested_ptcr & PTCR_PATS;
 
+/* Check if partition table is properly aligned */
+if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
+return false;
+}
+
 /* Calculate number of entries */
 pats = 1ull << (pats + 12 - 4);
 if (pats <= lpid) {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index d761a7d0c3..a8d4a6bcf0 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -920,6 +920,7 @@ static target_ulong h_register_process_table(PowerPCCPU 
*cpu,
 target_ulong page_size = args[2];
 target_ulong table_size = args[3];
 target_ulong update_lpcr = 0;
+target_ulong table_byte_size;
 uint64_t cproc;
 
 if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */
@@ -927,6 +928,14 @@ static target_ulong h_register_process_table(PowerPCCPU 
*cpu,
 }
 if (flags & FLAG_MODIFY) {
 if (flags & FLAG_REGISTER) {
+/* Check process table alignment */
+table_byte_size = 1ULL << (table_size + 12);
+if (proc_tbl & (table_byte_size - 1)) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: process table not properly aligned: proc_tbl 0x"
+TARGET_FMT_lx" proc_tbl_size 0x"TARGET_FMT_lx"\n",
+__func__, proc_tbl, table_byte_size);
+}
 if (flags & FLAG_RADIX) { /* Register new RADIX process table */
 if (proc_tbl & 0xfff || proc_tbl >> 60) {
 return H_P2;
diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
index f4985bae78..c8f69b3df9 100644
--- a/target/ppc/mmu-book3s-v3.c
+++ b/target/ppc/mmu-book3s-v3.c
@@ -28,6 +28,11 @@ bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, 
ppc_v3_pate_t *entry)
 uint64_t patb = cpu->env.spr[SPR_PTCR] & PTCR_PATB;
 uint64_t pats = cpu->env.spr[SPR_PTCR] & PTCR_PATS;
 
+/* Check if partition table is properly aligned */
+if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
+return false;
+}
+
 /* Calculate number of entries */
 pats = 1ull << (pats + 12 - 4);
 if (pats <= lpid) {
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 21ac958e48..9a8a2e2875 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -383,7 +383,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 {
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
-uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte;
+uint64_t offset, size, prtb, prtbe_addr, prtbe0, base_addr, nls, index, 
pte;
 int fault_cause = 0, h_page_size, h_prot;
 hwaddr h_raddr, pte_addr;
 int ret;
@@ -393,9 +393,18 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
*cpu,
   __func__, access_str(access_type),
   eaddr, mmu_idx, pid);
 
+prtb = (pate.dw1 & PATE1_R_PRTB);
+size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12);
+if (prtb & (size - 1)) {
+/* Process Table not properly aligned */
+if (guest_visible) {
+ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
+}
+return 1;
+}
+
 /* Index Process Table by PID to Find Corresponding Process Table Entry */
 offset = pid * sizeof(struct prtb_entry);
-size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12);
 if (offset >= size) {
 /* offset exceeds size of the process table */
 if (guest_visible) {
@@ -403,7 +412,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 }
 return 1;
 }
-prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
+prtbe_addr = prtb + offset;
 
 if (vhyp_flat_addressing(cpu)) {
 prtbe0 = ldq_phys(cs->as, prtbe_addr);
@@ -568,7 +577,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr 
eaddr,
 return false;
 }
 
-/* Get Process Table */
+/* Get Partition Table */
 if (cpu->vhyp) {
 PPCVirtualHypervisorClass *vhc;
 vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
-- 
2.25.1

[PATCH v2 0/3] ppc: Check for bad Radix configs

2022-06-24 Thread Leandro Lupori

Changes from v1:
- Use proper format defines in logs
- Optimized ppc_radix64_is_valid_level() and added a comment with
  instructions on how to proceed when adding new Radix CPUs with
  different configurations
- Moved calls to ppc_radix64_is_valid_level() outside of
  ppc_radix64_next_level(). This also avoids calling it twice for
  level 0, through ppc_radix64_walk_tree().
- Removed debug ifdefs from PDE/PTE alignment checks

Leandro Lupori (3):
  ppc: Check partition and process table alignment
  target/ppc: Improve Radix xlate level validation
  target/ppc: Check page dir/table base alignment

 hw/ppc/spapr.c |  5 +++
 hw/ppc/spapr_hcall.c   |  9 +
 target/ppc/mmu-book3s-v3.c |  5 +++
 target/ppc/mmu-radix64.c   | 79 +++---
 4 files changed, 85 insertions(+), 13 deletions(-)

-- 
2.25.1

Re: [PATCH 12/14] aspeed: Make aspeed_board_init_flashes public

2022-06-24 Thread Cédric Le Goater


On 6/23/22 20:43, Peter Delevoryas wrote:




On Jun 23, 2022, at 8:09 AM, Cédric Le Goater  wrote:

On 6/23/22 12:26, Peter Delevoryas wrote:

Signed-off-by: Peter Delevoryas 


Let's start simple without flash support. We should be able to
load FW blobs in each CPU address space using loader devices.


Actually, I was unable to do this, perhaps because the fb OpenBMC
boot sequence is a little weird. I specifically _needed_ to have
a flash device which maps the firmware in at 0x2000_, because
the fb OpenBMC U-Boot SPL jumps to that address to start executing
from flash? I think this is also why fb OpenBMC machines can be so slow.

$ ./build/qemu-system-arm -machine fby35 \
 -device loader,file=fby35.mtd,addr=0,cpu-num=0 -nographic \
 -d int -drive file=fby35.mtd,format=raw,if=mtd




Ideally we should be booting from the flash device directly using
the machine option '-M ast2600-evb,execute-in-place=true' like HW
does. Instructions are fetched using SPI transfers. But the amount
of code generated is tremendous. See some profiling below for a
run which barely reaches DRAM training in U-Boot.

C.


* execute-in-place=true

Each sample counts as 0.01 seconds.
  %   cumulative   self  self total
 time   seconds   secondscalls  ns/call  ns/call  name
100.00  0.02 0.02   164276   121.75   121.75  
memory_region_init_rom_device
  0.00  0.02 0.00 1610346008 0.00 0.00  tcg_code_capacity
  0.00  0.02 0.00 567612621 0.00 0.00  
type_register_static_array
  0.00  0.02 0.00 328886191 0.00 0.00  do_common_semihosting
  0.00  0.02 0.00 297215811 0.00 0.00  container_get
  0.00  0.02 0.00 292670030 0.00 0.00  arm_cpu_tlb_fill
  0.00  0.02 0.00 195416119 0.00 0.00  
arm_cpu_register_gdb_regs_for_features
  0.00  0.02 0.00 193326677 0.00 0.00  
object_type_get_instance_size
  0.00  0.02 0.00 182365829 0.00 0.00  tcg_op_insert_after
  0.00  0.02 0.00 150668458 0.00 0.00  plugin_gen_tb_end
  0.00  0.02 0.00 142171940 0.00 0.00  gen_new_label
  0.00  0.02 0.00 133200628 0.00 0.00  
smbios_build_type_38_table
  0.00  0.02 0.00 130540338 0.00 0.00  
object_dynamic_cast_assert
  0.00  0.02 0.00 129223195 0.00 0.00  cpu_loop_exit_atomic
  0.00  0.02 0.00 121759298 0.00 0.00  tcg_remove_ops_after
  0.00  0.02 0.00 116887887 0.00 0.00  in_code_gen_buffer
  0.00  0.02 0.00 111803833 0.00 0.00  tcg_emit_op
  0.00  0.02 0.00 106052221 0.00 0.00  
object_class_dynamic_cast_assert
  0.00  0.02 0.00 99704054 0.00 0.00  __jit_debug_register_code
  0.00  0.02 0.00 97812458 0.00 0.00  object_get_class
  0.00  0.02 0.00 88952594 0.00 0.00  tcg_splitwx_to_rx
  0.00  0.02 0.00 85790920 0.00 0.00  object_class_dynamic_cast
  0.00  0.02 0.00 73780673 0.00 0.00  helper_exit_atomic
  0.00  0.02 0.00 65337482 0.00 0.00  tcg_op_supported
  0.00  0.02 0.00 61213619 0.00 0.00  tcg_func_start
  0.00  0.02 0.00 54477684 0.00 0.00  tcg_flush_softmmu_tlb
  0.00  0.02 0.00 53968980 0.00 0.00  tcg_temp_new_internal
  0.00  0.02 0.00 51526008 0.00 0.00  qemu_in_vcpu_thread
  0.00  0.02 0.00 40750952 0.00 0.00  pflash_cfi02_register
  0.00  0.02 0.00 38039442 0.00 0.00  tcg_gen_op2
  0.00  0.02 0.00 37068039 0.00 0.00  tcg_gen_op1
  0.00  0.02 0.00 36473276 0.00 0.00  tcg_gen_op3
  0.00  0.02 0.00 36310225 0.00 0.00  gen_gvec_uaba
  0.00  0.02 0.00 30985436 0.00 0.00  tb_set_jmp_target
  0.00  0.02 0.00 30291796 0.00 0.00  tcg_constant_internal
  0.00  0.02 0.00 29857950 0.00 0.00  ssi_transfer

* execute-in-place=false

Each sample counts as 0.01 seconds.
  %   cumulative   self  self total
 time   seconds   secondscalls  ns/call  ns/call  name
 40.00  0.02 0.02   55114936.2936.29  aspeed_board_init_flashes
 20.00  0.03 0.01  3937238 2.54 2.54  
register_cp_regs_for_features
 20.00  0.04 0.01   67409614.8314.83  gen_gvec_uaba
 20.00  0.05 0.01   45746121.8621.86  finalize_target_page_bits
  0.00  0.05 0.00  5364258 0.00 0.00  arm_gt_hvtimer_cb
  0.00  0.05 0.00  2467532 0.00 0.00  helper_neon_narrow_sat_s8
  0.00  0.05 0.00  2431860 0.00 0.00  opb_opb2fsi_address
  0.00  0.05 0.00  1828453 0.00 0.00  cpsr_read
  0.00  0.05 0.00  1820659 0.00 0.00  cpu_get_tb_cpu_state
  0.00  0.05 0.00  1441344 0.00 0.00  arm_cpu_tlb_fill
  0.00  0.05 0.00  1427177 0.00 0.00  cxl_usp_to_cstate

Re: [PATCH 0/2] target/arm: Fix issue 1078

2022-06-24 Thread Peter Maydell

On Sun, 19 Jun 2022 at 01:16, Richard Henderson
 wrote:
>
> Nicely summarized by the reporter, but I thought it would be
> nicer to pull all of the logic into arm_pamax, rather than
> leave it separated.

Applied to target-arm.next, thanks.

I think the hang He Zhe reports as still present is an
unrelated issue (I should check the v7 LPAE spec about
block descriptors I guess).

-- PMM

Re: [PATCH v6 0/7] cutils: Introduce bundle mechanism

2022-06-24 Thread Akihiko Odaki


On 2022/06/16 18:18, Paolo Bonzini wrote:

+def destdir_join(d1: str, d2: str) -> str:
+    if not d1:
+    return d2
+    if not os.path.isabs(d2):
+    return os.path.join(d1, d2)
+
+    # c:\destdir + c:\prefix must produce c:\destdir\prefix
+    if len(d2) > 1 and d2[1] == ':':
+    return d1 + d2[2:]
+    return d1 + d2


This is from Meson but buggy so I fixed it and opened a pull request for 
Meson:

https://github.com/mesonbuild/meson/pull/10531

The script included in v8 has the fixed version of destdir_join.

Regards,
Akihiko Odaki

Re: [PATCH 2/2] target/arm: Check V7VE as well as LPAE in arm_pamax

2022-06-24 Thread Peter Maydell

On Sun, 19 Jun 2022 at 01:18, Richard Henderson
 wrote:
>
> In machvirt_init we create a cpu but do not fully initialize it.
> Thus the propagation of V7VE to LPAE has not been done, and we
> compute the wrong value for some v7 cpus, e.g. cortex-a15.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1078
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/ptw.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/target/arm/ptw.c b/target/arm/ptw.c
> index 07f7a21861..da478104f0 100644
> --- a/target/arm/ptw.c
> +++ b/target/arm/ptw.c
> @@ -47,7 +47,13 @@ unsigned int arm_pamax(ARMCPU *cpu)
>  assert(parange < ARRAY_SIZE(pamax_map));
>  return pamax_map[parange];
>  }
> -if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) {
> +
> +/*
> + * In machvirt_init, we call arm_pamax on a cpu that is not fully
> + * initialized, so we can't rely on the propagation done in realize.
> + */
> +if (arm_feature(&cpu->env, ARM_FEATURE_LPAE) ||
> +arm_feature(&cpu->env, ARM_FEATURE_V7VE)) {
>  /* v7 with LPAE */
>  return 40;

I guess this is expedient, so on that basis
Reviewed-by: Peter Maydell 

but as I mentioned in the gitlab issue it's kind of bogus
that the virt board is doing stuff to a non-realized CPU object.

thanks
-- PMM

Re: [PATCH 1/2] target/arm: Extend arm_pamax to more than aarch64

2022-06-24 Thread Peter Maydell

On Sun, 19 Jun 2022 at 01:16, Richard Henderson
 wrote:
>
> Move the code from hw/arm/virt.c that is supposed
> to handle v7 into the one function.
>
> Signed-off-by: Richard Henderson 
> ---
>  hw/arm/virt.c| 10 +-
>  target/arm/ptw.c | 24 

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v11 2/2] qtest/cxl: Add aarch64 virt test for CXL

2022-06-24 Thread Peter Maydell

On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron
 wrote:
>
> Add a single complex case for aarch64 virt machine.
>
> Signed-off-by: Jonathan Cameron 
> ---
>  tests/qtest/cxl-test.c  | 48 +
>  tests/qtest/meson.build |  1 +
>  2 files changed, 40 insertions(+), 9 deletions(-)
>
> diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c
> index 2133e973f4..1015d0e7c2 100644
> --- a/tests/qtest/cxl-test.c
> +++ b/tests/qtest/cxl-test.c
> @@ -17,6 +17,11 @@
>"-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \
>"-M 
> cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G "
>
> +#define QEMU_VIRT_2PXB_CMD "-machine virt,cxl=on "  \
> +  "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 "  \
> +  "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 "  \
> +  "-M 
> cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G "
> +

If CXL requires booting via UEFI, what does this test case do?
It doesn't seem to be passing in a BIOS image.

thanks
-- PMM

Re: [PATCH] meson: Prefix each element of firmware path

2022-06-24 Thread Paolo Bonzini


Queued, thanks!

Paolo

On 6/24/22 17:40, Akihiko Odaki wrote:

Signed-off-by: Akihiko Odaki 
---
  configure | 23 +++
  meson.build   | 10 --
  meson_options.txt |  2 +-
  scripts/meson-buildoptions.py |  7 +--
  scripts/meson-buildoptions.sh |  6 +++---
  softmmu/datadir.c |  8 +---
  6 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/configure b/configure
index 4b12a8094ca..ab952e7ce3b 100755
--- a/configure
+++ b/configure
@@ -675,6 +675,29 @@ fi
  
  werror=""
  
+meson_option_build_array() {

+  local a
+  local ifs
+
+  if test "$targetos" == windows; then
+ifs=\;
+  else
+ifs=:
+  fi
+
+  echo -n "["
+
+  while IFS="$ifs" read -ra a; do
+for e in "${a[@]}"; do
+  echo -n '"""'
+  echo -n "$e" | sed 's/\\//g; s/"/\\"/g'
+  echo -n '"""',
+done
+  done <<< "$1"
+
+  echo "]"
+}
+
  . $source_path/scripts/meson-buildoptions.sh
  
  meson_options=

diff --git a/meson.build b/meson.build
index 0c2e11ff071..40111ce4053 100644
--- a/meson.build
+++ b/meson.build
@@ -1684,7 +1684,13 @@ config_host_data.set_quoted('CONFIG_PREFIX', 
get_option('prefix'))
  config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / 
qemu_confdir)
  config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / 
qemu_datadir)
  config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / 
qemu_desktopdir)
-config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / 
get_option('qemu_firmwarepath'))
+
+qemu_firmwarepath = ''
+foreach k : get_option('qemu_firmwarepath')
+  qemu_firmwarepath += '"' + get_option('prefix') / k + '", '
+endforeach
+config_host_data.set('CONFIG_QEMU_FIRMWAREPATH', qemu_firmwarepath)
+
  config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / 
get_option('libexecdir'))
  config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / 
qemu_icondir)
  config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / 
get_option('localedir'))
@@ -3622,7 +3628,7 @@ endif
  summary_info = {}
  summary_info += {'Install prefix':get_option('prefix')}
  summary_info += {'BIOS directory':qemu_datadir}
-summary_info += {'firmware path': get_option('prefix') / 
get_option('qemu_firmwarepath')}
+summary_info += {'firmware path': qemu_firmwarepath}
  summary_info += {'binary directory':  get_option('prefix') / 
get_option('bindir')}
  summary_info += {'library directory': get_option('prefix') / 
get_option('libdir')}
  summary_info += {'module directory':  qemu_moddir}
diff --git a/meson_options.txt b/meson_options.txt
index 0e8197386b9..8ad5cd73819 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -6,7 +6,7 @@ option('qemu_suffix', type : 'string', value: 'qemu',
 description: 'Suffix for QEMU data/modules/config directories (can be 
empty)')
  option('docdir', type : 'string', value : 'share/doc',
 description: 'Base directory for documentation installation (can be 
empty)')
-option('qemu_firmwarepath', type : 'string', value : 'qemu-firmware',
+option('qemu_firmwarepath', type : 'array', value : ['qemu-firmware'],
 description: 'search PATH for firmware files')
  option('pkgversion', type : 'string', value : '',
 description: 'use specified string as sub-version of the package')
diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
index e624c16b01a..3e2b4785388 100755
--- a/scripts/meson-buildoptions.py
+++ b/scripts/meson-buildoptions.py
@@ -156,7 +156,7 @@ def cli_metavar(opt):
  if opt["type"] == "string":
  return "VALUE"
  if opt["type"] == "array":
-return "CHOICES"
+return "CHOICES" if "choices" in opt else "VALUES"
  return "CHOICE"
  
  
@@ -199,7 +199,10 @@ def print_parse(options):

  key = cli_option(opt)
  name = opt["name"]
  if require_arg(opt):
-print(f'--{key}=*) quote_sh "-D{name}=$2" ;;')
+if opt["type"] == "array" and not "choices" in opt:
+print(f'--{key}=*) quote_sh 
"-D{name}=$(meson_option_build_array $2)" ;;')
+else:
+print(f'--{key}=*) quote_sh "-D{name}=$2" ;;')
  elif opt["type"] == "boolean":
  print(f'--enable-{key}) printf "%s" -D{name}=true ;;')
  print(f'--disable-{key}) printf "%s" -D{name}=false ;;')
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 1fc1d2e2c36..238bab162bd 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -42,12 +42,12 @@ meson_options_help() {
printf "%s\n" '  --enable-trace-backends=CHOICES'
printf "%s\n" '   Set available tracing backends 
[log] (choices:'
printf "%s\n" '   
dtrace/ftrace/log/nop/simple/syslog/ust)'
-  printf "%s\n" '  --firm

Re: [PATCH v3 2/2] docs/system/devices/canokey: Document limitations on usb-ehci

2022-06-24 Thread Hongren Zheng

On Fri, Jun 24, 2022 at 11:29:40PM +0800, MkfsSion wrote:
>  
> -Another limitation is that this device is not compatible with ``qemu-xhci``,
> -in that this device would hang when there are FIDO2 packets (traffic on
> -interrupt endpoints). If you do not use FIDO2 then it works as intended,
> -but for full functionality you should use old uhci/ehci bus and attach 
> canokey
> -to it, for example
> -
> -.. parsed-literal::
> -
> -   |qemu_system| -device piix3-usb-uhci,id=uhci -device canokey,bus=uhci.0

Hi, the fix for the limitation on qemu-xhci has not been landed and should not
be removed by this patch.

> +Another limitation is that this device is not compatible with ``usb-ehci``
> +since we removed high-speed mode support. When a full-speed device attach
> +to a high-speed port, ``usb-ehci`` would complain about speed mismatch.

I think we could rephrase the limitation here. Instead of saying "we
removed the high-speed mode", how about saying "this device only
provides the full-speed mode".

Also you should list the changes between two PATCH versions
below the --- line just after your commit message
so the mailing list can track what happened.

Regards,

Hongren

[PULL v2 20/20] vduse-blk: Add name option

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

Currently we use 'id' option as the name of VDUSE device.
It's a bit confusing since we use one value for two different
purposes: the ID to identfy the export within QEMU (must be
distinct from any other exports in the same QEMU process, but
can overlap with names used by other processes), and the VDUSE
name to uniquely identify it on the host (must be distinct from
other VDUSE devices on the same host, but can overlap with other
export types like NBD in the same process). To make it clear,
this patch adds a separate 'name' option to specify the VDUSE
name for the vduse-blk export instead.

Signed-off-by: Xie Yongji 
Message-Id: <20220614051532.92-7-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 qapi/block-export.json   | 7 ---
 docs/tools/qemu-storage-daemon.rst   | 5 +++--
 block/export/vduse-blk.c | 4 ++--
 storage-daemon/qemu-storage-daemon.c | 8 
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 618a6367c9..4627bbc4e6 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -183,6 +183,7 @@
 #
 # A vduse-blk block export.
 #
+# @name: the name of VDUSE device (must be unique across the host).
 # @num-queues: the number of virtqueues. Defaults to 1.
 # @queue-size: the size of virtqueue. Defaults to 256.
 # @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE]
@@ -192,7 +193,8 @@
 # Since: 7.1
 ##
 { 'struct': 'BlockExportOptionsVduseBlk',
-  'data': { '*num-queues': 'uint16',
+  'data': { 'name': 'str',
+'*num-queues': 'uint16',
 '*queue-size': 'uint16',
 '*logical-block-size': 'size',
 '*serial': 'str' } }
@@ -320,8 +322,7 @@
 # Describes a block export, i.e. how single node should be exported on an
 # external interface.
 #
-# @id: A unique identifier for the block export (across the host for vduse-blk
-#  export type or across all export types for other types)
+# @id: A unique identifier for the block export (across all export types)
 #
 # @node-name: The node name of the block node to be exported (since: 5.2)
 #
diff --git a/docs/tools/qemu-storage-daemon.rst 
b/docs/tools/qemu-storage-daemon.rst
index 034f2809a6..ea00149a63 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -77,7 +77,7 @@ Standard options:
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
-  --export 
[type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=]
+  --export 
[type=]vduse-blk,id=,node-name=,name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=]
 
   is a block export definition. ``node-name`` is the block node that should be
   exported. ``writable`` determines whether or not the export allows write
@@ -111,7 +111,8 @@ Standard options:
   ``allow-other`` to auto (the default) will try enabling this option, and on
   error fall back to disabling it.
 
-  The ``vduse-blk`` export type uses the ``id`` as the VDUSE device name.
+  The ``vduse-blk`` export type takes a ``name`` (must be unique across the 
host)
+  to create the VDUSE device.
   ``num-queues`` sets the number of virtqueues (the default is 1).
   ``queue-size`` sets the virtqueue descriptor table size (the default is 256).
 
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 066e088b00..f101c24c3f 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -300,7 +300,7 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 features |= 1ULL << VIRTIO_BLK_F_RO;
 }
 
-vblk_exp->dev = vduse_dev_create(exp->id, VIRTIO_ID_BLOCK, 0,
+vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
  features, num_queues,
  sizeof(struct virtio_blk_config),
  (char *)&config, &vduse_blk_ops,
@@ -312,7 +312,7 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 }
 
 vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
-   g_get_tmp_dir(), exp->id);
+   g_get_tmp_dir(), vblk_opts->name);
 if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
 error_setg(errp, "failed to set reconnect log file");
 ret = -EINVAL;
diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index 4e18d3fc85..b8e910f220 100644
--- a/storage-daemon/qemu-storage-daemon

[PULL v2 18/20] nbd: Drop dead code spotted by Coverity

2022-06-24 Thread Kevin Wolf

From: Eric Blake 

CID 1488362 points out that the second 'rc >= 0' check is now dead
code.

Reported-by: Peter Maydell 
Fixes: 172f5f1a40(nbd: remove peppering of nbd_client_connected)
Signed-off-by: Eric Blake 
Message-Id: <20220516210519.76135-1-ebl...@redhat.com>
Reviewed-by: Peter Maydell 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block/nbd.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 6085ab1d2c..7f5f50ec46 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -521,12 +521,8 @@ static int coroutine_fn 
nbd_co_send_request(BlockDriverState *bs,
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
-if (rc >= 0) {
-if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-   NULL) < 0) {
-rc = -EIO;
-}
-} else if (rc >= 0) {
+if (rc >= 0 && qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
+  NULL) < 0) {
 rc = -EIO;
 }
 qio_channel_set_cork(s->ioc, false);
-- 
2.35.3

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-24 Thread Sam Li

Stefan Hajnoczi  于2022年6月24日周五 23:50写道：
>
> On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote:
> > Hi Stefan,
> >
> > Stefan Hajnoczi  于2022年6月20日周一 15:55写道：
> > >
> > > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> > >
> > > Hi Sam,
> > > Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> > > keep the email subject line the same (except for "v2", "v3", etc) so
> > > that it's clear which patch series this new version replaces.
> > >
> > > > Fix some mistakes before. It can report a range of zones now.
> > >
> > > This looks like the description of what changed compared to v1. Please
> > > put the changelog below "---" in the future. When patch emails are
> > > merged by git-am(1) it keeps the text above "---" and discards the text
> > > below "---". The changelog is usually no longer useful once the patches
> > > are merged, so it should be located below the "---" line.
> > >
> > > The text above the "---" is the commit description (an explanation of
> > > why this commit is necessary). In this case the commit description
> > > should explain that this patch adds .bdrv_co_zone_report() and
> > > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> > > supported.
> > >
> > > >
> > > > Signed-off-by: Sam Li 
> > > > ---
> > > >  block/block-backend.c |  22 
> > > >  block/coroutines.h|   5 +
> > > >  block/file-posix.c| 182 ++
> > > >  block/io.c|  23 
> > > >  include/block/block-common.h  |  43 ++-
> > > >  include/block/block-io.h  |  13 +++
> > > >  include/block/block_int-common.h  |  20 
> > > >  qemu-io-cmds.c| 118 +++
> > > >  tests/qemu-iotests/tests/zoned.sh |  52 +
> > > >  9 files changed, 477 insertions(+), 1 deletion(-)
> > > >  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> > > >
> > > > diff --git a/block/block-backend.c b/block/block-backend.c
> > > > index e0e1aff4b1..20248e4a35 100644
> > > > --- a/block/block-backend.c
> > > > +++ b/block/block-backend.c
> > > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
> > > >  int ret;
> > > >  } BlockBackendAIOCB;
> > > >
> > > > +
> > > > +
> > >
> > > Please avoid whitespace changes in code that is otherwise untouched by
> > > your patch. Code changes can cause merge conflicts and they make it
> > > harder to use git-annotate(1), so only changes that are necessary should
> > > be included in a patch.
> > >
> > > >  static const AIOCBInfo block_backend_aiocb_info = {
> > > >  .get_aio_context = blk_aiocb_get_aio_context,
> > > >  .aiocb_size = sizeof(BlockBackendAIOCB),
> > > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
> > > >  return ret;
> > > >  }
> > > >
> > >
> > > Please add a documentation comment for blk_co_zone_report() that
> > > explains how to use the functions and the purpose of the arguments. For
> > > example, does offset have to be the first byte in a zone or can it be
> > > any byte offset? What are the alignment requirements of offset and len?
> > > Why is nr_zones a pointer?
> > >
> > > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
> > >
> > > Functions that run in coroutine context must be labeled with
> > > coroutine_fn:
> > >
> > > int coroutine_fn blk_co_zone_report(...)
> > >
> > > This tells humans and tools that the function can only be called from a
> > > coroutine. There is a blog post about coroutines in QEMU here:
> > > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
> > >
> > > > +   int64_t *nr_zones,
> > > > +   struct BlockZoneDescriptor *zones)
> > >
> > > QEMU coding style uses typedefs when defining structs, so "struct
> > > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> > > *zones".
> > >
> > > > +{
> > > > +int ret;
> > >
> > > This function is called from the I/O code path, please mark it with:
> > >
> > >   IO_CODE();
> > >
> > > From include/block/block-io.h:
> > >
> > >   * I/O API functions. These functions are thread-safe, and therefore
> > >   * can run in any thread as long as the thread has called
> > >   * aio_context_acquire/release().
> > >   *
> > >   * These functions can only call functions from I/O and Common 
> > > categories,
> > >   * but can be invoked by GS, "I/O or GS" and I/O APIs.
> > >   *
> > >   * All functions in this category must use the macro
> > >   * IO_CODE();
> > >   * to catch when they are accidentally called by the wrong API.
> > >
> > > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, 
> > > > zones);
> > >
> > > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> > > function call to ensure that zone report requests finish before I/O is
> > > drained (see bdrv_drained_begin()). This is necessary so that it's
> > > possible to wait for I/O req

[PATCH] artist: set memory region owners for buffers to the artist device

2022-06-24 Thread Mark Cave-Ayland

This fixes the output of "info qom-tree" so that the buffers appear as children
of the artist device, rather than underneath the "unattached" container.

Signed-off-by: Mark Cave-Ayland 
---
 hw/display/artist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/display/artist.c b/hw/display/artist.c
index eadaef0d46..fde050c882 100644
--- a/hw/display/artist.c
+++ b/hw/display/artist.c
@@ -1358,7 +1358,7 @@ static void artist_create_buffer(ARTISTState *s, const 
char *name,
 {
 struct vram_buffer *buf = s->vram_buffer + idx;
 
-memory_region_init_ram(&buf->mr, NULL, name, width * height,
+memory_region_init_ram(&buf->mr, OBJECT(s), name, width * height,
&error_fatal);
 memory_region_add_subregion_overlap(&s->mem_as_root, *offset, &buf->mr, 0);
 
-- 
2.30.2

[PULL v2 10/20] libvduse: Add VDUSE (vDPA Device in Userspace) library

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

VDUSE [1] is a linux framework that makes it possible to implement
software-emulated vDPA devices in userspace. This adds a library
as a subproject to help implementing VDUSE backends in QEMU.

[1] https://www.kernel.org/doc/html/latest/userspace-api/vduse.html

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-6-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 meson_options.txt   |2 +
 subprojects/libvduse/include/atomic.h   |1 +
 subprojects/libvduse/include/compiler.h |1 +
 subprojects/libvduse/libvduse.h |  235 
 subprojects/libvduse/libvduse.c | 1150 +++
 MAINTAINERS |5 +
 meson.build |   15 +
 scripts/meson-buildoptions.sh   |3 +
 subprojects/libvduse/linux-headers/linux|1 +
 subprojects/libvduse/meson.build|   10 +
 subprojects/libvduse/standard-headers/linux |1 +
 11 files changed, 1424 insertions(+)
 create mode 12 subprojects/libvduse/include/atomic.h
 create mode 12 subprojects/libvduse/include/compiler.h
 create mode 100644 subprojects/libvduse/libvduse.h
 create mode 100644 subprojects/libvduse/libvduse.c
 create mode 12 subprojects/libvduse/linux-headers/linux
 create mode 100644 subprojects/libvduse/meson.build
 create mode 12 subprojects/libvduse/standard-headers/linux

diff --git a/meson_options.txt b/meson_options.txt
index f3e2f22c1e..23a9f440f7 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -257,6 +257,8 @@ option('virtfs', type: 'feature', value: 'auto',
description: 'virtio-9p support')
 option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
+option('libvduse', type: 'feature', value: 'auto',
+   description: 'build VDUSE Library')
 
 option('capstone', type: 'feature', value: 'auto',
description: 'Whether and how to find the capstone library')
diff --git a/subprojects/libvduse/include/atomic.h 
b/subprojects/libvduse/include/atomic.h
new file mode 12
index 00..8c2be64f7b
--- /dev/null
+++ b/subprojects/libvduse/include/atomic.h
@@ -0,0 +1 @@
+../../../include/qemu/atomic.h
\ No newline at end of file
diff --git a/subprojects/libvduse/include/compiler.h 
b/subprojects/libvduse/include/compiler.h
new file mode 12
index 00..de7b70697c
--- /dev/null
+++ b/subprojects/libvduse/include/compiler.h
@@ -0,0 +1 @@
+../../../include/qemu/compiler.h
\ No newline at end of file
diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h
new file mode 100644
index 00..6c2fe98213
--- /dev/null
+++ b/subprojects/libvduse/libvduse.h
@@ -0,0 +1,235 @@
+/*
+ * VDUSE (vDPA Device in Userspace) library
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVDUSE_H
+#define LIBVDUSE_H
+
+#include 
+#include 
+
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/* VDUSE device structure */
+typedef struct VduseDev VduseDev;
+
+/* Virtqueue structure */
+typedef struct VduseVirtq VduseVirtq;
+
+/* Some operation of VDUSE backend */
+typedef struct VduseOps {
+/* Called when virtqueue can be processed */
+void (*enable_queue)(VduseDev *dev, VduseVirtq *vq);
+/* Called when virtqueue processing should be stopped */
+void (*disable_queue)(VduseDev *dev, VduseVirtq *vq);
+} VduseOps;
+
+/* Describing elements of the I/O buffer */
+typedef struct VduseVirtqElement {
+/* Descriptor table index */
+unsigned int index;
+/* Number of physically-contiguous device-readable descriptors */
+unsigned int out_num;
+/* Number of physically-contiguous device-writable descriptors */
+unsigned int in_num;
+/* Array to store physically-contiguous device-writable descriptors */
+struct iovec *in_sg;
+/* Array to store physically-contiguous device-readable descriptors */
+struct iovec *out_sg;
+} VduseVirtqElement;
+
+
+/**
+ * vduse_get_virtio_features:
+ *
+ * Get supported virtio features
+ *
+ * Returns: supported feature bits
+ */
+uint64_t vduse_get_virtio_features(void);
+
+/**
+ * vduse_queue_get_dev:
+ * @vq: specified virtqueue
+ *
+ * Get corresponding VDUSE device from the virtqueue.
+ *
+ * Returns: a pointer to VDUSE device on success, NULL on failure.
+ */
+VduseDev *vduse_queue_get_dev(VduseVirtq *vq);
+
+/**
+ * vduse_queue_get_fd:
+ * @vq: specified virtqueue
+ *
+ * Get the kick fd for the virtqueue.
+ *
+ * Returns: file descriptor on success, -1 on failure.
+ */
+int vduse_queue_get_fd(VduseVirtq *vq);
+
+/**
+ * vduse_queue_pop:
+ * @vq: specified virtqueue
+ * @sz: the size of struct to return (must be >= VduseVirtqElement)
+ *
+ * Pop an elem

[PULL v2 16/20] block/gluster: correctly set max_pdiscard

2022-06-24 Thread Kevin Wolf

From: Fabian Ebner 

On 64-bit platforms, assigning SIZE_MAX to the int64_t max_pdiscard
results in a negative value, and the following assertion would trigger
down the line (it's not the same max_pdiscard, but computed from the
other one):
qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
`max_pdiscard >= bs->bl.request_alignment' failed.

On 32-bit platforms, it's fine to keep using SIZE_MAX.

The assertion in qemu_gluster_co_pdiscard() is checking that the value
of 'bytes' can safely be passed to glfs_discard_async(), which takes a
size_t for the argument in question, so it is kept as is. And since
max_pdiscard is still <= SIZE_MAX, relying on max_pdiscard is still
fine.

Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard 
handlers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Fabian Ebner 
Message-Id: <20220520075922.43972-1-f.eb...@proxmox.com>
Reviewed-by: Eric Blake 
Reviewed-by: Stefano Garzarella 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block/gluster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/gluster.c b/block/gluster.c
index 398976bc66..b60213ab80 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,7 +891,7 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-bs->bl.max_pdiscard = SIZE_MAX;
+bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
 }
 
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-- 
2.35.3

Re: [PATCH v11 1/2] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl

2022-06-24 Thread Jonathan Cameron via

On Fri, 24 Jun 2022 16:01:42 +0100
Peter Maydell  wrote:

> On Fri, 24 Jun 2022 at 15:54, Jonathan Cameron
>  wrote:
> > Just occurred to me there is another barrier to an approach that adds
> > DT bindings.
> > I fairly sure hw/pci-bridge/pci_expander_bridge.c (PXB)
> > only works on ACPI platforms and is the only host bridge supported
> > for CXL emulation in QEMU.  
> 
> Isn't it probeable like any other PCI device/bridge ?

Nope - PXB is a really weird device. (I tested it quickly in case I was
wrong and indeed, no sign of device on the downstream side without a
suitable BIOS / ACPI)
There is no driver support for it as such, rather it presents
as two things.
1) A EP on the main host bridge - which is used for interrupt routing
   and possibly a few other things.  Linux has no idea that's what it
   is though so attaches no driver to it.  lspci shows this as
   Red Hat, Inc, QEMU PCIe Expander Bridge

2) A host bridge with firmware described characteristics (bus number
   range and similar).  Host bridges as defined in ACPI are a concept
   rather than actual hardware and presented to the OS via firmware
   descriptions (ACPI DSDT stuff in this case).

You could probably add dt description via
pci-host-ecam-generic bindings though but it would be an interesting
late bit of dt addition in the virt_machine_done() function. Similar
to the fw_cfg and ACPI stuff done at that stage to deal with PXB
devices becoming visible.

So gut feeling is PXB could be made to work with DT, but doesn't today.
Give the main usecase for PXB is typically NUMA description I guess no one
noticed on DT platforms.

Jonathan

> 
> -- PMM

Re: [PULL 00/14] (Mostly) build system changes for 2022-06-24

2022-06-24 Thread Richard Henderson


On 6/24/22 01:27, Paolo Bonzini wrote:

The following changes since commit 2b049d2c8dc01de750410f8f1a4eac498c04c723:

   Merge tag 'pull-aspeed-20220622' of https://github.com/legoater/qemu into 
staging (2022-06-22 07:27:06 -0700)

are available in the Git repository at:

   https://gitlab.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 72da35fec9a9ba91a5b2cb9ee00843a94fa9413d:

   accel: kvm: Fix memory leak in find_stats_descriptors (2022-06-24 10:19:17 
+0200)


* fuzzing fixes
* fix cross compilation CFLAGS and compiler choice
* do not specify -bios option for tests/vm
* miscellaneous fixes


Build failure here.  I have ubuntu 22.04,

crossbuild-essential-arm64/jammy,jammy,now 12.9ubuntu3 all [installed]

crossbuild-essential-armhf/jammy,jammy,now 12.9ubuntu3 all [installed]

crossbuild-essential-i386/jammy,jammy,now 12.9ubuntu3 all [installed]

crossbuild-essential-mips64el/jammy,jammy,now 12.9 all [installed]

crossbuild-essential-ppc64el/jammy,jammy,now 12.9ubuntu3 all [installed]

crossbuild-essential-riscv64/jammy,jammy,now 12.9ubuntu3 all [installed]

crossbuild-essential-s390x/jammy,jammy,now 12.9ubuntu3 all [installed]


which is properly detected during configure,

  Cross compilers

aarch64  : aarch64-linux-gnu-gcc

alpha: $(DOCKER_SCRIPT) cc --cc alpha-linux-gnu-gcc -i 
qemu/debian-alpha-cross -s /home/rth/qemu-publish/src --


arm  : arm-linux-gnueabihf-gcc

i386 : i686-linux-gnu-gcc

nios2: $(DOCKER_SCRIPT) cc --cc nios2-linux-gnu-gcc -i 
qemu/debian-nios2-cross -s /home/rth/qemu-publish/src --


x86_64   : cc

...


But then the i386 cross-compiler isn't used:

$ cat tests/tcg/config-i386-softmmu.mak

# Automatically generated by configure - do not modify

TARGET_NAME=i386

BUILD_STATIC=

EXTRA_CFLAGS=-m32

CC=cc

CCAS=cc

AR=ar

AS=as

LD=ld

NM=nm

OBJCOPY=objcopy

RANLIB=ranlib

STRIP=strip

QEMU=/home/rth/qemu-publish/bld/qemu-system-i386


leading to failure:

cc -nostdlib -ggdb -O0 -isystem /home/rth/qemu-publish/src/tests/tcg/minilib -m32 
-ffreestanding /home/rth/qemu-publish/src/tests/tcg/multiarch/system/hello.c -o hello 
-Wl,-T/home/rth/qemu-publish/src/tests/tcg/i386/system/kernel.ld -Wl,-melf_i386 -static 
-nostdlib boot.o  printf.o -lgcc


/usr/bin/ld: skipping incompatible /usr/lib/gcc/x86_64-linux-gnu/11/libgcc.a when 
searching for -lgcc


/usr/bin/ld: cannot find -lgcc: No such file or directory

collect2: error: ld returned 1 exit status

make[1]: *** [/home/rth/qemu-publish/src/tests/tcg/i386/Makefile.softmmu-target:32: hello] 
Error 1




r~

[PULL v2 15/20] block/rbd: report a better error when namespace does not exist

2022-06-24 Thread Kevin Wolf

From: Stefano Garzarella 

If the namespace does not exist, rbd_create() fails with -ENOENT and
QEMU reports a generic "error rbd create: No such file or directory":

$ qemu-img create rbd:rbd/namespace/image 1M
Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576
qemu-img: rbd:rbd/namespace/image: error rbd create: No such file or 
directory

Unfortunately rados_ioctx_set_namespace() does not fail if the namespace
does not exist, so let's use rbd_namespace_exists() in qemu_rbd_connect()
to check if the namespace exists, reporting a more understandable error:

$ qemu-img create rbd:rbd/namespace/image 1M
Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576
qemu-img: rbd:rbd/namespace/image: namespace 'namespace' does not exist

Reported-by: Tingting Mao 
Reviewed-by: Ilya Dryomov 
Signed-off-by: Stefano Garzarella 
Message-Id: <20220517071012.6120-1-sgarz...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/rbd.c | 24 
 meson.build |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index 6caf35cbba..f826410f40 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -831,6 +831,26 @@ static int qemu_rbd_connect(rados_t *cluster, 
rados_ioctx_t *io_ctx,
 error_setg_errno(errp, -r, "error opening pool %s", opts->pool);
 goto failed_shutdown;
 }
+
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+if (opts->has_q_namespace && strlen(opts->q_namespace) > 0) {
+bool exists;
+
+r = rbd_namespace_exists(*io_ctx, opts->q_namespace, &exists);
+if (r < 0) {
+error_setg_errno(errp, -r, "error checking namespace");
+goto failed_ioctx_destroy;
+}
+
+if (!exists) {
+error_setg(errp, "namespace '%s' does not exist",
+   opts->q_namespace);
+r = -ENOENT;
+goto failed_ioctx_destroy;
+}
+}
+#endif
+
 /*
  * Set the namespace after opening the io context on the pool,
  * if nspace == NULL or if nspace == "", it is just as we did nothing
@@ -840,6 +860,10 @@ static int qemu_rbd_connect(rados_t *cluster, 
rados_ioctx_t *io_ctx,
 r = 0;
 goto out;
 
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+failed_ioctx_destroy:
+rados_ioctx_destroy(*io_ctx);
+#endif
 failed_shutdown:
 rados_shutdown(*cluster);
 out:
diff --git a/meson.build b/meson.build
index 397ca1d60a..a113078f1a 100644
--- a/meson.build
+++ b/meson.build
@@ -1903,6 +1903,12 @@ config_host_data.set('HAVE_GETIFADDRS', 
cc.has_function('getifaddrs'))
 config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: 
util))
 config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
 config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: 
'#include '))
+if rbd.found()
+  config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS',
+   cc.has_function('rbd_namespace_exists',
+   dependencies: rbd,
+   prefix: '#include '))
+endif
 if rdma.found()
   config_host_data.set('HAVE_IBV_ADVISE_MR',
cc.has_function('ibv_advise_mr',
-- 
2.35.3

[PULL v2 13/20] libvduse: Add support for reconnecting

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

To support reconnecting after restart or crash, VDUSE backend
might need to resubmit inflight I/Os. This stores the metadata
such as the index of inflight I/O's descriptors to a shm file so
that VDUSE backend can restore them during reconnecting.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-9-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 subprojects/libvduse/libvduse.h |  12 ++
 block/export/vduse-blk.c|  19 ++-
 subprojects/libvduse/libvduse.c | 235 +++-
 3 files changed, 260 insertions(+), 6 deletions(-)

diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h
index 6c2fe98213..32f19e7b48 100644
--- a/subprojects/libvduse/libvduse.h
+++ b/subprojects/libvduse/libvduse.h
@@ -173,6 +173,18 @@ int vduse_dev_update_config(VduseDev *dev, uint32_t size,
  */
 int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size);
 
+/**
+ * vduse_set_reconnect_log_file:
+ * @dev: VDUSE device
+ * @file: filename of reconnect log
+ *
+ * Specify the file to store log for reconnecting. It should
+ * be called before vduse_dev_setup_queue().
+ *
+ * Returns: 0 on success, -errno on failure.
+ */
+int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename);
+
 /**
  * vduse_dev_create_by_fd:
  * @fd: passed file descriptor
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index cab1904234..251d73c841 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -30,6 +30,7 @@ typedef struct VduseBlkExport {
 VirtioBlkHandler handler;
 VduseDev *dev;
 uint16_t num_queues;
+char *recon_file;
 unsigned int inflight;
 } VduseBlkExport;
 
@@ -125,6 +126,8 @@ static void vduse_blk_enable_queue(VduseDev *dev, 
VduseVirtq *vq)
 
 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
+/* Make sure we don't miss any kick afer reconnecting */
+eventfd_write(vduse_queue_get_fd(vq), 1);
 }
 
 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
@@ -306,6 +309,15 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 return -ENOMEM;
 }
 
+vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
+   g_get_tmp_dir(), exp->id);
+if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
+error_setg(errp, "failed to set reconnect log file");
+vduse_dev_destroy(vblk_exp->dev);
+g_free(vblk_exp->recon_file);
+return -EINVAL;
+}
+
 for (i = 0; i < num_queues; i++) {
 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
 }
@@ -324,11 +336,16 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 static void vduse_blk_exp_delete(BlockExport *exp)
 {
 VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+int ret;
 
 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
 vblk_exp);
 blk_set_dev_ops(exp->blk, NULL, NULL);
-vduse_dev_destroy(vblk_exp->dev);
+ret = vduse_dev_destroy(vblk_exp->dev);
+if (ret != -EBUSY) {
+unlink(vblk_exp->recon_file);
+}
+g_free(vblk_exp->recon_file);
 }
 
 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
diff --git a/subprojects/libvduse/libvduse.c b/subprojects/libvduse/libvduse.c
index 78e1e5cf90..9a2bcec282 100644
--- a/subprojects/libvduse/libvduse.c
+++ b/subprojects/libvduse/libvduse.c
@@ -42,6 +42,8 @@
 #define VDUSE_VQ_ALIGN 4096
 #define MAX_IOVA_REGIONS 256
 
+#define LOG_ALIGNMENT 64
+
 /* Round number down to multiple */
 #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
 
@@ -52,6 +54,31 @@
 #define unlikely(x)   __builtin_expect(!!(x), 0)
 #endif
 
+typedef struct VduseDescStateSplit {
+uint8_t inflight;
+uint8_t padding[5];
+uint16_t next;
+uint64_t counter;
+} VduseDescStateSplit;
+
+typedef struct VduseVirtqLogInflight {
+uint64_t features;
+uint16_t version;
+uint16_t desc_num;
+uint16_t last_batch_head;
+uint16_t used_idx;
+VduseDescStateSplit desc[];
+} VduseVirtqLogInflight;
+
+typedef struct VduseVirtqLog {
+VduseVirtqLogInflight inflight;
+} VduseVirtqLog;
+
+typedef struct VduseVirtqInflightDesc {
+uint16_t index;
+uint64_t counter;
+} VduseVirtqInflightDesc;
+
 typedef struct VduseRing {
 unsigned int num;
 uint64_t desc_addr;
@@ -74,6 +101,10 @@ struct VduseVirtq {
 bool ready;
 int fd;
 VduseDev *dev;
+VduseVirtqInflightDesc *resubmit_list;
+uint16_t resubmit_num;
+uint64_t counter;
+VduseVirtqLog *log;
 };
 
 typedef struct VduseIovaRegion {
@@ -97,8 +128,36 @@ struct VduseDev {
 int fd;
 int ctrl_fd;
 void *priv;
+void *log;
 };
 
+static inline size_t vduse_vq_log_size(uint16_t queue_size)
+{
+

[PULL v2 07/20] block/export: Fix incorrect length passed to vu_queue_push()

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

Now the req->size is set to the correct value only
when handling VIRTIO_BLK_T_GET_ID request. This patch
fixes it.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-3-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/export/vhost-user-blk-server.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index b2e458ade3..19c6ee51d3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -60,8 +60,7 @@ static void vu_blk_req_complete(VuBlkReq *req)
 {
 VuDev *vu_dev = &req->server->vu_dev;
 
-/* IO size with 1 extra status byte */
-vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
+vu_queue_push(vu_dev, req->vq, &req->elem, req->size);
 vu_queue_notify(vu_dev, req->vq);
 
 free(req);
@@ -207,6 +206,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void 
*opaque)
 goto err;
 }
 
+req->size = iov_size(in_iov, in_num);
 /* We always touch the last byte, so just see how big in_iov is.  */
 req->in = (void *)in_iov[in_num - 1].iov_base
   + in_iov[in_num - 1].iov_len
@@ -267,7 +267,6 @@ static void coroutine_fn vu_blk_virtio_process_req(void 
*opaque)
   VIRTIO_BLK_ID_BYTES);
 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 req->in->status = VIRTIO_BLK_S_OK;
-req->size = elem->in_sg[0].iov_len;
 break;
 }
 case VIRTIO_BLK_T_DISCARD:
-- 
2.35.3

[PULL v2 14/20] qsd: document vduse-blk exports

2022-06-24 Thread Kevin Wolf

From: Stefan Hajnoczi 

Document vduse-blk exports in qemu-storage-daemon --help and the
qemu-storage-daemon(1) man page.

Based-on: <20220523084611.91-1-xieyon...@bytedance.com>
Cc: Xie Yongji 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220525121947.859820-1-stefa...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 docs/tools/qemu-storage-daemon.rst   | 21 +
 storage-daemon/qemu-storage-daemon.c |  9 +
 2 files changed, 30 insertions(+)

diff --git a/docs/tools/qemu-storage-daemon.rst 
b/docs/tools/qemu-storage-daemon.rst
index 8b97592663..fbeaf76954 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -77,6 +77,7 @@ Standard options:
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
+  --export 
[type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=]
 
   is a block export definition. ``node-name`` is the block node that should be
   exported. ``writable`` determines whether or not the export allows write
@@ -110,6 +111,26 @@ Standard options:
   ``allow-other`` to auto (the default) will try enabling this option, and on
   error fall back to disabling it.
 
+  The ``vduse-blk`` export type uses the ``id`` as the VDUSE device name.
+  ``num-queues`` sets the number of virtqueues (the default is 1).
+  ``queue-size`` sets the virtqueue descriptor table size (the default is 256).
+
+  The instantiated VDUSE device must then be added to the vDPA bus using the
+  vdpa(8) command from the iproute2 project::
+
+  # vdpa dev add name  mgmtdev vduse
+
+  The device can be removed from the vDPA bus later as follows::
+
+  # vdpa dev del 
+
+  For more information about attaching vDPA devices to the host with
+  virtio_vdpa.ko or attaching them to guests with vhost_vdpa.ko, see
+  https://vdpa-dev.gitlab.io/.
+
+  For more information about VDUSE, see
+  https://docs.kernel.org/userspace-api/vduse.html.
+
 .. option:: --monitor MONITORDEF
 
   is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for
diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index c104817cdd..17fd3f2f5f 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -121,6 +121,15 @@ static void help(void)
 " vhost-user-blk device over file descriptor\n"
 "\n"
 #endif /* CONFIG_VHOST_USER_BLK_SERVER */
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+"  --export [type=]vduse-blk,id=,node-name=\n"
+"   [,writable=on|off][,num-queues=]\n"
+"   [,queue-size=]\n"
+"   [,logical-block-size=]\n"
+" export the specified block node as a vduse-blk\n"
+" device using the id as the VDUSE device name\n"
+"\n"
+#endif /* CONFIG_VDUSE_BLK_EXPORT */
 "  --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n"
 " configure a QMP monitor\n"
 "\n"
-- 
2.35.3

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-24 Thread Stefan Hajnoczi

On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote:
> Hi Stefan,
> 
> Stefan Hajnoczi  于2022年6月20日周一 15:55写道：
> >
> > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> >
> > Hi Sam,
> > Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> > keep the email subject line the same (except for "v2", "v3", etc) so
> > that it's clear which patch series this new version replaces.
> >
> > > Fix some mistakes before. It can report a range of zones now.
> >
> > This looks like the description of what changed compared to v1. Please
> > put the changelog below "---" in the future. When patch emails are
> > merged by git-am(1) it keeps the text above "---" and discards the text
> > below "---". The changelog is usually no longer useful once the patches
> > are merged, so it should be located below the "---" line.
> >
> > The text above the "---" is the commit description (an explanation of
> > why this commit is necessary). In this case the commit description
> > should explain that this patch adds .bdrv_co_zone_report() and
> > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> > supported.
> >
> > >
> > > Signed-off-by: Sam Li 
> > > ---
> > >  block/block-backend.c |  22 
> > >  block/coroutines.h|   5 +
> > >  block/file-posix.c| 182 ++
> > >  block/io.c|  23 
> > >  include/block/block-common.h  |  43 ++-
> > >  include/block/block-io.h  |  13 +++
> > >  include/block/block_int-common.h  |  20 
> > >  qemu-io-cmds.c| 118 +++
> > >  tests/qemu-iotests/tests/zoned.sh |  52 +
> > >  9 files changed, 477 insertions(+), 1 deletion(-)
> > >  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> > >
> > > diff --git a/block/block-backend.c b/block/block-backend.c
> > > index e0e1aff4b1..20248e4a35 100644
> > > --- a/block/block-backend.c
> > > +++ b/block/block-backend.c
> > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
> > >  int ret;
> > >  } BlockBackendAIOCB;
> > >
> > > +
> > > +
> >
> > Please avoid whitespace changes in code that is otherwise untouched by
> > your patch. Code changes can cause merge conflicts and they make it
> > harder to use git-annotate(1), so only changes that are necessary should
> > be included in a patch.
> >
> > >  static const AIOCBInfo block_backend_aiocb_info = {
> > >  .get_aio_context = blk_aiocb_get_aio_context,
> > >  .aiocb_size = sizeof(BlockBackendAIOCB),
> > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
> > >  return ret;
> > >  }
> > >
> >
> > Please add a documentation comment for blk_co_zone_report() that
> > explains how to use the functions and the purpose of the arguments. For
> > example, does offset have to be the first byte in a zone or can it be
> > any byte offset? What are the alignment requirements of offset and len?
> > Why is nr_zones a pointer?
> >
> > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
> >
> > Functions that run in coroutine context must be labeled with
> > coroutine_fn:
> >
> > int coroutine_fn blk_co_zone_report(...)
> >
> > This tells humans and tools that the function can only be called from a
> > coroutine. There is a blog post about coroutines in QEMU here:
> > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
> >
> > > +   int64_t *nr_zones,
> > > +   struct BlockZoneDescriptor *zones)
> >
> > QEMU coding style uses typedefs when defining structs, so "struct
> > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> > *zones".
> >
> > > +{
> > > +int ret;
> >
> > This function is called from the I/O code path, please mark it with:
> >
> >   IO_CODE();
> >
> > From include/block/block-io.h:
> >
> >   * I/O API functions. These functions are thread-safe, and therefore
> >   * can run in any thread as long as the thread has called
> >   * aio_context_acquire/release().
> >   *
> >   * These functions can only call functions from I/O and Common categories,
> >   * but can be invoked by GS, "I/O or GS" and I/O APIs.
> >   *
> >   * All functions in this category must use the macro
> >   * IO_CODE();
> >   * to catch when they are accidentally called by the wrong API.
> >
> > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, 
> > > zones);
> >
> > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> > function call to ensure that zone report requests finish before I/O is
> > drained (see bdrv_drained_begin()). This is necessary so that it's
> > possible to wait for I/O requests, including zone report, to complete.
> >
> > Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
> > blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
> > bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
> > bdrv_co_zon

[PULL v2 06/20] block: Support passing NULL ops to blk_set_dev_ops()

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

This supports passing NULL ops to blk_set_dev_ops()
so that we can remove stale ops in some cases.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-2-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index d4abdf8faa..f425b00793 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1058,7 +1058,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps 
*ops,
 blk->dev_opaque = opaque;
 
 /* Are we currently quiesced? Should we enforce this right now? */
-if (blk->quiesce_counter && ops->drained_begin) {
+if (blk->quiesce_counter && ops && ops->drained_begin) {
 ops->drained_begin(opaque);
 }
 }
-- 
2.35.3

[PULL v2 11/20] vduse-blk: Implement vduse-blk export

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

This implements a VDUSE block backends based on
the libvduse library. We can use it to export the BDSs
for both VM and container (host) usage.

The new command-line syntax is:

$ qemu-storage-daemon \
--blockdev file,node-name=drive0,filename=test.img \
--export vduse-blk,node-name=drive0,id=vduse-export0,writable=on

After the qemu-storage-daemon started, we need to use
the "vdpa" command to attach the device to vDPA bus:

$ vdpa dev add name vduse-export0 mgmtdev vduse

Also the device must be removed via the "vdpa" command
before we stop the qemu-storage-daemon.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-7-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 qapi/block-export.json|  28 ++-
 meson_options.txt |   2 +
 block/export/vduse-blk.h  |  20 +++
 block/export/export.c |   6 +
 block/export/vduse-blk.c  | 329 ++
 MAINTAINERS   |   4 +-
 block/export/meson.build  |   5 +
 meson.build   |  13 ++
 scripts/meson-buildoptions.sh |   4 +
 9 files changed, 407 insertions(+), 4 deletions(-)
 create mode 100644 block/export/vduse-blk.h
 create mode 100644 block/export/vduse-blk.c

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 8afb1b65b3..99c34a6965 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -178,6 +178,23 @@
 '*allow-other': 'FuseExportAllowOther' },
   'if': 'CONFIG_FUSE' }
 
+##
+# @BlockExportOptionsVduseBlk:
+#
+# A vduse-blk block export.
+#
+# @num-queues: the number of virtqueues. Defaults to 1.
+# @queue-size: the size of virtqueue. Defaults to 256.
+# @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE]
+#  and must be power of 2. Defaults to 512 bytes.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockExportOptionsVduseBlk',
+  'data': { '*num-queues': 'uint16',
+'*queue-size': 'uint16',
+'*logical-block-size': 'size'} }
+
 ##
 # @NbdServerAddOptions:
 #
@@ -284,6 +301,7 @@
 # @nbd: NBD export
 # @vhost-user-blk: vhost-user-blk export (since 5.2)
 # @fuse: FUSE export (since: 6.0)
+# @vduse-blk: vduse-blk export (since 7.1)
 #
 # Since: 4.2
 ##
@@ -291,7 +309,8 @@
   'data': [ 'nbd',
 { 'name': 'vhost-user-blk',
   'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
-{ 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] }
+{ 'name': 'fuse', 'if': 'CONFIG_FUSE' },
+{ 'name': 'vduse-blk', 'if': 'CONFIG_VDUSE_BLK_EXPORT' } ] }
 
 ##
 # @BlockExportOptions:
@@ -299,7 +318,8 @@
 # Describes a block export, i.e. how single node should be exported on an
 # external interface.
 #
-# @id: A unique identifier for the block export (across all export types)
+# @id: A unique identifier for the block export (across the host for vduse-blk
+#  export type or across all export types for other types)
 #
 # @node-name: The node name of the block node to be exported (since: 5.2)
 #
@@ -335,7 +355,9 @@
   'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk',
   'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
   'fuse': { 'type': 'BlockExportOptionsFuse',
-'if': 'CONFIG_FUSE' }
+'if': 'CONFIG_FUSE' },
+  'vduse-blk': { 'type': 'BlockExportOptionsVduseBlk',
+ 'if': 'CONFIG_VDUSE_BLK_EXPORT' }
} }
 
 ##
diff --git a/meson_options.txt b/meson_options.txt
index 23a9f440f7..97c38109b1 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -259,6 +259,8 @@ option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
 option('libvduse', type: 'feature', value: 'auto',
description: 'build VDUSE Library')
+option('vduse_blk_export', type: 'feature', value: 'auto',
+   description: 'VDUSE block export support')
 
 option('capstone', type: 'feature', value: 'auto',
description: 'Whether and how to find the capstone library')
diff --git a/block/export/vduse-blk.h b/block/export/vduse-blk.h
new file mode 100644
index 00..c4eeb1b70e
--- /dev/null
+++ b/block/export/vduse-blk.h
@@ -0,0 +1,20 @@
+/*
+ * Export QEMU block device via VDUSE
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VDUSE_BLK_H
+#define VDUSE_BLK_H
+
+#include "block/export.h"
+
+extern const BlockExportDriver blk_exp_vduse_blk;
+
+#endif /* VDUSE_BLK_H */
diff --git a/block/export/export.c b/block/export/export.c
index 7253af3bc3..4744862915 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -26,6 +26,9 @@
 #ifdef CONFIG_VHOST_USER_BLK_SERVER
 #include "vhost-user-blk-server.h"
 #endif
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+#include "vduse

[PULL v2 05/20] block: simplify handling of try to merge different sized bitmaps

2022-06-24 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

We have too much logic to simply check that bitmaps are of the same
size. Let's just define that hbitmap_merge() and
bdrv_dirty_bitmap_merge_internal() require their argument bitmaps be of
same size, this simplifies things.

Let's look through the callers:

For backup_init_bcs_bitmap() we already assert that merge can't fail.

In bdrv_reclaim_dirty_bitmap_locked() we gracefully handle the error
that can't happen: successor always has same size as its parent, drop
this logic.

In bdrv_merge_dirty_bitmap() we already has assertion and separate
check. Make the check explicit and improve error message.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Nikita Lapshin 
Reviewed-by: Kevin Wolf 
Message-Id: <20220517111206.23585-4-v.sementsov...@mail.ru>
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-io.h |  2 +-
 include/qemu/hbitmap.h   | 15 ++-
 block/backup.c   |  6 ++
 block/dirty-bitmap.c | 26 +++---
 util/hbitmap.c   | 25 +++--
 5 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index bb454200e5..ded29e7494 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -102,7 +102,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
   const BdrvDirtyBitmap *src,
   HBitmap **backup, bool lock);
 
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 5bd986aa44..af4e4ab746 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -76,20 +76,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size);
  *
  * Store result of merging @a and @b into @result.
  * @result is allowed to be equal to @a or @b.
- *
- * Return true if the merge was successful,
- *false if it was not attempted.
- */
-bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
-
-/**
- * hbitmap_can_merge:
- *
- * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
- * necessary for hbitmap_merge will not fail.
- *
+ * All bitmaps must have same size.
  */
-bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
+void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
 
 /**
  * hbitmap_empty:
diff --git a/block/backup.c b/block/backup.c
index 5cfd0b999c..b2b649e305 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -228,15 +228,13 @@ out:
 
 static void backup_init_bcs_bitmap(BackupBlockJob *job)
 {
-bool ret;
 uint64_t estimate;
 BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
 
 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
 bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
-ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
-   NULL, true);
-assert(ret);
+bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL,
+ true);
 } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
 /*
  * We can't hog the coroutine to initialize this thoroughly.
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index da1b91166f..bf3dc0512a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -309,10 +309,7 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent,
 return NULL;
 }
 
-if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
-error_setg(errp, "Merging of parent and successor bitmap failed");
-return NULL;
-}
+hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap);
 
 parent->disabled = successor->disabled;
 parent->busy = false;
@@ -912,13 +909,15 @@ bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const 
BdrvDirtyBitmap *src,
 goto out;
 }
 
-if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) {
-error_setg(errp, "Bitmaps are incompatible and can't be merged");
+if (bdrv_dirty_bitmap_size(src) != bdrv_dirty_bitmap_size(dest)) {
+error_setg(errp, "Bitmaps are of different sizes (destination size is 
%"
+   PRId64 ", source size is %" PRId64 ") and can't be merged",
+   bdrv_dirty_bitmap_size(dest), bdrv_dirty_bitmap_size(src));
 goto out;
 }
 
-ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
-assert(ret);
+bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
+ret = true;
 
 out:
 bdrv_dirty_bitmaps_unlock(dest->bs);
@@ -932,17 +931,16 @@ out:
 /**
  * bdrv_dirty_bit

[PULL v2 19/20] vduse-blk: Add serial option

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

Add a 'serial' option to allow user to specify this value
explicitly. And the default value is changed to an empty
string as what we did in "hw/block/virtio-blk.c".

Signed-off-by: Xie Yongji 
Message-Id: <20220614051532.92-6-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 qapi/block-export.json   |  4 +++-
 docs/tools/qemu-storage-daemon.rst   |  2 +-
 block/export/virtio-blk-handler.h|  2 +-
 block/export/vduse-blk.c | 20 ++--
 block/export/vhost-user-blk-server.c |  4 +++-
 storage-daemon/qemu-storage-daemon.c |  1 +
 6 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 99c34a6965..618a6367c9 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -187,13 +187,15 @@
 # @queue-size: the size of virtqueue. Defaults to 256.
 # @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE]
 #  and must be power of 2. Defaults to 512 bytes.
+# @serial: the serial number of virtio block device. Defaults to empty string.
 #
 # Since: 7.1
 ##
 { 'struct': 'BlockExportOptionsVduseBlk',
   'data': { '*num-queues': 'uint16',
 '*queue-size': 'uint16',
-'*logical-block-size': 'size'} }
+'*logical-block-size': 'size',
+'*serial': 'str' } }
 
 ##
 # @NbdServerAddOptions:
diff --git a/docs/tools/qemu-storage-daemon.rst 
b/docs/tools/qemu-storage-daemon.rst
index fbeaf76954..034f2809a6 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -77,7 +77,7 @@ Standard options:
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
-  --export 
[type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=]
+  --export 
[type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=]
 
   is a block export definition. ``node-name`` is the block node that should be
   exported. ``writable`` determines whether or not the export allows write
diff --git a/block/export/virtio-blk-handler.h 
b/block/export/virtio-blk-handler.h
index 1c7a5e32ad..150d44cff2 100644
--- a/block/export/virtio-blk-handler.h
+++ b/block/export/virtio-blk-handler.h
@@ -23,7 +23,7 @@
 
 typedef struct {
 BlockBackend *blk;
-const char *serial;
+char *serial;
 uint32_t logical_block_size;
 bool writable;
 } VirtioBlkHandler;
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 251d73c841..066e088b00 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -235,7 +235,7 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 Error *local_err = NULL;
 struct virtio_blk_config config = { 0 };
 uint64_t features;
-int i;
+int i, ret;
 
 if (vblk_opts->has_num_queues) {
 num_queues = vblk_opts->num_queues;
@@ -265,7 +265,8 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 }
 vblk_exp->num_queues = num_queues;
 vblk_exp->handler.blk = exp->blk;
-vblk_exp->handler.serial = exp->id;
+vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ?
+vblk_opts->serial : "");
 vblk_exp->handler.logical_block_size = logical_block_size;
 vblk_exp->handler.writable = opts->writable;
 
@@ -306,16 +307,16 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
  vblk_exp);
 if (!vblk_exp->dev) {
 error_setg(errp, "failed to create vduse device");
-return -ENOMEM;
+ret = -ENOMEM;
+goto err_dev;
 }
 
 vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
g_get_tmp_dir(), exp->id);
 if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
 error_setg(errp, "failed to set reconnect log file");
-vduse_dev_destroy(vblk_exp->dev);
-g_free(vblk_exp->recon_file);
-return -EINVAL;
+ret = -EINVAL;
+goto err;
 }
 
 for (i = 0; i < num_queues; i++) {
@@ -331,6 +332,12 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
 
 return 0;
+err:
+vduse_dev_destroy(vblk_exp->dev);
+g_free(vblk_exp->recon_file);
+err_dev:
+g_free(vblk_exp->handler.serial);
+return ret;
 }
 
 static void vduse_blk_exp_delete(BlockExport *exp)
@@ -346,6 +353,7 @@ static void vduse_blk_exp_delete(BlockExport *exp)
 unlink(vblk_exp->re

[PULL v2 03/20] block: block_dirty_bitmap_merge(): fix error path

2022-06-24 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

At the end we ignore failure of bdrv_merge_dirty_bitmap() and report
success. And still set errp. That's wrong.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Nikita Lapshin 
Reviewed-by: Kevin Wolf 
Message-Id: <20220517111206.23585-2-v.sementsov...@mail.ru>
Signed-off-by: Kevin Wolf 
---
 block/monitor/bitmap-qmp-cmds.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index 2b677c4a2f..bd10468596 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -309,7 +309,10 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 }
 
 /* Merge into dst; dst is unchanged on failure. */
-bdrv_merge_dirty_bitmap(dst, anon, backup, errp);
+if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) {
+dst = NULL;
+goto out;
+}
 
  out:
 bdrv_release_dirty_bitmap(anon);
-- 
2.35.3

[PULL v2 09/20] linux-headers: Add vduse.h

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

This adds vduse header to linux headers so that the
relevant VDUSE API can be used in subsequent patches.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-5-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 linux-headers/linux/vduse.h | 306 
 scripts/update-linux-headers.sh |   2 +-
 2 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 linux-headers/linux/vduse.h

diff --git a/linux-headers/linux/vduse.h b/linux-headers/linux/vduse.h
new file mode 100644
index 00..d47b004ce6
--- /dev/null
+++ b/linux-headers/linux/vduse.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _VDUSE_H_
+#define _VDUSE_H_
+
+#include 
+
+#define VDUSE_BASE 0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION  0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION  _IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION  _IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX 256
+   char name[VDUSE_NAME_MAX];
+   __u32 vendor_id;
+   __u32 device_id;
+   __u64 features;
+   __u32 vq_num;
+   __u32 vq_align;
+   __u32 reserved[13];
+   __u32 config_size;
+   __u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device 
(/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV   _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV  _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region 
[start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA 
region.
+ */
+struct vduse_iotlb_entry {
+   __u64 offset;
+   __u64 start;
+   __u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+   __u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct 
vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+   __u32 offset;
+   __u32 length;
+   __u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG   _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ_IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+   __u32 index;
+   __u16 max_size;
+   __u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE,

[PULL v2 17/20] aio_wait_kick: add missing memory barrier

2022-06-24 Thread Kevin Wolf

From: Emanuele Giuseppe Esposito 

It seems that aio_wait_kick always required a memory barrier
or atomic operation in the caller, but nobody actually
took care of doing it.

Let's put the barrier in the function instead, and pair it
with another one in AIO_WAIT_WHILE. Read aio_wait_kick()
comment for further explanation.

Suggested-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Message-Id: <20220524173054.12651-1-eespo...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 include/block/aio-wait.h |  2 ++
 util/aio-wait.c  | 16 +++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index b39eefb38d..54840f8622 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -81,6 +81,8 @@ extern AioWait global_aio_wait;
 AioContext *ctx_ = (ctx);  \
 /* Increment wait_->num_waiters before evaluating cond. */ \
 qatomic_inc(&wait_->num_waiters);  \
+/* Paired with smp_mb in aio_wait_kick(). */   \
+smp_mb();  \
 if (ctx_ && in_aio_context_home_thread(ctx_)) {\
 while ((cond)) {   \
 aio_poll(ctx_, true);  \
diff --git a/util/aio-wait.c b/util/aio-wait.c
index bdb3d3af22..98c5accd29 100644
--- a/util/aio-wait.c
+++ b/util/aio-wait.c
@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
 
 void aio_wait_kick(void)
 {
-/* The barrier (or an atomic op) is in the caller.  */
+/*
+ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
+ * write(condition);
+ * aio_wait_kick() {
+ *  smp_mb();
+ *  read(num_waiters);
+ * }
+ *
+ * And in AIO_WAIT_WHILE:
+ * write(num_waiters);
+ * smp_mb();
+ * read(condition);
+ */
+smp_mb();
+
 if (qatomic_read(&global_aio_wait.num_waiters)) {
 aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
 }
-- 
2.35.3

[PULL v2 08/20] block/export: Abstract out the logic of virtio-blk I/O process

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

Abstract the common logic of virtio-blk I/O process to a function
named virtio_blk_process_req(). It's needed for the following commit.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-4-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/export/virtio-blk-handler.h|  37 
 block/export/vhost-user-blk-server.c | 259 +++
 block/export/virtio-blk-handler.c| 240 +
 MAINTAINERS  |   2 +
 block/export/meson.build |   2 +-
 5 files changed, 301 insertions(+), 239 deletions(-)
 create mode 100644 block/export/virtio-blk-handler.h
 create mode 100644 block/export/virtio-blk-handler.c

diff --git a/block/export/virtio-blk-handler.h 
b/block/export/virtio-blk-handler.h
new file mode 100644
index 00..1c7a5e32ad
--- /dev/null
+++ b/block/export/virtio-blk-handler.h
@@ -0,0 +1,37 @@
+/*
+ * Handler for virtio-blk I/O
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VIRTIO_BLK_HANDLER_H
+#define VIRTIO_BLK_HANDLER_H
+
+#include "sysemu/block-backend.h"
+
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS)
+
+#define VIRTIO_BLK_MAX_DISCARD_SECTORS 32768
+#define VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS 32768
+
+typedef struct {
+BlockBackend *blk;
+const char *serial;
+uint32_t logical_block_size;
+bool writable;
+} VirtioBlkHandler;
+
+int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
+struct iovec *in_iov,
+struct iovec *out_iov,
+unsigned int in_num,
+unsigned int out_num);
+
+#endif /* VIRTIO_BLK_HANDLER_H */
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 19c6ee51d3..c9c290cc4c 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -17,31 +17,15 @@
 #include "vhost-user-blk-server.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
-#include "sysemu/block-backend.h"
 #include "util/block-helpers.h"
-
-/*
- * Sector units are 512 bytes regardless of the
- * virtio_blk_config->blk_size value.
- */
-#define VIRTIO_BLK_SECTOR_BITS 9
-#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
+#include "virtio-blk-handler.h"
 
 enum {
 VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
-VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
-VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
-};
-struct virtio_blk_inhdr {
-unsigned char status;
 };
 
 typedef struct VuBlkReq {
 VuVirtqElement elem;
-int64_t sector_num;
-size_t size;
-struct virtio_blk_inhdr *in;
-struct virtio_blk_outhdr out;
 VuServer *server;
 struct VuVirtq *vq;
 } VuBlkReq;
@@ -50,247 +34,44 @@ typedef struct VuBlkReq {
 typedef struct {
 BlockExport export;
 VuServer vu_server;
-uint32_t blk_size;
+VirtioBlkHandler handler;
 QIOChannelSocket *sioc;
 struct virtio_blk_config blkcfg;
-bool writable;
 } VuBlkExport;
 
-static void vu_blk_req_complete(VuBlkReq *req)
+static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
 {
 VuDev *vu_dev = &req->server->vu_dev;
 
-vu_queue_push(vu_dev, req->vq, &req->elem, req->size);
+vu_queue_push(vu_dev, req->vq, &req->elem, in_len);
 vu_queue_notify(vu_dev, req->vq);
 
 free(req);
 }
 
-static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
- size_t size)
-{
-uint64_t nb_sectors;
-uint64_t total_sectors;
-
-if (size % VIRTIO_BLK_SECTOR_SIZE) {
-return false;
-}
-
-nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
-
-QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
-if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return false;
-}
-if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
-return false;
-}
-blk_get_geometry(vexp->export.blk, &total_sectors);
-if (sector > total_sectors || nb_sectors > total_sectors - sector) {
-return false;
-}
-return true;
-}
-
-static int coroutine_fn
-vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
-uint32_t iovcnt, uint32_t type)
-{
-BlockBackend *blk = vexp->export.blk;
-struct virtio_blk_discard_write_zeroes desc;
-ssize_t size;
-uint64_t sector;
-uint32_t num_sectors;
-uint32_t max_sectors;
-uint32_t flags;
-int bytes;
-
-/* Only one desc is currently supported */
-if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
-return VIRTIO_BLK_S_UNSUPP;
-

[PULL v2 01/20] block: drop unused bdrv_co_drain() API

2022-06-24 Thread Kevin Wolf

From: Stefan Hajnoczi 

bdrv_co_drain() has not been used since commit 9a0cec664eef ("mirror:
use bdrv_drained_begin/bdrv_drained_end") in 2016. Remove it so there
are fewer drain scenarios to worry about.

Use bdrv_drained_begin()/bdrv_drained_end() instead. They are "mixed"
functions that can be called from coroutine context. Unlike
bdrv_co_drain(), these functions provide control of the length of the
drained section, which is usually the right thing.

Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220521122714.3837731-1-stefa...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Alberto Faria 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h |  1 -
 block/io.c   | 15 ---
 2 files changed, 16 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 62c84f0519..053a27141a 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -270,7 +270,6 @@ void bdrv_drained_end_no_poll(BlockDriverState *bs, int 
*drained_end_counter);
cond); })
 
 void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
 
 int generated_co_wrapper
 bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
diff --git a/block/io.c b/block/io.c
index 789e6373d5..1e9bf09a49 100644
--- a/block/io.c
+++ b/block/io.c
@@ -588,21 +588,6 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, 
BlockDriverState *old_parent)
 BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
 }
 
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree,
- * and suspend block driver's internal I/O until next request arrives.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- */
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
-{
-IO_OR_GS_CODE();
-assert(qemu_in_coroutine());
-bdrv_drained_begin(bs);
-bdrv_drained_end(bs);
-}
-
 void bdrv_drain(BlockDriverState *bs)
 {
 IO_OR_GS_CODE();
-- 
2.35.3

[PULL v2 00/20] Block layer patches

2022-06-24 Thread Kevin Wolf

The following changes since commit 3a821c52e1a30ecd9a436f2c67cc66b5628c829f:

  Merge tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme into 
staging (2022-06-23 14:52:30 -0700)

are available in the Git repository at:

  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 779d82e1d305f2a9cbd7f48cf6555ad58145e04a:

  vduse-blk: Add name option (2022-06-24 17:07:06 +0200)


Block layer patches

- Add vduse-blk export
- Dirty bitmaps: Fix and improve bitmap merge
- gluster: correctly set max_pdiscard
- rbd: report a better error when namespace does not exist
- aio_wait_kick: add missing memory barrier
- Code cleanups


Emanuele Giuseppe Esposito (1):
  aio_wait_kick: add missing memory barrier

Eric Blake (1):
  nbd: Drop dead code spotted by Coverity

Fabian Ebner (1):
  block/gluster: correctly set max_pdiscard

Stefan Hajnoczi (3):
  block: drop unused bdrv_co_drain() API
  block: get rid of blk->guest_block_size
  qsd: document vduse-blk exports

Stefano Garzarella (1):
  block/rbd: report a better error when namespace does not exist

Vladimir Sementsov-Ogievskiy (3):
  block: block_dirty_bitmap_merge(): fix error path
  block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap
  block: simplify handling of try to merge different sized bitmaps

Xie Yongji (10):
  block: Support passing NULL ops to blk_set_dev_ops()
  block/export: Fix incorrect length passed to vu_queue_push()
  block/export: Abstract out the logic of virtio-blk I/O process
  linux-headers: Add vduse.h
  libvduse: Add VDUSE (vDPA Device in Userspace) library
  vduse-blk: Implement vduse-blk export
  vduse-blk: Add vduse-blk resize support
  libvduse: Add support for reconnecting
  vduse-blk: Add serial option
  vduse-blk: Add name option

 qapi/block-export.json  |   29 +-
 docs/tools/qemu-storage-daemon.rst  |   22 +
 meson_options.txt   |4 +
 block/export/vduse-blk.h|   20 +
 block/export/virtio-blk-handler.h   |   37 +
 include/block/aio-wait.h|2 +
 include/block/block-io.h|1 -
 include/block/block_int-io.h|2 +-
 include/qemu/hbitmap.h  |   15 +-
 include/sysemu/block-backend-io.h   |1 -
 linux-headers/linux/vduse.h |  306 ++
 subprojects/libvduse/include/atomic.h   |1 +
 subprojects/libvduse/include/compiler.h |1 +
 subprojects/libvduse/libvduse.h |  247 +
 block/backup.c  |6 +-
 block/block-backend.c   |   12 +-
 block/dirty-bitmap.c|   26 +-
 block/export/export.c   |6 +
 block/export/vduse-blk.c|  374 
 block/export/vhost-user-blk-server.c|  263 +
 block/export/virtio-blk-handler.c   |  240 +
 block/gluster.c |2 +-
 block/io.c  |   15 -
 block/monitor/bitmap-qmp-cmds.c |   40 +-
 block/nbd.c |8 +-
 block/rbd.c |   24 +
 hw/block/virtio-blk.c   |1 -
 hw/block/xen-block.c|1 -
 hw/ide/core.c   |1 -
 hw/scsi/scsi-disk.c |1 -
 hw/scsi/scsi-generic.c  |1 -
 storage-daemon/qemu-storage-daemon.c|   10 +
 subprojects/libvduse/libvduse.c | 1375 +++
 util/aio-wait.c |   16 +-
 util/hbitmap.c  |   25 +-
 MAINTAINERS |9 +
 block/export/meson.build|7 +-
 meson.build |   34 +
 scripts/meson-buildoptions.sh   |7 +
 scripts/update-linux-headers.sh |2 +-
 subprojects/libvduse/linux-headers/linux|1 +
 subprojects/libvduse/meson.build|   10 +
 subprojects/libvduse/standard-headers/linux |1 +
 43 files changed, 2852 insertions(+), 354 deletions(-)
 create mode 100644 block/export/vduse-blk.h
 create mode 100644 block/export/virtio-blk-handler.h
 create mode 100644 linux-headers/linux/vduse.h
 create mode 12 subprojects/libvduse/include/atomic.h
 create mode 12 subprojects/libvduse/include/compiler.h
 create mode 100644 subprojects/libvduse/libvduse.h
 create mode 100644 block/export/vduse-blk.c
 create mode 100644 block/export/virtio-blk-handler.c
 create mode 100644 subprojects/libvduse/libvduse.c
 create mode 12 subprojects/libvduse/linux-headers/linux
 create mode 100644 subpr

[PULL v2 12/20] vduse-blk: Add vduse-blk resize support

2022-06-24 Thread Kevin Wolf

From: Xie Yongji 

To support block resize, this uses vduse_dev_update_config()
to update the capacity field in configuration space and inject
config interrupt on the block resize callback.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-8-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 block/export/vduse-blk.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 04be16c133..cab1904234 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -204,6 +204,23 @@ static void blk_aio_detach(void *opaque)
 vblk_exp->export.ctx = NULL;
 }
 
+static void vduse_blk_resize(void *opaque)
+{
+BlockExport *exp = opaque;
+VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+struct virtio_blk_config config;
+
+config.capacity =
+cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
+vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
+offsetof(struct virtio_blk_config, capacity),
+(char *)&config.capacity);
+}
+
+static const BlockDevOps vduse_block_ops = {
+.resize_cb = vduse_blk_resize,
+};
+
 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
 Error **errp)
 {
@@ -299,6 +316,8 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
  vblk_exp);
 
+blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
+
 return 0;
 }
 
@@ -308,6 +327,7 @@ static void vduse_blk_exp_delete(BlockExport *exp)
 
 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
 vblk_exp);
+blk_set_dev_ops(exp->blk, NULL, NULL);
 vduse_dev_destroy(vblk_exp->dev);
 }
 
-- 
2.35.3

[PULL v2 02/20] block: get rid of blk->guest_block_size

2022-06-24 Thread Kevin Wolf

From: Stefan Hajnoczi 

Commit 1b7fd729559c ("block: rename buffer_alignment to
guest_block_size") noted:

  At this point, the field is set by the device emulation, but completely
  ignored by the block layer.

The last time the value of buffer_alignment/guest_block_size was
actually used was before commit 339064d50639 ("block: Don't use guest
sector size for qemu_blockalign()").

This value has not been used since 2013. Get rid of it.

Cc: Xie Yongji 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220518130945.2657905-1-stefa...@redhat.com>
Reviewed-by: Paul Durrant 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Faria 
Signed-off-by: Kevin Wolf 
---
 include/sysemu/block-backend-io.h|  1 -
 block/block-backend.c| 10 --
 block/export/vhost-user-blk-server.c |  1 -
 hw/block/virtio-blk.c|  1 -
 hw/block/xen-block.c |  1 -
 hw/ide/core.c|  1 -
 hw/scsi/scsi-disk.c  |  1 -
 hw/scsi/scsi-generic.c   |  1 -
 8 files changed, 17 deletions(-)

diff --git a/include/sysemu/block-backend-io.h 
b/include/sysemu/block-backend-io.h
index 6517c39295..ccef514023 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -72,7 +72,6 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction 
action,
 void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_get_max_iov(BlockBackend *blk);
 int blk_get_max_hw_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
 
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index e0e1aff4b1..d4abdf8faa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -56,9 +56,6 @@ struct BlockBackend {
 const BlockDevOps *dev_ops;
 void *dev_opaque;
 
-/* the block size for which the guest device expects atomicity */
-int guest_block_size;
-
 /* If the BDS tree is removed, some of its options are stored here (which
  * can be used to restore those options in the new BDS on insert) */
 BlockBackendRootState root_state;
@@ -998,7 +995,6 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
 blk->dev = NULL;
 blk->dev_ops = NULL;
 blk->dev_opaque = NULL;
-blk->guest_block_size = 512;
 blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
 blk_unref(blk);
 }
@@ -2100,12 +2096,6 @@ int blk_get_max_iov(BlockBackend *blk)
 return blk->root->bs->bl.max_iov;
 }
 
-void blk_set_guest_block_size(BlockBackend *blk, int align)
-{
-IO_CODE();
-blk->guest_block_size = align;
-}
-
 void *blk_try_blockalign(BlockBackend *blk, size_t size)
 {
 IO_CODE();
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index a129204c44..b2e458ade3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -495,7 +495,6 @@ static int vu_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 return -EINVAL;
 }
 vexp->blk_size = logical_block_size;
-blk_set_guest_block_size(exp->blk, logical_block_size);
 
 if (vu_opts->has_num_queues) {
 num_queues = vu_opts->num_queues;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index cd804795c6..e9ba752f6b 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1228,7 +1228,6 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 
 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
 blk_set_dev_ops(s->blk, &virtio_block_ops, s);
-blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
 blk_iostatus_enable(s->blk);
 
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 674953f1ad..345b284d70 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -243,7 +243,6 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 }
 
 blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
-blk_set_guest_block_size(blk, conf->logical_block_size);
 
 if (conf->discard_granularity == -1) {
 conf->discard_granularity = conf->physical_block_size;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index c2caa54285..7cbc0a54a7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2548,7 +2548,6 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, 
IDEDriveKind kind,
 s->smart_selftest_count = 0;
 if (kind == IDE_CD) {
 blk_set_dev_ops(blk, &ide_cd_block_ops, s);
-blk_set_guest_block_size(blk, 2048);
 } else {
 if (!blk_is_inserted(s->blk)) {
 error_setg(errp, "Device needs media, but drive is empty");
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 072686ed58..91acb5c0ce 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2419,7 +2419,6 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 } else {
 blk_set_dev_ops(s->qdev.conf.

[PULL v2 04/20] block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap

2022-06-24 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

We don't need extra bitmap. All we need is to backup the original
bitmap when we do first merge. So, drop extra temporary bitmap and work
directly with target and backup.

Still to keep old semantics, that on failure target is unchanged and
user don't need to restore, we need a local_backup variable and do
restore ourselves on failure path.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20220517111206.23585-3-v.sementsov...@mail.ru>
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 block/monitor/bitmap-qmp-cmds.c | 41 +
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index bd10468596..282363606f 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -261,8 +261,9 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, 
const char *target,
   HBitmap **backup, Error **errp)
 {
 BlockDriverState *bs;
-BdrvDirtyBitmap *dst, *src, *anon;
+BdrvDirtyBitmap *dst, *src;
 BlockDirtyBitmapOrStrList *lst;
+HBitmap *local_backup = NULL;
 
 GLOBAL_STATE_CODE();
 
@@ -271,12 +272,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 return NULL;
 }
 
-anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst),
-NULL, errp);
-if (!anon) {
-return NULL;
-}
-
 for (lst = bms; lst; lst = lst->next) {
 switch (lst->value->type) {
 const char *name, *node;
@@ -285,8 +280,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, 
const char *target,
 src = bdrv_find_dirty_bitmap(bs, name);
 if (!src) {
 error_setg(errp, "Dirty bitmap '%s' not found", name);
-dst = NULL;
-goto out;
+goto fail;
 }
 break;
 case QTYPE_QDICT:
@@ -294,29 +288,36 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 name = lst->value->u.external.name;
 src = block_dirty_bitmap_lookup(node, name, NULL, errp);
 if (!src) {
-dst = NULL;
-goto out;
+goto fail;
 }
 break;
 default:
 abort();
 }
 
-if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) {
-dst = NULL;
-goto out;
+/* We do backup only for first merge operation */
+if (!bdrv_merge_dirty_bitmap(dst, src,
+ local_backup ? NULL : &local_backup,
+ errp))
+{
+goto fail;
 }
 }
 
-/* Merge into dst; dst is unchanged on failure. */
-if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) {
-dst = NULL;
-goto out;
+if (backup) {
+*backup = local_backup;
+} else {
+hbitmap_free(local_backup);
 }
 
- out:
-bdrv_release_dirty_bitmap(anon);
 return dst;
+
+fail:
+if (local_backup) {
+bdrv_restore_dirty_bitmap(dst, local_backup);
+}
+
+return NULL;
 }
 
 void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
-- 
2.35.3

[PATCH] meson: Prefix each element of firmware path

2022-06-24 Thread Akihiko Odaki

Signed-off-by: Akihiko Odaki 
---
 configure | 23 +++
 meson.build   | 10 --
 meson_options.txt |  2 +-
 scripts/meson-buildoptions.py |  7 +--
 scripts/meson-buildoptions.sh |  6 +++---
 softmmu/datadir.c |  8 +---
 6 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/configure b/configure
index 4b12a8094ca..ab952e7ce3b 100755
--- a/configure
+++ b/configure
@@ -675,6 +675,29 @@ fi
 
 werror=""
 
+meson_option_build_array() {
+  local a
+  local ifs
+
+  if test "$targetos" == windows; then
+ifs=\;
+  else
+ifs=:
+  fi
+
+  echo -n "["
+
+  while IFS="$ifs" read -ra a; do
+for e in "${a[@]}"; do
+  echo -n '"""'
+  echo -n "$e" | sed 's/\\//g; s/"/\\"/g'
+  echo -n '"""',
+done
+  done <<< "$1"
+
+  echo "]"
+}
+
 . $source_path/scripts/meson-buildoptions.sh
 
 meson_options=
diff --git a/meson.build b/meson.build
index 0c2e11ff071..40111ce4053 100644
--- a/meson.build
+++ b/meson.build
@@ -1684,7 +1684,13 @@ config_host_data.set_quoted('CONFIG_PREFIX', 
get_option('prefix'))
 config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / 
qemu_confdir)
 config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / 
qemu_datadir)
 config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / 
qemu_desktopdir)
-config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / 
get_option('qemu_firmwarepath'))
+
+qemu_firmwarepath = ''
+foreach k : get_option('qemu_firmwarepath')
+  qemu_firmwarepath += '"' + get_option('prefix') / k + '", '
+endforeach
+config_host_data.set('CONFIG_QEMU_FIRMWAREPATH', qemu_firmwarepath)
+
 config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / 
get_option('libexecdir'))
 config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / 
qemu_icondir)
 config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / 
get_option('localedir'))
@@ -3622,7 +3628,7 @@ endif
 summary_info = {}
 summary_info += {'Install prefix':get_option('prefix')}
 summary_info += {'BIOS directory':qemu_datadir}
-summary_info += {'firmware path': get_option('prefix') / 
get_option('qemu_firmwarepath')}
+summary_info += {'firmware path': qemu_firmwarepath}
 summary_info += {'binary directory':  get_option('prefix') / 
get_option('bindir')}
 summary_info += {'library directory': get_option('prefix') / 
get_option('libdir')}
 summary_info += {'module directory':  qemu_moddir}
diff --git a/meson_options.txt b/meson_options.txt
index 0e8197386b9..8ad5cd73819 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -6,7 +6,7 @@ option('qemu_suffix', type : 'string', value: 'qemu',
description: 'Suffix for QEMU data/modules/config directories (can be 
empty)')
 option('docdir', type : 'string', value : 'share/doc',
description: 'Base directory for documentation installation (can be 
empty)')
-option('qemu_firmwarepath', type : 'string', value : 'qemu-firmware',
+option('qemu_firmwarepath', type : 'array', value : ['qemu-firmware'],
description: 'search PATH for firmware files')
 option('pkgversion', type : 'string', value : '',
description: 'use specified string as sub-version of the package')
diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
index e624c16b01a..3e2b4785388 100755
--- a/scripts/meson-buildoptions.py
+++ b/scripts/meson-buildoptions.py
@@ -156,7 +156,7 @@ def cli_metavar(opt):
 if opt["type"] == "string":
 return "VALUE"
 if opt["type"] == "array":
-return "CHOICES"
+return "CHOICES" if "choices" in opt else "VALUES"
 return "CHOICE"
 
 
@@ -199,7 +199,10 @@ def print_parse(options):
 key = cli_option(opt)
 name = opt["name"]
 if require_arg(opt):
-print(f'--{key}=*) quote_sh "-D{name}=$2" ;;')
+if opt["type"] == "array" and not "choices" in opt:
+print(f'--{key}=*) quote_sh 
"-D{name}=$(meson_option_build_array $2)" ;;')
+else:
+print(f'--{key}=*) quote_sh "-D{name}=$2" ;;')
 elif opt["type"] == "boolean":
 print(f'--enable-{key}) printf "%s" -D{name}=true ;;')
 print(f'--disable-{key}) printf "%s" -D{name}=false ;;')
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 1fc1d2e2c36..238bab162bd 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -42,12 +42,12 @@ meson_options_help() {
   printf "%s\n" '  --enable-trace-backends=CHOICES'
   printf "%s\n" '   Set available tracing backends 
[log] (choices:'
   printf "%s\n" '   
dtrace/ftrace/log/nop/simple/syslog/ust)'
-  printf "%s\n" '  --firmwarepath=VALUE search PATH for firmware files 
[qemu-firmware]'
+  printf "%s\n" '  --firmwarepath=VALUESs

Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active

2022-06-24 Thread Peter Maydell

On Mon, 20 Jun 2022 at 19:09, Richard Henderson
 wrote:
>
> This new behaviour is in the ARM pseudocode function
> AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
> via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
> the trap would be delivered is in AArch64 mode.
>
> Given that ARMv9 drops support for AArch32 outside EL0,
> the trap EL detection ought to be trivially true, but
> the pseudocode still contains a number of conditions,
> and QEMU has not yet committed to dropping A32 support
> for EL[12] when v9 features are present.
>
> Since the computation of SME_TRAP_SIMD is necessarily
> different for the two modes, we might as well preserve
> bits within TBFLAG_ANY and allocate separate bits within
> TBFLAG_A32 and TBFLAG_A64 instead.

> +# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
> +# Arm Architecture Reference Manual Supplement,
> +# The Scalable Matrix Extension (SME), for Armv9-A

So the thing that worries me about structuring this this way
is that the SME supplement appendix includes this caution:

# The instruction encoding tables in this section [...] will
# require correction if subsequent versions of the A64 ISA
# add new instructions which overlap with these encodings.

My guess (based on how the H.a Arm ARM has incorporated
SME) is that these tables aren't going to be included
in the Arm ARM and updated going forward. Instead the
behaviour will be documented based on whether (existing
and new) instructions call CheckNonStreamingSVEEnabled()
or CheckSVEEnabled() in their pseudocode.

So I'm a bit uncertain about how awkward it's going to be
in future to maintain this transliteration of the SME
supplement tables into decodetree: we might find that
we have to look at new instructions and kind of
reverse-engineer back out any required changes to the
tables here, rather than simply "write the trans_ function
for the new insn, looking at the pseudocode to see which
_access_check() function it should be calling"...

thanks
-- PMM

Re: [PATCH qemu v2] ppc: Define SETFIELD for the ppc target

2022-06-24 Thread Daniel Henrique Barboza


Alexey,

Gitlab does not like what you're doing here. Several cross compile runners
fails with errors like these (this is from cross-win64-system):


../hw/intc/pnv_xive.c: In function 'pnv_xive_block_id':
3328/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: overflow in conversion 
from 'long long unsigned int' to 'long int' changes value from 
'4222124650659840' to '0' [-Werror=overflow]
3329   45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | 
PPC_BIT(bs))
3330  | 
^~~
3331/builds/danielhb/qemu/target/ppc/cpu.h:57:49: note: in definition of macro 
'MASK_TO_LSH'
3332   57 | #define MASK_TO_LSH(m)  (__builtin_ffsl(m) - 1)
  | ^
3334../hw/intc/pnv_xive.c:80:15: note: in expansion of macro 'GETFIELD'
3335   80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val);
3336  |   ^~~~
3337../hw/intc/pnv_xive_regs.h:77:41: note: in expansion of macro 'PPC_BITMASK'
3338   77 | #define  PC_TCTXT_CHIPIDPPC_BITMASK(12, 15)
3339  | ^~~
3340../hw/intc/pnv_xive.c:80:24: note: in expansion of macro 'PC_TCTXT_CHIPID'
3341   80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val);
3342  |^~~
3343/builds/danielhb/qemu/target/ppc/cpu.h:58:46: error: right shift count is 
negative [-Werror=shift-count-negative]
3344   58 | #define GETFIELD(m, v)  (((v) & (m)) >> MASK_TO_LSH(m))
3345  |  ^~
3346../hw/intc/pnv_xive.c:80:15: note: in expansion of macro 'GETFIELD'
3347   80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val);
3348  |   ^~~~
../hw/intc/pnv_xive.c: In function 'pnv_xive_vst_addr':
3350/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: overflow in conversion 
from 'long long unsigned int' to 'long int' changes value from 
'13835058055282163712' to '0' [-Werror=overflow]
3351   45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | 
PPC_BIT(bs))
3352  | 
^~~
3353/builds/danielhb/qemu/target/ppc/cpu.h:57:49: note: in definition of macro 
'MASK_TO_LSH'
3354   57 | #define MASK_TO_LSH(m)  (__builtin_ffsl(m) - 1)
3355  | ^
3356../hw/intc/pnv_xive.c:226:9: note: in expansion of macro 'GETFIELD'
3357  226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
3358  | ^~~~
3359../hw/intc/pnv_xive_regs.h:230:33: note: in expansion of macro 'PPC_BITMASK'
3360  230 | #define VSD_MODEPPC_BITMASK(0, 1)
3361  | ^~~
3362../hw/intc/pnv_xive.c:226:18: note: in expansion of macro 'VSD_MODE'
3363  226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
3364  |  ^~~~
3365/builds/danielhb/qemu/target/ppc/cpu.h:58:46: error: right shift count is 
negative [-Werror=shift-count-negative]
3366   58 | #define GETFIELD(m, v)  (((v) & (m)) >> MASK_TO_LSH(m))
3367  |  ^~
3368../hw/intc/pnv_xive.c:226:9: note: in expansion of macro 'GETFIELD'
3369  226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
3370  | ^~~~


You can see the results here:

https://gitlab.com/danielhb/qemu/-/jobs/2636585317

Other failing runners include cross-armel-system and cross-mips-system, so I 
don't
think that the error is related to Windows specifics.


I guess we're missing an uint64_t cast somewhere like you did in the v2 of
this patch. The skiboot macros as is will not cut it.


Thanks,


Daniel


On 6/22/22 02:08, Alexey Kardashevskiy wrote:

It keeps repeating, move it to the header. This uses __builtin_ffsl() to
allow using the macros in #define.

This is not using the QEMU's FIELD macros as this would require changing
all such macros found in skiboot (the PPC PowerNV firmware).

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* preserved the comment about skiboot
* copied the actual macros from skiboot:
https://github.com/open-power/skiboot/blob/master/include/bitutils.h#L31
---
  include/hw/pci-host/pnv_phb3_regs.h | 16 
  target/ppc/cpu.h| 12 
  hw/intc/pnv_xive.c  | 20 
  hw/intc/pnv_xive2.c | 20 
  hw/pci-host/pnv_phb4.c  | 16 
  5 files changed, 12 insertions(+), 72 deletions(-)

diff --git a/include/hw/pci-host/pnv_phb3_regs.h 
b/include/hw/pci-host/pnv_phb3_regs.h
index a174ef1f7045..38f8ce9d7406 100644
--- a/include/hw/pci-host/pnv_phb3_regs.h
+++ b/include/hw/pci-host/pnv_phb3_regs.h
@@ -12,22 +12,6 @@
  
  #include "qemu/host-utils.h"
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * These are commo

1 2 3 >

1 - 100 of 294 matches

Mail list logo