hw/nvme: why schedule sq timer when cq is full?
Hi Keith, I just came across this piece of code in nvme_process_db() that I found weird: start_sqs = nvme_cq_full(cq) ? 1 : 0; ... if (start_sqs) { NvmeSQueue *sq; QTAILQ_FOREACH(sq, &cq->sq_list, entry) { timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } The logic seems to be “If CQ is full, schedule SQ timer to produce more completions”. I cannot understand this. I think it would make more sense with “If CQ is NOT full, schedule SQ timer to produce more completions”.Am I missing something? Thanks, Jinhao Fan
[PATCH v2 09/11] bsd-user: Implement pathconf, lpathconf and fpathconf
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 32 bsd-user/freebsd/os-syscall.c | 12 2 files changed, 44 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index 4b2f6dcc1dc..065f576dfe8 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -894,4 +894,36 @@ static abi_long do_bsd_mkfifoat(abi_long arg1, abi_long arg2, return ret; } +/* pathconf(2) */ +static abi_long do_bsd_pathconf(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(pathconf(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* lpathconf(2) */ +static abi_long do_bsd_lpathconf(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(lpathconf(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fpathconf(2) */ +static abi_long do_bsd_fpathconf(abi_long arg1, abi_long arg2) +{ +return get_errno(fpathconf(arg1, arg2)); +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index be225195fbd..7de4c40bb16 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -475,6 +475,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_mkfifoat(arg1, arg2, arg3); break; +case TARGET_FREEBSD_NR_pathconf: /* pathconf(2) */ +ret = do_bsd_pathconf(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_lpathconf: /* lpathconf(2) */ +ret = do_bsd_lpathconf(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_fpathconf: /* fpathconf(2) */ +ret = do_bsd_fpathconf(arg1, arg2); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 11/11] bsd-user: Remove stray 'inline' from do_bsd_close
In the last series, I inadvertantly didn't remove this inline, but did all the others. Remove it for consistency. Signed-off-by: Warner Losh --- bsd-user/bsd-file.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index 108a5061850..588e0c50d45 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -252,7 +252,7 @@ static abi_long do_bsd_openat(abi_long arg1, abi_long arg2, } /* close(2) */ -static inline abi_long do_bsd_close(abi_long arg1) +static abi_long do_bsd_close(abi_long arg1) { return get_errno(close(arg1)); } -- 2.33.1
[PATCH v2 06/11] bsd-user: Implement chflags, lchflags and fchflags
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 32 bsd-user/freebsd/os-syscall.c | 12 2 files changed, 44 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index ac171c409ca..a1c80428d98 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -816,4 +816,36 @@ static abi_long do_bsd_fchownat(abi_long arg1, abi_long arg2, return ret; } +/* chflags(2) */ +static abi_long do_bsd_chflags(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(chflags(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* lchflags(2) */ +static abi_long do_bsd_lchflags(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(lchflags(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fchflags(2) */ +static abi_long do_bsd_fchflags(abi_long arg1, abi_long arg2) +{ +return get_errno(fchflags(arg1, arg2)); +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 8090666b0d9..06bc76a326b 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -447,6 +447,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_fchownat(arg1, arg2, arg3, arg4, arg5); break; +case TARGET_FREEBSD_NR_chflags: /* chflags(2) */ +ret = do_bsd_chflags(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_lchflags: /* lchflags(2) */ +ret = do_bsd_lchflags(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_fchflags: /* fchflags(2) */ +ret = do_bsd_fchflags(arg1, arg2); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 08/11] bsd-user: Implement mkfifo and mkfifoat
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 27 +++ bsd-user/freebsd/os-syscall.c | 8 2 files changed, 35 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index c24054fed11..4b2f6dcc1dc 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -867,4 +867,31 @@ static abi_long do_bsd_flock(abi_long arg1, abi_long arg2) return get_errno(flock(arg1, arg2)); } +/* mkfifo(2) */ +static abi_long do_bsd_mkfifo(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(mkfifo(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* mkfifoat(2) */ +static abi_long do_bsd_mkfifoat(abi_long arg1, abi_long arg2, +abi_long arg3) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg2); +ret = get_errno(mkfifoat(arg1, p, arg3)); +UNLOCK_PATH(p, arg2); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index d252fb40737..be225195fbd 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -467,6 +467,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_flock(arg1, arg2); break; +case TARGET_FREEBSD_NR_mkfifo: /* mkfifo(2) */ +ret = do_bsd_mkfifo(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_mkfifoat: /* mkfifoat(2) */ +ret = do_bsd_mkfifoat(arg1, arg2, arg3); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 10/11] bsd-user: Implement undelete
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 13 + bsd-user/freebsd/os-syscall.c | 4 2 files changed, 17 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index 065f576dfe8..108a5061850 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -926,4 +926,17 @@ static abi_long do_bsd_fpathconf(abi_long arg1, abi_long arg2) return get_errno(fpathconf(arg1, arg2)); } +/* undelete(2) */ +static abi_long do_bsd_undelete(abi_long arg1) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(undelete(p)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 7de4c40bb16..57996cad8ae 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -487,6 +487,10 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_fpathconf(arg1, arg2); break; +case TARGET_FREEBSD_NR_undelete: /* undelete(2) */ +ret = do_bsd_undelete(arg1); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 04/11] bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat
These implement both the old-pre INO64 mknod variations, as well as the now current INO64 variant. To implement the old stuff, we use some linker magic to bind to the old versions of these functions. Signed-off-by: Stacey Son Signed-off-by: Michal Meloun Signed-off-by: Warner Losh --- bsd-user/bsd-file.h | 47 +++ bsd-user/freebsd/os-syscall.c | 13 ++ 2 files changed, 60 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index 1af79866fc6..b05d3cbb717 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -721,4 +721,51 @@ static abi_long do_bsd_fchmodat(abi_long arg1, abi_long arg2, return ret; } +/* pre-ino64 mknod(2) */ +static abi_long do_bsd_freebsd11_mknod(abi_long arg1, abi_long arg2, abi_long arg3) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(syscall(SYS_freebsd11_mknod, p, arg2, arg3)); +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* pre-ino64 mknodat(2) */ +static abi_long do_bsd_freebsd11_mknodat(abi_long arg1, abi_long arg2, +abi_long arg3, abi_long arg4) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg2); +ret = get_errno(syscall(SYS_freebsd11_mknodat, arg1, p, arg3, arg4)); +UNLOCK_PATH(p, arg2); + +return ret; +} + +/* post-ino64 mknodat(2) */ +static abi_long do_bsd_mknodat(void *cpu_env, abi_long arg1, +abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, +abi_long arg6) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg2); + /* 32-bit arch's use two 32 registers for 64 bit return value */ +if (regpairs_aligned(cpu_env) != 0) { +ret = get_errno(mknodat(arg1, p, arg3, target_arg64(arg5, arg6))); +} else { +ret = get_errno(mknodat(arg1, p, arg3, target_arg64(arg4, arg5))); +} +UNLOCK_PATH(p, arg2); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index b33d548a4b6..d3125f340f7 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -32,6 +32,7 @@ #include "qemu/cutils.h" #include "qemu/path.h" #include +#include #include #include #include @@ -418,6 +419,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_fchmodat(arg1, arg2, arg3, arg4); break; +case TARGET_FREEBSD_NR_freebsd11_mknod: /* mknod(2) */ +ret = do_bsd_freebsd11_mknod(arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_freebsd11_mknodat: /* mknodat(2) */ +ret = do_bsd_freebsd11_mknodat(arg1, arg2, arg3, arg4); +break; + +case TARGET_FREEBSD_NR_mknodat: /* mknodat(2) */ +ret = do_bsd_mknodat(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 05/11] bsd-user: Implement chown, fchown, lchown and fchownat
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 48 +++ bsd-user/freebsd/os-syscall.c | 16 2 files changed, 64 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index b05d3cbb717..ac171c409ca 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -768,4 +768,52 @@ static abi_long do_bsd_mknodat(void *cpu_env, abi_long arg1, return ret; } +/* chown(2) */ +static abi_long do_bsd_chown(abi_long arg1, abi_long arg2, abi_long arg3) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(chown(p, arg2, arg3)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fchown(2) */ +static abi_long do_bsd_fchown(abi_long arg1, abi_long arg2, +abi_long arg3) +{ +return get_errno(fchown(arg1, arg2, arg3)); +} + +/* lchown(2) */ +static abi_long do_bsd_lchown(abi_long arg1, abi_long arg2, +abi_long arg3) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(lchown(p, arg2, arg3)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fchownat(2) */ +static abi_long do_bsd_fchownat(abi_long arg1, abi_long arg2, +abi_long arg3, abi_long arg4, abi_long arg5) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg2); +ret = get_errno(fchownat(arg1, p, arg3, arg4, arg5)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg2); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index d3125f340f7..8090666b0d9 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -431,6 +431,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_mknodat(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6); break; +case TARGET_FREEBSD_NR_chown: /* chown(2) */ +ret = do_bsd_chown(arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_fchown: /* fchown(2) */ +ret = do_bsd_fchown(arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_lchown: /* lchown(2) */ +ret = do_bsd_lchown(arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_fchownat: /* fchownat(2) */ +ret = do_bsd_fchownat(arg1, arg2, arg3, arg4, arg5); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v6 15/15] block: refactor bdrv_remove_file_or_backing_child to bdrv_remove_child
Now the function can remove any child, so give it more common name. Drop assertions and drop bs argument which becomes unused. Function would be reused in a further commit. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 27 +-- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/block.c b/block.c index 6b08d20d8c..ddd043f556 100644 --- a/block.c +++ b/block.c @@ -92,9 +92,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); -static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, - BdrvChild *child, - Transaction *tran); +static void bdrv_remove_child(BdrvChild *child, Transaction *tran); static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran); @@ -3335,7 +,7 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, if (child) { bdrv_unset_inherits_from(parent_bs, child, tran); -bdrv_remove_file_or_backing_child(parent_bs, child, tran); +bdrv_remove_child(child, tran); } if (!child_bs) { @@ -5019,26 +5017,19 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) return ret; } -static void bdrv_remove_filter_or_cow_child_commit(void *opaque) +static void bdrv_remove_child_commit(void *opaque) { GLOBAL_STATE_CODE(); bdrv_child_free(opaque); } -static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { -.commit = bdrv_remove_filter_or_cow_child_commit, +static TransactionActionDrv bdrv_remove_child_drv = { +.commit = bdrv_remove_child_commit, }; -/* - * A function to remove backing or file child of @bs. - * Function doesn't update permissions, caller is responsible for this. - */ -static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, - BdrvChild *child, - Transaction *tran) +/* Function doesn't update permissions, caller is responsible for this. */ +static void bdrv_remove_child(BdrvChild *child, Transaction *tran) { -assert(child == bs->backing || child == bs->file); - if (!child) { return; } @@ -5047,7 +5038,7 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, bdrv_replace_child_tran(child, NULL, tran); } -tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, child); +tran_add(tran, &bdrv_remove_child_drv, child); } /* @@ -5058,7 +5049,7 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, Transaction *tran) { -bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran); +bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); } static int bdrv_replace_node_noperm(BlockDriverState *from, -- 2.25.1
[PATCH v2 07/11] bsd-user: Implement chroot and flock
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 19 +++ bsd-user/freebsd/os-syscall.c | 8 2 files changed, 27 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index a1c80428d98..c24054fed11 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -848,4 +848,23 @@ static abi_long do_bsd_fchflags(abi_long arg1, abi_long arg2) return get_errno(fchflags(arg1, arg2)); } +/* chroot(2) */ +static abi_long do_bsd_chroot(abi_long arg1) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(chroot(p)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* flock(2) */ +static abi_long do_bsd_flock(abi_long arg1, abi_long arg2) +{ +return get_errno(flock(arg1, arg2)); +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 06bc76a326b..d252fb40737 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -459,6 +459,14 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_fchflags(arg1, arg2); break; +case TARGET_FREEBSD_NR_chroot: /* chroot(2) */ +ret = do_bsd_chroot(arg1); +break; + +case TARGET_FREEBSD_NR_flock: /* flock(2) */ +ret = do_bsd_flock(arg1, arg2); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 03/11] bsd-user: implement chmod, fchmod, lchmod and fchmodat
Signed-off-by: Stacey Son Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 46 +++ bsd-user/freebsd/os-syscall.c | 16 2 files changed, 62 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index 635ac8d0e62..1af79866fc6 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -675,4 +675,50 @@ static abi_long do_bsd_readlinkat(abi_long arg1, abi_long arg2, return ret; } +/* chmod(2) */ +static abi_long do_bsd_chmod(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(chmod(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fchmod(2) */ +static abi_long do_bsd_fchmod(abi_long arg1, abi_long arg2) +{ +return get_errno(fchmod(arg1, arg2)); +} + +/* lchmod(2) */ +static abi_long do_bsd_lchmod(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(lchmod(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* fchmodat(2) */ +static abi_long do_bsd_fchmodat(abi_long arg1, abi_long arg2, +abi_long arg3, abi_long arg4) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg2); +ret = get_errno(fchmodat(arg1, p, arg3, arg4)); +UNLOCK_PATH(p, arg2); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 80ec9dd4954..b33d548a4b6 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -402,6 +402,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_readlinkat(arg1, arg2, arg3, arg4); break; +case TARGET_FREEBSD_NR_chmod: /* chmod(2) */ +ret = do_bsd_chmod(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_fchmod: /* fchmod(2) */ +ret = do_bsd_fchmod(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_lchmod: /* lchmod(2) */ +ret = do_bsd_lchmod(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_fchmodat: /* fchmodat(2) */ +ret = do_bsd_fchmodat(arg1, arg2, arg3, arg4); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 02/11] bsd-user: Implement symlink, symlinkat, readlink and readlinkat
Signed-off-by: Stacey Son Signed-off-by: Jung-uk Kim Signed-off-by: Warner Losh --- bsd-user/bsd-file.h | 74 +++ bsd-user/freebsd/os-syscall.c | 16 2 files changed, 90 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index a0f03102639..635ac8d0e62 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -601,4 +601,78 @@ static abi_long do_bsd_nmount(abi_long arg1, abi_long count, return ret; } +/* symlink(2) */ +static abi_long do_bsd_symlink(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p1, *p2; + +LOCK_PATH2(p1, arg1, p2, arg2); +ret = get_errno(symlink(p1, p2)); /* XXX path(p1), path(p2) */ +UNLOCK_PATH2(p1, arg1, p2, arg2); + +return ret; +} + +/* symlinkat(2) */ +static abi_long do_bsd_symlinkat(abi_long arg1, abi_long arg2, +abi_long arg3) +{ +abi_long ret; +void *p1, *p2; + +LOCK_PATH2(p1, arg1, p2, arg3); +ret = get_errno(symlinkat(p1, arg2, p2)); /* XXX path(p1), path(p2) */ +UNLOCK_PATH2(p1, arg1, p2, arg3); + +return ret; +} + +/* readlink(2) */ +static abi_long do_bsd_readlink(CPUArchState *env, abi_long arg1, +abi_long arg2, abi_long arg3) +{ +abi_long ret; +void *p1, *p2; + +LOCK_PATH(p1, arg1); +p2 = lock_user(VERIFY_WRITE, arg2, arg3, 0); +if (p2 == NULL) { +UNLOCK_PATH(p1, arg1); +return -TARGET_EFAULT; +} +if (strcmp(p1, "/proc/curproc/file") == 0) { +CPUState *cpu = env_cpu(env); +TaskState *ts = (TaskState *)cpu->opaque; +strncpy(p2, ts->bprm->fullpath, arg3); +ret = MIN((abi_long)strlen(ts->bprm->fullpath), arg3); +} else { +ret = get_errno(readlink(path(p1), p2, arg3)); +} +unlock_user(p2, arg2, ret); +UNLOCK_PATH(p1, arg1); + +return ret; +} + +/* readlinkat(2) */ +static abi_long do_bsd_readlinkat(abi_long arg1, abi_long arg2, +abi_long arg3, abi_long arg4) +{ +abi_long ret; +void *p1, *p2; + +LOCK_PATH(p1, arg2); +p2 = lock_user(VERIFY_WRITE, arg3, arg4, 0); +if (p2 == NULL) { +UNLOCK_PATH(p1, arg2); +return -TARGET_EFAULT; +} +ret = get_errno(readlinkat(arg1, p1, p2, arg4)); +unlock_user(p2, arg3, ret); +UNLOCK_PATH(p1, arg2); + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index bd4dfa6ddc7..80ec9dd4954 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -386,6 +386,22 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_nmount(arg1, arg2, arg3); break; +case TARGET_FREEBSD_NR_symlink: /* symlink(2) */ +ret = do_bsd_symlink(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_symlinkat: /* symlinkat(2) */ +ret = do_bsd_symlinkat(arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_readlink: /* readlink(2) */ +ret = do_bsd_readlink(cpu_env, arg1, arg2, arg3); +break; + +case TARGET_FREEBSD_NR_readlinkat: /* readlinkat(2) */ +ret = do_bsd_readlinkat(arg1, arg2, arg3, arg4); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v2 00/11] bsd-user: More file-related system calls
A second round of mostly BSD-independent filesystem calls: mount, unmount, nmount, symlink, symlinkat, readlink, readlinkat, chmod, fchmod, lchmod, fchmodat, freebsd11_mknod, freebsd11_monodat, mknodat, chown, fchown, lchown, fchownat, chflags, lchflags, fchflags, chroot, flock, mkfifo, mkfifoat, pathconf, lpathconf, fpathconf, undelete. These are all non-reentrant system calls, so these wrappers are pretty simple and no safe_* versions need to be created. In addition, a small correction to an earlier series is included. V2: Updated with review comments. Reworked freebsd11_mknod* stuff after unifdef BSD_HAVE_INO64 Fixed comments that had too many words Added one more hunk to remove a stary 'inline' that slipped through earlier Need reviews on: bsd-user: Implement symlink, symlinkat, readlink and readlinkat bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat bsd-user: Remove stray 'inline' from do_bsd_close Warner Losh (11): bsd-user: Implement mount, umount and nmount bsd-user: Implement symlink, symlinkat, readlink and readlinkat bsd-user: implement chmod, fchmod, lchmod and fchmodat bsd-user: Implement freebsd11_mknod, freebsd11_mknodat and mknodat bsd-user: Implement chown, fchown, lchown and fchownat bsd-user: Implement chflags, lchflags and fchflags bsd-user: Implement chroot and flock bsd-user: Implement mkfifo and mkfifoat bsd-user: Implement pathconf, lpathconf and fpathconf bsd-user: Implement undelete bsd-user: Remove stray 'inline' from do_bsd_close bsd-user/bsd-file.h | 392 +- bsd-user/freebsd/os-syscall.c | 118 ++ 2 files changed, 509 insertions(+), 1 deletion(-) -- 2.33.1
[PATCH v6 13/15] block: Manipulate bs->file / bs->backing pointers in .attach/.detach
bs->file and bs->backing are a kind of duplication of part of bs->children. But very useful diplication, so let's not drop them at all:) We should manage bs->file and bs->backing in same place, where we manage bs->children, to keep them in sync. Moreover, generic io paths are unprepared to BdrvChild without a bs, so it's double good to clear bs->file / bs->backing when we detach the child. Detach is simple: if we detach bs->file or bs->backing child, just set corresponding field to NULL. Attach is a bit more complicated. But we still can precisely detect should we set one of bs->file / bs->backing or not: - if role is BDRV_CHILD_COW, we definitely deal with bs->backing - else, if role is BDRV_CHILD_FILTERED (it must be also BDRV_CHILD_PRIMARY), it's a filtered child. Use bs->drv->filtered_child_is_backing to chose the pointer field to modify. - else, if role is BDRV_CHILD_PRIMARY, we deal with bs->file - in all other cases, it's neither bs->backing nor bs->file. It's some other child and we shouldn't care OK. This change brings one more good thing: we can (and should) get rid of all indirect pointers in the block-graph-change transactions: bdrv_attach_child_common() stores BdrvChild** into transaction to clear it on abort. bdrv_attach_child_common() has two callers: bdrv_attach_child_noperm() just pass-through this feature, bdrv_root_attach_child() doesn't need the feature. Look at bdrv_attach_child_noperm() callers: - bdrv_attach_child() doesn't need the feature - bdrv_set_file_or_backing_noperm() uses the feature to manage bs->file and bs->backing, we don't want it anymore - bdrv_append() uses the feature to manage bs->backing, again we don't want it anymore So, we should drop this stuff! Great! We could probably keep BdrvChild** argument to keep the int return value, but it seems not worth the complexity. Finally, we now set .file / .backing automatically in generic code and want to restring setting them by hand outside of .attach/.detach. So, this patch cleanups all remaining places where they were set. To find such places I use: git grep '\->file =' git grep '\->backing =' git grep '&.*\' git grep '&.*\' Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 234 ++- block/raw-format.c | 4 +- block/snapshot-access.c | 6 +- block/snapshot.c | 1 - include/block/block_int-common.h | 15 +- tests/unit/test-bdrv-drain.c | 10 +- 6 files changed, 126 insertions(+), 144 deletions(-) diff --git a/block.c b/block.c index ca86cd86d3..6b08d20d8c 100644 --- a/block.c +++ b/block.c @@ -1438,9 +1438,39 @@ static void bdrv_child_cb_attach(BdrvChild *child) assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(&bs->children, child, next); - -if (child->role & BDRV_CHILD_COW) { +if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { +/* + * Here we handle filters and block/raw-format.c when it behave like + * filter. They generally have a single PRIMARY child, which is also the + * FILTERED child, and that they may have multiple more children, which + * are neither PRIMARY nor FILTERED. And never we have a COW child here. + * So bs->file will be the PRIMARY child, unless the PRIMARY child goes + * into bs->backing on exceptional cases; and bs->backing will be + * nothing else. + */ +assert(!(child->role & BDRV_CHILD_COW)); +if (child->role & BDRV_CHILD_PRIMARY) { +assert(child->role & BDRV_CHILD_FILTERED); +assert(!bs->backing); +assert(!bs->file); + +if (bs->drv->filtered_child_is_backing) { +bs->backing = child; +} else { +bs->file = child; +} +} else { +assert(!(child->role & BDRV_CHILD_FILTERED)); +} +} else if (child->role & BDRV_CHILD_COW) { +assert(bs->drv->supports_backing); +assert(!(child->role & BDRV_CHILD_PRIMARY)); +assert(!bs->backing); +bs->backing = child; bdrv_backing_attach(child); +} else if (child->role & BDRV_CHILD_PRIMARY) { +assert(!bs->file); +bs->file = child; } bdrv_apply_subtree_drain(child, bs); @@ -1458,6 +1488,12 @@ static void bdrv_child_cb_detach(BdrvChild *child) assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); +if (child == bs->backing) { +assert(child != bs->file); +bs->backing = NULL; +} else if (child == bs->file) { +bs->file = NULL; +} } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -1663,7 +1699,7 @@ open_failed: bs->drv = NULL; if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); -bs->file = NULL; +assert(!bs->file); } g_free(bs->opaque); bs->opaque = NULL;
[PATCH v2 01/11] bsd-user: Implement mount, umount and nmount
Signed-off-by: Stacey Son Signed-off-by: Jung-uk Kim Signed-off-by: Warner Losh Reviewed-by: Richard Henderson --- bsd-user/bsd-file.h | 52 +++ bsd-user/freebsd/os-syscall.c | 13 + 2 files changed, 65 insertions(+) diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h index b2dca586129..a0f03102639 100644 --- a/bsd-user/bsd-file.h +++ b/bsd-user/bsd-file.h @@ -549,4 +549,56 @@ static abi_long do_bsd_sync(void) return 0; } +/* mount(2) */ +static abi_long do_bsd_mount(abi_long arg1, abi_long arg2, abi_long arg3, +abi_long arg4) +{ +abi_long ret; +void *p1, *p2; + +LOCK_PATH2(p1, arg1, p2, arg2); +/* + * XXX arg4 should be locked, but it isn't clear how to do that since it may + * be not be a NULL-terminated string. + */ +if (arg4 == 0) { +ret = get_errno(mount(p1, p2, arg3, NULL)); /* XXX path(p2)? */ +} else { +ret = get_errno(mount(p1, p2, arg3, g2h_untagged(arg4))); /* XXX path(p2)? */ +} +UNLOCK_PATH2(p1, arg1, p2, arg2); + +return ret; +} + +/* unmount(2) */ +static abi_long do_bsd_unmount(abi_long arg1, abi_long arg2) +{ +abi_long ret; +void *p; + +LOCK_PATH(p, arg1); +ret = get_errno(unmount(p, arg2)); /* XXX path(p)? */ +UNLOCK_PATH(p, arg1); + +return ret; +} + +/* nmount(2) */ +static abi_long do_bsd_nmount(abi_long arg1, abi_long count, +abi_long flags) +{ +abi_long ret; +struct iovec *vec = lock_iovec(VERIFY_READ, arg1, count, 1); + +if (vec != NULL) { +ret = get_errno(nmount(vec, count, flags)); +unlock_iovec(vec, arg1, count, 0); +} else { +return -TARGET_EFAULT; +} + +return ret; +} + #endif /* BSD_FILE_H */ diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c index 2623caf8007..bd4dfa6ddc7 100644 --- a/bsd-user/freebsd/os-syscall.c +++ b/bsd-user/freebsd/os-syscall.c @@ -33,6 +33,7 @@ #include "qemu/path.h" #include #include +#include #include #include @@ -373,6 +374,18 @@ static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1, ret = do_bsd_sync(); break; +case TARGET_FREEBSD_NR_mount: /* mount(2) */ +ret = do_bsd_mount(arg1, arg2, arg3, arg4); +break; + +case TARGET_FREEBSD_NR_unmount: /* unmount(2) */ +ret = do_bsd_unmount(arg1, arg2); +break; + +case TARGET_FREEBSD_NR_nmount: /* nmount(2) */ +ret = do_bsd_nmount(arg1, arg2, arg3); +break; + default: qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num); ret = -TARGET_ENOSYS; -- 2.33.1
[PATCH v6 12/15] Revert "block: Pass BdrvChild ** to replace_child_noperm"
That's a preparation to previously reverted "block: Let replace_child_noperm free children". Drop it too, we don't need it for a new approach. This reverts commit be64bbb0149748f3999c49b13976aafb8330ea86. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 23 +++ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 3ac332a460..ca86cd86d3 100644 --- a/block.c +++ b/block.c @@ -90,7 +90,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild **child, +static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, @@ -2352,7 +2352,7 @@ static void bdrv_replace_child_abort(void *opaque) GLOBAL_STATE_CODE(); /* old_bs reference is transparently moved from @s to @s->child */ -bdrv_replace_child_noperm(&s->child, s->old_bs); +bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2382,7 +2382,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, if (new_bs) { bdrv_ref(new_bs); } -bdrv_replace_child_noperm(&child, new_bs); +bdrv_replace_child_noperm(child, new_bs); /* old_bs reference is transparently moved from @child to @s */ } @@ -2764,10 +2764,9 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild **childp, +static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { -BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2865,7 +2864,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BlockDriverState *bs = child->bs; GLOBAL_STATE_CODE(); -bdrv_replace_child_noperm(s->child, NULL); +bdrv_replace_child_noperm(child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); @@ -2966,7 +2965,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); -bdrv_replace_child_noperm(&new_child, child_bs); +bdrv_replace_child_noperm(new_child, child_bs); *child = new_child; @@ -3022,13 +3021,13 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return 0; } -static void bdrv_detach_child(BdrvChild **childp) +static void bdrv_detach_child(BdrvChild *child) { -BlockDriverState *old_bs = (*childp)->bs; +BlockDriverState *old_bs = child->bs; GLOBAL_STATE_CODE(); -bdrv_replace_child_noperm(childp, NULL); -bdrv_child_free(*childp); +bdrv_replace_child_noperm(child, NULL); +bdrv_child_free(child); if (old_bs) { /* @@ -3140,7 +3139,7 @@ void bdrv_root_unref_child(BdrvChild *child) GLOBAL_STATE_CODE(); child_bs = child->bs; -bdrv_detach_child(&child); +bdrv_detach_child(child); bdrv_unref(child_bs); } -- 2.25.1
Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active
On Fri, 24 Jun 2022 at 21:34, Richard Henderson wrote: > > On 6/24/22 08:30, Peter Maydell wrote: > > So the thing that worries me about structuring this this way > > is that the SME supplement appendix includes this caution: > > > > # The instruction encoding tables in this section [...] will > > # require correction if subsequent versions of the A64 ISA > > # add new instructions which overlap with these encodings. > > > > My guess (based on how the H.a Arm ARM has incorporated > > SME) is that these tables aren't going to be included > > in the Arm ARM and updated going forward. Instead the > > behaviour will be documented based on whether (existing > > and new) instructions call CheckNonStreamingSVEEnabled() > > or CheckSVEEnabled() in their pseudocode. > > I agree that this would be cleaner and more correct long-term. > > > So I'm a bit uncertain about how awkward it's going to be > > in future to maintain this transliteration of the SME > > supplement tables into decodetree: we might find that > > we have to look at new instructions and kind of > > reverse-engineer back out any required changes to the > > tables here, rather than simply "write the trans_ function > > for the new insn, looking at the pseudocode to see which > > _access_check() function it should be calling"... > > I thought about this, and if it were simply a matter of annotating the > trans_* functions > within translate-sve.c, I would have done it. But I would need to adjust A64 > AdvSIMD as > well, which is still done with the by-hand decoder. > > Can we use this solution in the short term, and fix up advsimd while > coverting it to > decodetree? I'm more and more convinced we'll want this sooner than later. Yeah, I guess so. Is it possible to do the SVE stuff the right long-term way and have the short-term fix only for the A64 AdvSIMD, or do we need to do both the same way ? -- PMM
[PATCH v6 10/15] Revert "block: Let replace_child_tran keep indirect pointer"
That's a preparation to previously reverted "block: Let replace_child_noperm free children". Drop it too, we don't need it for a new approach. This reverts commit 82b54cf51656bf3cd5ed1ac549e8a1085a0e3290. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 81 +++-- 1 file changed, 10 insertions(+), 71 deletions(-) diff --git a/block.c b/block.c index 34ca046470..a83845b120 100644 --- a/block.c +++ b/block.c @@ -2334,7 +2334,6 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; -BdrvChild **childp; BlockDriverState *old_bs; } BdrvReplaceChildState; @@ -2352,29 +2351,7 @@ static void bdrv_replace_child_abort(void *opaque) BlockDriverState *new_bs = s->child->bs; GLOBAL_STATE_CODE(); -/* - * old_bs reference is transparently moved from @s to s->child. - * - * Pass &s->child here instead of s->childp, because: - * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not - * modify the BdrvChild * pointer we indirectly pass to it, i.e. it - * will not modify s->child. From that perspective, it does not matter - * whether we pass s->childp or &s->child. - * (TODO: Right now, bdrv_replace_child_noperm() never modifies that - * pointer anyway (though it will in the future), so at this point it - * absolutely does not matter whether we pass s->childp or &s->child.) - * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use - * it here. - * (3) If new_bs is NULL, *s->childp will have been NULLed by - * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we - * must not pass a NULL *s->childp here. - * (TODO: In its current state, bdrv_replace_child_noperm() will not - * have NULLed *s->childp, so this does not apply yet. It will in the - * future.) - * - * So whether new_bs was NULL or not, we cannot pass s->childp here; and in - * any case, there is no reason to pass it anyway. - */ +/* old_bs reference is transparently moved from @s to @s->child */ bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2391,32 +2368,22 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. - * - * Note that if new_bs == NULL, @childp is stored in a state object attached - * to @tran, so that the old child can be reinstated in the abort handler. - * Therefore, if @new_bs can be NULL, @childp must stay valid until the - * transaction is committed or aborted. - * - * (TODO: The reinstating does not happen yet, but it will once - * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) */ -static void bdrv_replace_child_tran(BdrvChild **childp, -BlockDriverState *new_bs, +static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { -.child = *childp, -.childp = new_bs == NULL ? childp : NULL, -.old_bs = (*childp)->bs, +.child = child, +.old_bs = child->bs, }; tran_add(tran, &bdrv_replace_child_drv, s); if (new_bs) { bdrv_ref(new_bs); } -bdrv_replace_child_noperm(childp, new_bs); -/* old_bs reference is transparently moved from *childp to @s */ +bdrv_replace_child_noperm(&child, new_bs); +/* old_bs reference is transparently moved from @child to @s */ } /* @@ -5041,7 +5008,6 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) typedef struct BdrvRemoveFilterOrCowChild { BdrvChild *child; -BlockDriverState *bs; bool is_backing; } BdrvRemoveFilterOrCowChild; @@ -5071,19 +5037,10 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) bdrv_child_free(s->child); } -static void bdrv_remove_filter_or_cow_child_clean(void *opaque) -{ -BdrvRemoveFilterOrCowChild *s = opaque; - -/* Drop the bs reference after the transaction is done */ -bdrv_unref(s->bs); -g_free(s); -} - static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { .abort = bdrv_remove_filter_or_cow_child_abort, .commit = bdrv_remove_filter_or_cow_child_commit, -.clean = bdrv_remove_filter_or_cow_child_clean, +.clean = g_free, }; /* @@ -5101,11 +5058,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, return; } -/* - * Keep a reference to @bs so @childp will stay valid throughout the - * transaction (required by bdrv_replace_child_tran()) - */ -bdrv_ref(bs); if (child == bs->back
Re: [PATCH] aspeed: i2c: Fix DMA len write-enable bit handling
On 6/24/22 22:34, Peter Delevoryas wrote: On Jun 24, 2022, at 1:31 PM, Peter Delevoryas wrote: I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It seems to be because the Zephyr i2c driver sets the RX DMA len with the RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1] /* 0x1C : I2CM Master DMA Transfer Length Register */ I think we should be checking the write-enable bits on the incoming value, not checking the register array. I'm not sure we're even writing the write-enable bits to the register array, actually. [1] https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148 Arg, forgot this: Fixes: ba2cccd64e90f34 ("aspeed: i2c: Add new mode support”) Should I resend as v2? No. patchwork did it : http://patchwork.ozlabs.org/project/qemu-devel/patch/20220624203151.2026355-1-p...@fb.com/ Thanks, C. Thanks, Peter Signed-off-by: Peter Delevoryas --- hw/i2c/aspeed_i2c.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c index 37ae1f2e04..c4fce7474a 100644 --- a/hw/i2c/aspeed_i2c.c +++ b/hw/i2c/aspeed_i2c.c @@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, hwaddr offset, RX_BUF_LEN) + 1; break; case A_I2CM_DMA_LEN: -w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || - ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T); +w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || + FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T); /* If none of the w1t bits are set, just write to the reg as normal. */ if (!w1t) { bus->regs[R_I2CM_DMA_LEN] = value; break; } -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { +if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN, FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN)); } -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { +if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN, FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN)); } -- 2.30.2
[PATCH v6 09/15] Revert "block: Let replace_child_noperm free children"
We are going to reimplement this behavior (clear bs->file / bs->backing pointers automatically when child->bs is cleared) in a nicer way, see further commit "block: Manipulate bs->file / bs->backing pointers in .attach/.detach". With this revert we bring back a problem that was fixed by b0a9f6fed3d8. Still the problem was mostly theoretical, we don't have concrete bugs fixed by b0a9f6fed3d8, we don't have a specific test. Probably some accidental failures of iotests are related. Alternatively, we may merge this and following three reverts into final "block: Manipulate ..." to avoid any kind of regression. But seems that in this case having separate clear revert commits is better. This reverts commit b0a9f6fed3d80de610dcd04a7e66f9f30a04174f. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 102 +--- 1 file changed, 23 insertions(+), 79 deletions(-) diff --git a/block.c b/block.c index 883b1bb242..34ca046470 100644 --- a/block.c +++ b/block.c @@ -90,10 +90,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_child_free(BdrvChild *child); static void bdrv_replace_child_noperm(BdrvChild **child, - BlockDriverState *new_bs, - bool free_empty_child); + BlockDriverState *new_bs); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran); @@ -2338,7 +2336,6 @@ typedef struct BdrvReplaceChildState { BdrvChild *child; BdrvChild **childp; BlockDriverState *old_bs; -bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) @@ -2346,9 +2343,6 @@ static void bdrv_replace_child_commit(void *opaque) BdrvReplaceChildState *s = opaque; GLOBAL_STATE_CODE(); -if (s->free_empty_child && !s->child->bs) { -bdrv_child_free(s->child); -} bdrv_unref(s->old_bs); } @@ -2366,26 +2360,22 @@ static void bdrv_replace_child_abort(void *opaque) * modify the BdrvChild * pointer we indirectly pass to it, i.e. it * will not modify s->child. From that perspective, it does not matter * whether we pass s->childp or &s->child. + * (TODO: Right now, bdrv_replace_child_noperm() never modifies that + * pointer anyway (though it will in the future), so at this point it + * absolutely does not matter whether we pass s->childp or &s->child.) * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use * it here. * (3) If new_bs is NULL, *s->childp will have been NULLed by * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we * must not pass a NULL *s->childp here. + * (TODO: In its current state, bdrv_replace_child_noperm() will not + * have NULLed *s->childp, so this does not apply yet. It will in the + * future.) * * So whether new_bs was NULL or not, we cannot pass s->childp here; and in * any case, there is no reason to pass it anyway. */ -bdrv_replace_child_noperm(&s->child, s->old_bs, true); -/* - * The child was pre-existing, so s->old_bs must be non-NULL, and - * s->child thus must not have been freed - */ -assert(s->child != NULL); -if (!new_bs) { -/* As described above, *s->childp was cleared, so restore it */ -assert(s->childp != NULL); -*s->childp = s->child; -} +bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2402,44 +2392,30 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * The function doesn't update permissions, caller is responsible for this. * - * (*childp)->bs must not be NULL. - * * Note that if new_bs == NULL, @childp is stored in a state object attached * to @tran, so that the old child can be reinstated in the abort handler. * Therefore, if @new_bs can be NULL, @childp must stay valid until the * transaction is committed or aborted. * - * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is - * freed (on commit). @free_empty_child should only be false if the - * caller will free the BDrvChild themselves (which may be important - * if this is in turn called in another transactional context). + * (TODO: The reinstating does not happen yet, but it will once + * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) */ static void bdrv_replace_child_tran(BdrvChild **childp, BlockDriverState *new_bs, -Transaction *tran, -bool free_empty_ch
[PATCH v6 06/15] test-bdrv-graph-mod: fix filters to be filters
bdrv_pass_through is used as filter, even all node variables has corresponding names. We want to append it, so it should be backing-child-based filter like mirror_top. So, in test_update_perm_tree, first child should be DATA, as we don't want filters with two filtered children. bdrv_exclusive_writer is used as a filter once. So it should be filter anyway. We want to append it, so it should be backing-child-based fitler too. Make all FILTERED children to be PRIMARY as well. We are going to force this rule by assertion soon. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- include/block/block_int-common.h | 5 +++-- tests/unit/test-bdrv-graph-mod.c | 24 +--- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 9d91ccbcbf..d68adc6ff3 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -122,8 +122,9 @@ struct BlockDriver { /* * Only make sense for filter drivers, for others must be false. * If true, filtered child is bs->backing. Otherwise it's bs->file. - * Only two internal filters use bs->backing as filtered child and has this - * field set to true: mirror_top and commit_top. + * Two internal filters use bs->backing as filtered child and has this + * field set to true: mirror_top and commit_top. There also two such test + * filters in tests/unit/test-bdrv-graph-mod.c. * * Never create any more such filters! * diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c index e2f1355af1..c522591531 100644 --- a/tests/unit/test-bdrv-graph-mod.c +++ b/tests/unit/test-bdrv-graph-mod.c @@ -26,6 +26,8 @@ static BlockDriver bdrv_pass_through = { .format_name = "pass-through", +.is_filter = true, +.filtered_child_is_backing = true, .bdrv_child_perm = bdrv_default_perms, }; @@ -57,6 +59,8 @@ static void exclusive_write_perms(BlockDriverState *bs, BdrvChild *c, static BlockDriver bdrv_exclusive_writer = { .format_name = "exclusive-writer", +.is_filter = true, +.filtered_child_is_backing = true, .bdrv_child_perm = exclusive_write_perms, }; @@ -134,7 +138,7 @@ static void test_update_perm_tree(void) blk_insert_bs(root, bs, &error_abort); bdrv_attach_child(filter, bs, "child", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); + BDRV_CHILD_DATA, &error_abort); ret = bdrv_append(filter, bs, NULL); g_assert_cmpint(ret, <, 0); @@ -228,11 +232,14 @@ static void test_parallel_exclusive_write(void) */ bdrv_ref(base); -bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA, +bdrv_attach_child(top, fl1, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); -bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, +bdrv_attach_child(fl1, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); -bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, +bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_replace_node(fl1, fl2, &error_abort); @@ -344,9 +351,11 @@ static void test_parallel_perm_update(void) BDRV_CHILD_DATA, &error_abort); c_fl2 = bdrv_attach_child(ws, fl2, "second", &child_of_bds, BDRV_CHILD_DATA, &error_abort); -bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, +bdrv_attach_child(fl1, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); -bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED, +bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); /* Select fl1 as first child to be active */ @@ -397,7 +406,8 @@ static void test_append_greedy_filter(void) BlockDriverState *base = no_perm_node("base"); BlockDriverState *fl = exclusive_writer_node("fl1"); -bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW, +bdrv_attach_child(top, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_append(fl, base, &error_abort); -- 2.25.1
[PATCH v6 14/15] block/snapshot: drop indirection around bdrv_snapshot_fallback_ptr
Now the indirection is not actually used, we can safely reduce it to simple pointer. For consistency do a bit of refactoring to get rid of _ptr suffixes that become meaningless. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/snapshot.c | 38 -- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index f3971ac2bd..e22ac3eac6 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -151,34 +151,29 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, } /** - * Return a pointer to the child BDS pointer to which we can fall + * Return a pointer to child of given BDS to which we can fall * back if the given BDS does not support snapshots. * Return NULL if there is no BDS to (safely) fall back to. - * - * We need to return an indirect pointer because bdrv_snapshot_goto() - * has to modify the BdrvChild pointer. */ -static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) +static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs) { -BdrvChild **fallback; -BdrvChild *child = bdrv_primary_child(bs); +BdrvChild *fallback = bdrv_primary_child(bs); +BdrvChild *child; /* We allow fallback only to primary child */ -if (!child) { +if (!fallback) { return NULL; } -fallback = (child == bs->file ? &bs->file : &bs->backing); -assert(*fallback == child); /* * Check that there are no other children that would need to be * snapshotted. If there are, it is not safe to fall back to - * *fallback. + * fallback. */ QLIST_FOREACH(child, &bs->children, next) { if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED) && -child != *fallback) +child != fallback) { return NULL; } @@ -189,8 +184,7 @@ static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) { -BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs); -return child_ptr ? (*child_ptr)->bs : NULL; +return child_bs(bdrv_snapshot_fallback_child(bs)); } int bdrv_can_snapshot(BlockDriverState *bs) @@ -237,7 +231,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; -BdrvChild **fallback_ptr; +BdrvChild *fallback; int ret, open_ret; GLOBAL_STATE_CODE(); @@ -260,13 +254,13 @@ int bdrv_snapshot_goto(BlockDriverState *bs, return ret; } -fallback_ptr = bdrv_snapshot_fallback_ptr(bs); -if (fallback_ptr) { +fallback = bdrv_snapshot_fallback_child(bs); +if (fallback) { QDict *options; QDict *file_options; Error *local_err = NULL; -BlockDriverState *fallback_bs = (*fallback_ptr)->bs; -char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name); +BlockDriverState *fallback_bs = fallback->bs; +char *subqdict_prefix = g_strdup_printf("%s.", fallback->name); options = qdict_clone_shallow(bs->options); @@ -277,8 +271,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, qobject_unref(file_options); g_free(subqdict_prefix); -/* Force .bdrv_open() below to re-attach fallback_bs on *fallback_ptr */ -qdict_put_str(options, (*fallback_ptr)->name, +/* Force .bdrv_open() below to re-attach fallback_bs on fallback */ +qdict_put_str(options, fallback->name, bdrv_get_node_name(fallback_bs)); /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */ @@ -287,7 +281,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ -bdrv_unref_child(bs, *fallback_ptr); +bdrv_unref_child(bs, fallback); ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); -- 2.25.1
[PATCH v6 11/15] Revert "block: Restructure remove_file_or_backing_child()"
That's a preparation to previously reverted "block: Let replace_child_noperm free children". Drop it too, we don't need it for a new approach. This reverts commit 562bda8bb41879eeda0bd484dd3d55134579b28e. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index a83845b120..3ac332a460 100644 --- a/block.c +++ b/block.c @@ -5051,33 +5051,30 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran) { -BdrvChild **childp; BdrvRemoveFilterOrCowChild *s; +assert(child == bs->backing || child == bs->file); + if (!child) { return; } -if (child == bs->backing) { -childp = &bs->backing; -} else if (child == bs->file) { -childp = &bs->file; -} else { -g_assert_not_reached(); -} - if (child->bs) { -bdrv_replace_child_tran(*childp, NULL, tran); +bdrv_replace_child_tran(child, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, -.is_backing = (childp == &bs->backing), +.is_backing = (child == bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); -*childp = NULL; +if (s->is_backing) { +bs->backing = NULL; +} else { +bs->file = NULL; +} } /* -- 2.25.1
[PATCH v6 04/15] test-bdrv-graph-mod: update test_parallel_perm_update test case
test_parallel_perm_update() does two things that we are going to restrict in the near future: 1. It updates bs->file field by hand. bs->file will be managed automatically by generic code (together with bs->children list). Let's better refactor our "tricky" bds to have own state where one of children is linked as "selected". This also looks less "tricky", so avoid using this word. 2. It create FILTERED children that are not PRIMARY. Except for tests all FILTERED children in the Qemu block layer are always PRIMARY as well. We are going to formalize this rule, so let's better use DATA children here. 3. It creates more than one FILTERED child, which is already abandoned in BDRV_CHILD_FILTERED's description. While being here, update the picture to better correspond to the test code. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/unit/test-bdrv-graph-mod.c | 80 +++- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c index a6e3bb79be..e2f1355af1 100644 --- a/tests/unit/test-bdrv-graph-mod.c +++ b/tests/unit/test-bdrv-graph-mod.c @@ -241,13 +241,26 @@ static void test_parallel_exclusive_write(void) bdrv_unref(top); } -static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) +/* + * write-to-selected node may have several DATA children, one of them may be + * "selected". Exclusive write permission is taken on selected child. + * + * We don't realize write handler itself, as we need only to test how permission + * update works. + */ +typedef struct BDRVWriteToSelectedState { +BdrvChild *selected; +} BDRVWriteToSelectedState; + +static void write_to_selected_perms(BlockDriverState *bs, BdrvChild *c, +BdrvChildRole role, +BlockReopenQueue *reopen_queue, +uint64_t perm, uint64_t shared, +uint64_t *nperm, uint64_t *nshared) { -if (bs->file && c == bs->file) { +BDRVWriteToSelectedState *s = bs->opaque; + +if (s->selected && c == s->selected) { *nperm = BLK_PERM_WRITE; *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE; } else { @@ -256,9 +269,10 @@ static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c, } } -static BlockDriver bdrv_write_to_file = { -.format_name = "tricky-perm", -.bdrv_child_perm = write_to_file_perms, +static BlockDriver bdrv_write_to_selected = { +.format_name = "write-to-selected", +.instance_size = sizeof(BDRVWriteToSelectedState), +.bdrv_child_perm = write_to_selected_perms, }; @@ -266,15 +280,18 @@ static BlockDriver bdrv_write_to_file = { * The following test shows that topological-sort order is required for * permission update, simple DFS is not enough. * - * Consider the block driver which has two filter children: one active - * with exclusive write access and one inactive with no specific - * permissions. + * Consider the block driver (write-to-selected) which has two children: one is + * selected so we have exclusive write access to it and for the other one we + * don't need any specific permissions. * * And, these two children has a common base child, like this: + * (additional "top" on top is used in test just because the only public + *function to update permission should get a specific child to update. + *Making bdrv_refresh_perms() public just for this test isn't worth it) * - * ┌─┐ ┌──┐ - * │ fl2 │ ◀── │ top │ - * └─┘ └──┘ + * ┌─┐ ┌───┐ ┌─┐ + * │ fl2 │ ◀── │ write-to-selected │ ◀── │ top │ + * └─┘ └───┘ └─┘ * │ │ * │ │ w * │ ▼ @@ -290,14 +307,14 @@ static BlockDriver bdrv_write_to_file = { * * So, exclusive write is propagated. * - * Assume, we want to make fl2 active instead of fl1. - * So, we set some option for top driver and do permission update. + * Assume, we want to select fl2 instead of fl1. + * So, we set some option for write-to-selected driver and do permission update. * * With simple DFS, if permission update goes first through - * top->fl1->base branch it will succeed: it firstly drop exclusive write - * permissions and than apply them for another BdrvChildren. - * But if permission update goes first through top->fl2->base branch it - * will fail, as when we try to update fl2->base child, old not yet + * write-to-selected -> fl1 -> base branch it will succeed: it firstly drop + * exclusive write permissions and than apply them for another BdrvChildren. + * But if
[PATCH v6 08/15] block/snapshot: stress that we fallback to primary child
Actually what we chose is a primary child. Let's stress it in the code. We are going to drop indirect pointer logic here in future. Actually this commit simplifies the future work: we drop use of indirection in the assertion now. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- block/snapshot.c | 30 ++ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/block/snapshot.c b/block/snapshot.c index d6f53c3065..75e8d3a937 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -161,21 +161,14 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs) { BdrvChild **fallback; -BdrvChild *child; +BdrvChild *child = bdrv_primary_child(bs); -/* - * The only BdrvChild pointers that are safe to modify (and which - * we can thus return a reference to) are bs->file and - * bs->backing. - */ -fallback = &bs->file; -if (!*fallback && bs->drv && bs->drv->is_filter) { -fallback = &bs->backing; -} - -if (!*fallback) { +/* We allow fallback only to primary child */ +if (!child) { return NULL; } +fallback = (child == bs->file ? &bs->file : &bs->backing); +assert(*fallback == child); /* * Check that there are no other children that would need to be @@ -309,15 +302,12 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* - * fallback_ptr is &bs->file or &bs->backing. *fallback_ptr - * was closed above and set to NULL, but the .bdrv_open() call - * has opened it again, because we set the respective option - * (with the qdict_put_str() call above). - * Assert that .bdrv_open() has attached some child on - * *fallback_ptr, and that it has attached the one we wanted - * it to (i.e., fallback_bs). + * fallback was a primary child. It was closed above and set to NULL, + * but the .bdrv_open() call has opened it again, because we set the + * respective option (with the qdict_put_str() call above). + * Assert that .bdrv_open() has attached the right BDS as primary child. */ -assert(*fallback_ptr && fallback_bs == (*fallback_ptr)->bs); +assert(bdrv_primary_bs(bs) == fallback_bs); bdrv_unref(fallback_bs); return ret; } -- 2.25.1
[PATCH v6 00/15] block: cleanup backing and file handling
Hi all! That's the first part of "[PATCH v5 00/45] Transactional block-graph modifying API", updated and almost reviewed. On commit (15) is added to original scope of "block: cleanup backing and file handling", as it's related. 01: add Hanna's r-b 02: - mention snapshot-access in commit msg - return ret in compress_open instead of EINVAL - add Hanna's r-b 03: add Hanna's r-b 04: - add case in commit msg - fix comments 05: - fix type in commit msg - add Hanna's r-b 06: add Hanna's r-b 07: wording improvements 08: - fix wording - add Hanna's r-b 09: I add the description, whey we allow a degradation. Still, up to maintainers: it's OK to merge 09-13 into one bit commit 13: - fix s/|/||/ - improve comment - more readable logic when handle filters in bdrv_child_cb_attach() - don't keep **child indirection, move to just returning a child ptr (honestly, I didn't analyze all the callers do they need this int value. Do you think it's needed?) - handle snapshot-access.c 14: get rid of _ptr 15: update comment Vladimir Sementsov-Ogievskiy (15): block: BlockDriver: add .filtered_child_is_backing field block: introduce bdrv_open_file_child() helper block/blklogwrites: don't care to remove bs->file child on failure test-bdrv-graph-mod: update test_parallel_perm_update test case tests-bdrv-drain: bdrv_replace_test driver: declare supports_backing test-bdrv-graph-mod: fix filters to be filters block: document connection between child roles and bs->backing/bs->file block/snapshot: stress that we fallback to primary child Revert "block: Let replace_child_noperm free children" Revert "block: Let replace_child_tran keep indirect pointer" Revert "block: Restructure remove_file_or_backing_child()" Revert "block: Pass BdrvChild ** to replace_child_noperm" block: Manipulate bs->file / bs->backing pointers in .attach/.detach block/snapshot: drop indirection around bdrv_snapshot_fallback_ptr block: refactor bdrv_remove_file_or_backing_child to bdrv_remove_child block.c| 435 ++--- block/blkdebug.c | 9 +- block/blklogwrites.c | 11 +- block/blkreplay.c | 7 +- block/blkverify.c | 9 +- block/bochs.c | 7 +- block/cloop.c | 7 +- block/commit.c | 1 + block/copy-before-write.c | 9 +- block/copy-on-read.c | 9 +- block/crypto.c | 11 +- block/dmg.c| 7 +- block/filter-compress.c| 8 +- block/mirror.c | 1 + block/parallels.c | 7 +- block/preallocate.c| 9 +- block/qcow.c | 6 +- block/qcow2.c | 8 +- block/qed.c| 8 +- block/raw-format.c | 4 +- block/replication.c| 8 +- block/snapshot-access.c| 6 +- block/snapshot.c | 59 ++-- block/throttle.c | 8 +- block/vdi.c| 7 +- block/vhdx.c | 7 +- block/vmdk.c | 7 +- block/vpc.c| 7 +- include/block/block-common.h | 39 +++ include/block/block-global-state.h | 3 + include/block/block_int-common.h | 29 +- tests/unit/test-bdrv-drain.c | 11 +- tests/unit/test-bdrv-graph-mod.c | 104 --- 33 files changed, 389 insertions(+), 479 deletions(-) -- 2.25.1
[PATCH v6 07/15] block: document connection between child roles and bs->backing/bs->file
Make the informal rules formal. In further commit we'll add corresponding assertions. Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/block/block-common.h | 39 1 file changed, 39 insertions(+) diff --git a/include/block/block-common.h b/include/block/block-common.h index fdb7306e78..fda67a7c38 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -313,6 +313,45 @@ enum { * * At least one of DATA, METADATA, FILTERED, or COW must be set for * every child. + * + * + * = Connection with bs->children, bs->file and bs->backing fields = + * + * 1. Filters + * + * Filter drivers have drv->is_filter = true. + * + * Filter node has exactly one FILTERED|PRIMARY child, and may have other + * children which must not have these bits (one example is the + * copy-before-write filter, which also has its target DATA child). + * + * Filter nodes never have COW children. + * + * For most filters, the filtered child is linked in bs->file, bs->backing is + * NULL. For some filters (as an exception), it is the other way around; those + * drivers will have drv->filtered_child_is_backing set to true (see that + * field’s documentation for what drivers this concerns) + * + * 2. "raw" driver (block/raw-format.c) + * + * Formally it's not a filter (drv->is_filter = false) + * + * bs->backing is always NULL + * + * Only has one child, linked in bs->file. Its role is either FILTERED|PRIMARY + * (like filter) or DATA|PRIMARY depending on options. + * + * 3. Other drivers + * + * Don't have any FILTERED children. + * + * May have at most one COW child. In this case it's linked in bs->backing. + * Otherwise bs->backing is NULL. COW child is never PRIMARY. + * + * May have at most one PRIMARY child. In this case it's linked in bs->file. + * Otherwise bs->file is NULL. + * + * May also have some other children that don't have the PRIMARY or COW bit set. */ enum BdrvChildRoleBits { /* -- 2.25.1
[PATCH v6 02/15] block: introduce bdrv_open_file_child() helper
Almost all drivers call bdrv_open_child() similarly. Let's create a helper for this. The only not updated drivers that call bdrv_open_child() to set bs->file are raw-format and snapshot-access: raw-format sometimes want to have filtered child but don't set drv->is_filter to true. snapshot-access wants only DATA | PRIMARY Possibly we should implement drv->is_filter_func() handler, to consider raw-format as filter when it works as filter.. But it's another story. Note also, that we decrease assignments to bs->file in code: it helps us restrict modifying this field in further commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- block.c| 21 + block/blkdebug.c | 9 +++-- block/blklogwrites.c | 7 ++- block/blkreplay.c | 7 ++- block/blkverify.c | 9 +++-- block/bochs.c | 7 +++ block/cloop.c | 7 +++ block/copy-before-write.c | 9 - block/copy-on-read.c | 9 - block/crypto.c | 11 ++- block/dmg.c| 7 +++ block/filter-compress.c| 8 +++- block/parallels.c | 7 +++ block/preallocate.c| 9 - block/qcow.c | 6 ++ block/qcow2.c | 8 block/qed.c| 8 block/replication.c| 8 +++- block/throttle.c | 8 +++- block/vdi.c| 7 +++ block/vhdx.c | 7 +++ block/vmdk.c | 7 +++ block/vpc.c| 7 +++ include/block/block-global-state.h | 3 +++ 24 files changed, 95 insertions(+), 101 deletions(-) diff --git a/block.c b/block.c index 2c0080..883b1bb242 100644 --- a/block.c +++ b/block.c @@ -3666,6 +3666,27 @@ BdrvChild *bdrv_open_child(const char *filename, errp); } +/* + * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. + */ +int bdrv_open_file_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, Error **errp) +{ +BdrvChildRole role; + +/* commit_top and mirror_top don't use this function */ +assert(!parent->drv->filtered_child_is_backing); + +role = parent->drv->is_filter ? +(BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + +parent->file = bdrv_open_child(filename, options, bdref_key, parent, + &child_of_bds, role, false, errp); + +return parent->file ? 0 : -EINVAL; +} + /* * TODO Future callers may need to specify parent/child_class in order for * option inheritance to work. Existing callers use it for the root node. diff --git a/block/blkdebug.c b/block/blkdebug.c index bbf2948703..5fcfc8ac6f 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -503,12 +503,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the image file */ -bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image", - bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); -if (!bs->file) { -ret = -EINVAL; +ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image", + bs, errp); +if (ret < 0) { goto out; } diff --git a/block/blklogwrites.c b/block/blklogwrites.c index f7a251e91f..f66a617eb3 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -155,11 +155,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the file */ -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false, - errp); -if (!bs->file) { -ret = -EINVAL; +ret = bdrv_open_file_child(NULL, options, "file", bs, errp); +if (ret < 0) { goto fail; } diff --git a/block/blkreplay.c b/block/blkreplay.c index dcbe780ddb..76a0b8d12a 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -26,11 +26,8 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, int ret; /* Open the image file */ -bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - false, errp); -if (!bs->file) { -ret = -EINVAL; +ret = bdrv_open_file_child(NULL, options, "image", bs, errp); +if (ret
[PATCH v6 01/15] block: BlockDriver: add .filtered_child_is_backing field
Unfortunately not all filters use .file child as filtered child. Two exclusions are mirror_top and commit_top. Happily they both are private filters. Bad thing is that this inconsistency is observable through qmp commands query-block / query-named-block-nodes. So, could we just change mirror_top and commit_top to use file child as all other filter driver is an open question. Probably, we could do that with some kind of deprecation period, but how to warn users during it? For now, let's just add a field so we can distinguish them in generic code, it will be used in further commits. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- block/commit.c | 1 + block/mirror.c | 1 + include/block/block_int-common.h | 13 + 3 files changed, 15 insertions(+) diff --git a/block/commit.c b/block/commit.c index 851d1c557a..7722a392af 100644 --- a/block/commit.c +++ b/block/commit.c @@ -238,6 +238,7 @@ static BlockDriver bdrv_commit_top = { .bdrv_child_perm= bdrv_commit_top_child_perm, .is_filter = true, +.filtered_child_is_backing = true, }; void commit_start(const char *job_id, BlockDriverState *bs, diff --git a/block/mirror.c b/block/mirror.c index d8ecb9efa2..824b273fc7 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1578,6 +1578,7 @@ static BlockDriver bdrv_mirror_top = { .bdrv_child_perm= bdrv_mirror_top_child_perm, .is_filter = true, +.filtered_child_is_backing = true, }; static BlockJob *mirror_start_job( diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 8947abab76..9d91ccbcbf 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -119,6 +119,19 @@ struct BlockDriver { * (And this filtered child must then be bs->file or bs->backing.) */ bool is_filter; +/* + * Only make sense for filter drivers, for others must be false. + * If true, filtered child is bs->backing. Otherwise it's bs->file. + * Only two internal filters use bs->backing as filtered child and has this + * field set to true: mirror_top and commit_top. + * + * Never create any more such filters! + * + * TODO: imagine how to deprecate this behavior and make all filters work + * similarly using bs->file as filtered child. + */ +bool filtered_child_is_backing; + /* * Set to true if the BlockDriver is a format driver. Format nodes * generally do not expect their children to be other format nodes -- 2.25.1
[PATCH v6 05/15] tests-bdrv-drain: bdrv_replace_test driver: declare supports_backing
We do add COW child to the node. In future we are going to forbid adding COW child to the node that doesn't support backing. So, fix it here now. Don't worry about setting bs->backing itself: in further commit we'll update the block-layer to automatically set/unset this field in generic code. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- tests/unit/test-bdrv-drain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 36be84ae55..23d425a494 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1948,6 +1948,7 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) static BlockDriver bdrv_replace_test = { .format_name= "replace_test", .instance_size = sizeof(BDRVReplaceTestState), +.supports_backing = true, .bdrv_close = bdrv_replace_test_close, .bdrv_co_preadv = bdrv_replace_test_co_preadv, -- 2.25.1
[PATCH v6 03/15] block/blklogwrites: don't care to remove bs->file child on failure
We don't need to remove bs->file, generic layer takes care of it. No other driver cares to remove bs->file on failure by hand. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz --- block/blklogwrites.c | 4 1 file changed, 4 deletions(-) diff --git a/block/blklogwrites.c b/block/blklogwrites.c index f66a617eb3..7d25df97cc 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -254,10 +254,6 @@ fail_log: s->log_file = NULL; } fail: -if (ret < 0) { -bdrv_unref_child(bs, bs->file); -bs->file = NULL; -} qemu_opts_del(opts); return ret; } -- 2.25.1
[PATCH] Align Raspberry Pi DMA interrupts with Linux DTS
All Raspberry Pi models 1-3 (based on bcm2835) have Linux device tree (arch/arm/boot/dts/bcm2835-common.dtsi +25): /* dma channel 11-14 share one irq */ which mismatched the Qemu model. In this patch channels 0--10 and 11--14 are handled separately. Signed-off-by: Andrey Makarov --- hw/arm/bcm2835_peripherals.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c index 48538c9360..3d808b0e31 100644 --- a/hw/arm/bcm2835_peripherals.c +++ b/hw/arm/bcm2835_peripherals.c @@ -322,13 +322,21 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&s->peri_mr, DMA15_OFFSET, sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->dma), 1)); -for (n = 0; n <= 12; n++) { +for (n = 0; n <= 10; n++) { sysbus_connect_irq(SYS_BUS_DEVICE(&s->dma), n, qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ, INTERRUPT_DMA0 + n)); } +/* According to DTS, dma channels 11-14 share one irq */ +for (n = 11; n <= 14; n++) { +sysbus_connect_irq(SYS_BUS_DEVICE(&s->dma), n, + qdev_get_gpio_in_named(DEVICE(&s->ic), + BCM2835_IC_GPU_IRQ, + INTERRUPT_DMA0 + 11)); +} + /* THERMAL */ if (!sysbus_realize(SYS_BUS_DEVICE(&s->thermal), errp)) { return; -- 2.30.2
Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active
On 6/24/22 08:30, Peter Maydell wrote: So the thing that worries me about structuring this this way is that the SME supplement appendix includes this caution: # The instruction encoding tables in this section [...] will # require correction if subsequent versions of the A64 ISA # add new instructions which overlap with these encodings. My guess (based on how the H.a Arm ARM has incorporated SME) is that these tables aren't going to be included in the Arm ARM and updated going forward. Instead the behaviour will be documented based on whether (existing and new) instructions call CheckNonStreamingSVEEnabled() or CheckSVEEnabled() in their pseudocode. I agree that this would be cleaner and more correct long-term. So I'm a bit uncertain about how awkward it's going to be in future to maintain this transliteration of the SME supplement tables into decodetree: we might find that we have to look at new instructions and kind of reverse-engineer back out any required changes to the tables here, rather than simply "write the trans_ function for the new insn, looking at the pseudocode to see which _access_check() function it should be calling"... I thought about this, and if it were simply a matter of annotating the trans_* functions within translate-sve.c, I would have done it. But I would need to adjust A64 AdvSIMD as well, which is still done with the by-hand decoder. Can we use this solution in the short term, and fix up advsimd while coverting it to decodetree? I'm more and more convinced we'll want this sooner than later. r~
Re: [PULL v2 00/20] Block layer patches
On 6/24/22 08:40, Kevin Wolf wrote: The following changes since commit 3a821c52e1a30ecd9a436f2c67cc66b5628c829f: Merge tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme into staging (2022-06-23 14:52:30 -0700) are available in the Git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to 779d82e1d305f2a9cbd7f48cf6555ad58145e04a: vduse-blk: Add name option (2022-06-24 17:07:06 +0200) Block layer patches - Add vduse-blk export - Dirty bitmaps: Fix and improve bitmap merge - gluster: correctly set max_pdiscard - rbd: report a better error when namespace does not exist - aio_wait_kick: add missing memory barrier - Code cleanups Applied, thanks. Please update https://wiki.qemu.org/ChangeLog/7.1 as appropriate. r~ Emanuele Giuseppe Esposito (1): aio_wait_kick: add missing memory barrier Eric Blake (1): nbd: Drop dead code spotted by Coverity Fabian Ebner (1): block/gluster: correctly set max_pdiscard Stefan Hajnoczi (3): block: drop unused bdrv_co_drain() API block: get rid of blk->guest_block_size qsd: document vduse-blk exports Stefano Garzarella (1): block/rbd: report a better error when namespace does not exist Vladimir Sementsov-Ogievskiy (3): block: block_dirty_bitmap_merge(): fix error path block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap block: simplify handling of try to merge different sized bitmaps Xie Yongji (10): block: Support passing NULL ops to blk_set_dev_ops() block/export: Fix incorrect length passed to vu_queue_push() block/export: Abstract out the logic of virtio-blk I/O process linux-headers: Add vduse.h libvduse: Add VDUSE (vDPA Device in Userspace) library vduse-blk: Implement vduse-blk export vduse-blk: Add vduse-blk resize support libvduse: Add support for reconnecting vduse-blk: Add serial option vduse-blk: Add name option qapi/block-export.json | 29 +- docs/tools/qemu-storage-daemon.rst | 22 + meson_options.txt |4 + block/export/vduse-blk.h| 20 + block/export/virtio-blk-handler.h | 37 + include/block/aio-wait.h|2 + include/block/block-io.h|1 - include/block/block_int-io.h|2 +- include/qemu/hbitmap.h | 15 +- include/sysemu/block-backend-io.h |1 - linux-headers/linux/vduse.h | 306 ++ subprojects/libvduse/include/atomic.h |1 + subprojects/libvduse/include/compiler.h |1 + subprojects/libvduse/libvduse.h | 247 + block/backup.c |6 +- block/block-backend.c | 12 +- block/dirty-bitmap.c| 26 +- block/export/export.c |6 + block/export/vduse-blk.c| 374 block/export/vhost-user-blk-server.c| 263 + block/export/virtio-blk-handler.c | 240 + block/gluster.c |2 +- block/io.c | 15 - block/monitor/bitmap-qmp-cmds.c | 40 +- block/nbd.c |8 +- block/rbd.c | 24 + hw/block/virtio-blk.c |1 - hw/block/xen-block.c|1 - hw/ide/core.c |1 - hw/scsi/scsi-disk.c |1 - hw/scsi/scsi-generic.c |1 - storage-daemon/qemu-storage-daemon.c| 10 + subprojects/libvduse/libvduse.c | 1375 +++ util/aio-wait.c | 16 +- util/hbitmap.c | 25 +- MAINTAINERS |9 + block/export/meson.build|7 +- meson.build | 34 + scripts/meson-buildoptions.sh |7 + scripts/update-linux-headers.sh |2 +- subprojects/libvduse/linux-headers/linux|1 + subprojects/libvduse/meson.build| 10 + subprojects/libvduse/standard-headers/linux |1 + 43 files changed, 2852 insertions(+), 354 deletions(-) create mode 100644 block/export/vduse-blk.h create mode 100644 block/export/virtio-blk-handler.h create mode 100644 linux-headers/linux/vduse.h create mode 12 subprojects/libvduse/include/atomic.h create mode 12 subprojects/libvduse/include/compiler.h create mode 100644 subprojects/libvduse/libvduse.h create mode 100644 block/exp
Re: [PATCH] aspeed: i2c: Fix DMA len write-enable bit handling
> On Jun 24, 2022, at 1:31 PM, Peter Delevoryas wrote: > > I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It > seems to be because the Zephyr i2c driver sets the RX DMA len with the > RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1] > > /* 0x1C : I2CM Master DMA Transfer Length Register */ > > I think we should be checking the write-enable bits on the incoming > value, not checking the register array. I'm not sure we're even writing > the write-enable bits to the register array, actually. > > [1] > https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148 Arg, forgot this: Fixes: ba2cccd64e90f34 ("aspeed: i2c: Add new mode support”) Should I resend as v2? Thanks, Peter > > Signed-off-by: Peter Delevoryas > --- > hw/i2c/aspeed_i2c.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c > index 37ae1f2e04..c4fce7474a 100644 > --- a/hw/i2c/aspeed_i2c.c > +++ b/hw/i2c/aspeed_i2c.c > @@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, > hwaddr offset, > RX_BUF_LEN) + 1; > break; > case A_I2CM_DMA_LEN: > -w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || > - ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T); > +w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || > + FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T); > /* If none of the w1t bits are set, just write to the reg as normal. > */ > if (!w1t) { > bus->regs[R_I2CM_DMA_LEN] = value; > break; > } > -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { > +if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { > ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN, > FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN)); > } > -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { > +if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { > ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN, > FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN)); > } > -- > 2.30.2 >
[PATCH] aspeed: i2c: Fix DMA len write-enable bit handling
I noticed i2c rx transfers were getting shortened to "1" on Zephyr. It seems to be because the Zephyr i2c driver sets the RX DMA len with the RX field write-enable bit set (bit 31) to avoid a read-modify-write. [1] /* 0x1C : I2CM Master DMA Transfer Length Register */ I think we should be checking the write-enable bits on the incoming value, not checking the register array. I'm not sure we're even writing the write-enable bits to the register array, actually. [1] https://github.com/AspeedTech-BMC/zephyr/blob/db3dbcc9c52e67a47180890ac938ed380b33f91c/drivers/i2c/i2c_aspeed.c#L145-L148 Signed-off-by: Peter Delevoryas --- hw/i2c/aspeed_i2c.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c index 37ae1f2e04..c4fce7474a 100644 --- a/hw/i2c/aspeed_i2c.c +++ b/hw/i2c/aspeed_i2c.c @@ -644,18 +644,18 @@ static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, hwaddr offset, RX_BUF_LEN) + 1; break; case A_I2CM_DMA_LEN: -w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || - ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T); +w1t = FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || + FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T); /* If none of the w1t bits are set, just write to the reg as normal. */ if (!w1t) { bus->regs[R_I2CM_DMA_LEN] = value; break; } -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { +if (FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN, FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN)); } -if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { +if (FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN, FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN)); } -- 2.30.2
Re: [PATCH qemu v2 1/2] ppc: Define SETFIELD for the ppc target
Alexey, The newer version of this patch is having trouble with Gitlab runners, as you can read in my feedback there. I've tested this one just in case. The same problems happen. E.g. for the cross-armel-system runner: In file included from ../hw/intc/pnv_xive.c:14: ../hw/intc/pnv_xive.c: In function ‘pnv_xive_block_id’: /builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: conversion from ‘long long unsigned int’ to ‘long unsigned int’ changes value from ‘4222124650659840’ to ‘0’ [-Werror=overflow] 45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) | ^~~ /builds/danielhb/qemu/target/ppc/cpu.h:51:42: note: in definition of macro ‘GETFIELD’ 51 | (((word) & (mask)) >> __builtin_ctzl(mask)) | ^~~~ ../hw/intc/pnv_xive_regs.h:77:41: note: in expansion of macro ‘PPC_BITMASK’ 77 | #define PC_TCTXT_CHIPIDPPC_BITMASK(12, 15) | ^~~ ../hw/intc/pnv_xive.c:80:24: note: in expansion of macro ‘PC_TCTXT_CHIPID’ 80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val); |^~~ ../hw/intc/pnv_xive.c: In function ‘pnv_xive_vst_addr’: /builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: conversion from ‘long long unsigned int’ to ‘long unsigned int’ changes value from ‘13835058055282163712’ to ‘0’ [-Werror=overflow] 45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) | ^~~ /builds/danielhb/qemu/target/ppc/cpu.h:51:42: note: in definition of macro ‘GETFIELD’ 51 | (((word) & (mask)) >> __builtin_ctzl(mask)) | ^~~~ ../hw/intc/pnv_xive_regs.h:230:33: note: in expansion of macro ‘PPC_BITMASK’ 230 | #define VSD_MODEPPC_BITMASK(0, 1) | ^~~ ../hw/intc/pnv_xive.c:226:18: note: in expansion of macro ‘VSD_MODE’ 226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { | ^~~~ ../hw/intc/pnv_xive.c: In function ‘pnv_xive_end_update’: Link: https://gitlab.com/danielhb/qemu/-/jobs/2637716673 I don´t know how to deal with that. For the record: if this is too troublesome to fix, I am ok with just consolidating the GETFIELD and SETFIELD inlines we already have, under cpu.h, keeping them exactly as they are today (functions, not macros). Thanks, Daniel On 6/17/22 03:07, Alexey Kardashevskiy wrote: It keeps repeating, move it to the header. This uses __builtin_ctzl() to allow using the macros in #define. Signed-off-by: Alexey Kardashevskiy --- include/hw/pci-host/pnv_phb3_regs.h | 16 target/ppc/cpu.h| 5 + hw/intc/pnv_xive.c | 20 hw/intc/pnv_xive2.c | 20 hw/pci-host/pnv_phb4.c | 16 5 files changed, 5 insertions(+), 72 deletions(-) diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h index a174ef1f7045..38f8ce9d7406 100644 --- a/include/hw/pci-host/pnv_phb3_regs.h +++ b/include/hw/pci-host/pnv_phb3_regs.h @@ -12,22 +12,6 @@ #include "qemu/host-utils.h" -/* - * QEMU version of the GETFIELD/SETFIELD macros - * - * These are common with the PnvXive model. - */ -static inline uint64_t GETFIELD(uint64_t mask, uint64_t word) -{ -return (word & mask) >> ctz64(mask); -} - -static inline uint64_t SETFIELD(uint64_t mask, uint64_t word, -uint64_t value) -{ -return (word & ~mask) | ((value << ctz64(mask)) & mask); -} - /* * PBCQ XSCOM registers */ diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 6d78078f379d..9a1f1ea3 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -47,6 +47,11 @@ PPC_BIT32(bs)) #define PPC_BITMASK8(bs, be)((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs)) +#define GETFIELD(mask, word) \ +(((word) & (mask)) >> __builtin_ctzl(mask)) +#define SETFIELD(mask, word, val) \ +(((word) & ~(mask)) | (((uint64_t)(val) << __builtin_ctzl(mask)) & (mask))) + /*/ /* Exception vectors definitions */ enum { diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 1ce1d7b07d63..c7b75ed12ee0 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -66,26 +66,6 @@ static const XiveVstInfo vst_infos[] = { qemu_log_mask(LOG_GUEST_ERROR, "XIVE[%x] - " fmt "\n", \ (xive)->chip->chip_id, ## __VA_ARGS__); -/* - * QEMU version of the GETFIELD/SETFIELD macros - * - * TODO: It might be better to use the existing extract64
Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment
On 6/24/22 15:04, Richard Henderson wrote: On 6/24/22 10:16, Leandro Lupori wrote: Check if each page dir/table base address is properly aligned and log a guest error if not, as real hardware behave incorrectly in this case. Signed-off-by: Leandro Lupori --- target/ppc/mmu-radix64.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 339cf5b4d8..1e7d932893 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, *psize -= *nls; if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */ *nls = pde & R_PDE_NLS; + + if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: misaligned page dir/table base: 0x%"VADDR_PRIx + " page dir size: 0x"TARGET_FMT_lx"\n", + __func__, (pde & R_PDE_NLB), BIT(*nls + 3)); + } + index = eaddr >> (*psize - *nls); /* Shift */ index &= ((1UL << *nls) - 1); /* Mask */ *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde)); In your response to my question on v1, you said that it appears that the cpu ignores bits *nls+3. This isn't ignoring them -- it's including [nls+2 : nls] into pte_addr. It would be better to compute this as index = ... index &= ... *pte_addr = ... if (*pte_addr & 7) { qemu_log(...); } Right, I wanted to warn about the invalid alignment but I ended up forgetting to make QEMU match the CPU behavior. The CPU seems to ignore bits [nls+2 : 0] of NLB. The multiplication of index by sizeof(pde) discards the 3 lower bits and it's not possible for NLB to have its 8 lower bits set, as these are used for NLS plus some reserved bits in the PDE. Then we need to make sure that bits [nls+2 : 8] of NLB are also 0. So maybe something like this would do it: index = eaddr >> (*psize - *nls); /* Shift */ index &= ((1UL << *nls) - 1); /* Mask */ *pte_addr = pde & R_PDE_NLB; mask = MAKE_64BIT_MASK(0, *nls + 3); if (*pte_addr & mask) { qemu_log(...); *pte_addr &= ~mask; } *pte_addr += index * sizeof(pde); Thanks, Leandro r~
[PATCH] python: QEMUMachine: enable qmp accept timeout by default
I've spent much time trying to debug hanging pipeline in gitlab. I started from and idea that I have problem in code in my series (which has some timeouts). Finally I found that the problem is that I've used QEMUMachine class directly to avoid qtest, and didn't add necessary arguments. Qemu fails and we wait for qmp accept endlessly. In gitlab it's just stopped by timeout (one hour) with no sign of what's going wrong. With timeout enabled, gitlab don't wait for an hour and prints all needed information. Signed-off-by: Vladimir Sementsov-Ogievskiy --- Hi all! Just compare this https://gitlab.com/vsementsov/qemu/-/pipelines/572232557 and this https://gitlab.com/vsementsov/qemu/-/pipelines/572526252 and you'll see that the latter is much better. python/qemu/machine/machine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index 37191f433b..01a12f6f73 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -131,7 +131,7 @@ def __init__(self, drain_console: bool = False, console_log: Optional[str] = None, log_dir: Optional[str] = None, - qmp_timer: Optional[float] = None): + qmp_timer: float = 30): ''' Initialize a QEMUMachine -- 2.25.1
Re: [PATCH RESEND v2 2/2] target/ppc: Implement ISA 3.00 tlbie[l]
On 6/24/22 12:14, Leandro Lupori wrote: This initial version supports the invalidation of one or all TLB entries. Flush by PID/LPID, or based in process/partition scope is not supported, because it would make using the generic QEMU TLB implementation hard. In these cases, all entries are flushed. Signed-off-by: Leandro Lupori --- target/ppc/helper.h | 18 +++ target/ppc/mmu_helper.c | 154 +++ target/ppc/translate/storage-ctrl-impl.c.inc | 15 ++ 3 files changed, 187 insertions(+) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index d627cfe6ed..5e663a0a50 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -672,6 +672,24 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl) #if defined(TARGET_PPC64) + +/* + * tlbie[l] helper flags + * + * RIC, PRS, R and local are passed as flags in the last argument. + */ +#define TLBIE_F_RIC_SHIFT 0 +#define TLBIE_F_PRS_SHIFT 2 +#define TLBIE_F_R_SHIFT 3 +#define TLBIE_F_LOCAL_SHIFT 4 + +#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT) +#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT) +#define TLBIE_F_R (1 << TLBIE_F_R_SHIFT) +#define TLBIE_F_LOCAL (1 << TLBIE_F_LOCAL_SHIFT) + Better to put these somewhere else -- internal.h probably -- helper.h is included multiple times. r~
[PATCH RESEND v2 2/2] target/ppc: Implement ISA 3.00 tlbie[l]
This initial version supports the invalidation of one or all TLB entries. Flush by PID/LPID, or based in process/partition scope is not supported, because it would make using the generic QEMU TLB implementation hard. In these cases, all entries are flushed. Signed-off-by: Leandro Lupori --- target/ppc/helper.h | 18 +++ target/ppc/mmu_helper.c | 154 +++ target/ppc/translate/storage-ctrl-impl.c.inc | 15 ++ 3 files changed, 187 insertions(+) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index d627cfe6ed..5e663a0a50 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -672,6 +672,24 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl) #if defined(TARGET_PPC64) + +/* + * tlbie[l] helper flags + * + * RIC, PRS, R and local are passed as flags in the last argument. + */ +#define TLBIE_F_RIC_SHIFT 0 +#define TLBIE_F_PRS_SHIFT 2 +#define TLBIE_F_R_SHIFT 3 +#define TLBIE_F_LOCAL_SHIFT 4 + +#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT) +#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT) +#define TLBIE_F_R (1 << TLBIE_F_R_SHIFT) +#define TLBIE_F_LOCAL (1 << TLBIE_F_LOCAL_SHIFT) + +DEF_HELPER_FLAGS_4(tlbie_isa300, TCG_CALL_NO_WG, void, \ +env, tl, tl, i32) DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl) DEF_HELPER_2(load_slb_esid, tl, env, tl) DEF_HELPER_2(load_slb_vsid, tl, env, tl) diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index 15239dc95b..b881aee23f 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -429,6 +429,160 @@ void helper_tlbie(CPUPPCState *env, target_ulong addr) ppc_tlb_invalidate_one(env, addr); } +#if defined(TARGET_PPC64) + +/* Invalidation Selector */ +#define TLBIE_IS_VA 0 +#define TLBIE_IS_PID1 +#define TLBIE_IS_LPID 2 +#define TLBIE_IS_ALL3 + +/* Radix Invalidation Control */ +#define TLBIE_RIC_TLB 0 +#define TLBIE_RIC_PWC 1 +#define TLBIE_RIC_ALL 2 +#define TLBIE_RIC_GRP 3 + +/* Radix Actual Page sizes */ +#define TLBIE_R_AP_4K 0 +#define TLBIE_R_AP_64K 5 +#define TLBIE_R_AP_2M 1 +#define TLBIE_R_AP_1G 2 + +/* RB field masks */ +#define TLBIE_RB_EPN_MASK PPC_BITMASK(0, 51) +#define TLBIE_RB_IS_MASKPPC_BITMASK(52, 53) +#define TLBIE_RB_AP_MASKPPC_BITMASK(56, 58) + +void helper_tlbie_isa300(CPUPPCState *env, target_ulong rb, target_ulong rs, + uint32_t flags) +{ +unsigned ric = (flags & TLBIE_F_RIC_MASK) >> TLBIE_F_RIC_SHIFT; +/* + * With the exception of the checks for invalid instruction forms, + * PRS is currently ignored, because we don't know if a given TLB entry + * is process or partition scoped. + */ +bool prs = flags & TLBIE_F_PRS; +bool r = flags & TLBIE_F_R; +bool local = flags & TLBIE_F_LOCAL; +bool effR; +unsigned is = extract64(rb, PPC_BIT_NR(53), 2), set; +unsigned ap;/* actual page size */ +target_ulong addr, pgoffs_mask; + +qemu_log_mask(CPU_LOG_MMU, +"%s: local=%d addr=" TARGET_FMT_lx " ric=%u prs=%d r=%d is=%u\n", +__func__, local, rb & TARGET_PAGE_MASK, ric, prs, r, is); + +effR = FIELD_EX64(env->msr, MSR, HV) ? r : env->spr[SPR_LPCR] & LPCR_HR; + +/* Partial TLB invalidation is supported for Radix only for now. */ +if (!effR) { +goto inval_all; +} + +/* Check for invalid instruction forms (effR=1). */ +if (unlikely(ric == TLBIE_RIC_GRP || + ((ric == TLBIE_RIC_PWC || ric == TLBIE_RIC_ALL) && + is == TLBIE_IS_VA) || + (!prs && is == TLBIE_IS_PID))) { +qemu_log_mask(LOG_GUEST_ERROR, +"%s: invalid instruction form: ric=%u prs=%d r=%d is=%u\n", +__func__, ric, prs, r, is); +goto invalid; +} + +/* We don't cache Page Walks. */ +if (ric == TLBIE_RIC_PWC) { +if (local) { +set = extract64(rb, PPC_BIT_NR(51), 12); +if (set != 0) { +qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid set: %d\n", + __func__, set); +goto invalid; +} +} +return; +} + +/* + * Invalidation by LPID or PID is not supported, so fallback + * to full TLB flush in these cases. + */ +if (is != TLBIE_IS_VA) { +goto inval_all; +} + +/* + * The results of an attempt to invalidate a translation outside of + * quadrant 0 for Radix Tree translation (effR=1, RIC=0, PRS=1, IS=0, + * and EA 0:1 != 0b00) are boundedly undefined. + */ +if (unlikely(ric == TLBIE_RIC_TLB && prs && is == TLBIE_IS_VA && + (rb & R_EADDR_QUADRANT) != R_EADDR_QUADRANT0)) { +
[PATCH RESEND v2 1/2] target/ppc: Move tlbie[l] to decode tree
Also decode RIC, PRS and R operands. Signed-off-by: Leandro Lupori --- target/ppc/cpu_init.c| 4 +- target/ppc/insn32.decode | 8 ++ target/ppc/translate.c | 64 +- target/ppc/translate/storage-ctrl-impl.c.inc | 87 4 files changed, 99 insertions(+), 64 deletions(-) create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index c16cb8dbe7..8d7e77f778 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PPC_FLOAT_EXT | PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | - PPC_MEM_TLBSYNC | + PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD | @@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) PPC_FLOAT_EXT | PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | - PPC_MEM_TLBSYNC | + PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD | diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 6ea48d5163..2b985249b8 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -809,3 +809,11 @@ VMODSD 000100 . . . 1001011@VX VMODUD 000100 . . . 11011001011@VX VMODSQ 000100 . . . 1111011@VX VMODUQ 000100 . . . 1101011@VX + +## TLB Management Instructions + +&X_tlbierb rs ric prs:bool r:bool +@X_tlbie.. rs:5 - ric:2 prs:1 r:1 rb:5 .. . &X_tlbie + +TLBIE 01 . - .. . . . 0100110010 -@X_tlbie +TLBIEL 01 . - .. . . . 0100010010 -@X_tlbie diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 1d6daa4608..4fcb311c2d 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -5424,64 +5424,6 @@ static void gen_tlbia(DisasContext *ctx) #endif /* defined(CONFIG_USER_ONLY) */ } -/* tlbiel */ -static void gen_tlbiel(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) -GEN_PRIV; -#else -bool psr = (ctx->opcode >> 17) & 0x1; - -if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) { -/* - * tlbiel is privileged except when PSR=0 and HR=1, making it - * hypervisor privileged. - */ -GEN_PRIV; -} - -gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]); -#endif /* defined(CONFIG_USER_ONLY) */ -} - -/* tlbie */ -static void gen_tlbie(DisasContext *ctx) -{ -#if defined(CONFIG_USER_ONLY) -GEN_PRIV; -#else -bool psr = (ctx->opcode >> 17) & 0x1; -TCGv_i32 t1; - -if (ctx->pr) { -/* tlbie is privileged... */ -GEN_PRIV; -} else if (!ctx->hv) { -if (!ctx->gtse || (!psr && ctx->hr)) { -/* - * ... except when GTSE=0 or when PSR=0 and HR=1, making it - * hypervisor privileged. - */ -GEN_PRIV; -} -} - -if (NARROW_MODE(ctx)) { -TCGv t0 = tcg_temp_new(); -tcg_gen_ext32u_tl(t0, cpu_gpr[rB(ctx->opcode)]); -gen_helper_tlbie(cpu_env, t0); -tcg_temp_free(t0); -} else { -gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]); -} -t1 = tcg_temp_new_i32(); -tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); -tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH); -tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); -tcg_temp_free_i32(t1); -#endif /* defined(CONFIG_USER_ONLY) */ -} - /* tlbsync */ static void gen_tlbsync(DisasContext *ctx) { @@ -6699,6 +6641,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a) #include "translate/branch-impl.c.inc" +#include "translate/storage-ctrl-impl.c.inc" + /* Handles lfdp */ static void gen_dform39(DisasContext *ctx) { @@ -6937,10 +6881,6 @@ GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, PPC_MEM_TLBIA), * XXX Those instructions will need to be handled differently for * different ISA versions */ -GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE), -GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE), -GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x0011, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(tlbie, 0x1F, 0x12, 0x09, 0x0011, PPC_NONE, PPC2_ISA300), GEN_HANDLER(tlbsync, 0x1
[PATCH RESEND v2 0/2] ppc: Implement ISA 3.00 tlbie[l]
Resent after rebasing and fixing conflicts with master. Changes from v1: - squashed first 2 commits into 1, because adding PPC_MEM_TLBIE to P9/P10's insns_flags and moving only tlbie (and not tlbiel) to decode tree breaks PowerPC64 instruction decoder initialization. Leandro Lupori (2): target/ppc: Move tlbie[l] to decode tree target/ppc: Implement ISA 3.00 tlbie[l] target/ppc/cpu_init.c| 4 +- target/ppc/helper.h | 18 +++ target/ppc/insn32.decode | 8 + target/ppc/mmu_helper.c | 154 +++ target/ppc/translate.c | 64 +--- target/ppc/translate/storage-ctrl-impl.c.inc | 102 6 files changed, 286 insertions(+), 64 deletions(-) create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc -- 2.25.1
Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment
Leandro Lupori writes: > Check if each page dir/table base address is properly aligned and > log a guest error if not, as real hardware behave incorrectly in > this case. I think the commit message could be clearer, something like: According to PowerISA 3.1B, Book III 6.7.6 programming note, the page directory base addresses are expected to be aligned to their size. Real hardware seems to rely on that and will access the wrong address if they are misaligned. This results in a translation failure even if the page tables seem to be properly populated. Let's make sure we capture this assumption in the code to help anyone implementing page tables. > > Signed-off-by: Leandro Lupori > --- > target/ppc/mmu-radix64.c | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index 339cf5b4d8..1e7d932893 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, > vaddr eaddr, > *psize -= *nls; > if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */ > *nls = pde & R_PDE_NLS; > + > +if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) { > +qemu_log_mask(LOG_GUEST_ERROR, > +"%s: misaligned page dir/table base: 0x%"VADDR_PRIx > +" page dir size: 0x"TARGET_FMT_lx"\n", > +__func__, (pde & R_PDE_NLB), BIT(*nls + 3)); > +} > + > index = eaddr >> (*psize - *nls); /* Shift */ > index &= ((1UL << *nls) - 1); /* Mask */ > *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde)); > @@ -295,6 +303,13 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr > eaddr, > uint64_t index, pde, rpn, mask; > int level = 0; > > +if (base_addr & MAKE_64BIT_MASK(0, nls + 3)) { > +qemu_log_mask(LOG_GUEST_ERROR, > +"%s: misaligned page dir base: 0x%"VADDR_PRIx > +" page dir size: 0x"TARGET_FMT_lx"\n", > +__func__, base_addr, BIT(nls + 3)); > +} > + > index = eaddr >> (*psize - nls);/* Shift */ > index &= ((1UL << nls) - 1); /* Mask */ > *pte_addr = base_addr + (index * sizeof(pde));
Re: [PATCH v2 1/3] ppc: Check partition and process table alignment
Leandro Lupori writes: > Check if partition and process tables are properly aligned, in > their size, according to PowerISA 3.1B, Book III 6.7.6 programming > note. Hardware and KVM also raise an exception in these cases. > > Signed-off-by: Leandro Lupori Reviewed-by: Fabiano Rosas
Re: [PATCH v2 2/3] target/ppc: Improve Radix xlate level validation
Leandro Lupori writes: > Check if the number and size of Radix levels are valid on > POWER9/POWER10 CPUs, according to the supported Radix Tree > Configurations described in their User Manuals. > > Signed-off-by: Leandro Lupori > --- > target/ppc/mmu-radix64.c | 51 +++- > 1 file changed, 40 insertions(+), 11 deletions(-) > > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index 9a8a2e2875..339cf5b4d8 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -236,17 +236,39 @@ static void ppc_radix64_set_rc(PowerPCCPU *cpu, > MMUAccessType access_type, > } > } > > +static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) > +{ > +/* > + * Check if this is a valid level, according to POWER9 and POWER10 > + * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, > respectively: > + * Supported Radix Tree Configurations and Resulting Page Sizes. > + * > + * NOTE: these checks are valid for POWER9 and POWER10 CPUs only. If > + * new CPUs that support other Radix configurations are added > + * (e.g., Microwatt), then a new method should be added to > + * PowerPCCPUClass, with this function being the POWER9/POWER10 > + * implementation. > + */ Sorry, this got too specific now. I could not respond in time before you sent the v2. Let's cut the mentions to the code: Note: these checks are specific to POWER9 and POWER10 CPUs. Any future CPUs that supports a different Radix MMU configuration will need their own implementation. > +switch (level) { > +case 0: /* Root Page Dir */ > +return psize == 52 && nls == 13; > +case 1: > +case 2: > +return nls == 9; > +case 3: > +return nls == 9 || nls == 5; > +default: > +qemu_log_mask(LOG_GUEST_ERROR, "invalid radix level: %d\n", level); > +return false; > +} > +} > + > static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, >uint64_t *pte_addr, uint64_t *nls, >int *psize, uint64_t *pte, int > *fault_cause) > { > uint64_t index, pde; > > -if (*nls < 5) { /* Directory maps less than 2**5 entries */ > -*fault_cause |= DSISR_R_BADCONFIG; > -return 1; > -} > - > /* Read page entry from guest address space */ > pde = ldq_phys(as, *pte_addr); > if (!(pde & R_PTE_VALID)) { /* Invalid Entry */ > @@ -270,12 +292,8 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr > eaddr, > hwaddr *raddr, int *psize, uint64_t *pte, > int *fault_cause, hwaddr *pte_addr) > { > -uint64_t index, pde, rpn , mask; > - > -if (nls < 5) { /* Directory maps less than 2**5 entries */ > -*fault_cause |= DSISR_R_BADCONFIG; > -return 1; > -} > +uint64_t index, pde, rpn, mask; > +int level = 0; > > index = eaddr >> (*psize - nls);/* Shift */ > index &= ((1UL << nls) - 1); /* Mask */ > @@ -283,6 +301,11 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr > eaddr, > do { > int ret; > > +if (!ppc_radix64_is_valid_level(level++, *psize, nls)) { > +*fault_cause |= DSISR_R_BADCONFIG; > +return 1; > +} > + > ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde, > fault_cause); > if (ret) { > @@ -456,6 +479,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU > *cpu, > } > } else { > uint64_t rpn, mask; > +int level = 0; > > index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */ > index &= ((1UL << nls) - 1);/* Mask */ > @@ -475,6 +499,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU > *cpu, > return ret; > } > > +if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) { > +fault_cause |= DSISR_R_BADCONFIG; > +return 1; > +} > + > ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, > &h_raddr, > &nls, g_page_size, &pte, > &fault_cause); > if (ret) {
[PATCH v4] hw: m25p80: add tests for write protect (WP# and SRWD bit)
Signed-off-by: Iris Chen --- Adding Signed Off By tag -- sorry I missed that ! tests/qtest/aspeed_smc-test.c | 62 +++ 1 file changed, 62 insertions(+) diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c index ec233315e6..7786addfb8 100644 --- a/tests/qtest/aspeed_smc-test.c +++ b/tests/qtest/aspeed_smc-test.c @@ -56,7 +56,9 @@ enum { BULK_ERASE = 0xc7, READ = 0x03, PP = 0x02, +WRSR = 0x1, WREN = 0x6, +SRWD = 0x80, RESET_ENABLE = 0x66, RESET_MEMORY = 0x99, EN_4BYTE_ADDR = 0xB7, @@ -390,6 +392,64 @@ static void test_read_status_reg(void) flash_reset(); } +static void test_status_reg_write_protection(void) +{ +uint8_t r; + +spi_conf(CONF_ENABLE_W0); + +/* default case: WP# is high and SRWD is low -> status register writable */ +spi_ctrl_start_user(); +writeb(ASPEED_FLASH_BASE, WREN); +/* test ability to write SRWD */ +writeb(ASPEED_FLASH_BASE, WRSR); +writeb(ASPEED_FLASH_BASE, SRWD); +writeb(ASPEED_FLASH_BASE, RDSR); +r = readb(ASPEED_FLASH_BASE); +spi_ctrl_stop_user(); +g_assert_cmphex(r & SRWD, ==, SRWD); + +/* WP# high and SRWD high -> status register writable */ +spi_ctrl_start_user(); +writeb(ASPEED_FLASH_BASE, WREN); +/* test ability to write SRWD */ +writeb(ASPEED_FLASH_BASE, WRSR); +writeb(ASPEED_FLASH_BASE, 0); +writeb(ASPEED_FLASH_BASE, RDSR); +r = readb(ASPEED_FLASH_BASE); +spi_ctrl_stop_user(); +g_assert_cmphex(r & SRWD, ==, 0); + +/* WP# low and SRWD low -> status register writable */ +qtest_set_irq_in(global_qtest, + "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 0); +spi_ctrl_start_user(); +writeb(ASPEED_FLASH_BASE, WREN); +/* test ability to write SRWD */ +writeb(ASPEED_FLASH_BASE, WRSR); +writeb(ASPEED_FLASH_BASE, SRWD); +writeb(ASPEED_FLASH_BASE, RDSR); +r = readb(ASPEED_FLASH_BASE); +spi_ctrl_stop_user(); +g_assert_cmphex(r & SRWD, ==, SRWD); + +/* WP# low and SRWD high -> status register NOT writable */ +spi_ctrl_start_user(); +writeb(ASPEED_FLASH_BASE, WREN); +/* test ability to write SRWD */ +writeb(ASPEED_FLASH_BASE, WRSR); +writeb(ASPEED_FLASH_BASE, 0); +writeb(ASPEED_FLASH_BASE, RDSR); +r = readb(ASPEED_FLASH_BASE); +spi_ctrl_stop_user(); +/* write is not successful */ +g_assert_cmphex(r & SRWD, ==, SRWD); + +qtest_set_irq_in(global_qtest, + "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 1); +flash_reset(); +} + static char tmp_path[] = "/tmp/qtest.m25p80.XX"; int main(int argc, char **argv) @@ -416,6 +476,8 @@ int main(int argc, char **argv) qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem); qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem); qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg); +qtest_add_func("/ast2400/smc/status_reg_write_protection", + test_status_reg_write_protection); ret = g_test_run(); -- 2.30.2
[RFC PATCH] tests/9p: introduce declarative function calls
There are currently 3 different functions for sending a 9p 'Twalk' request. They are all doing the same thing, just in a slightly different way and with slightly different function arguments. Merge those 3 functions into a single function by using a struct for function call arguments and use designated initializers when calling this function to turn usage into a declarative approach, which is better readable and easier to maintain. Signed-off-by: Christian Schoenebeck --- Before working on actual new stuff, I looked at the current unit test code and thought it's probably a good time to make the overall test code better readable before piling up more test code soon. In this patch I am suggesting to use named function arguments. For instance do_walk_expect_error(v9p, "non-existent", ENOENT); is probably a bit hard to tell what it is supposed to be doing without looking up the function prototype, whereas Twalk((TWalkOpt) { .client = v9p, .path = "non-existent", .expectErr = ENOENT }); should make it immediately clear (provided you have some knowledge about the 9p network protocol). I'm using this coding style of declarative functions calls a lot nowadays, which makes especially sense in the context of unit test code as those are typically passing literals as function arguments as shown above very often. But also in other contexts it is beneficial as it allows various linear combinations of possible function arguments being used / ommitted on function calls and still being handled with only one function implementation. Caller has a great flexibility of which function arguments to use, and is also completely free of the order of the arguments being specified. Another benefit is that you can also extend functionality later on, without breaking existing function calls. So this avoids a lot of refactoring work on the long-term. With C++ you could also define specific default values for ommitted function arguments. In C unfortunately it is just the language default initializer which usually is simply zero. Obviously with a large number of possible function arguments provided, some combinations make sense and some simply don't. In this patch for instance this is handled with assertion faults like: /* you can expect either Rwalk or Rlerror, but obviously not both */ g_assert(!opt.expectErr || !(opt.Rwalk.nwqid || opt.Rwalk.wqid)); So this would be a runtime error. In C++ you could turn the function into a constexpr and make that a compile error instead, in C there is _Static_assert(...) but as there is no constexpr, that would probably be a hard to achieve. Thoughts? --- tests/qtest/virtio-9p-test.c | 79 +++- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c index 25305a4cf7..6a7f1f6252 100644 --- a/tests/qtest/virtio-9p-test.c +++ b/tests/qtest/virtio-9p-test.c @@ -669,50 +669,51 @@ static void do_version(QVirtio9P *v9p) g_assert_cmpmem(server_version, server_len, version, strlen(version)); } +/* options for 'Twalk' 9p request */ +typedef struct TWalkOpt { +/* 9P client being used (mandatory) */ +QVirtio9P *client; +/* path to walk to (mandatory) */ +const char *path; +/* data being received from 9p server as 'Rwalk' response (optional) */ +struct { +uint16_t *nwqid; +v9fs_qid **wqid; +} Rwalk; +/* do we expect an Rlerror response, if yes which error code? (optional) */ +uint32_t expectErr; +} TWalkOpt; + /* * utility function: walk to requested dir and return fid for that dir and * the QIDs of server response */ -static uint32_t do_walk_rqids(QVirtio9P *v9p, const char *path, uint16_t *nwqid, - v9fs_qid **wqid) +static uint32_t Twalk(TWalkOpt opt) { char **wnames; P9Req *req; +uint32_t err; const uint32_t fid = genfid(); -int nwnames = split(path, "/", &wnames); - -req = v9fs_twalk(v9p, 0, fid, nwnames, wnames, 0); -v9fs_req_wait_for_reply(req, NULL); -v9fs_rwalk(req, nwqid, wqid); - -split_free(&wnames); -return fid; -} +g_assert(opt.client); +g_assert(opt.path); +/* you can expect either Rwalk or Rlerror, but obviously not both */ +g_assert(!opt.expectErr || !(opt.Rwalk.nwqid || opt.Rwalk.wqid)); -/* utility function: walk to requested dir and return fid for that dir */ -static uint32_t do_walk(QVirtio9P *v9p, const char *path) -{ -return do_walk_rqids(v9p, path, NULL, NULL); -} +int nwnames = split(opt.path, "/", &wnames); -/* utility function: walk to requested dir and expect passed error response */ -static void do_walk_expect_error(QVirtio9P *v9p, const char *path, uint32_t err) -{ -char **wnames; -P9Req *req; -uint32_t _err; -const uint32_t fid = genfid(); - -int nwnames = split(path, "/", &wnames); -
Re: [PATCH v7 01/18] job.c: make job_mutex and job_lock/unlock() public
I've already acked this (honestly, because Stefan do), but still, want to clarify: On 6/16/22 16:18, Emanuele Giuseppe Esposito wrote: job mutex will be used to protect the job struct elements and list, replacing AioContext locks. Right now use a shared lock for all jobs, in order to keep things simple. Once the AioContext lock is gone, we can introduce per-job locks. To simplify the switch from aiocontext to job lock, introduce *nop* lock/unlock functions and macros. We want to always call job_lock/unlock outside the AioContext locks, and not vice-versa, otherwise we might get a deadlock. Could you describe here, why we get a deadlock? As I understand, we'll deadlock if two code paths exist simultaneously: 1. we take job mutex under aiocontext lock 2. we take aiocontex lock under job mutex If these paths exists, it's possible that one thread goes through [1] and another through [2]. If thread [1] holds job-mutex and want to take aiocontext-lock, and in the same time thread [2] holds aiocontext-lock and want to take job-mutext, that's a dead-lock. If you say, that we must avoid [1], do you have in mind that we have [2] somewhere? If so, this should be mentioned here. If not, could we just make a normal mutex, not a noop? This is not straightforward to do, and that's why we start with nop functions. Once everything is protected by job_lock/unlock, we can change the nop into an actual mutex and remove the aiocontext lock. Since job_mutex is already being used, add static real_job_{lock/unlock} for the existing usage. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Stefan Hajnoczi -- Best regards, Vladimir
Re: [PATCH v2 3/3] target/ppc: Check page dir/table base alignment
On 6/24/22 10:16, Leandro Lupori wrote: Check if each page dir/table base address is properly aligned and log a guest error if not, as real hardware behave incorrectly in this case. Signed-off-by: Leandro Lupori --- target/ppc/mmu-radix64.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 339cf5b4d8..1e7d932893 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, *psize -= *nls; if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */ *nls = pde & R_PDE_NLS; + +if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) { +qemu_log_mask(LOG_GUEST_ERROR, +"%s: misaligned page dir/table base: 0x%"VADDR_PRIx +" page dir size: 0x"TARGET_FMT_lx"\n", +__func__, (pde & R_PDE_NLB), BIT(*nls + 3)); +} + index = eaddr >> (*psize - *nls); /* Shift */ index &= ((1UL << *nls) - 1); /* Mask */ *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde)); In your response to my question on v1, you said that it appears that the cpu ignores bits *nls+3. This isn't ignoring them -- it's including [nls+2 : nls] into pte_addr. It would be better to compute this as index = ... index &= ... *pte_addr = ... if (*pte_addr & 7) { qemu_log(...); } r~
Re: [PATCH v11 2/2] qtest/cxl: Add aarch64 virt test for CXL
On Fri, 24 Jun 2022 17:12:25 +0100 Peter Maydell wrote: > On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron > wrote: > > > > Add a single complex case for aarch64 virt machine. > > > > Signed-off-by: Jonathan Cameron > > --- > > tests/qtest/cxl-test.c | 48 + > > tests/qtest/meson.build | 1 + > > 2 files changed, 40 insertions(+), 9 deletions(-) > > > > diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c > > index 2133e973f4..1015d0e7c2 100644 > > --- a/tests/qtest/cxl-test.c > > +++ b/tests/qtest/cxl-test.c > > @@ -17,6 +17,11 @@ > >"-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ > >"-M > > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " > > > > +#define QEMU_VIRT_2PXB_CMD "-machine virt,cxl=on " \ > > + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ > > + "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ > > + "-M > > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " > > + > > If CXL requires booting via UEFI, what does this test case do? > It doesn't seem to be passing in a BIOS image. Not a lot beyond checking device creation is valid etc and the machine boots. There is a bios tables test that checks we pass the right tables to the BIOS image. I didn't duplicate that for ARM on the basis it's more or less identical, but perhaps that is worth adding. To do any useful functional testing will require a mass of complex OS handling after booting. That testing is definitely something I'd like to add, but the userspace tooling isn't all in place yet. Final kernel series that's needed to get to the point where you can use the non volatile memory had a new version posted yesterday. Jonathan > > thanks > -- PMM
Re: [PULL 0/3] Linux user for 7.1 patches
On 6/24/22 02:49, Laurent Vivier wrote: The following changes since commit c8b2d413761af732a0798d8df45ce968732083fe: Merge tag 'bsd-user-syscall-2022q2-pull-request' of ssh://github.com/qemu-bsd-user/qemu-bsd-user into staging (2022-06-19 13:56:13 -0700) are available in the Git repository at: https://gitlab.com/laurent_vivier/qemu.git tags/linux-user-for-7.1-pull-request for you to fetch changes up to 9a7f682c26acae5bc8bfd1f7c774070da54f1625: linux-user: Adjust child_tidptr on set_tid_address() syscall (2022-06-24 10:00:01 +0200) linux-user pull request 20220624 Applied, thanks. Please update https://wiki.qemu.org/ChangeLog/7.1 as appropriate. r~ Helge Deller (1): linux-user: Adjust child_tidptr on set_tid_address() syscall Ilya Leoshkevich (1): linux-user: Add partial support for MADV_DONTNEED Richard Henderson (1): linux-user/x86_64: Fix ELF_PLATFORM linux-user/elfload.c| 30 + linux-user/mmap.c | 64 + linux-user/syscall.c| 20 ++-- linux-user/user-internals.h | 1 + linux-user/user-mmap.h | 1 + 5 files changed, 92 insertions(+), 24 deletions(-)
[PATCH v9 4/4] module: Use bundle mechanism
Before this change, the directory of the executable was being added to resolve modules in the build tree. However, get_relocated_path() can now resolve them with the new bundle mechanism. Signed-off-by: Akihiko Odaki --- util/module.c | 1 - 1 file changed, 1 deletion(-) diff --git a/util/module.c b/util/module.c index 6bb4ad915a1..8ddb0e18f51 100644 --- a/util/module.c +++ b/util/module.c @@ -274,7 +274,6 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail) dirs[n_dirs++] = g_strdup_printf("%s", search_dir); } dirs[n_dirs++] = get_relocated_path(CONFIG_QEMU_MODDIR); -dirs[n_dirs++] = g_strdup(qemu_get_exec_dir()); #ifdef CONFIG_MODULE_UPGRADES version_dir = g_strcanon(g_strdup(QEMU_PKGVERSION), -- 2.32.1 (Apple Git-133)
Re: [PATCH 2/2] target/arm: Check V7VE as well as LPAE in arm_pamax
On 6/24/22 09:27, Peter Maydell wrote: +/* + * In machvirt_init, we call arm_pamax on a cpu that is not fully + * initialized, so we can't rely on the propagation done in realize. + */ +if (arm_feature(&cpu->env, ARM_FEATURE_LPAE) || +arm_feature(&cpu->env, ARM_FEATURE_V7VE)) { /* v7 with LPAE */ return 40; I guess this is expedient, so on that basis Reviewed-by: Peter Maydell but as I mentioned in the gitlab issue it's kind of bogus that the virt board is doing stuff to a non-realized CPU object. My first look suggested that the virt board wasn't even setting all of the cpu properties properly, so realization might not help. I meant to go back again and soend more time, but that hasn't happened yet. r~
[PATCH v9 2/4] cutils: Introduce bundle mechanism
Developers often run QEMU without installing. The bundle mechanism allows to look up files which should be present in installation even in such a situation. It is a general mechanism and can find any files in the installation tree. The build tree will have a new directory, qemu-bundle, to represent what files the installation tree would have for reference by the executables. Note that it abandons compatibility with Windows older than 8. The extended support for the prior version, 7 ended more than 2 years ago, and it is unlikely that someone would like to run the latest QEMU on such an old system. Signed-off-by: Akihiko Odaki Suggested-by: Paolo Bonzini --- docs/about/build-platforms.rst | 2 +- include/qemu/cutils.h | 18 +++-- include/qemu/osdep.h| 2 +- meson.build | 4 ++ scripts/symlink-install-tree.py | 37 ++ util/cutils.c | 68 +++-- util/meson.build| 1 + 7 files changed, 106 insertions(+), 26 deletions(-) create mode 100755 scripts/symlink-install-tree.py diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst index 1958edb4305..ebde20f9815 100644 --- a/docs/about/build-platforms.rst +++ b/docs/about/build-platforms.rst @@ -88,7 +88,7 @@ Windows The project aims to support the two most recent versions of Windows that are still supported by the vendor. The minimum Windows API that is currently -targeted is "Windows 7", so theoretically the QEMU binaries can still be run +targeted is "Windows 8", so theoretically the QEMU binaries can still be run on older versions of Windows, too. However, such old versions of Windows are not tested anymore, so it is recommended to use one of the latest versions of Windows instead. diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h index d3e532b64c8..92c436d8c70 100644 --- a/include/qemu/cutils.h +++ b/include/qemu/cutils.h @@ -224,9 +224,21 @@ const char *qemu_get_exec_dir(void); * @dir: the directory (typically a `CONFIG_*DIR` variable) to be relocated. * * Returns a path for @dir that uses the directory of the running executable - * as the prefix. For example, if `bindir` is `/usr/bin` and @dir is - * `/usr/share/qemu`, the function will append `../share/qemu` to the - * directory that contains the running executable and return the result. + * as the prefix. + * + * When a directory named `qemu-bundle` exists in the directory of the running + * executable, the path to the directory will be prepended to @dir. For + * example, if the directory of the running executable is `/qemu/build` @dir + * is `/usr/share/qemu`, the result will be + * `/qemu/build/qemu-bundle/usr/share/qemu`. The directory is expected to exist + * in the build tree. + * + * Otherwise, the directory of the running executable will be used as the + * prefix and it appends the relative path from `bindir` to @dir. For example, + * if the directory of the running executable is `/opt/qemu/bin`, `bindir` is + * `/usr/bin` and @dir is `/usr/share/qemu`, the result will be + * `/opt/qemu/bin/../share/qemu`. + * * The returned string should be freed by the caller. */ char *get_relocated_path(const char *dir); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index b1c161c035a..84f8b9d0243 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int); #ifdef _WIN32 /* as defined in sdkddkver.h */ #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */ +#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be in sync with glib) */ #endif /* reduces the number of implicitly included headers */ #ifndef WIN32_LEAN_AND_MEAN diff --git a/meson.build b/meson.build index 9efcb175d16..c49f5ebfc37 100644 --- a/meson.build +++ b/meson.build @@ -7,6 +7,8 @@ add_test_setup('quick', exclude_suites: ['slow', 'thorough'], is_default: true) add_test_setup('slow', exclude_suites: ['thorough'], env: ['G_TEST_SLOW=1', 'SPEED=slow']) add_test_setup('thorough', env: ['G_TEST_SLOW=1', 'SPEED=thorough']) +meson.add_postconf_script('scripts/symlink-install-tree.py') + not_found = dependency('', required: false) keyval = import('keyval') ss = import('sourceset') @@ -356,10 +358,12 @@ nvmm =not_found hvf = not_found midl = not_found widl = not_found +pathcch = not_found host_dsosuf = '.so' if targetos == 'windows' midl = find_program('midl', required: false) widl = find_program('widl', required: false) + pathcch = cc.find_library('pathcch') socket = cc.find_library('ws2_32') winmm = cc.find_library('winmm') diff --git a/scripts/symlink-install-tree.py b/scripts/symlink-install-tree.py new file mode 100755 index 000..f35aa176404 --- /dev/null +++ b/scripts/symlink-install-tree.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import errno +import json +import os +import subprocess +imp
[PATCH v9 3/4] datadir: Use bundle mechanism
softmmu/datadir.c had its own implementation to find files in the build tree, but now bundle mechanism provides the unified implementation which works for datadir and the other files. Signed-off-by: Akihiko Odaki --- .travis.yml | 2 +- pc-bios/keymaps/meson.build | 21 ++--- pc-bios/meson.build | 13 +++-- scripts/oss-fuzz/build.sh | 2 +- softmmu/datadir.c | 22 +- tests/qtest/fuzz/fuzz.c | 15 --- 6 files changed, 12 insertions(+), 63 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9afc4a54b8f..4fdc9a67855 100644 --- a/.travis.yml +++ b/.travis.yml @@ -223,7 +223,7 @@ jobs: - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$? - | if [ "$BUILD_RC" -eq 0 ] ; then - mv pc-bios/s390-ccw/*.img pc-bios/ ; + mv pc-bios/s390-ccw/*.img qemu-bundle/usr/local/share/qemu ; ${TEST_CMD} ; else $(exit $BUILD_RC); diff --git a/pc-bios/keymaps/meson.build b/pc-bios/keymaps/meson.build index 44247a12b54..2837eb34f4e 100644 --- a/pc-bios/keymaps/meson.build +++ b/pc-bios/keymaps/meson.build @@ -40,9 +40,9 @@ else endif cp = find_program('cp') -t = [] -foreach km, args: keymaps - if native_qemu_keymap.found() +if native_qemu_keymap.found() + t = [] + foreach km, args: keymaps # generate with qemu-kvm t += custom_target(km, build_by_default: true, @@ -50,20 +50,11 @@ foreach km, args: keymaps command: [native_qemu_keymap, '-f', '@OUTPUT@', args.split()], install: true, install_dir: qemu_datadir / 'keymaps') - else -# copy from source tree -t += custom_target(km, - build_by_default: true, - input: km, - output: km, - command: [cp, '@INPUT@', '@OUTPUT@'], - install: true, - install_dir: qemu_datadir / 'keymaps') - endif -endforeach + endforeach -if native_qemu_keymap.found() alias_target('update-keymaps', t) +else + install_data(keymaps.keys(), install_dir: qemu_datadir / 'keymaps') endif install_data(['sl', 'sv'], install_dir: qemu_datadir / 'keymaps') diff --git a/pc-bios/meson.build b/pc-bios/meson.build index 41ba1c0ec7b..388e0db6e40 100644 --- a/pc-bios/meson.build +++ b/pc-bios/meson.build @@ -85,16 +85,9 @@ blobs = [ 'vof-nvram.bin', ] -ln_s = [find_program('ln', required: true), '-sf'] -foreach f : blobs - roms += custom_target(f, -build_by_default: have_system, -output: f, -input: files('meson.build'),# dummy input -install: get_option('install_blobs'), -install_dir: qemu_datadir, -command: [ ln_s, meson.project_source_root() / 'pc-bios' / f, '@OUTPUT@' ]) -endforeach +if get_option('install_blobs') + install_data(blobs, install_dir: qemu_datadir) +endif subdir('descriptors') subdir('keymaps') diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh index 98b56e05210..16316b25662 100755 --- a/scripts/oss-fuzz/build.sh +++ b/scripts/oss-fuzz/build.sh @@ -88,7 +88,7 @@ if [ "$GITLAB_CI" != "true" ]; then fi # Copy over the datadir -cp -r ../pc-bios/ "$DEST_DIR/pc-bios" +cp -r ../pc-bios/ "$DEST_DIR/data" targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/ {print $2}') base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)" diff --git a/softmmu/datadir.c b/softmmu/datadir.c index 160cac999a6..697cffea932 100644 --- a/softmmu/datadir.c +++ b/softmmu/datadir.c @@ -83,26 +83,6 @@ void qemu_add_data_dir(char *path) data_dir[data_dir_idx++] = path; } -/* - * Find a likely location for support files using the location of the binary. - * When running from the build tree this will be "$bindir/pc-bios". - * Otherwise, this is CONFIG_QEMU_DATADIR (possibly relocated). - * - * The caller must use g_free() to free the returned data when it is - * no longer required. - */ -static char *find_datadir(void) -{ -g_autofree char *dir = NULL; - -dir = g_build_filename(qemu_get_exec_dir(), "pc-bios", NULL); -if (g_file_test(dir, G_FILE_TEST_IS_DIR)) { -return g_steal_pointer(&dir); -} - -return get_relocated_path(CONFIG_QEMU_DATADIR); -} - void qemu_add_default_firmwarepath(void) { char **dirs; @@ -116,7 +96,7 @@ void qemu_add_default_firmwarepath(void) g_strfreev(dirs); /* try to find datadir relative to the executable path */ -qemu_add_data_dir(find_datadir()); +qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR)); } void qemu_list_data_dirs(void) diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c index 0ad4ba9e94d..2062b40d82b 100644 --- a/tests/qtest/fuzz/fuzz.c +++ b/tests/qtest/fuzz/fuzz.c @@ -174,21 +174,6 @@ int LLVMFuzzerInitialize(int *a
[PATCH v9 1/4] tests/vm: do not specify -bios option
From: Paolo Bonzini When running from the build tree, the executable is able to find the BIOS on its own; when running from the source tree, a firmware blob should already be installed and there is no guarantee that the one in the source tree works with the QEMU that is being used for the installation. Just remove the -bios option, since it is unnecessary and in fact there are other x86 VM tests that do not bother specifying it. Signed-off-by: Paolo Bonzini Reviewed-by: Daniel P. Berrangé Reviewed-by: Thomas Huth Signed-off-by: Akihiko Odaki Message-Id: <20220616083025.116902-1-pbonz...@redhat.com> --- tests/vm/fedora | 1 - tests/vm/freebsd | 1 - tests/vm/netbsd | 1 - tests/vm/openbsd | 1 - 4 files changed, 4 deletions(-) diff --git a/tests/vm/fedora b/tests/vm/fedora index 92b78d6e2c9..12eca919a08 100755 --- a/tests/vm/fedora +++ b/tests/vm/fedora @@ -79,7 +79,6 @@ class FedoraVM(basevm.BaseVM): self.exec_qemu_img("create", "-f", "qcow2", img_tmp, self.size) self.print_step("Booting installer") self.boot(img_tmp, extra_args = [ -"-bios", "pc-bios/bios-256k.bin", "-machine", "graphics=off", "-device", "VGA", "-cdrom", iso diff --git a/tests/vm/freebsd b/tests/vm/freebsd index 805db759d67..cd1fabde523 100755 --- a/tests/vm/freebsd +++ b/tests/vm/freebsd @@ -95,7 +95,6 @@ class FreeBSDVM(basevm.BaseVM): self.print_step("Booting installer") self.boot(img_tmp, extra_args = [ -"-bios", "pc-bios/bios-256k.bin", "-machine", "graphics=off", "-device", "VGA", "-cdrom", iso diff --git a/tests/vm/netbsd b/tests/vm/netbsd index 45aa9a7fda7..aa883ec23c9 100755 --- a/tests/vm/netbsd +++ b/tests/vm/netbsd @@ -86,7 +86,6 @@ class NetBSDVM(basevm.BaseVM): self.print_step("Booting installer") self.boot(img_tmp, extra_args = [ -"-bios", "pc-bios/bios-256k.bin", "-machine", "graphics=off", "-cdrom", iso ]) diff --git a/tests/vm/openbsd b/tests/vm/openbsd index 13c82542140..6f1b6f5b98a 100755 --- a/tests/vm/openbsd +++ b/tests/vm/openbsd @@ -82,7 +82,6 @@ class OpenBSDVM(basevm.BaseVM): self.print_step("Booting installer") self.boot(img_tmp, extra_args = [ -"-bios", "pc-bios/bios-256k.bin", "-machine", "graphics=off", "-device", "VGA", "-cdrom", iso -- 2.32.1 (Apple Git-133)
[PATCH v9 0/4] cutils: Introduce bundle mechanism
Developers often run QEMU without installing. The bundle mechanism allows to look up files which should be present in installation even in such a situation. It is a general mechanism and can find any files located relative to the installation tree. The build tree must have a new directory, qemu-bundle, to represent what files the installation tree would have for reference by the executables. Note that this abandons compatibility with Windows older than 8 to use PathCchSkipRoot(). The extended support for the prior version, 7 ended more than 2 years ago, and it is unlikely that anyone would like to run the latest QEMU on such an old system. v9: * Update _WIN32_WINNT in include/qemu/osdep.h (Thomas Huth) v8: * Pass absolute paths to get_relocated_path() (Paolo Bonzini) * Use meson introspection (Paolo Bonzini) * Drop "qga: Relocate a path emitted in the help text" as it is no longer relevant for the bundle mechanism. v7: Properly fix --firmwarepath (Daniel P. Berrangé) v6: Reuse get_relocated_path() in find_bundle() (Paolo Bonzini) v5: * Prefer qemu-bundle if it exists. (Daniel P. Berrangé) * Check install_blobs option before installing BIOSes (Paolo Bonzini) * Add common code to set up qemu-bundle to the top level meson.build (Paolo Bonzini) v4: * Add Daniel P. Berrangé to CC. Hopefully this helps merging his patch: https://mail.gnu.org/archive/html/qemu-devel/2022-06/msg02276.html * Rebased to the latest QEMU. v3: * Note that the bundle mechanism is for any files located relative to the installation tree including but not limited to datadir. (Peter Maydell) * Fix "bridge" typo (Philippe Mathieu-Daudé) v2: Rebased to the latest QEMU. Akihiko Odaki (3): cutils: Introduce bundle mechanism datadir: Use bundle mechanism module: Use bundle mechanism Paolo Bonzini (1): tests/vm: do not specify -bios option .travis.yml | 2 +- docs/about/build-platforms.rst | 2 +- include/qemu/cutils.h | 18 +++-- include/qemu/osdep.h| 2 +- meson.build | 4 ++ pc-bios/keymaps/meson.build | 21 +++--- pc-bios/meson.build | 13 ++- scripts/oss-fuzz/build.sh | 2 +- scripts/symlink-install-tree.py | 37 ++ softmmu/datadir.c | 22 +-- tests/qtest/fuzz/fuzz.c | 15 tests/vm/fedora | 1 - tests/vm/freebsd| 1 - tests/vm/netbsd | 1 - tests/vm/openbsd| 1 - util/cutils.c | 68 +++-- util/meson.build| 1 + util/module.c | 1 - 18 files changed, 118 insertions(+), 94 deletions(-) create mode 100755 scripts/symlink-install-tree.py -- 2.32.1 (Apple Git-133)
Re: [PATCH v7 10/18] jobs: rename static functions called with job_mutex held
Am 24/06/2022 um 17:28 schrieb Paolo Bonzini: > On 6/24/22 16:29, Kevin Wolf wrote: >> Yes, I think Vladimir is having the same difficulties with reading the >> series as I had. And I believe his suggestion would make the >> intermediate states less impossible to review. The question is how much >> work it would be and whether you're willing to do this. As I said, if >> reorganising is too hard, I'm okay with just ignoring the intermediate >> state and reviewing the series as if it were a single patch. > > I think we've tried different intermediate states for each of the > previous 6 versions, and none of them were really satisfactory. :( > Yes. v7 in this case basically means that we tried at least 4-5 times to reorganize patches. Nevertheless I could give it a try. I just hope I won't regret it :) If I don't manage, I will just give up and re-send the serie with Vladimir's nitpicks. But yeah, I guess we all agree that this is the last time I reorganize this serie. Feedback are always very well welcome, but not anymore on reordering please ;) Thank you, Emanuele
[PATCH v2 3/3] target/ppc: Check page dir/table base alignment
Check if each page dir/table base address is properly aligned and log a guest error if not, as real hardware behave incorrectly in this case. Signed-off-by: Leandro Lupori --- target/ppc/mmu-radix64.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 339cf5b4d8..1e7d932893 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -280,6 +280,14 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, *psize -= *nls; if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */ *nls = pde & R_PDE_NLS; + +if ((pde & R_PDE_NLB) & MAKE_64BIT_MASK(0, *nls + 3)) { +qemu_log_mask(LOG_GUEST_ERROR, +"%s: misaligned page dir/table base: 0x%"VADDR_PRIx +" page dir size: 0x"TARGET_FMT_lx"\n", +__func__, (pde & R_PDE_NLB), BIT(*nls + 3)); +} + index = eaddr >> (*psize - *nls); /* Shift */ index &= ((1UL << *nls) - 1); /* Mask */ *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde)); @@ -295,6 +303,13 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr, uint64_t index, pde, rpn, mask; int level = 0; +if (base_addr & MAKE_64BIT_MASK(0, nls + 3)) { +qemu_log_mask(LOG_GUEST_ERROR, +"%s: misaligned page dir base: 0x%"VADDR_PRIx +" page dir size: 0x"TARGET_FMT_lx"\n", +__func__, base_addr, BIT(nls + 3)); +} + index = eaddr >> (*psize - nls);/* Shift */ index &= ((1UL << nls) - 1); /* Mask */ *pte_addr = base_addr + (index * sizeof(pde)); -- 2.25.1
[PATCH v2 2/3] target/ppc: Improve Radix xlate level validation
Check if the number and size of Radix levels are valid on POWER9/POWER10 CPUs, according to the supported Radix Tree Configurations described in their User Manuals. Signed-off-by: Leandro Lupori --- target/ppc/mmu-radix64.c | 51 +++- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 9a8a2e2875..339cf5b4d8 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -236,17 +236,39 @@ static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type, } } +static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls) +{ +/* + * Check if this is a valid level, according to POWER9 and POWER10 + * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, respectively: + * Supported Radix Tree Configurations and Resulting Page Sizes. + * + * NOTE: these checks are valid for POWER9 and POWER10 CPUs only. If + * new CPUs that support other Radix configurations are added + * (e.g., Microwatt), then a new method should be added to + * PowerPCCPUClass, with this function being the POWER9/POWER10 + * implementation. + */ +switch (level) { +case 0: /* Root Page Dir */ +return psize == 52 && nls == 13; +case 1: +case 2: +return nls == 9; +case 3: +return nls == 9 || nls == 5; +default: +qemu_log_mask(LOG_GUEST_ERROR, "invalid radix level: %d\n", level); +return false; +} +} + static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, uint64_t *pte_addr, uint64_t *nls, int *psize, uint64_t *pte, int *fault_cause) { uint64_t index, pde; -if (*nls < 5) { /* Directory maps less than 2**5 entries */ -*fault_cause |= DSISR_R_BADCONFIG; -return 1; -} - /* Read page entry from guest address space */ pde = ldq_phys(as, *pte_addr); if (!(pde & R_PTE_VALID)) { /* Invalid Entry */ @@ -270,12 +292,8 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr, hwaddr *raddr, int *psize, uint64_t *pte, int *fault_cause, hwaddr *pte_addr) { -uint64_t index, pde, rpn , mask; - -if (nls < 5) { /* Directory maps less than 2**5 entries */ -*fault_cause |= DSISR_R_BADCONFIG; -return 1; -} +uint64_t index, pde, rpn, mask; +int level = 0; index = eaddr >> (*psize - nls);/* Shift */ index &= ((1UL << nls) - 1); /* Mask */ @@ -283,6 +301,11 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr, do { int ret; +if (!ppc_radix64_is_valid_level(level++, *psize, nls)) { +*fault_cause |= DSISR_R_BADCONFIG; +return 1; +} + ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde, fault_cause); if (ret) { @@ -456,6 +479,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, } } else { uint64_t rpn, mask; +int level = 0; index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */ index &= ((1UL << nls) - 1);/* Mask */ @@ -475,6 +499,11 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, return ret; } +if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) { +fault_cause |= DSISR_R_BADCONFIG; +return 1; +} + ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr, &nls, g_page_size, &pte, &fault_cause); if (ret) { -- 2.25.1
[PATCH v2 1/3] ppc: Check partition and process table alignment
Check if partition and process tables are properly aligned, in their size, according to PowerISA 3.1B, Book III 6.7.6 programming note. Hardware and KVM also raise an exception in these cases. Signed-off-by: Leandro Lupori --- hw/ppc/spapr.c | 5 + hw/ppc/spapr_hcall.c | 9 + target/ppc/mmu-book3s-v3.c | 5 + target/ppc/mmu-radix64.c | 17 + 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index fd4942e881..4b1f346087 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1329,6 +1329,11 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, patb = spapr->nested_ptcr & PTCR_PATB; pats = spapr->nested_ptcr & PTCR_PATS; +/* Check if partition table is properly aligned */ +if (patb & MAKE_64BIT_MASK(0, pats + 12)) { +return false; +} + /* Calculate number of entries */ pats = 1ull << (pats + 12 - 4); if (pats <= lpid) { diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index d761a7d0c3..a8d4a6bcf0 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -920,6 +920,7 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, target_ulong page_size = args[2]; target_ulong table_size = args[3]; target_ulong update_lpcr = 0; +target_ulong table_byte_size; uint64_t cproc; if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */ @@ -927,6 +928,14 @@ static target_ulong h_register_process_table(PowerPCCPU *cpu, } if (flags & FLAG_MODIFY) { if (flags & FLAG_REGISTER) { +/* Check process table alignment */ +table_byte_size = 1ULL << (table_size + 12); +if (proc_tbl & (table_byte_size - 1)) { +qemu_log_mask(LOG_GUEST_ERROR, +"%s: process table not properly aligned: proc_tbl 0x" +TARGET_FMT_lx" proc_tbl_size 0x"TARGET_FMT_lx"\n", +__func__, proc_tbl, table_byte_size); +} if (flags & FLAG_RADIX) { /* Register new RADIX process table */ if (proc_tbl & 0xfff || proc_tbl >> 60) { return H_P2; diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c index f4985bae78..c8f69b3df9 100644 --- a/target/ppc/mmu-book3s-v3.c +++ b/target/ppc/mmu-book3s-v3.c @@ -28,6 +28,11 @@ bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) uint64_t patb = cpu->env.spr[SPR_PTCR] & PTCR_PATB; uint64_t pats = cpu->env.spr[SPR_PTCR] & PTCR_PATS; +/* Check if partition table is properly aligned */ +if (patb & MAKE_64BIT_MASK(0, pats + 12)) { +return false; +} + /* Calculate number of entries */ pats = 1ull << (pats + 12 - 4); if (pats <= lpid) { diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 21ac958e48..9a8a2e2875 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -383,7 +383,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; -uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte; +uint64_t offset, size, prtb, prtbe_addr, prtbe0, base_addr, nls, index, pte; int fault_cause = 0, h_page_size, h_prot; hwaddr h_raddr, pte_addr; int ret; @@ -393,9 +393,18 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, __func__, access_str(access_type), eaddr, mmu_idx, pid); +prtb = (pate.dw1 & PATE1_R_PRTB); +size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12); +if (prtb & (size - 1)) { +/* Process Table not properly aligned */ +if (guest_visible) { +ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG); +} +return 1; +} + /* Index Process Table by PID to Find Corresponding Process Table Entry */ offset = pid * sizeof(struct prtb_entry); -size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12); if (offset >= size) { /* offset exceeds size of the process table */ if (guest_visible) { @@ -403,7 +412,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, } return 1; } -prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset; +prtbe_addr = prtb + offset; if (vhyp_flat_addressing(cpu)) { prtbe0 = ldq_phys(cs->as, prtbe_addr); @@ -568,7 +577,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, return false; } -/* Get Process Table */ +/* Get Partition Table */ if (cpu->vhyp) { PPCVirtualHypervisorClass *vhc; vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); -- 2.25.1
[PATCH v2 0/3] ppc: Check for bad Radix configs
Changes from v1: - Use proper format defines in logs - Optimized ppc_radix64_is_valid_level() and added a comment with instructions on how to proceed when adding new Radix CPUs with different configurations - Moved calls to ppc_radix64_is_valid_level() outside of ppc_radix64_next_level(). This also avoids calling it twice for level 0, through ppc_radix64_walk_tree(). - Removed debug ifdefs from PDE/PTE alignment checks Leandro Lupori (3): ppc: Check partition and process table alignment target/ppc: Improve Radix xlate level validation target/ppc: Check page dir/table base alignment hw/ppc/spapr.c | 5 +++ hw/ppc/spapr_hcall.c | 9 + target/ppc/mmu-book3s-v3.c | 5 +++ target/ppc/mmu-radix64.c | 79 +++--- 4 files changed, 85 insertions(+), 13 deletions(-) -- 2.25.1
Re: [PATCH 12/14] aspeed: Make aspeed_board_init_flashes public
On 6/23/22 20:43, Peter Delevoryas wrote: On Jun 23, 2022, at 8:09 AM, Cédric Le Goater wrote: On 6/23/22 12:26, Peter Delevoryas wrote: Signed-off-by: Peter Delevoryas Let's start simple without flash support. We should be able to load FW blobs in each CPU address space using loader devices. Actually, I was unable to do this, perhaps because the fb OpenBMC boot sequence is a little weird. I specifically _needed_ to have a flash device which maps the firmware in at 0x2000_, because the fb OpenBMC U-Boot SPL jumps to that address to start executing from flash? I think this is also why fb OpenBMC machines can be so slow. $ ./build/qemu-system-arm -machine fby35 \ -device loader,file=fby35.mtd,addr=0,cpu-num=0 -nographic \ -d int -drive file=fby35.mtd,format=raw,if=mtd Ideally we should be booting from the flash device directly using the machine option '-M ast2600-evb,execute-in-place=true' like HW does. Instructions are fetched using SPI transfers. But the amount of code generated is tremendous. See some profiling below for a run which barely reaches DRAM training in U-Boot. C. * execute-in-place=true Each sample counts as 0.01 seconds. % cumulative self self total time seconds secondscalls ns/call ns/call name 100.00 0.02 0.02 164276 121.75 121.75 memory_region_init_rom_device 0.00 0.02 0.00 1610346008 0.00 0.00 tcg_code_capacity 0.00 0.02 0.00 567612621 0.00 0.00 type_register_static_array 0.00 0.02 0.00 328886191 0.00 0.00 do_common_semihosting 0.00 0.02 0.00 297215811 0.00 0.00 container_get 0.00 0.02 0.00 292670030 0.00 0.00 arm_cpu_tlb_fill 0.00 0.02 0.00 195416119 0.00 0.00 arm_cpu_register_gdb_regs_for_features 0.00 0.02 0.00 193326677 0.00 0.00 object_type_get_instance_size 0.00 0.02 0.00 182365829 0.00 0.00 tcg_op_insert_after 0.00 0.02 0.00 150668458 0.00 0.00 plugin_gen_tb_end 0.00 0.02 0.00 142171940 0.00 0.00 gen_new_label 0.00 0.02 0.00 133200628 0.00 0.00 smbios_build_type_38_table 0.00 0.02 0.00 130540338 0.00 0.00 object_dynamic_cast_assert 0.00 0.02 0.00 129223195 0.00 0.00 cpu_loop_exit_atomic 0.00 0.02 0.00 121759298 0.00 0.00 tcg_remove_ops_after 0.00 0.02 0.00 116887887 0.00 0.00 in_code_gen_buffer 0.00 0.02 0.00 111803833 0.00 0.00 tcg_emit_op 0.00 0.02 0.00 106052221 0.00 0.00 object_class_dynamic_cast_assert 0.00 0.02 0.00 99704054 0.00 0.00 __jit_debug_register_code 0.00 0.02 0.00 97812458 0.00 0.00 object_get_class 0.00 0.02 0.00 88952594 0.00 0.00 tcg_splitwx_to_rx 0.00 0.02 0.00 85790920 0.00 0.00 object_class_dynamic_cast 0.00 0.02 0.00 73780673 0.00 0.00 helper_exit_atomic 0.00 0.02 0.00 65337482 0.00 0.00 tcg_op_supported 0.00 0.02 0.00 61213619 0.00 0.00 tcg_func_start 0.00 0.02 0.00 54477684 0.00 0.00 tcg_flush_softmmu_tlb 0.00 0.02 0.00 53968980 0.00 0.00 tcg_temp_new_internal 0.00 0.02 0.00 51526008 0.00 0.00 qemu_in_vcpu_thread 0.00 0.02 0.00 40750952 0.00 0.00 pflash_cfi02_register 0.00 0.02 0.00 38039442 0.00 0.00 tcg_gen_op2 0.00 0.02 0.00 37068039 0.00 0.00 tcg_gen_op1 0.00 0.02 0.00 36473276 0.00 0.00 tcg_gen_op3 0.00 0.02 0.00 36310225 0.00 0.00 gen_gvec_uaba 0.00 0.02 0.00 30985436 0.00 0.00 tb_set_jmp_target 0.00 0.02 0.00 30291796 0.00 0.00 tcg_constant_internal 0.00 0.02 0.00 29857950 0.00 0.00 ssi_transfer * execute-in-place=false Each sample counts as 0.01 seconds. % cumulative self self total time seconds secondscalls ns/call ns/call name 40.00 0.02 0.02 55114936.2936.29 aspeed_board_init_flashes 20.00 0.03 0.01 3937238 2.54 2.54 register_cp_regs_for_features 20.00 0.04 0.01 67409614.8314.83 gen_gvec_uaba 20.00 0.05 0.01 45746121.8621.86 finalize_target_page_bits 0.00 0.05 0.00 5364258 0.00 0.00 arm_gt_hvtimer_cb 0.00 0.05 0.00 2467532 0.00 0.00 helper_neon_narrow_sat_s8 0.00 0.05 0.00 2431860 0.00 0.00 opb_opb2fsi_address 0.00 0.05 0.00 1828453 0.00 0.00 cpsr_read 0.00 0.05 0.00 1820659 0.00 0.00 cpu_get_tb_cpu_state 0.00 0.05 0.00 1441344 0.00 0.00 arm_cpu_tlb_fill 0.00 0.05 0.00 1427177 0.00 0.00 cxl_usp_to_cstate
Re: [PATCH 0/2] target/arm: Fix issue 1078
On Sun, 19 Jun 2022 at 01:16, Richard Henderson wrote: > > Nicely summarized by the reporter, but I thought it would be > nicer to pull all of the logic into arm_pamax, rather than > leave it separated. Applied to target-arm.next, thanks. I think the hang He Zhe reports as still present is an unrelated issue (I should check the v7 LPAE spec about block descriptors I guess). -- PMM
Re: [PATCH v6 0/7] cutils: Introduce bundle mechanism
On 2022/06/16 18:18, Paolo Bonzini wrote: +def destdir_join(d1: str, d2: str) -> str: + if not d1: + return d2 + if not os.path.isabs(d2): + return os.path.join(d1, d2) + + # c:\destdir + c:\prefix must produce c:\destdir\prefix + if len(d2) > 1 and d2[1] == ':': + return d1 + d2[2:] + return d1 + d2 This is from Meson but buggy so I fixed it and opened a pull request for Meson: https://github.com/mesonbuild/meson/pull/10531 The script included in v8 has the fixed version of destdir_join. Regards, Akihiko Odaki
Re: [PATCH 2/2] target/arm: Check V7VE as well as LPAE in arm_pamax
On Sun, 19 Jun 2022 at 01:18, Richard Henderson wrote: > > In machvirt_init we create a cpu but do not fully initialize it. > Thus the propagation of V7VE to LPAE has not been done, and we > compute the wrong value for some v7 cpus, e.g. cortex-a15. > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1078 > Signed-off-by: Richard Henderson > --- > target/arm/ptw.c | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/target/arm/ptw.c b/target/arm/ptw.c > index 07f7a21861..da478104f0 100644 > --- a/target/arm/ptw.c > +++ b/target/arm/ptw.c > @@ -47,7 +47,13 @@ unsigned int arm_pamax(ARMCPU *cpu) > assert(parange < ARRAY_SIZE(pamax_map)); > return pamax_map[parange]; > } > -if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) { > + > +/* > + * In machvirt_init, we call arm_pamax on a cpu that is not fully > + * initialized, so we can't rely on the propagation done in realize. > + */ > +if (arm_feature(&cpu->env, ARM_FEATURE_LPAE) || > +arm_feature(&cpu->env, ARM_FEATURE_V7VE)) { > /* v7 with LPAE */ > return 40; I guess this is expedient, so on that basis Reviewed-by: Peter Maydell but as I mentioned in the gitlab issue it's kind of bogus that the virt board is doing stuff to a non-realized CPU object. thanks -- PMM
Re: [PATCH 1/2] target/arm: Extend arm_pamax to more than aarch64
On Sun, 19 Jun 2022 at 01:16, Richard Henderson wrote: > > Move the code from hw/arm/virt.c that is supposed > to handle v7 into the one function. > > Signed-off-by: Richard Henderson > --- > hw/arm/virt.c| 10 +- > target/arm/ptw.c | 24 Reviewed-by: Peter Maydell thanks -- PMM
Re: [PATCH v11 2/2] qtest/cxl: Add aarch64 virt test for CXL
On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron wrote: > > Add a single complex case for aarch64 virt machine. > > Signed-off-by: Jonathan Cameron > --- > tests/qtest/cxl-test.c | 48 + > tests/qtest/meson.build | 1 + > 2 files changed, 40 insertions(+), 9 deletions(-) > > diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c > index 2133e973f4..1015d0e7c2 100644 > --- a/tests/qtest/cxl-test.c > +++ b/tests/qtest/cxl-test.c > @@ -17,6 +17,11 @@ >"-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ >"-M > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " > > +#define QEMU_VIRT_2PXB_CMD "-machine virt,cxl=on " \ > + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ > + "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ > + "-M > cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " > + If CXL requires booting via UEFI, what does this test case do? It doesn't seem to be passing in a BIOS image. thanks -- PMM
Re: [PATCH] meson: Prefix each element of firmware path
Queued, thanks! Paolo On 6/24/22 17:40, Akihiko Odaki wrote: Signed-off-by: Akihiko Odaki --- configure | 23 +++ meson.build | 10 -- meson_options.txt | 2 +- scripts/meson-buildoptions.py | 7 +-- scripts/meson-buildoptions.sh | 6 +++--- softmmu/datadir.c | 8 +--- 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 4b12a8094ca..ab952e7ce3b 100755 --- a/configure +++ b/configure @@ -675,6 +675,29 @@ fi werror="" +meson_option_build_array() { + local a + local ifs + + if test "$targetos" == windows; then +ifs=\; + else +ifs=: + fi + + echo -n "[" + + while IFS="$ifs" read -ra a; do +for e in "${a[@]}"; do + echo -n '"""' + echo -n "$e" | sed 's/\\//g; s/"/\\"/g' + echo -n '"""', +done + done <<< "$1" + + echo "]" +} + . $source_path/scripts/meson-buildoptions.sh meson_options= diff --git a/meson.build b/meson.build index 0c2e11ff071..40111ce4053 100644 --- a/meson.build +++ b/meson.build @@ -1684,7 +1684,13 @@ config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix')) config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir) config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / qemu_datadir) config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / qemu_desktopdir) -config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / get_option('qemu_firmwarepath')) + +qemu_firmwarepath = '' +foreach k : get_option('qemu_firmwarepath') + qemu_firmwarepath += '"' + get_option('prefix') / k + '", ' +endforeach +config_host_data.set('CONFIG_QEMU_FIRMWAREPATH', qemu_firmwarepath) + config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / get_option('libexecdir')) config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / qemu_icondir) config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / get_option('localedir')) @@ -3622,7 +3628,7 @@ endif summary_info = {} summary_info += {'Install prefix':get_option('prefix')} summary_info += {'BIOS directory':qemu_datadir} -summary_info += {'firmware path': get_option('prefix') / get_option('qemu_firmwarepath')} +summary_info += {'firmware path': qemu_firmwarepath} summary_info += {'binary directory': get_option('prefix') / get_option('bindir')} summary_info += {'library directory': get_option('prefix') / get_option('libdir')} summary_info += {'module directory': qemu_moddir} diff --git a/meson_options.txt b/meson_options.txt index 0e8197386b9..8ad5cd73819 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -6,7 +6,7 @@ option('qemu_suffix', type : 'string', value: 'qemu', description: 'Suffix for QEMU data/modules/config directories (can be empty)') option('docdir', type : 'string', value : 'share/doc', description: 'Base directory for documentation installation (can be empty)') -option('qemu_firmwarepath', type : 'string', value : 'qemu-firmware', +option('qemu_firmwarepath', type : 'array', value : ['qemu-firmware'], description: 'search PATH for firmware files') option('pkgversion', type : 'string', value : '', description: 'use specified string as sub-version of the package') diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py index e624c16b01a..3e2b4785388 100755 --- a/scripts/meson-buildoptions.py +++ b/scripts/meson-buildoptions.py @@ -156,7 +156,7 @@ def cli_metavar(opt): if opt["type"] == "string": return "VALUE" if opt["type"] == "array": -return "CHOICES" +return "CHOICES" if "choices" in opt else "VALUES" return "CHOICE" @@ -199,7 +199,10 @@ def print_parse(options): key = cli_option(opt) name = opt["name"] if require_arg(opt): -print(f'--{key}=*) quote_sh "-D{name}=$2" ;;') +if opt["type"] == "array" and not "choices" in opt: +print(f'--{key}=*) quote_sh "-D{name}=$(meson_option_build_array $2)" ;;') +else: +print(f'--{key}=*) quote_sh "-D{name}=$2" ;;') elif opt["type"] == "boolean": print(f'--enable-{key}) printf "%s" -D{name}=true ;;') print(f'--disable-{key}) printf "%s" -D{name}=false ;;') diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 1fc1d2e2c36..238bab162bd 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -42,12 +42,12 @@ meson_options_help() { printf "%s\n" ' --enable-trace-backends=CHOICES' printf "%s\n" ' Set available tracing backends [log] (choices:' printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' - printf "%s\n" ' --firm
Re: [PATCH v3 2/2] docs/system/devices/canokey: Document limitations on usb-ehci
On Fri, Jun 24, 2022 at 11:29:40PM +0800, MkfsSion wrote: > > -Another limitation is that this device is not compatible with ``qemu-xhci``, > -in that this device would hang when there are FIDO2 packets (traffic on > -interrupt endpoints). If you do not use FIDO2 then it works as intended, > -but for full functionality you should use old uhci/ehci bus and attach > canokey > -to it, for example > - > -.. parsed-literal:: > - > - |qemu_system| -device piix3-usb-uhci,id=uhci -device canokey,bus=uhci.0 Hi, the fix for the limitation on qemu-xhci has not been landed and should not be removed by this patch. > +Another limitation is that this device is not compatible with ``usb-ehci`` > +since we removed high-speed mode support. When a full-speed device attach > +to a high-speed port, ``usb-ehci`` would complain about speed mismatch. I think we could rephrase the limitation here. Instead of saying "we removed the high-speed mode", how about saying "this device only provides the full-speed mode". Also you should list the changes between two PATCH versions below the --- line just after your commit message so the mailing list can track what happened. Regards, Hongren
[PULL v2 20/20] vduse-blk: Add name option
From: Xie Yongji Currently we use 'id' option as the name of VDUSE device. It's a bit confusing since we use one value for two different purposes: the ID to identfy the export within QEMU (must be distinct from any other exports in the same QEMU process, but can overlap with names used by other processes), and the VDUSE name to uniquely identify it on the host (must be distinct from other VDUSE devices on the same host, but can overlap with other export types like NBD in the same process). To make it clear, this patch adds a separate 'name' option to specify the VDUSE name for the vduse-blk export instead. Signed-off-by: Xie Yongji Message-Id: <20220614051532.92-7-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- qapi/block-export.json | 7 --- docs/tools/qemu-storage-daemon.rst | 5 +++-- block/export/vduse-blk.c | 4 ++-- storage-daemon/qemu-storage-daemon.c | 8 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/qapi/block-export.json b/qapi/block-export.json index 618a6367c9..4627bbc4e6 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -183,6 +183,7 @@ # # A vduse-blk block export. # +# @name: the name of VDUSE device (must be unique across the host). # @num-queues: the number of virtqueues. Defaults to 1. # @queue-size: the size of virtqueue. Defaults to 256. # @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE] @@ -192,7 +193,8 @@ # Since: 7.1 ## { 'struct': 'BlockExportOptionsVduseBlk', - 'data': { '*num-queues': 'uint16', + 'data': { 'name': 'str', +'*num-queues': 'uint16', '*queue-size': 'uint16', '*logical-block-size': 'size', '*serial': 'str' } } @@ -320,8 +322,7 @@ # Describes a block export, i.e. how single node should be exported on an # external interface. # -# @id: A unique identifier for the block export (across the host for vduse-blk -# export type or across all export types for other types) +# @id: A unique identifier for the block export (across all export types) # # @node-name: The node name of the block node to be exported (since: 5.2) # diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 034f2809a6..ea00149a63 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -77,7 +77,7 @@ Standard options: --export [type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto] - --export [type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=] + --export [type=]vduse-blk,id=,node-name=,name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=] is a block export definition. ``node-name`` is the block node that should be exported. ``writable`` determines whether or not the export allows write @@ -111,7 +111,8 @@ Standard options: ``allow-other`` to auto (the default) will try enabling this option, and on error fall back to disabling it. - The ``vduse-blk`` export type uses the ``id`` as the VDUSE device name. + The ``vduse-blk`` export type takes a ``name`` (must be unique across the host) + to create the VDUSE device. ``num-queues`` sets the number of virtqueues (the default is 1). ``queue-size`` sets the virtqueue descriptor table size (the default is 256). diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c index 066e088b00..f101c24c3f 100644 --- a/block/export/vduse-blk.c +++ b/block/export/vduse-blk.c @@ -300,7 +300,7 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, features |= 1ULL << VIRTIO_BLK_F_RO; } -vblk_exp->dev = vduse_dev_create(exp->id, VIRTIO_ID_BLOCK, 0, +vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0, features, num_queues, sizeof(struct virtio_blk_config), (char *)&config, &vduse_blk_ops, @@ -312,7 +312,7 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, } vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s", - g_get_tmp_dir(), exp->id); + g_get_tmp_dir(), vblk_opts->name); if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) { error_setg(errp, "failed to set reconnect log file"); ret = -EINVAL; diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index 4e18d3fc85..b8e910f220 100644 --- a/storage-daemon/qemu-storage-daemon
[PULL v2 18/20] nbd: Drop dead code spotted by Coverity
From: Eric Blake CID 1488362 points out that the second 'rc >= 0' check is now dead code. Reported-by: Peter Maydell Fixes: 172f5f1a40(nbd: remove peppering of nbd_client_connected) Signed-off-by: Eric Blake Message-Id: <20220516210519.76135-1-ebl...@redhat.com> Reviewed-by: Peter Maydell Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/nbd.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 6085ab1d2c..7f5f50ec46 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -521,12 +521,8 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, if (qiov) { qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); -if (rc >= 0) { -if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, - NULL) < 0) { -rc = -EIO; -} -} else if (rc >= 0) { +if (rc >= 0 && qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, + NULL) < 0) { rc = -EIO; } qio_channel_set_cork(s->ioc, false); -- 2.35.3
Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.
Stefan Hajnoczi 于2022年6月24日周五 23:50写道: > > On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote: > > Hi Stefan, > > > > Stefan Hajnoczi 于2022年6月20日周一 15:55写道: > > > > > > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote: > > > > > > Hi Sam, > > > Is this version 2 of "[RFC v1] Add support for zoned device"? Please > > > keep the email subject line the same (except for "v2", "v3", etc) so > > > that it's clear which patch series this new version replaces. > > > > > > > Fix some mistakes before. It can report a range of zones now. > > > > > > This looks like the description of what changed compared to v1. Please > > > put the changelog below "---" in the future. When patch emails are > > > merged by git-am(1) it keeps the text above "---" and discards the text > > > below "---". The changelog is usually no longer useful once the patches > > > are merged, so it should be located below the "---" line. > > > > > > The text above the "---" is the commit description (an explanation of > > > why this commit is necessary). In this case the commit description > > > should explain that this patch adds .bdrv_co_zone_report() and > > > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be > > > supported. > > > > > > > > > > > Signed-off-by: Sam Li > > > > --- > > > > block/block-backend.c | 22 > > > > block/coroutines.h| 5 + > > > > block/file-posix.c| 182 ++ > > > > block/io.c| 23 > > > > include/block/block-common.h | 43 ++- > > > > include/block/block-io.h | 13 +++ > > > > include/block/block_int-common.h | 20 > > > > qemu-io-cmds.c| 118 +++ > > > > tests/qemu-iotests/tests/zoned.sh | 52 + > > > > 9 files changed, 477 insertions(+), 1 deletion(-) > > > > create mode 100644 tests/qemu-iotests/tests/zoned.sh > > > > > > > > diff --git a/block/block-backend.c b/block/block-backend.c > > > > index e0e1aff4b1..20248e4a35 100644 > > > > --- a/block/block-backend.c > > > > +++ b/block/block-backend.c > > > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB { > > > > int ret; > > > > } BlockBackendAIOCB; > > > > > > > > + > > > > + > > > > > > Please avoid whitespace changes in code that is otherwise untouched by > > > your patch. Code changes can cause merge conflicts and they make it > > > harder to use git-annotate(1), so only changes that are necessary should > > > be included in a patch. > > > > > > > static const AIOCBInfo block_backend_aiocb_info = { > > > > .get_aio_context = blk_aiocb_get_aio_context, > > > > .aiocb_size = sizeof(BlockBackendAIOCB), > > > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk) > > > > return ret; > > > > } > > > > > > > > > > Please add a documentation comment for blk_co_zone_report() that > > > explains how to use the functions and the purpose of the arguments. For > > > example, does offset have to be the first byte in a zone or can it be > > > any byte offset? What are the alignment requirements of offset and len? > > > Why is nr_zones a pointer? > > > > > > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len, > > > > > > Functions that run in coroutine context must be labeled with > > > coroutine_fn: > > > > > > int coroutine_fn blk_co_zone_report(...) > > > > > > This tells humans and tools that the function can only be called from a > > > coroutine. There is a blog post about coroutines in QEMU here: > > > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html > > > > > > > + int64_t *nr_zones, > > > > + struct BlockZoneDescriptor *zones) > > > > > > QEMU coding style uses typedefs when defining structs, so "struct > > > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor > > > *zones". > > > > > > > +{ > > > > +int ret; > > > > > > This function is called from the I/O code path, please mark it with: > > > > > > IO_CODE(); > > > > > > From include/block/block-io.h: > > > > > > * I/O API functions. These functions are thread-safe, and therefore > > > * can run in any thread as long as the thread has called > > > * aio_context_acquire/release(). > > > * > > > * These functions can only call functions from I/O and Common > > > categories, > > > * but can be invoked by GS, "I/O or GS" and I/O APIs. > > > * > > > * All functions in this category must use the macro > > > * IO_CODE(); > > > * to catch when they are accidentally called by the wrong API. > > > > > > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, > > > > zones); > > > > > > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this > > > function call to ensure that zone report requests finish before I/O is > > > drained (see bdrv_drained_begin()). This is necessary so that it's > > > possible to wait for I/O req
[PATCH] artist: set memory region owners for buffers to the artist device
This fixes the output of "info qom-tree" so that the buffers appear as children of the artist device, rather than underneath the "unattached" container. Signed-off-by: Mark Cave-Ayland --- hw/display/artist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/display/artist.c b/hw/display/artist.c index eadaef0d46..fde050c882 100644 --- a/hw/display/artist.c +++ b/hw/display/artist.c @@ -1358,7 +1358,7 @@ static void artist_create_buffer(ARTISTState *s, const char *name, { struct vram_buffer *buf = s->vram_buffer + idx; -memory_region_init_ram(&buf->mr, NULL, name, width * height, +memory_region_init_ram(&buf->mr, OBJECT(s), name, width * height, &error_fatal); memory_region_add_subregion_overlap(&s->mem_as_root, *offset, &buf->mr, 0); -- 2.30.2
[PULL v2 10/20] libvduse: Add VDUSE (vDPA Device in Userspace) library
From: Xie Yongji VDUSE [1] is a linux framework that makes it possible to implement software-emulated vDPA devices in userspace. This adds a library as a subproject to help implementing VDUSE backends in QEMU. [1] https://www.kernel.org/doc/html/latest/userspace-api/vduse.html Signed-off-by: Xie Yongji Message-Id: <20220523084611.91-6-xieyon...@bytedance.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- meson_options.txt |2 + subprojects/libvduse/include/atomic.h |1 + subprojects/libvduse/include/compiler.h |1 + subprojects/libvduse/libvduse.h | 235 subprojects/libvduse/libvduse.c | 1150 +++ MAINTAINERS |5 + meson.build | 15 + scripts/meson-buildoptions.sh |3 + subprojects/libvduse/linux-headers/linux|1 + subprojects/libvduse/meson.build| 10 + subprojects/libvduse/standard-headers/linux |1 + 11 files changed, 1424 insertions(+) create mode 12 subprojects/libvduse/include/atomic.h create mode 12 subprojects/libvduse/include/compiler.h create mode 100644 subprojects/libvduse/libvduse.h create mode 100644 subprojects/libvduse/libvduse.c create mode 12 subprojects/libvduse/linux-headers/linux create mode 100644 subprojects/libvduse/meson.build create mode 12 subprojects/libvduse/standard-headers/linux diff --git a/meson_options.txt b/meson_options.txt index f3e2f22c1e..23a9f440f7 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -257,6 +257,8 @@ option('virtfs', type: 'feature', value: 'auto', description: 'virtio-9p support') option('virtiofsd', type: 'feature', value: 'auto', description: 'build virtiofs daemon (virtiofsd)') +option('libvduse', type: 'feature', value: 'auto', + description: 'build VDUSE Library') option('capstone', type: 'feature', value: 'auto', description: 'Whether and how to find the capstone library') diff --git a/subprojects/libvduse/include/atomic.h b/subprojects/libvduse/include/atomic.h new file mode 12 index 00..8c2be64f7b --- /dev/null +++ b/subprojects/libvduse/include/atomic.h @@ -0,0 +1 @@ +../../../include/qemu/atomic.h \ No newline at end of file diff --git a/subprojects/libvduse/include/compiler.h b/subprojects/libvduse/include/compiler.h new file mode 12 index 00..de7b70697c --- /dev/null +++ b/subprojects/libvduse/include/compiler.h @@ -0,0 +1 @@ +../../../include/qemu/compiler.h \ No newline at end of file diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h new file mode 100644 index 00..6c2fe98213 --- /dev/null +++ b/subprojects/libvduse/libvduse.h @@ -0,0 +1,235 @@ +/* + * VDUSE (vDPA Device in Userspace) library + * + * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: + * Xie Yongji + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef LIBVDUSE_H +#define LIBVDUSE_H + +#include +#include + +#define VIRTQUEUE_MAX_SIZE 1024 + +/* VDUSE device structure */ +typedef struct VduseDev VduseDev; + +/* Virtqueue structure */ +typedef struct VduseVirtq VduseVirtq; + +/* Some operation of VDUSE backend */ +typedef struct VduseOps { +/* Called when virtqueue can be processed */ +void (*enable_queue)(VduseDev *dev, VduseVirtq *vq); +/* Called when virtqueue processing should be stopped */ +void (*disable_queue)(VduseDev *dev, VduseVirtq *vq); +} VduseOps; + +/* Describing elements of the I/O buffer */ +typedef struct VduseVirtqElement { +/* Descriptor table index */ +unsigned int index; +/* Number of physically-contiguous device-readable descriptors */ +unsigned int out_num; +/* Number of physically-contiguous device-writable descriptors */ +unsigned int in_num; +/* Array to store physically-contiguous device-writable descriptors */ +struct iovec *in_sg; +/* Array to store physically-contiguous device-readable descriptors */ +struct iovec *out_sg; +} VduseVirtqElement; + + +/** + * vduse_get_virtio_features: + * + * Get supported virtio features + * + * Returns: supported feature bits + */ +uint64_t vduse_get_virtio_features(void); + +/** + * vduse_queue_get_dev: + * @vq: specified virtqueue + * + * Get corresponding VDUSE device from the virtqueue. + * + * Returns: a pointer to VDUSE device on success, NULL on failure. + */ +VduseDev *vduse_queue_get_dev(VduseVirtq *vq); + +/** + * vduse_queue_get_fd: + * @vq: specified virtqueue + * + * Get the kick fd for the virtqueue. + * + * Returns: file descriptor on success, -1 on failure. + */ +int vduse_queue_get_fd(VduseVirtq *vq); + +/** + * vduse_queue_pop: + * @vq: specified virtqueue + * @sz: the size of struct to return (must be >= VduseVirtqElement) + * + * Pop an elem
[PULL v2 16/20] block/gluster: correctly set max_pdiscard
From: Fabian Ebner On 64-bit platforms, assigning SIZE_MAX to the int64_t max_pdiscard results in a negative value, and the following assertion would trigger down the line (it's not the same max_pdiscard, but computed from the other one): qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion `max_pdiscard >= bs->bl.request_alignment' failed. On 32-bit platforms, it's fine to keep using SIZE_MAX. The assertion in qemu_gluster_co_pdiscard() is checking that the value of 'bytes' can safely be passed to glfs_discard_async(), which takes a size_t for the argument in question, so it is kept as is. And since max_pdiscard is still <= SIZE_MAX, relying on max_pdiscard is still fine. Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard handlers") Cc: qemu-sta...@nongnu.org Signed-off-by: Fabian Ebner Message-Id: <20220520075922.43972-1-f.eb...@proxmox.com> Reviewed-by: Eric Blake Reviewed-by: Stefano Garzarella Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/gluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/gluster.c b/block/gluster.c index 398976bc66..b60213ab80 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -891,7 +891,7 @@ out: static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; -bs->bl.max_pdiscard = SIZE_MAX; +bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX); } static int qemu_gluster_reopen_prepare(BDRVReopenState *state, -- 2.35.3
Re: [PATCH v11 1/2] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl
On Fri, 24 Jun 2022 16:01:42 +0100 Peter Maydell wrote: > On Fri, 24 Jun 2022 at 15:54, Jonathan Cameron > wrote: > > Just occurred to me there is another barrier to an approach that adds > > DT bindings. > > I fairly sure hw/pci-bridge/pci_expander_bridge.c (PXB) > > only works on ACPI platforms and is the only host bridge supported > > for CXL emulation in QEMU. > > Isn't it probeable like any other PCI device/bridge ? Nope - PXB is a really weird device. (I tested it quickly in case I was wrong and indeed, no sign of device on the downstream side without a suitable BIOS / ACPI) There is no driver support for it as such, rather it presents as two things. 1) A EP on the main host bridge - which is used for interrupt routing and possibly a few other things. Linux has no idea that's what it is though so attaches no driver to it. lspci shows this as Red Hat, Inc, QEMU PCIe Expander Bridge 2) A host bridge with firmware described characteristics (bus number range and similar). Host bridges as defined in ACPI are a concept rather than actual hardware and presented to the OS via firmware descriptions (ACPI DSDT stuff in this case). You could probably add dt description via pci-host-ecam-generic bindings though but it would be an interesting late bit of dt addition in the virt_machine_done() function. Similar to the fw_cfg and ACPI stuff done at that stage to deal with PXB devices becoming visible. So gut feeling is PXB could be made to work with DT, but doesn't today. Give the main usecase for PXB is typically NUMA description I guess no one noticed on DT platforms. Jonathan > > -- PMM
Re: [PULL 00/14] (Mostly) build system changes for 2022-06-24
On 6/24/22 01:27, Paolo Bonzini wrote: The following changes since commit 2b049d2c8dc01de750410f8f1a4eac498c04c723: Merge tag 'pull-aspeed-20220622' of https://github.com/legoater/qemu into staging (2022-06-22 07:27:06 -0700) are available in the Git repository at: https://gitlab.com/bonzini/qemu.git tags/for-upstream for you to fetch changes up to 72da35fec9a9ba91a5b2cb9ee00843a94fa9413d: accel: kvm: Fix memory leak in find_stats_descriptors (2022-06-24 10:19:17 +0200) * fuzzing fixes * fix cross compilation CFLAGS and compiler choice * do not specify -bios option for tests/vm * miscellaneous fixes Build failure here. I have ubuntu 22.04, crossbuild-essential-arm64/jammy,jammy,now 12.9ubuntu3 all [installed] crossbuild-essential-armhf/jammy,jammy,now 12.9ubuntu3 all [installed] crossbuild-essential-i386/jammy,jammy,now 12.9ubuntu3 all [installed] crossbuild-essential-mips64el/jammy,jammy,now 12.9 all [installed] crossbuild-essential-ppc64el/jammy,jammy,now 12.9ubuntu3 all [installed] crossbuild-essential-riscv64/jammy,jammy,now 12.9ubuntu3 all [installed] crossbuild-essential-s390x/jammy,jammy,now 12.9ubuntu3 all [installed] which is properly detected during configure, Cross compilers aarch64 : aarch64-linux-gnu-gcc alpha: $(DOCKER_SCRIPT) cc --cc alpha-linux-gnu-gcc -i qemu/debian-alpha-cross -s /home/rth/qemu-publish/src -- arm : arm-linux-gnueabihf-gcc i386 : i686-linux-gnu-gcc nios2: $(DOCKER_SCRIPT) cc --cc nios2-linux-gnu-gcc -i qemu/debian-nios2-cross -s /home/rth/qemu-publish/src -- x86_64 : cc ... But then the i386 cross-compiler isn't used: $ cat tests/tcg/config-i386-softmmu.mak # Automatically generated by configure - do not modify TARGET_NAME=i386 BUILD_STATIC= EXTRA_CFLAGS=-m32 CC=cc CCAS=cc AR=ar AS=as LD=ld NM=nm OBJCOPY=objcopy RANLIB=ranlib STRIP=strip QEMU=/home/rth/qemu-publish/bld/qemu-system-i386 leading to failure: cc -nostdlib -ggdb -O0 -isystem /home/rth/qemu-publish/src/tests/tcg/minilib -m32 -ffreestanding /home/rth/qemu-publish/src/tests/tcg/multiarch/system/hello.c -o hello -Wl,-T/home/rth/qemu-publish/src/tests/tcg/i386/system/kernel.ld -Wl,-melf_i386 -static -nostdlib boot.o printf.o -lgcc /usr/bin/ld: skipping incompatible /usr/lib/gcc/x86_64-linux-gnu/11/libgcc.a when searching for -lgcc /usr/bin/ld: cannot find -lgcc: No such file or directory collect2: error: ld returned 1 exit status make[1]: *** [/home/rth/qemu-publish/src/tests/tcg/i386/Makefile.softmmu-target:32: hello] Error 1 r~
[PULL v2 15/20] block/rbd: report a better error when namespace does not exist
From: Stefano Garzarella If the namespace does not exist, rbd_create() fails with -ENOENT and QEMU reports a generic "error rbd create: No such file or directory": $ qemu-img create rbd:rbd/namespace/image 1M Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576 qemu-img: rbd:rbd/namespace/image: error rbd create: No such file or directory Unfortunately rados_ioctx_set_namespace() does not fail if the namespace does not exist, so let's use rbd_namespace_exists() in qemu_rbd_connect() to check if the namespace exists, reporting a more understandable error: $ qemu-img create rbd:rbd/namespace/image 1M Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576 qemu-img: rbd:rbd/namespace/image: namespace 'namespace' does not exist Reported-by: Tingting Mao Reviewed-by: Ilya Dryomov Signed-off-by: Stefano Garzarella Message-Id: <20220517071012.6120-1-sgarz...@redhat.com> Signed-off-by: Kevin Wolf --- block/rbd.c | 24 meson.build | 6 ++ 2 files changed, 30 insertions(+) diff --git a/block/rbd.c b/block/rbd.c index 6caf35cbba..f826410f40 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -831,6 +831,26 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, error_setg_errno(errp, -r, "error opening pool %s", opts->pool); goto failed_shutdown; } + +#ifdef HAVE_RBD_NAMESPACE_EXISTS +if (opts->has_q_namespace && strlen(opts->q_namespace) > 0) { +bool exists; + +r = rbd_namespace_exists(*io_ctx, opts->q_namespace, &exists); +if (r < 0) { +error_setg_errno(errp, -r, "error checking namespace"); +goto failed_ioctx_destroy; +} + +if (!exists) { +error_setg(errp, "namespace '%s' does not exist", + opts->q_namespace); +r = -ENOENT; +goto failed_ioctx_destroy; +} +} +#endif + /* * Set the namespace after opening the io context on the pool, * if nspace == NULL or if nspace == "", it is just as we did nothing @@ -840,6 +860,10 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, r = 0; goto out; +#ifdef HAVE_RBD_NAMESPACE_EXISTS +failed_ioctx_destroy: +rados_ioctx_destroy(*io_ctx); +#endif failed_shutdown: rados_shutdown(*cluster); out: diff --git a/meson.build b/meson.build index 397ca1d60a..a113078f1a 100644 --- a/meson.build +++ b/meson.build @@ -1903,6 +1903,12 @@ config_host_data.set('HAVE_GETIFADDRS', cc.has_function('getifaddrs')) config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util)) config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul')) config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include ')) +if rbd.found() + config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS', + cc.has_function('rbd_namespace_exists', + dependencies: rbd, + prefix: '#include ')) +endif if rdma.found() config_host_data.set('HAVE_IBV_ADVISE_MR', cc.has_function('ibv_advise_mr', -- 2.35.3
[PULL v2 13/20] libvduse: Add support for reconnecting
From: Xie Yongji To support reconnecting after restart or crash, VDUSE backend might need to resubmit inflight I/Os. This stores the metadata such as the index of inflight I/O's descriptors to a shm file so that VDUSE backend can restore them during reconnecting. Signed-off-by: Xie Yongji Message-Id: <20220523084611.91-9-xieyon...@bytedance.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- subprojects/libvduse/libvduse.h | 12 ++ block/export/vduse-blk.c| 19 ++- subprojects/libvduse/libvduse.c | 235 +++- 3 files changed, 260 insertions(+), 6 deletions(-) diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h index 6c2fe98213..32f19e7b48 100644 --- a/subprojects/libvduse/libvduse.h +++ b/subprojects/libvduse/libvduse.h @@ -173,6 +173,18 @@ int vduse_dev_update_config(VduseDev *dev, uint32_t size, */ int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size); +/** + * vduse_set_reconnect_log_file: + * @dev: VDUSE device + * @file: filename of reconnect log + * + * Specify the file to store log for reconnecting. It should + * be called before vduse_dev_setup_queue(). + * + * Returns: 0 on success, -errno on failure. + */ +int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename); + /** * vduse_dev_create_by_fd: * @fd: passed file descriptor diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c index cab1904234..251d73c841 100644 --- a/block/export/vduse-blk.c +++ b/block/export/vduse-blk.c @@ -30,6 +30,7 @@ typedef struct VduseBlkExport { VirtioBlkHandler handler; VduseDev *dev; uint16_t num_queues; +char *recon_file; unsigned int inflight; } VduseBlkExport; @@ -125,6 +126,8 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), true, on_vduse_vq_kick, NULL, NULL, NULL, vq); +/* Make sure we don't miss any kick afer reconnecting */ +eventfd_write(vduse_queue_get_fd(vq), 1); } static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq) @@ -306,6 +309,15 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, return -ENOMEM; } +vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s", + g_get_tmp_dir(), exp->id); +if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) { +error_setg(errp, "failed to set reconnect log file"); +vduse_dev_destroy(vblk_exp->dev); +g_free(vblk_exp->recon_file); +return -EINVAL; +} + for (i = 0; i < num_queues; i++) { vduse_dev_setup_queue(vblk_exp->dev, i, queue_size); } @@ -324,11 +336,16 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, static void vduse_blk_exp_delete(BlockExport *exp) { VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); +int ret; blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vblk_exp); blk_set_dev_ops(exp->blk, NULL, NULL); -vduse_dev_destroy(vblk_exp->dev); +ret = vduse_dev_destroy(vblk_exp->dev); +if (ret != -EBUSY) { +unlink(vblk_exp->recon_file); +} +g_free(vblk_exp->recon_file); } static void vduse_blk_exp_request_shutdown(BlockExport *exp) diff --git a/subprojects/libvduse/libvduse.c b/subprojects/libvduse/libvduse.c index 78e1e5cf90..9a2bcec282 100644 --- a/subprojects/libvduse/libvduse.c +++ b/subprojects/libvduse/libvduse.c @@ -42,6 +42,8 @@ #define VDUSE_VQ_ALIGN 4096 #define MAX_IOVA_REGIONS 256 +#define LOG_ALIGNMENT 64 + /* Round number down to multiple */ #define ALIGN_DOWN(n, m) ((n) / (m) * (m)) @@ -52,6 +54,31 @@ #define unlikely(x) __builtin_expect(!!(x), 0) #endif +typedef struct VduseDescStateSplit { +uint8_t inflight; +uint8_t padding[5]; +uint16_t next; +uint64_t counter; +} VduseDescStateSplit; + +typedef struct VduseVirtqLogInflight { +uint64_t features; +uint16_t version; +uint16_t desc_num; +uint16_t last_batch_head; +uint16_t used_idx; +VduseDescStateSplit desc[]; +} VduseVirtqLogInflight; + +typedef struct VduseVirtqLog { +VduseVirtqLogInflight inflight; +} VduseVirtqLog; + +typedef struct VduseVirtqInflightDesc { +uint16_t index; +uint64_t counter; +} VduseVirtqInflightDesc; + typedef struct VduseRing { unsigned int num; uint64_t desc_addr; @@ -74,6 +101,10 @@ struct VduseVirtq { bool ready; int fd; VduseDev *dev; +VduseVirtqInflightDesc *resubmit_list; +uint16_t resubmit_num; +uint64_t counter; +VduseVirtqLog *log; }; typedef struct VduseIovaRegion { @@ -97,8 +128,36 @@ struct VduseDev { int fd; int ctrl_fd; void *priv; +void *log; }; +static inline size_t vduse_vq_log_size(uint16_t queue_size) +{ +
[PULL v2 07/20] block/export: Fix incorrect length passed to vu_queue_push()
From: Xie Yongji Now the req->size is set to the correct value only when handling VIRTIO_BLK_T_GET_ID request. This patch fixes it. Signed-off-by: Xie Yongji Message-Id: <20220523084611.91-3-xieyon...@bytedance.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/export/vhost-user-blk-server.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index b2e458ade3..19c6ee51d3 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -60,8 +60,7 @@ static void vu_blk_req_complete(VuBlkReq *req) { VuDev *vu_dev = &req->server->vu_dev; -/* IO size with 1 extra status byte */ -vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1); +vu_queue_push(vu_dev, req->vq, &req->elem, req->size); vu_queue_notify(vu_dev, req->vq); free(req); @@ -207,6 +206,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) goto err; } +req->size = iov_size(in_iov, in_num); /* We always touch the last byte, so just see how big in_iov is. */ req->in = (void *)in_iov[in_num - 1].iov_base + in_iov[in_num - 1].iov_len @@ -267,7 +267,6 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) VIRTIO_BLK_ID_BYTES); snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); req->in->status = VIRTIO_BLK_S_OK; -req->size = elem->in_sg[0].iov_len; break; } case VIRTIO_BLK_T_DISCARD: -- 2.35.3
[PULL v2 14/20] qsd: document vduse-blk exports
From: Stefan Hajnoczi Document vduse-blk exports in qemu-storage-daemon --help and the qemu-storage-daemon(1) man page. Based-on: <20220523084611.91-1-xieyon...@bytedance.com> Cc: Xie Yongji Signed-off-by: Stefan Hajnoczi Message-Id: <20220525121947.859820-1-stefa...@redhat.com> Signed-off-by: Kevin Wolf --- docs/tools/qemu-storage-daemon.rst | 21 + storage-daemon/qemu-storage-daemon.c | 9 + 2 files changed, 30 insertions(+) diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 8b97592663..fbeaf76954 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -77,6 +77,7 @@ Standard options: --export [type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto] + --export [type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=] is a block export definition. ``node-name`` is the block node that should be exported. ``writable`` determines whether or not the export allows write @@ -110,6 +111,26 @@ Standard options: ``allow-other`` to auto (the default) will try enabling this option, and on error fall back to disabling it. + The ``vduse-blk`` export type uses the ``id`` as the VDUSE device name. + ``num-queues`` sets the number of virtqueues (the default is 1). + ``queue-size`` sets the virtqueue descriptor table size (the default is 256). + + The instantiated VDUSE device must then be added to the vDPA bus using the + vdpa(8) command from the iproute2 project:: + + # vdpa dev add name mgmtdev vduse + + The device can be removed from the vDPA bus later as follows:: + + # vdpa dev del + + For more information about attaching vDPA devices to the host with + virtio_vdpa.ko or attaching them to guests with vhost_vdpa.ko, see + https://vdpa-dev.gitlab.io/. + + For more information about VDUSE, see + https://docs.kernel.org/userspace-api/vduse.html. + .. option:: --monitor MONITORDEF is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index c104817cdd..17fd3f2f5f 100644 --- a/storage-daemon/qemu-storage-daemon.c +++ b/storage-daemon/qemu-storage-daemon.c @@ -121,6 +121,15 @@ static void help(void) " vhost-user-blk device over file descriptor\n" "\n" #endif /* CONFIG_VHOST_USER_BLK_SERVER */ +#ifdef CONFIG_VDUSE_BLK_EXPORT +" --export [type=]vduse-blk,id=,node-name=\n" +" [,writable=on|off][,num-queues=]\n" +" [,queue-size=]\n" +" [,logical-block-size=]\n" +" export the specified block node as a vduse-blk\n" +" device using the id as the VDUSE device name\n" +"\n" +#endif /* CONFIG_VDUSE_BLK_EXPORT */ " --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n" " configure a QMP monitor\n" "\n" -- 2.35.3
Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.
On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote: > Hi Stefan, > > Stefan Hajnoczi 于2022年6月20日周一 15:55写道: > > > > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote: > > > > Hi Sam, > > Is this version 2 of "[RFC v1] Add support for zoned device"? Please > > keep the email subject line the same (except for "v2", "v3", etc) so > > that it's clear which patch series this new version replaces. > > > > > Fix some mistakes before. It can report a range of zones now. > > > > This looks like the description of what changed compared to v1. Please > > put the changelog below "---" in the future. When patch emails are > > merged by git-am(1) it keeps the text above "---" and discards the text > > below "---". The changelog is usually no longer useful once the patches > > are merged, so it should be located below the "---" line. > > > > The text above the "---" is the commit description (an explanation of > > why this commit is necessary). In this case the commit description > > should explain that this patch adds .bdrv_co_zone_report() and > > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be > > supported. > > > > > > > > Signed-off-by: Sam Li > > > --- > > > block/block-backend.c | 22 > > > block/coroutines.h| 5 + > > > block/file-posix.c| 182 ++ > > > block/io.c| 23 > > > include/block/block-common.h | 43 ++- > > > include/block/block-io.h | 13 +++ > > > include/block/block_int-common.h | 20 > > > qemu-io-cmds.c| 118 +++ > > > tests/qemu-iotests/tests/zoned.sh | 52 + > > > 9 files changed, 477 insertions(+), 1 deletion(-) > > > create mode 100644 tests/qemu-iotests/tests/zoned.sh > > > > > > diff --git a/block/block-backend.c b/block/block-backend.c > > > index e0e1aff4b1..20248e4a35 100644 > > > --- a/block/block-backend.c > > > +++ b/block/block-backend.c > > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB { > > > int ret; > > > } BlockBackendAIOCB; > > > > > > + > > > + > > > > Please avoid whitespace changes in code that is otherwise untouched by > > your patch. Code changes can cause merge conflicts and they make it > > harder to use git-annotate(1), so only changes that are necessary should > > be included in a patch. > > > > > static const AIOCBInfo block_backend_aiocb_info = { > > > .get_aio_context = blk_aiocb_get_aio_context, > > > .aiocb_size = sizeof(BlockBackendAIOCB), > > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk) > > > return ret; > > > } > > > > > > > Please add a documentation comment for blk_co_zone_report() that > > explains how to use the functions and the purpose of the arguments. For > > example, does offset have to be the first byte in a zone or can it be > > any byte offset? What are the alignment requirements of offset and len? > > Why is nr_zones a pointer? > > > > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len, > > > > Functions that run in coroutine context must be labeled with > > coroutine_fn: > > > > int coroutine_fn blk_co_zone_report(...) > > > > This tells humans and tools that the function can only be called from a > > coroutine. There is a blog post about coroutines in QEMU here: > > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html > > > > > + int64_t *nr_zones, > > > + struct BlockZoneDescriptor *zones) > > > > QEMU coding style uses typedefs when defining structs, so "struct > > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor > > *zones". > > > > > +{ > > > +int ret; > > > > This function is called from the I/O code path, please mark it with: > > > > IO_CODE(); > > > > From include/block/block-io.h: > > > > * I/O API functions. These functions are thread-safe, and therefore > > * can run in any thread as long as the thread has called > > * aio_context_acquire/release(). > > * > > * These functions can only call functions from I/O and Common categories, > > * but can be invoked by GS, "I/O or GS" and I/O APIs. > > * > > * All functions in this category must use the macro > > * IO_CODE(); > > * to catch when they are accidentally called by the wrong API. > > > > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, > > > zones); > > > > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this > > function call to ensure that zone report requests finish before I/O is > > drained (see bdrv_drained_begin()). This is necessary so that it's > > possible to wait for I/O requests, including zone report, to complete. > > > > Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk), > > blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling > > bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after > > bdrv_co_zon
[PULL v2 06/20] block: Support passing NULL ops to blk_set_dev_ops()
From: Xie Yongji This supports passing NULL ops to blk_set_dev_ops() so that we can remove stale ops in some cases. Signed-off-by: Xie Yongji Reviewed-by: Stefan Hajnoczi Message-Id: <20220523084611.91-2-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- block/block-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index d4abdf8faa..f425b00793 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1058,7 +1058,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, blk->dev_opaque = opaque; /* Are we currently quiesced? Should we enforce this right now? */ -if (blk->quiesce_counter && ops->drained_begin) { +if (blk->quiesce_counter && ops && ops->drained_begin) { ops->drained_begin(opaque); } } -- 2.35.3
[PULL v2 11/20] vduse-blk: Implement vduse-blk export
From: Xie Yongji This implements a VDUSE block backends based on the libvduse library. We can use it to export the BDSs for both VM and container (host) usage. The new command-line syntax is: $ qemu-storage-daemon \ --blockdev file,node-name=drive0,filename=test.img \ --export vduse-blk,node-name=drive0,id=vduse-export0,writable=on After the qemu-storage-daemon started, we need to use the "vdpa" command to attach the device to vDPA bus: $ vdpa dev add name vduse-export0 mgmtdev vduse Also the device must be removed via the "vdpa" command before we stop the qemu-storage-daemon. Signed-off-by: Xie Yongji Reviewed-by: Stefan Hajnoczi Message-Id: <20220523084611.91-7-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- qapi/block-export.json| 28 ++- meson_options.txt | 2 + block/export/vduse-blk.h | 20 +++ block/export/export.c | 6 + block/export/vduse-blk.c | 329 ++ MAINTAINERS | 4 +- block/export/meson.build | 5 + meson.build | 13 ++ scripts/meson-buildoptions.sh | 4 + 9 files changed, 407 insertions(+), 4 deletions(-) create mode 100644 block/export/vduse-blk.h create mode 100644 block/export/vduse-blk.c diff --git a/qapi/block-export.json b/qapi/block-export.json index 8afb1b65b3..99c34a6965 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -178,6 +178,23 @@ '*allow-other': 'FuseExportAllowOther' }, 'if': 'CONFIG_FUSE' } +## +# @BlockExportOptionsVduseBlk: +# +# A vduse-blk block export. +# +# @num-queues: the number of virtqueues. Defaults to 1. +# @queue-size: the size of virtqueue. Defaults to 256. +# @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE] +# and must be power of 2. Defaults to 512 bytes. +# +# Since: 7.1 +## +{ 'struct': 'BlockExportOptionsVduseBlk', + 'data': { '*num-queues': 'uint16', +'*queue-size': 'uint16', +'*logical-block-size': 'size'} } + ## # @NbdServerAddOptions: # @@ -284,6 +301,7 @@ # @nbd: NBD export # @vhost-user-blk: vhost-user-blk export (since 5.2) # @fuse: FUSE export (since: 6.0) +# @vduse-blk: vduse-blk export (since 7.1) # # Since: 4.2 ## @@ -291,7 +309,8 @@ 'data': [ 'nbd', { 'name': 'vhost-user-blk', 'if': 'CONFIG_VHOST_USER_BLK_SERVER' }, -{ 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] } +{ 'name': 'fuse', 'if': 'CONFIG_FUSE' }, +{ 'name': 'vduse-blk', 'if': 'CONFIG_VDUSE_BLK_EXPORT' } ] } ## # @BlockExportOptions: @@ -299,7 +318,8 @@ # Describes a block export, i.e. how single node should be exported on an # external interface. # -# @id: A unique identifier for the block export (across all export types) +# @id: A unique identifier for the block export (across the host for vduse-blk +# export type or across all export types for other types) # # @node-name: The node name of the block node to be exported (since: 5.2) # @@ -335,7 +355,9 @@ 'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk', 'if': 'CONFIG_VHOST_USER_BLK_SERVER' }, 'fuse': { 'type': 'BlockExportOptionsFuse', -'if': 'CONFIG_FUSE' } +'if': 'CONFIG_FUSE' }, + 'vduse-blk': { 'type': 'BlockExportOptionsVduseBlk', + 'if': 'CONFIG_VDUSE_BLK_EXPORT' } } } ## diff --git a/meson_options.txt b/meson_options.txt index 23a9f440f7..97c38109b1 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -259,6 +259,8 @@ option('virtiofsd', type: 'feature', value: 'auto', description: 'build virtiofs daemon (virtiofsd)') option('libvduse', type: 'feature', value: 'auto', description: 'build VDUSE Library') +option('vduse_blk_export', type: 'feature', value: 'auto', + description: 'VDUSE block export support') option('capstone', type: 'feature', value: 'auto', description: 'Whether and how to find the capstone library') diff --git a/block/export/vduse-blk.h b/block/export/vduse-blk.h new file mode 100644 index 00..c4eeb1b70e --- /dev/null +++ b/block/export/vduse-blk.h @@ -0,0 +1,20 @@ +/* + * Export QEMU block device via VDUSE + * + * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: + * Xie Yongji + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef VDUSE_BLK_H +#define VDUSE_BLK_H + +#include "block/export.h" + +extern const BlockExportDriver blk_exp_vduse_blk; + +#endif /* VDUSE_BLK_H */ diff --git a/block/export/export.c b/block/export/export.c index 7253af3bc3..4744862915 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -26,6 +26,9 @@ #ifdef CONFIG_VHOST_USER_BLK_SERVER #include "vhost-user-blk-server.h" #endif +#ifdef CONFIG_VDUSE_BLK_EXPORT +#include "vduse
[PULL v2 05/20] block: simplify handling of try to merge different sized bitmaps
From: Vladimir Sementsov-Ogievskiy We have too much logic to simply check that bitmaps are of the same size. Let's just define that hbitmap_merge() and bdrv_dirty_bitmap_merge_internal() require their argument bitmaps be of same size, this simplifies things. Let's look through the callers: For backup_init_bcs_bitmap() we already assert that merge can't fail. In bdrv_reclaim_dirty_bitmap_locked() we gracefully handle the error that can't happen: successor always has same size as its parent, drop this logic. In bdrv_merge_dirty_bitmap() we already has assertion and separate check. Make the check explicit and improve error message. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin Reviewed-by: Kevin Wolf Message-Id: <20220517111206.23585-4-v.sementsov...@mail.ru> Signed-off-by: Kevin Wolf --- include/block/block_int-io.h | 2 +- include/qemu/hbitmap.h | 15 ++- block/backup.c | 6 ++ block/dirty-bitmap.c | 26 +++--- util/hbitmap.c | 25 +++-- 5 files changed, 23 insertions(+), 51 deletions(-) diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index bb454200e5..ded29e7494 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -102,7 +102,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); -bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, +void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, HBitmap **backup, bool lock); diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 5bd986aa44..af4e4ab746 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -76,20 +76,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size); * * Store result of merging @a and @b into @result. * @result is allowed to be equal to @a or @b. - * - * Return true if the merge was successful, - *false if it was not attempted. - */ -bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result); - -/** - * hbitmap_can_merge: - * - * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and - * necessary for hbitmap_merge will not fail. - * + * All bitmaps must have same size. */ -bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b); +void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result); /** * hbitmap_empty: diff --git a/block/backup.c b/block/backup.c index 5cfd0b999c..b2b649e305 100644 --- a/block/backup.c +++ b/block/backup.c @@ -228,15 +228,13 @@ out: static void backup_init_bcs_bitmap(BackupBlockJob *job) { -bool ret; uint64_t estimate; BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs); if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { bdrv_clear_dirty_bitmap(bcs_bitmap, NULL); -ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, - NULL, true); -assert(ret); +bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL, + true); } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { /* * We can't hog the coroutine to initialize this thoroughly. diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index da1b91166f..bf3dc0512a 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -309,10 +309,7 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent, return NULL; } -if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) { -error_setg(errp, "Merging of parent and successor bitmap failed"); -return NULL; -} +hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap); parent->disabled = successor->disabled; parent->busy = false; @@ -912,13 +909,15 @@ bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, goto out; } -if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) { -error_setg(errp, "Bitmaps are incompatible and can't be merged"); +if (bdrv_dirty_bitmap_size(src) != bdrv_dirty_bitmap_size(dest)) { +error_setg(errp, "Bitmaps are of different sizes (destination size is %" + PRId64 ", source size is %" PRId64 ") and can't be merged", + bdrv_dirty_bitmap_size(dest), bdrv_dirty_bitmap_size(src)); goto out; } -ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false); -assert(ret); +bdrv_dirty_bitmap_merge_internal(dest, src, backup, false); +ret = true; out: bdrv_dirty_bitmaps_unlock(dest->bs); @@ -932,17 +931,16 @@ out: /** * bdrv_dirty_bit
[PULL v2 19/20] vduse-blk: Add serial option
From: Xie Yongji Add a 'serial' option to allow user to specify this value explicitly. And the default value is changed to an empty string as what we did in "hw/block/virtio-blk.c". Signed-off-by: Xie Yongji Message-Id: <20220614051532.92-6-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- qapi/block-export.json | 4 +++- docs/tools/qemu-storage-daemon.rst | 2 +- block/export/virtio-blk-handler.h| 2 +- block/export/vduse-blk.c | 20 ++-- block/export/vhost-user-blk-server.c | 4 +++- storage-daemon/qemu-storage-daemon.c | 1 + 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/qapi/block-export.json b/qapi/block-export.json index 99c34a6965..618a6367c9 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -187,13 +187,15 @@ # @queue-size: the size of virtqueue. Defaults to 256. # @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE] # and must be power of 2. Defaults to 512 bytes. +# @serial: the serial number of virtio block device. Defaults to empty string. # # Since: 7.1 ## { 'struct': 'BlockExportOptionsVduseBlk', 'data': { '*num-queues': 'uint16', '*queue-size': 'uint16', -'*logical-block-size': 'size'} } +'*logical-block-size': 'size', +'*serial': 'str' } } ## # @NbdServerAddOptions: diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index fbeaf76954..034f2809a6 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -77,7 +77,7 @@ Standard options: --export [type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=] --export [type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto] - --export [type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=] + --export [type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=][,serial=] is a block export definition. ``node-name`` is the block node that should be exported. ``writable`` determines whether or not the export allows write diff --git a/block/export/virtio-blk-handler.h b/block/export/virtio-blk-handler.h index 1c7a5e32ad..150d44cff2 100644 --- a/block/export/virtio-blk-handler.h +++ b/block/export/virtio-blk-handler.h @@ -23,7 +23,7 @@ typedef struct { BlockBackend *blk; -const char *serial; +char *serial; uint32_t logical_block_size; bool writable; } VirtioBlkHandler; diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c index 251d73c841..066e088b00 100644 --- a/block/export/vduse-blk.c +++ b/block/export/vduse-blk.c @@ -235,7 +235,7 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, Error *local_err = NULL; struct virtio_blk_config config = { 0 }; uint64_t features; -int i; +int i, ret; if (vblk_opts->has_num_queues) { num_queues = vblk_opts->num_queues; @@ -265,7 +265,8 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, } vblk_exp->num_queues = num_queues; vblk_exp->handler.blk = exp->blk; -vblk_exp->handler.serial = exp->id; +vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ? +vblk_opts->serial : ""); vblk_exp->handler.logical_block_size = logical_block_size; vblk_exp->handler.writable = opts->writable; @@ -306,16 +307,16 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, vblk_exp); if (!vblk_exp->dev) { error_setg(errp, "failed to create vduse device"); -return -ENOMEM; +ret = -ENOMEM; +goto err_dev; } vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s", g_get_tmp_dir(), exp->id); if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) { error_setg(errp, "failed to set reconnect log file"); -vduse_dev_destroy(vblk_exp->dev); -g_free(vblk_exp->recon_file); -return -EINVAL; +ret = -EINVAL; +goto err; } for (i = 0; i < num_queues; i++) { @@ -331,6 +332,12 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, blk_set_dev_ops(exp->blk, &vduse_block_ops, exp); return 0; +err: +vduse_dev_destroy(vblk_exp->dev); +g_free(vblk_exp->recon_file); +err_dev: +g_free(vblk_exp->handler.serial); +return ret; } static void vduse_blk_exp_delete(BlockExport *exp) @@ -346,6 +353,7 @@ static void vduse_blk_exp_delete(BlockExport *exp) unlink(vblk_exp->re
[PULL v2 03/20] block: block_dirty_bitmap_merge(): fix error path
From: Vladimir Sementsov-Ogievskiy At the end we ignore failure of bdrv_merge_dirty_bitmap() and report success. And still set errp. That's wrong. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Nikita Lapshin Reviewed-by: Kevin Wolf Message-Id: <20220517111206.23585-2-v.sementsov...@mail.ru> Signed-off-by: Kevin Wolf --- block/monitor/bitmap-qmp-cmds.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c index 2b677c4a2f..bd10468596 100644 --- a/block/monitor/bitmap-qmp-cmds.c +++ b/block/monitor/bitmap-qmp-cmds.c @@ -309,7 +309,10 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, } /* Merge into dst; dst is unchanged on failure. */ -bdrv_merge_dirty_bitmap(dst, anon, backup, errp); +if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) { +dst = NULL; +goto out; +} out: bdrv_release_dirty_bitmap(anon); -- 2.35.3
[PULL v2 09/20] linux-headers: Add vduse.h
From: Xie Yongji This adds vduse header to linux headers so that the relevant VDUSE API can be used in subsequent patches. Signed-off-by: Xie Yongji Reviewed-by: Stefan Hajnoczi Message-Id: <20220523084611.91-5-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- linux-headers/linux/vduse.h | 306 scripts/update-linux-headers.sh | 2 +- 2 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 linux-headers/linux/vduse.h diff --git a/linux-headers/linux/vduse.h b/linux-headers/linux/vduse.h new file mode 100644 index 00..d47b004ce6 --- /dev/null +++ b/linux-headers/linux/vduse.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _VDUSE_H_ +#define _VDUSE_H_ + +#include + +#define VDUSE_BASE 0x81 + +/* The ioctls for control device (/dev/vduse/control) */ + +#define VDUSE_API_VERSION 0 + +/* + * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). + * This is used for future extension. + */ +#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64) + +/* Set the version of VDUSE API that userspace supported. */ +#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64) + +/** + * struct vduse_dev_config - basic configuration of a VDUSE device + * @name: VDUSE device name, needs to be NUL terminated + * @vendor_id: virtio vendor id + * @device_id: virtio device id + * @features: virtio features + * @vq_num: the number of virtqueues + * @vq_align: the allocation alignment of virtqueue's metadata + * @reserved: for future use, needs to be initialized to zero + * @config_size: the size of the configuration space + * @config: the buffer of the configuration space + * + * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device. + */ +struct vduse_dev_config { +#define VDUSE_NAME_MAX 256 + char name[VDUSE_NAME_MAX]; + __u32 vendor_id; + __u32 device_id; + __u64 features; + __u32 vq_num; + __u32 vq_align; + __u32 reserved[13]; + __u32 config_size; + __u8 config[]; +}; + +/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */ +#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config) + +/* + * Destroy a VDUSE device. Make sure there are no more references + * to the char device (/dev/vduse/$NAME). + */ +#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX]) + +/* The ioctls for VDUSE device (/dev/vduse/$NAME) */ + +/** + * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last] + * @offset: the mmap offset on returned file descriptor + * @start: start of the IOVA region + * @last: last of the IOVA region + * @perm: access permission of the IOVA region + * + * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry { + __u64 offset; + __u64 start; + __u64 last; +#define VDUSE_ACCESS_RO 0x1 +#define VDUSE_ACCESS_WO 0x2 +#define VDUSE_ACCESS_RW 0x3 + __u8 perm; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return the corresponding file descriptor. Return -EINVAL means the + * IOVA region doesn't exist. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry) + +/* + * Get the negotiated virtio features. It's a subset of the features in + * struct vduse_dev_config which can be accepted by virtio driver. It's + * only valid after FEATURES_OK status bit is set. + */ +#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64) + +/** + * struct vduse_config_data - data used to update configuration space + * @offset: the offset from the beginning of configuration space + * @length: the length to write to configuration space + * @buffer: the buffer used to write from + * + * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device + * configuration space. + */ +struct vduse_config_data { + __u32 offset; + __u32 length; + __u8 buffer[]; +}; + +/* Set device configuration space */ +#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data) + +/* + * Inject a config interrupt. It's usually used to notify virtio driver + * that device configuration space has changed. + */ +#define VDUSE_DEV_INJECT_CONFIG_IRQ_IO(VDUSE_BASE, 0x13) + +/** + * struct vduse_vq_config - basic configuration of a virtqueue + * @index: virtqueue index + * @max_size: the max size of virtqueue + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. + */ +struct vduse_vq_config { + __u32 index; + __u16 max_size; + __u16 reserved[13]; +}; + +/* + * Setup the specified virtqueue. Make sure all virtqueues have been + * configured before the device is attached to vDPA bus. + */ +#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE,
[PULL v2 17/20] aio_wait_kick: add missing memory barrier
From: Emanuele Giuseppe Esposito It seems that aio_wait_kick always required a memory barrier or atomic operation in the caller, but nobody actually took care of doing it. Let's put the barrier in the function instead, and pair it with another one in AIO_WAIT_WHILE. Read aio_wait_kick() comment for further explanation. Suggested-by: Paolo Bonzini Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20220524173054.12651-1-eespo...@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/block/aio-wait.h | 2 ++ util/aio-wait.c | 16 +++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index b39eefb38d..54840f8622 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -81,6 +81,8 @@ extern AioWait global_aio_wait; AioContext *ctx_ = (ctx); \ /* Increment wait_->num_waiters before evaluating cond. */ \ qatomic_inc(&wait_->num_waiters); \ +/* Paired with smp_mb in aio_wait_kick(). */ \ +smp_mb(); \ if (ctx_ && in_aio_context_home_thread(ctx_)) {\ while ((cond)) { \ aio_poll(ctx_, true); \ diff --git a/util/aio-wait.c b/util/aio-wait.c index bdb3d3af22..98c5accd29 100644 --- a/util/aio-wait.c +++ b/util/aio-wait.c @@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque) void aio_wait_kick(void) { -/* The barrier (or an atomic op) is in the caller. */ +/* + * Paired with smp_mb in AIO_WAIT_WHILE. Here we have: + * write(condition); + * aio_wait_kick() { + * smp_mb(); + * read(num_waiters); + * } + * + * And in AIO_WAIT_WHILE: + * write(num_waiters); + * smp_mb(); + * read(condition); + */ +smp_mb(); + if (qatomic_read(&global_aio_wait.num_waiters)) { aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); } -- 2.35.3
[PULL v2 08/20] block/export: Abstract out the logic of virtio-blk I/O process
From: Xie Yongji Abstract the common logic of virtio-blk I/O process to a function named virtio_blk_process_req(). It's needed for the following commit. Signed-off-by: Xie Yongji Message-Id: <20220523084611.91-4-xieyon...@bytedance.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/export/virtio-blk-handler.h| 37 block/export/vhost-user-blk-server.c | 259 +++ block/export/virtio-blk-handler.c| 240 + MAINTAINERS | 2 + block/export/meson.build | 2 +- 5 files changed, 301 insertions(+), 239 deletions(-) create mode 100644 block/export/virtio-blk-handler.h create mode 100644 block/export/virtio-blk-handler.c diff --git a/block/export/virtio-blk-handler.h b/block/export/virtio-blk-handler.h new file mode 100644 index 00..1c7a5e32ad --- /dev/null +++ b/block/export/virtio-blk-handler.h @@ -0,0 +1,37 @@ +/* + * Handler for virtio-blk I/O + * + * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: + * Xie Yongji + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef VIRTIO_BLK_HANDLER_H +#define VIRTIO_BLK_HANDLER_H + +#include "sysemu/block-backend.h" + +#define VIRTIO_BLK_SECTOR_BITS 9 +#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS) + +#define VIRTIO_BLK_MAX_DISCARD_SECTORS 32768 +#define VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS 32768 + +typedef struct { +BlockBackend *blk; +const char *serial; +uint32_t logical_block_size; +bool writable; +} VirtioBlkHandler; + +int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler, +struct iovec *in_iov, +struct iovec *out_iov, +unsigned int in_num, +unsigned int out_num); + +#endif /* VIRTIO_BLK_HANDLER_H */ diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 19c6ee51d3..c9c290cc4c 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -17,31 +17,15 @@ #include "vhost-user-blk-server.h" #include "qapi/error.h" #include "qom/object_interfaces.h" -#include "sysemu/block-backend.h" #include "util/block-helpers.h" - -/* - * Sector units are 512 bytes regardless of the - * virtio_blk_config->blk_size value. - */ -#define VIRTIO_BLK_SECTOR_BITS 9 -#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS) +#include "virtio-blk-handler.h" enum { VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, -VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768, -VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768, -}; -struct virtio_blk_inhdr { -unsigned char status; }; typedef struct VuBlkReq { VuVirtqElement elem; -int64_t sector_num; -size_t size; -struct virtio_blk_inhdr *in; -struct virtio_blk_outhdr out; VuServer *server; struct VuVirtq *vq; } VuBlkReq; @@ -50,247 +34,44 @@ typedef struct VuBlkReq { typedef struct { BlockExport export; VuServer vu_server; -uint32_t blk_size; +VirtioBlkHandler handler; QIOChannelSocket *sioc; struct virtio_blk_config blkcfg; -bool writable; } VuBlkExport; -static void vu_blk_req_complete(VuBlkReq *req) +static void vu_blk_req_complete(VuBlkReq *req, size_t in_len) { VuDev *vu_dev = &req->server->vu_dev; -vu_queue_push(vu_dev, req->vq, &req->elem, req->size); +vu_queue_push(vu_dev, req->vq, &req->elem, in_len); vu_queue_notify(vu_dev, req->vq); free(req); } -static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector, - size_t size) -{ -uint64_t nb_sectors; -uint64_t total_sectors; - -if (size % VIRTIO_BLK_SECTOR_SIZE) { -return false; -} - -nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS; - -QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE); -if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -return false; -} -if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) { -return false; -} -blk_get_geometry(vexp->export.blk, &total_sectors); -if (sector > total_sectors || nb_sectors > total_sectors - sector) { -return false; -} -return true; -} - -static int coroutine_fn -vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, -uint32_t iovcnt, uint32_t type) -{ -BlockBackend *blk = vexp->export.blk; -struct virtio_blk_discard_write_zeroes desc; -ssize_t size; -uint64_t sector; -uint32_t num_sectors; -uint32_t max_sectors; -uint32_t flags; -int bytes; - -/* Only one desc is currently supported */ -if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) { -return VIRTIO_BLK_S_UNSUPP; -
[PULL v2 01/20] block: drop unused bdrv_co_drain() API
From: Stefan Hajnoczi bdrv_co_drain() has not been used since commit 9a0cec664eef ("mirror: use bdrv_drained_begin/bdrv_drained_end") in 2016. Remove it so there are fewer drain scenarios to worry about. Use bdrv_drained_begin()/bdrv_drained_end() instead. They are "mixed" functions that can be called from coroutine context. Unlike bdrv_co_drain(), these functions provide control of the length of the drained section, which is usually the right thing. Signed-off-by: Stefan Hajnoczi Message-Id: <20220521122714.3837731-1-stefa...@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Alberto Faria Signed-off-by: Kevin Wolf --- include/block/block-io.h | 1 - block/io.c | 15 --- 2 files changed, 16 deletions(-) diff --git a/include/block/block-io.h b/include/block/block-io.h index 62c84f0519..053a27141a 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -270,7 +270,6 @@ void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); cond); }) void bdrv_drain(BlockDriverState *bs); -void coroutine_fn bdrv_co_drain(BlockDriverState *bs); int generated_co_wrapper bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, diff --git a/block/io.c b/block/io.c index 789e6373d5..1e9bf09a49 100644 --- a/block/io.c +++ b/block/io.c @@ -588,21 +588,6 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); } -/* - * Wait for pending requests to complete on a single BlockDriverState subtree, - * and suspend block driver's internal I/O until next request arrives. - * - * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState - * AioContext. - */ -void coroutine_fn bdrv_co_drain(BlockDriverState *bs) -{ -IO_OR_GS_CODE(); -assert(qemu_in_coroutine()); -bdrv_drained_begin(bs); -bdrv_drained_end(bs); -} - void bdrv_drain(BlockDriverState *bs) { IO_OR_GS_CODE(); -- 2.35.3
[PULL v2 00/20] Block layer patches
The following changes since commit 3a821c52e1a30ecd9a436f2c67cc66b5628c829f: Merge tag 'nvme-next-pull-request' of git://git.infradead.org/qemu-nvme into staging (2022-06-23 14:52:30 -0700) are available in the Git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to 779d82e1d305f2a9cbd7f48cf6555ad58145e04a: vduse-blk: Add name option (2022-06-24 17:07:06 +0200) Block layer patches - Add vduse-blk export - Dirty bitmaps: Fix and improve bitmap merge - gluster: correctly set max_pdiscard - rbd: report a better error when namespace does not exist - aio_wait_kick: add missing memory barrier - Code cleanups Emanuele Giuseppe Esposito (1): aio_wait_kick: add missing memory barrier Eric Blake (1): nbd: Drop dead code spotted by Coverity Fabian Ebner (1): block/gluster: correctly set max_pdiscard Stefan Hajnoczi (3): block: drop unused bdrv_co_drain() API block: get rid of blk->guest_block_size qsd: document vduse-blk exports Stefano Garzarella (1): block/rbd: report a better error when namespace does not exist Vladimir Sementsov-Ogievskiy (3): block: block_dirty_bitmap_merge(): fix error path block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap block: simplify handling of try to merge different sized bitmaps Xie Yongji (10): block: Support passing NULL ops to blk_set_dev_ops() block/export: Fix incorrect length passed to vu_queue_push() block/export: Abstract out the logic of virtio-blk I/O process linux-headers: Add vduse.h libvduse: Add VDUSE (vDPA Device in Userspace) library vduse-blk: Implement vduse-blk export vduse-blk: Add vduse-blk resize support libvduse: Add support for reconnecting vduse-blk: Add serial option vduse-blk: Add name option qapi/block-export.json | 29 +- docs/tools/qemu-storage-daemon.rst | 22 + meson_options.txt |4 + block/export/vduse-blk.h| 20 + block/export/virtio-blk-handler.h | 37 + include/block/aio-wait.h|2 + include/block/block-io.h|1 - include/block/block_int-io.h|2 +- include/qemu/hbitmap.h | 15 +- include/sysemu/block-backend-io.h |1 - linux-headers/linux/vduse.h | 306 ++ subprojects/libvduse/include/atomic.h |1 + subprojects/libvduse/include/compiler.h |1 + subprojects/libvduse/libvduse.h | 247 + block/backup.c |6 +- block/block-backend.c | 12 +- block/dirty-bitmap.c| 26 +- block/export/export.c |6 + block/export/vduse-blk.c| 374 block/export/vhost-user-blk-server.c| 263 + block/export/virtio-blk-handler.c | 240 + block/gluster.c |2 +- block/io.c | 15 - block/monitor/bitmap-qmp-cmds.c | 40 +- block/nbd.c |8 +- block/rbd.c | 24 + hw/block/virtio-blk.c |1 - hw/block/xen-block.c|1 - hw/ide/core.c |1 - hw/scsi/scsi-disk.c |1 - hw/scsi/scsi-generic.c |1 - storage-daemon/qemu-storage-daemon.c| 10 + subprojects/libvduse/libvduse.c | 1375 +++ util/aio-wait.c | 16 +- util/hbitmap.c | 25 +- MAINTAINERS |9 + block/export/meson.build|7 +- meson.build | 34 + scripts/meson-buildoptions.sh |7 + scripts/update-linux-headers.sh |2 +- subprojects/libvduse/linux-headers/linux|1 + subprojects/libvduse/meson.build| 10 + subprojects/libvduse/standard-headers/linux |1 + 43 files changed, 2852 insertions(+), 354 deletions(-) create mode 100644 block/export/vduse-blk.h create mode 100644 block/export/virtio-blk-handler.h create mode 100644 linux-headers/linux/vduse.h create mode 12 subprojects/libvduse/include/atomic.h create mode 12 subprojects/libvduse/include/compiler.h create mode 100644 subprojects/libvduse/libvduse.h create mode 100644 block/export/vduse-blk.c create mode 100644 block/export/virtio-blk-handler.c create mode 100644 subprojects/libvduse/libvduse.c create mode 12 subprojects/libvduse/linux-headers/linux create mode 100644 subpr
[PULL v2 12/20] vduse-blk: Add vduse-blk resize support
From: Xie Yongji To support block resize, this uses vduse_dev_update_config() to update the capacity field in configuration space and inject config interrupt on the block resize callback. Signed-off-by: Xie Yongji Reviewed-by: Stefan Hajnoczi Message-Id: <20220523084611.91-8-xieyon...@bytedance.com> Signed-off-by: Kevin Wolf --- block/export/vduse-blk.c | 20 1 file changed, 20 insertions(+) diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c index 04be16c133..cab1904234 100644 --- a/block/export/vduse-blk.c +++ b/block/export/vduse-blk.c @@ -204,6 +204,23 @@ static void blk_aio_detach(void *opaque) vblk_exp->export.ctx = NULL; } +static void vduse_blk_resize(void *opaque) +{ +BlockExport *exp = opaque; +VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); +struct virtio_blk_config config; + +config.capacity = +cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS); +vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity), +offsetof(struct virtio_blk_config, capacity), +(char *)&config.capacity); +} + +static const BlockDevOps vduse_block_ops = { +.resize_cb = vduse_blk_resize, +}; + static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, Error **errp) { @@ -299,6 +316,8 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vblk_exp); +blk_set_dev_ops(exp->blk, &vduse_block_ops, exp); + return 0; } @@ -308,6 +327,7 @@ static void vduse_blk_exp_delete(BlockExport *exp) blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vblk_exp); +blk_set_dev_ops(exp->blk, NULL, NULL); vduse_dev_destroy(vblk_exp->dev); } -- 2.35.3
[PULL v2 02/20] block: get rid of blk->guest_block_size
From: Stefan Hajnoczi Commit 1b7fd729559c ("block: rename buffer_alignment to guest_block_size") noted: At this point, the field is set by the device emulation, but completely ignored by the block layer. The last time the value of buffer_alignment/guest_block_size was actually used was before commit 339064d50639 ("block: Don't use guest sector size for qemu_blockalign()"). This value has not been used since 2013. Get rid of it. Cc: Xie Yongji Signed-off-by: Stefan Hajnoczi Message-Id: <20220518130945.2657905-1-stefa...@redhat.com> Reviewed-by: Paul Durrant Reviewed-by: Eric Blake Reviewed-by: Alberto Faria Signed-off-by: Kevin Wolf --- include/sysemu/block-backend-io.h| 1 - block/block-backend.c| 10 -- block/export/vhost-user-blk-server.c | 1 - hw/block/virtio-blk.c| 1 - hw/block/xen-block.c | 1 - hw/ide/core.c| 1 - hw/scsi/scsi-disk.c | 1 - hw/scsi/scsi-generic.c | 1 - 8 files changed, 17 deletions(-) diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index 6517c39295..ccef514023 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -72,7 +72,6 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, void blk_iostatus_set_err(BlockBackend *blk, int error); int blk_get_max_iov(BlockBackend *blk); int blk_get_max_hw_iov(BlockBackend *blk); -void blk_set_guest_block_size(BlockBackend *blk, int align); void blk_io_plug(BlockBackend *blk); void blk_io_unplug(BlockBackend *blk); diff --git a/block/block-backend.c b/block/block-backend.c index e0e1aff4b1..d4abdf8faa 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -56,9 +56,6 @@ struct BlockBackend { const BlockDevOps *dev_ops; void *dev_opaque; -/* the block size for which the guest device expects atomicity */ -int guest_block_size; - /* If the BDS tree is removed, some of its options are stored here (which * can be used to restore those options in the new BDS on insert) */ BlockBackendRootState root_state; @@ -998,7 +995,6 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev) blk->dev = NULL; blk->dev_ops = NULL; blk->dev_opaque = NULL; -blk->guest_block_size = 512; blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort); blk_unref(blk); } @@ -2100,12 +2096,6 @@ int blk_get_max_iov(BlockBackend *blk) return blk->root->bs->bl.max_iov; } -void blk_set_guest_block_size(BlockBackend *blk, int align) -{ -IO_CODE(); -blk->guest_block_size = align; -} - void *blk_try_blockalign(BlockBackend *blk, size_t size) { IO_CODE(); diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index a129204c44..b2e458ade3 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -495,7 +495,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, return -EINVAL; } vexp->blk_size = logical_block_size; -blk_set_guest_block_size(exp->blk, logical_block_size); if (vu_opts->has_num_queues) { num_queues = vu_opts->num_queues; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cd804795c6..e9ba752f6b 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1228,7 +1228,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); blk_set_dev_ops(s->blk, &virtio_block_ops, s); -blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); blk_iostatus_enable(s->blk); diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 674953f1ad..345b284d70 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -243,7 +243,6 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) } blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev); -blk_set_guest_block_size(blk, conf->logical_block_size); if (conf->discard_granularity == -1) { conf->discard_granularity = conf->physical_block_size; diff --git a/hw/ide/core.c b/hw/ide/core.c index c2caa54285..7cbc0a54a7 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2548,7 +2548,6 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, s->smart_selftest_count = 0; if (kind == IDE_CD) { blk_set_dev_ops(blk, &ide_cd_block_ops, s); -blk_set_guest_block_size(blk, 2048); } else { if (!blk_is_inserted(s->blk)) { error_setg(errp, "Device needs media, but drive is empty"); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 072686ed58..91acb5c0ce 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2419,7 +2419,6 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) } else { blk_set_dev_ops(s->qdev.conf.
[PULL v2 04/20] block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap
From: Vladimir Sementsov-Ogievskiy We don't need extra bitmap. All we need is to backup the original bitmap when we do first merge. So, drop extra temporary bitmap and work directly with target and backup. Still to keep old semantics, that on failure target is unchanged and user don't need to restore, we need a local_backup variable and do restore ourselves on failure path. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20220517111206.23585-3-v.sementsov...@mail.ru> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/monitor/bitmap-qmp-cmds.c | 41 + 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c index bd10468596..282363606f 100644 --- a/block/monitor/bitmap-qmp-cmds.c +++ b/block/monitor/bitmap-qmp-cmds.c @@ -261,8 +261,9 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, HBitmap **backup, Error **errp) { BlockDriverState *bs; -BdrvDirtyBitmap *dst, *src, *anon; +BdrvDirtyBitmap *dst, *src; BlockDirtyBitmapOrStrList *lst; +HBitmap *local_backup = NULL; GLOBAL_STATE_CODE(); @@ -271,12 +272,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, return NULL; } -anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), -NULL, errp); -if (!anon) { -return NULL; -} - for (lst = bms; lst; lst = lst->next) { switch (lst->value->type) { const char *name, *node; @@ -285,8 +280,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, src = bdrv_find_dirty_bitmap(bs, name); if (!src) { error_setg(errp, "Dirty bitmap '%s' not found", name); -dst = NULL; -goto out; +goto fail; } break; case QTYPE_QDICT: @@ -294,29 +288,36 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, name = lst->value->u.external.name; src = block_dirty_bitmap_lookup(node, name, NULL, errp); if (!src) { -dst = NULL; -goto out; +goto fail; } break; default: abort(); } -if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) { -dst = NULL; -goto out; +/* We do backup only for first merge operation */ +if (!bdrv_merge_dirty_bitmap(dst, src, + local_backup ? NULL : &local_backup, + errp)) +{ +goto fail; } } -/* Merge into dst; dst is unchanged on failure. */ -if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) { -dst = NULL; -goto out; +if (backup) { +*backup = local_backup; +} else { +hbitmap_free(local_backup); } - out: -bdrv_release_dirty_bitmap(anon); return dst; + +fail: +if (local_backup) { +bdrv_restore_dirty_bitmap(dst, local_backup); +} + +return NULL; } void qmp_block_dirty_bitmap_merge(const char *node, const char *target, -- 2.35.3
[PATCH] meson: Prefix each element of firmware path
Signed-off-by: Akihiko Odaki --- configure | 23 +++ meson.build | 10 -- meson_options.txt | 2 +- scripts/meson-buildoptions.py | 7 +-- scripts/meson-buildoptions.sh | 6 +++--- softmmu/datadir.c | 8 +--- 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 4b12a8094ca..ab952e7ce3b 100755 --- a/configure +++ b/configure @@ -675,6 +675,29 @@ fi werror="" +meson_option_build_array() { + local a + local ifs + + if test "$targetos" == windows; then +ifs=\; + else +ifs=: + fi + + echo -n "[" + + while IFS="$ifs" read -ra a; do +for e in "${a[@]}"; do + echo -n '"""' + echo -n "$e" | sed 's/\\//g; s/"/\\"/g' + echo -n '"""', +done + done <<< "$1" + + echo "]" +} + . $source_path/scripts/meson-buildoptions.sh meson_options= diff --git a/meson.build b/meson.build index 0c2e11ff071..40111ce4053 100644 --- a/meson.build +++ b/meson.build @@ -1684,7 +1684,13 @@ config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix')) config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir) config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / qemu_datadir) config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / qemu_desktopdir) -config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / get_option('qemu_firmwarepath')) + +qemu_firmwarepath = '' +foreach k : get_option('qemu_firmwarepath') + qemu_firmwarepath += '"' + get_option('prefix') / k + '", ' +endforeach +config_host_data.set('CONFIG_QEMU_FIRMWAREPATH', qemu_firmwarepath) + config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / get_option('libexecdir')) config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / qemu_icondir) config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / get_option('localedir')) @@ -3622,7 +3628,7 @@ endif summary_info = {} summary_info += {'Install prefix':get_option('prefix')} summary_info += {'BIOS directory':qemu_datadir} -summary_info += {'firmware path': get_option('prefix') / get_option('qemu_firmwarepath')} +summary_info += {'firmware path': qemu_firmwarepath} summary_info += {'binary directory': get_option('prefix') / get_option('bindir')} summary_info += {'library directory': get_option('prefix') / get_option('libdir')} summary_info += {'module directory': qemu_moddir} diff --git a/meson_options.txt b/meson_options.txt index 0e8197386b9..8ad5cd73819 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -6,7 +6,7 @@ option('qemu_suffix', type : 'string', value: 'qemu', description: 'Suffix for QEMU data/modules/config directories (can be empty)') option('docdir', type : 'string', value : 'share/doc', description: 'Base directory for documentation installation (can be empty)') -option('qemu_firmwarepath', type : 'string', value : 'qemu-firmware', +option('qemu_firmwarepath', type : 'array', value : ['qemu-firmware'], description: 'search PATH for firmware files') option('pkgversion', type : 'string', value : '', description: 'use specified string as sub-version of the package') diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py index e624c16b01a..3e2b4785388 100755 --- a/scripts/meson-buildoptions.py +++ b/scripts/meson-buildoptions.py @@ -156,7 +156,7 @@ def cli_metavar(opt): if opt["type"] == "string": return "VALUE" if opt["type"] == "array": -return "CHOICES" +return "CHOICES" if "choices" in opt else "VALUES" return "CHOICE" @@ -199,7 +199,10 @@ def print_parse(options): key = cli_option(opt) name = opt["name"] if require_arg(opt): -print(f'--{key}=*) quote_sh "-D{name}=$2" ;;') +if opt["type"] == "array" and not "choices" in opt: +print(f'--{key}=*) quote_sh "-D{name}=$(meson_option_build_array $2)" ;;') +else: +print(f'--{key}=*) quote_sh "-D{name}=$2" ;;') elif opt["type"] == "boolean": print(f'--enable-{key}) printf "%s" -D{name}=true ;;') print(f'--disable-{key}) printf "%s" -D{name}=false ;;') diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 1fc1d2e2c36..238bab162bd 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -42,12 +42,12 @@ meson_options_help() { printf "%s\n" ' --enable-trace-backends=CHOICES' printf "%s\n" ' Set available tracing backends [log] (choices:' printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' - printf "%s\n" ' --firmwarepath=VALUE search PATH for firmware files [qemu-firmware]' + printf "%s\n" ' --firmwarepath=VALUESs
Re: [PATCH v3 22/51] target/arm: Trap AdvSIMD usage when Streaming SVE is active
On Mon, 20 Jun 2022 at 19:09, Richard Henderson wrote: > > This new behaviour is in the ARM pseudocode function > AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32 > via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which > the trap would be delivered is in AArch64 mode. > > Given that ARMv9 drops support for AArch32 outside EL0, > the trap EL detection ought to be trivially true, but > the pseudocode still contains a number of conditions, > and QEMU has not yet committed to dropping A32 support > for EL[12] when v9 features are present. > > Since the computation of SME_TRAP_SIMD is necessarily > different for the two modes, we might as well preserve > bits within TBFLAG_ANY and allocate separate bits within > TBFLAG_A32 and TBFLAG_A64 instead. > +# These patterns are taken from Appendix E1.1 of DDI0616 A.a, > +# Arm Architecture Reference Manual Supplement, > +# The Scalable Matrix Extension (SME), for Armv9-A So the thing that worries me about structuring this this way is that the SME supplement appendix includes this caution: # The instruction encoding tables in this section [...] will # require correction if subsequent versions of the A64 ISA # add new instructions which overlap with these encodings. My guess (based on how the H.a Arm ARM has incorporated SME) is that these tables aren't going to be included in the Arm ARM and updated going forward. Instead the behaviour will be documented based on whether (existing and new) instructions call CheckNonStreamingSVEEnabled() or CheckSVEEnabled() in their pseudocode. So I'm a bit uncertain about how awkward it's going to be in future to maintain this transliteration of the SME supplement tables into decodetree: we might find that we have to look at new instructions and kind of reverse-engineer back out any required changes to the tables here, rather than simply "write the trans_ function for the new insn, looking at the pseudocode to see which _access_check() function it should be calling"... thanks -- PMM
Re: [PATCH qemu v2] ppc: Define SETFIELD for the ppc target
Alexey, Gitlab does not like what you're doing here. Several cross compile runners fails with errors like these (this is from cross-win64-system): ../hw/intc/pnv_xive.c: In function 'pnv_xive_block_id': 3328/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: overflow in conversion from 'long long unsigned int' to 'long int' changes value from '4222124650659840' to '0' [-Werror=overflow] 3329 45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) 3330 | ^~~ 3331/builds/danielhb/qemu/target/ppc/cpu.h:57:49: note: in definition of macro 'MASK_TO_LSH' 3332 57 | #define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1) | ^ 3334../hw/intc/pnv_xive.c:80:15: note: in expansion of macro 'GETFIELD' 3335 80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val); 3336 | ^~~~ 3337../hw/intc/pnv_xive_regs.h:77:41: note: in expansion of macro 'PPC_BITMASK' 3338 77 | #define PC_TCTXT_CHIPIDPPC_BITMASK(12, 15) 3339 | ^~~ 3340../hw/intc/pnv_xive.c:80:24: note: in expansion of macro 'PC_TCTXT_CHIPID' 3341 80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val); 3342 |^~~ 3343/builds/danielhb/qemu/target/ppc/cpu.h:58:46: error: right shift count is negative [-Werror=shift-count-negative] 3344 58 | #define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m)) 3345 | ^~ 3346../hw/intc/pnv_xive.c:80:15: note: in expansion of macro 'GETFIELD' 3347 80 | blk = GETFIELD(PC_TCTXT_CHIPID, cfg_val); 3348 | ^~~~ ../hw/intc/pnv_xive.c: In function 'pnv_xive_vst_addr': 3350/builds/danielhb/qemu/target/ppc/cpu.h:45:33: error: overflow in conversion from 'long long unsigned int' to 'long int' changes value from '13835058055282163712' to '0' [-Werror=overflow] 3351 45 | #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) 3352 | ^~~ 3353/builds/danielhb/qemu/target/ppc/cpu.h:57:49: note: in definition of macro 'MASK_TO_LSH' 3354 57 | #define MASK_TO_LSH(m) (__builtin_ffsl(m) - 1) 3355 | ^ 3356../hw/intc/pnv_xive.c:226:9: note: in expansion of macro 'GETFIELD' 3357 226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { 3358 | ^~~~ 3359../hw/intc/pnv_xive_regs.h:230:33: note: in expansion of macro 'PPC_BITMASK' 3360 230 | #define VSD_MODEPPC_BITMASK(0, 1) 3361 | ^~~ 3362../hw/intc/pnv_xive.c:226:18: note: in expansion of macro 'VSD_MODE' 3363 226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { 3364 | ^~~~ 3365/builds/danielhb/qemu/target/ppc/cpu.h:58:46: error: right shift count is negative [-Werror=shift-count-negative] 3366 58 | #define GETFIELD(m, v) (((v) & (m)) >> MASK_TO_LSH(m)) 3367 | ^~ 3368../hw/intc/pnv_xive.c:226:9: note: in expansion of macro 'GETFIELD' 3369 226 | if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { 3370 | ^~~~ You can see the results here: https://gitlab.com/danielhb/qemu/-/jobs/2636585317 Other failing runners include cross-armel-system and cross-mips-system, so I don't think that the error is related to Windows specifics. I guess we're missing an uint64_t cast somewhere like you did in the v2 of this patch. The skiboot macros as is will not cut it. Thanks, Daniel On 6/22/22 02:08, Alexey Kardashevskiy wrote: It keeps repeating, move it to the header. This uses __builtin_ffsl() to allow using the macros in #define. This is not using the QEMU's FIELD macros as this would require changing all such macros found in skiboot (the PPC PowerNV firmware). Signed-off-by: Alexey Kardashevskiy --- Changes: v2: * preserved the comment about skiboot * copied the actual macros from skiboot: https://github.com/open-power/skiboot/blob/master/include/bitutils.h#L31 --- include/hw/pci-host/pnv_phb3_regs.h | 16 target/ppc/cpu.h| 12 hw/intc/pnv_xive.c | 20 hw/intc/pnv_xive2.c | 20 hw/pci-host/pnv_phb4.c | 16 5 files changed, 12 insertions(+), 72 deletions(-) diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h index a174ef1f7045..38f8ce9d7406 100644 --- a/include/hw/pci-host/pnv_phb3_regs.h +++ b/include/hw/pci-host/pnv_phb3_regs.h @@ -12,22 +12,6 @@ #include "qemu/host-utils.h" -/* - * QEMU version of the GETFIELD/SETFIELD macros - * - * These are commo