[PATCH v6 2/8] tcg/sparc: Split out tcg_out_movi_imm32

2022-02-07 Thread Richard Henderson
Handle 32-bit constants with a separate function, so that
tcg_out_movi_int does not need to recurse.  This slightly
rearranges the order of tests for small constants, but
produces the same output.

Signed-off-by: Richard Henderson 
---
 tcg/sparc/tcg-target.c.inc | 36 +---
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 82a7c684b6..576903e0d8 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -413,15 +413,30 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, 
int32_t arg)
 tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
 }
 
+static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
+{
+if (check_fit_i32(arg, 13)) {
+/* A 13-bit constant sign-extended to 64-bits.  */
+tcg_out_movi_imm13(s, ret, arg);
+} else {
+/* A 32-bit constant zero-extended to 64 bits.  */
+tcg_out_sethi(s, ret, arg);
+if (arg & 0x3ff) {
+tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
+}
+}
+}
+
 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
  tcg_target_long arg, bool in_prologue)
 {
 tcg_target_long hi, lo = (int32_t)arg;
 tcg_target_long test, lsb;
 
-/* Make sure we test 32-bit constants for imm13 properly.  */
-if (type == TCG_TYPE_I32) {
-arg = lo;
+/* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
+if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
+tcg_out_movi_imm32(s, ret, arg);
+return;
 }
 
 /* A 13-bit constant sign-extended to 64-bits.  */
@@ -439,15 +454,6 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, 
TCGReg ret,
 }
 }
 
-/* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
-if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
-tcg_out_sethi(s, ret, arg);
-if (arg & 0x3ff) {
-tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
-}
-return;
-}
-
 /* A 32-bit constant sign-extended to 64-bits.  */
 if (arg == lo) {
 tcg_out_sethi(s, ret, ~arg);
@@ -471,13 +477,13 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, 
TCGReg ret,
 /* A 64-bit constant decomposed into 2 32-bit pieces.  */
 if (check_fit_i32(lo, 13)) {
 hi = (arg - lo) >> 32;
-tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+tcg_out_movi_imm32(s, ret, hi);
 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
 tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
 } else {
 hi = arg >> 32;
-tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
+tcg_out_movi_imm32(s, ret, hi);
+tcg_out_movi_imm32(s, TCG_REG_T2, lo);
 tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
 tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
 }
-- 
2.25.1




Re: [PATCH 3/5] vduse-blk: implements vduse-blk export

2022-02-07 Thread Yongji Xie
On Mon, Feb 7, 2022 at 10:15 PM Stefan Hajnoczi  wrote:
>
> On Tue, Jan 25, 2022 at 09:17:58PM +0800, Xie Yongji wrote:
> > This implements a VDUSE block backends based on
> > the libvduse library. We can use it to export the BDSs
> > for both VM and container (host) usage.
> >
> > The new command-line syntax is:
> >
> > $ qemu-storage-daemon \
> > --blockdev file,node-name=drive0,filename=test.img \
> > --export vduse-blk,node-name=drive0,id=vduse-export0,writable=on
> >
> > After the qemu-storage-daemon started, we need to use
> > the "vdpa" command to attach the device to vDPA bus:
> >
> > $ vdpa dev add name vduse-export0 mgmtdev vduse
> >
> > Also the device must be removed via the "vdpa" command
> > before we stop the qemu-storage-daemon.
> >
> > Signed-off-by: Xie Yongji 
> > ---
> >  block/export/export.c |   6 +
> >  block/export/meson.build  |   5 +
> >  block/export/vduse-blk.c  | 427 ++
> >  block/export/vduse-blk.h  |  20 ++
> >  meson.build   |  13 ++
> >  meson_options.txt |   2 +
> >  qapi/block-export.json|  24 +-
> >  scripts/meson-buildoptions.sh |   4 +
> >  8 files changed, 499 insertions(+), 2 deletions(-)
> >  create mode 100644 block/export/vduse-blk.c
> >  create mode 100644 block/export/vduse-blk.h
> >
> > diff --git a/block/export/export.c b/block/export/export.c
> > index 6d3b9964c8..00dd505540 100644
> > --- a/block/export/export.c
> > +++ b/block/export/export.c
> > @@ -26,6 +26,9 @@
> >  #ifdef CONFIG_VHOST_USER_BLK_SERVER
> >  #include "vhost-user-blk-server.h"
> >  #endif
> > +#ifdef CONFIG_VDUSE_BLK_EXPORT
> > +#include "vduse-blk.h"
> > +#endif
> >
> >  static const BlockExportDriver *blk_exp_drivers[] = {
> >  &blk_exp_nbd,
> > @@ -35,6 +38,9 @@ static const BlockExportDriver *blk_exp_drivers[] = {
> >  #ifdef CONFIG_FUSE
> >  &blk_exp_fuse,
> >  #endif
> > +#ifdef CONFIG_VDUSE_BLK_EXPORT
> > +&blk_exp_vduse_blk,
> > +#endif
> >  };
> >
> >  /* Only accessed from the main thread */
> > diff --git a/block/export/meson.build b/block/export/meson.build
> > index 0a08e384c7..cf311d2b1b 100644
> > --- a/block/export/meson.build
> > +++ b/block/export/meson.build
> > @@ -5,3 +5,8 @@ if have_vhost_user_blk_server
> >  endif
> >
> >  blockdev_ss.add(when: fuse, if_true: files('fuse.c'))
> > +
> > +if have_vduse_blk_export
> > +blockdev_ss.add(files('vduse-blk.c'))
> > +blockdev_ss.add(libvduse)
> > +endif
> > diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
> > new file mode 100644
> > index 00..5a8d289685
> > --- /dev/null
> > +++ b/block/export/vduse-blk.c
> > @@ -0,0 +1,427 @@
> > +/*
> > + * Export QEMU block device via VDUSE
> > + *
> > + * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
> > reserved.
> > + *   Portions of codes and concepts borrowed from vhost-user-blk-server.c, 
> > so:
> > + * Copyright (c) 2020 Red Hat, Inc.
> > + *
> > + * Author:
> > + *   Xie Yongji 
> > + *   Coiby Xu 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> > + * later.  See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include 
> > +
> > +#include "qemu/osdep.h"
> > +#include "qapi/error.h"
> > +#include "sysemu/block-backend.h"
> > +#include "block/export.h"
> > +#include "qemu/error-report.h"
> > +#include "util/block-helpers.h"
> > +#include "subprojects/libvduse/libvduse.h"
> > +
> > +#include "standard-headers/linux/virtio_ring.h"
> > +#include "standard-headers/linux/virtio_blk.h"
> > +
> > +#define VIRTIO_BLK_SECTOR_BITS 9
> > +#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS)
> > +
> > +#define VDUSE_DEFAULT_NUM_QUEUE 1
> > +#define VDUSE_DEFAULT_QUEUE_SIZE 128
>
> QEMU's virtio-blk emulation has increased this limit to 256 for better
> performance with large block size I/O patterns. I think it would be okay
> to increase it here too.
>

Sure.

> > +
> > +typedef struct VduseBlkExport {
> > +BlockExport export;
> > +VduseDev *dev;
> > +uint16_t num_queues;
> > +uint32_t blk_size;
> > +bool writable;
> > +} VduseBlkExport;
> > +
> > +struct virtio_blk_inhdr {
> > +unsigned char status;
> > +};
> > +
> > +typedef struct VduseBlkReq {
> > +VduseVirtqElement elem;
> > +int64_t sector_num;
> > +size_t in_len;
> > +struct virtio_blk_inhdr *in;
> > +struct virtio_blk_outhdr out;
> > +VduseVirtq *vq;
> > +} VduseBlkReq;
> > +
> > +static void vduse_blk_req_complete(VduseBlkReq *req)
> > +{
> > +vduse_queue_push(req->vq, &req->elem, req->in_len);
> > +vduse_queue_notify(req->vq);
> > +
> > +free(req);
> > +}
> > +
> > +static bool vduse_blk_sect_range_ok(VduseBlkExport *vblk_exp,
> > +uint64_t sector, size_t size)
> > +{
> > +uint64_t nb_sectors;
> > +uint64_t total_sectors;
> > +
> > +if (size % VIRTIO_BLK_SECTOR_SIZE) {
> > +return false;
> > 

[PATCH 2/5] linux-user: Introduce host_signal_mask

2022-02-07 Thread Richard Henderson
Do not directly access the uc_sigmask member.
This is preparation for a sparc64 fix.

Signed-off-by: Richard Henderson 
---
 linux-user/include/host/aarch64/host-signal.h  |  5 +
 linux-user/include/host/alpha/host-signal.h|  5 +
 linux-user/include/host/arm/host-signal.h  |  5 +
 linux-user/include/host/i386/host-signal.h |  5 +
 .../include/host/loongarch64/host-signal.h |  5 +
 linux-user/include/host/mips/host-signal.h |  5 +
 linux-user/include/host/ppc/host-signal.h  |  5 +
 linux-user/include/host/riscv/host-signal.h|  5 +
 linux-user/include/host/s390/host-signal.h |  5 +
 linux-user/include/host/sparc/host-signal.h|  5 +
 linux-user/include/host/x86_64/host-signal.h   |  5 +
 linux-user/signal.c| 18 --
 12 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/linux-user/include/host/aarch64/host-signal.h 
b/linux-user/include/host/aarch64/host-signal.h
index 9770b36dc1..76ab078069 100644
--- a/linux-user/include/host/aarch64/host-signal.h
+++ b/linux-user/include/host/aarch64/host-signal.h
@@ -40,6 +40,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.pc = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 struct _aarch64_ctx *hdr;
diff --git a/linux-user/include/host/alpha/host-signal.h 
b/linux-user/include/host/alpha/host-signal.h
index f4c942948a..a44d670f2b 100644
--- a/linux-user/include/host/alpha/host-signal.h
+++ b/linux-user/include/host/alpha/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.sc_pc = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 uint32_t *pc = (uint32_t *)host_signal_pc(uc);
diff --git a/linux-user/include/host/arm/host-signal.h 
b/linux-user/include/host/arm/host-signal.h
index 6c095773c0..bbeb4ffefb 100644
--- a/linux-user/include/host/arm/host-signal.h
+++ b/linux-user/include/host/arm/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.arm_pc = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 /*
diff --git a/linux-user/include/host/i386/host-signal.h 
b/linux-user/include/host/i386/host-signal.h
index abe1ece5c9..fd36f06bda 100644
--- a/linux-user/include/host/i386/host-signal.h
+++ b/linux-user/include/host/i386/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.gregs[REG_EIP] = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe
diff --git a/linux-user/include/host/loongarch64/host-signal.h 
b/linux-user/include/host/loongarch64/host-signal.h
index 7effa24251..a9dfe0c688 100644
--- a/linux-user/include/host/loongarch64/host-signal.h
+++ b/linux-user/include/host/loongarch64/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.__pc = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 const uint32_t *pinsn = (const uint32_t *)host_signal_pc(uc);
diff --git a/linux-user/include/host/mips/host-signal.h 
b/linux-user/include/host/mips/host-signal.h
index c666ed8c3f..ff840dd491 100644
--- a/linux-user/include/host/mips/host-signal.h
+++ b/linux-user/include/host/mips/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.pc = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 #if defined(__misp16) || defined(__mips_micromips)
 #error "Unsupported encoding"
 #endif
diff --git a/linux-user/include/host/ppc/host-signal.h 
b/linux-user/include/host/ppc/host-signal.h
index 1d8e658ff7..730a321d98 100644
--- a/linux-user/include/host/ppc/host-signal.h
+++ b/linux-user/include/host/ppc/host-signal.h
@@ -21,6 +21,11 @@ static inline void host_signal_set_pc(ucontext_t *uc, 
uintptr_t pc)
 uc->uc_mcontext.regs->nip = pc;
 }
 
+static inline void *host_signal_mask(ucontext_t *uc)
+{
+return &uc->uc_sigmask;
+}
+
 static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
 {
 return uc->uc_mcontext.regs->trap != 0x400
diff --git a/linux-user/include/host/riscv/host-signal.h 
b/linux-user/include/host/riscv/ho

[PATCH v6 1/8] tcg/sparc: Use tcg_out_movi_imm13 in tcg_out_addsub2_i64

2022-02-07 Thread Richard Henderson
When BH is constant, it is constrained to 10 bits for use in MOVCC.
For the cases in which we must load the constant BH into a register,
we do not need the full logic of tcg_out_movi; we can use the simpler
function for emitting a 13 bit constant.

This eliminates the only case in which TCG_REG_T2 was passed to
tcg_out_movi, which will shortly become invalid.

Signed-off-by: Richard Henderson 
---
 tcg/sparc/tcg-target.c.inc | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 0c062c60eb..82a7c684b6 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -795,7 +795,7 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, 
TCGReg rh,
 if (use_vis3_instructions && !is_sub) {
 /* Note that ADDXC doesn't accept immediates.  */
 if (bhconst && bh != 0) {
-   tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
+   tcg_out_movi_imm13(s, TCG_REG_T2, bh);
bh = TCG_REG_T2;
 }
 tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
@@ -811,9 +811,13 @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, 
TCGReg rh,
tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
}
 } else {
-/* Otherwise adjust BH as if there is carry into T2 ... */
+/*
+ * Otherwise adjust BH as if there is carry into T2.
+ * Note that constant BH is constrained to 10 bits for the MOVCC,
+ * so the adjustment fits 11 bits.
+ */
 if (bhconst) {
-tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
+tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
 } else {
 tcg_out_arithi(s, TCG_REG_T2, bh, 1,
is_sub ? ARITH_SUB : ARITH_ADD);
-- 
2.25.1




[PATCH 5/5] linux-user/include/host/sparc64: Fix host_sigcontext

2022-02-07 Thread Richard Henderson
Sparc64 is unique on linux in *not* passing ucontext_t as
the third argument to a SA_SIGINFO handler.  It passes the
old struct sigcontext instead.t log

Fixes: 8b5bd461935b ("linux-user/host/sparc: Populate host_signal.h")
Signed-off-by: Richard Henderson 
---
 linux-user/include/host/sparc64/host-signal.h | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/linux-user/include/host/sparc64/host-signal.h 
b/linux-user/include/host/sparc64/host-signal.h
index f8a8a4908d..64957c2bca 100644
--- a/linux-user/include/host/sparc64/host-signal.h
+++ b/linux-user/include/host/sparc64/host-signal.h
@@ -11,22 +11,23 @@
 #ifndef SPARC64_HOST_SIGNAL_H
 #define SPARC64_HOST_SIGNAL_H
 
-/* FIXME: the third argument to a SA_SIGINFO handler is *not* ucontext_t. */
-typedef ucontext_t host_sigcontext;
+/* The third argument to a SA_SIGINFO handler is struct sigcontext.  */
+typedef struct sigcontext host_sigcontext;
 
-static inline uintptr_t host_signal_pc(host_sigcontext *uc)
+static inline uintptr_t host_signal_pc(host_sigcontext *sc)
 {
-return uc->uc_mcontext.mc_gregs[MC_PC];
+return sc->sigc_regs.tpc;
 }
 
-static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
+static inline void host_signal_set_pc(host_sigcontext *sc, uintptr_t pc)
 {
-uc->uc_mcontext.mc_gregs[MC_PC] = pc;
+sc->sigc_regs.tpc = pc;
+sc->sigc_regs.tnpc = pc + 4;
 }
 
-static inline void *host_signal_mask(host_sigcontext *uc)
+static inline void *host_signal_mask(host_sigcontext *sc)
 {
-return &uc->uc_sigmask;
+return &sc->sigc_mask;
 }
 
 static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
-- 
2.25.1




Re: [PATCH v9 00/23] QEMU RISC-V AIA support

2022-02-07 Thread Alistair Francis
On Tue, Feb 8, 2022 at 2:16 PM Alistair Francis  wrote:
>
> On Sat, Feb 5, 2022 at 3:47 AM Anup Patel  wrote:
> >
> > From: Anup Patel 
> >
> > The advanced interrupt architecture (AIA) extends the per-HART local
> > interrupt support. Along with this, it also adds IMSIC (MSI contrllor)
> > and Advanced PLIC (wired interrupt controller).
> >
> > The latest AIA draft specification can be found here:
> > https://github.com/riscv/riscv-aia/releases/download/0.2-draft.28/riscv-interrupts-028.pdf
> >
> > This series adds RISC-V AIA support in QEMU which includes emulating all
> > AIA local CSRs, APLIC, and IMSIC. Only AIA local interrupt filtering is
> > not implemented because we don't have any local interrupt greater than 12.
> >
> > To enable AIA in QEMU, use one of the following:
> > 1) Only AIA local interrupt CSRs: Pass "x-aia=true" as CPU paramenter
> >in the QEMU command-line
> > 2) Only APLIC for virt machine: Pass "aia=aplic" as machine parameter
> >in the QEMU command-line
> > 3) Both APLIC and IMSIC for virt machine: Pass "aia=aplic-imsic" as
> >machine parameter in the QEMU command-line
> > 4) Both APLIC and IMSIC with 2 guest files for virt machine: Pass
> >"aia=aplic-imsic,aia-guests=2" as machine parameter in the QEMU
> >command-line
> >
> > To test series, we require OpenSBI and Linux with AIA support which can
> > be found in:
> > riscv_aia_v2 branch at https://github.com/avpatel/opensbi.git
> > riscv_aia_v1 branch at https://github.com/avpatel/linux.git
> >
> > This series can be found riscv_aia_v9 branch at:
> > https://github.com/avpatel/qemu.git
> >
> > Changes since v8:
> >  - Use error_setg() in riscv_imsic_realize() added by PATCH20
> >
> > Changes since v7:
> >  - Rebased on latest riscv-to-apply.next branch of Alistair's repo
> >  - Improved default priority assignment in PATCH9
> >
> > Changes since v6:
> >  - Fixed priority comparison in riscv_cpu_pending_to_irq() of PATCH9
> >  - Fixed typos in comments added by PATCH11
> >  - Added "pend = true;" for CSR_MSETEIPNUM case of rmw_xsetclreinum()
> >in PATCH15
> >  - Handle ithreshold == 0 case in riscv_aplic_idc_topi() of PATCH18
> >  - Allow setting pending bit for Level0 or Level1 interrupts in
> >riscv_aplic_set_pending() of PATCH18
> >  - Force DOMAINCFG[31:24] bits to 0x80 in riscv_aplic_read() of PATCH18
> >  - For APLIC direct mode, set target.iprio to 1 when zero is writtern
> >in PATCH18
> >  - Handle eithreshold == 0 case in riscv_imsic_topei() of PATCH20
> >
> > Changes since v5:
> >  - Moved VSTOPI_NUM_SRCS define to top of the file in PATCH13
> >  - Fixed typo in PATCH16
> >
> > Changes since v4:
> >  - Changed IRQ_LOCAL_MAX to 16 in PATCH2
> >  - Fixed typo in PATCH10
> >  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH11
> >  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH14
> >  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH15
> >  - Replaced TARGET_LONG_BITS with xlen passed via ireg callback in PATCH20
> >  - Retrict maximum IMSIC guest files per-HART of virt machine to 7 in
> >PATCH21.
> >  - Added separate PATCH23 to increase maximum number of allowed CPUs
> >for virt machine
> >
> > Changes since v3:
> >  - Replaced "aplic,xyz" and "imsic,xyz" DT properties with "riscv,xyz"
> >DT properties because "aplic" and "imsic" are not valid vendor names
> >required by Linux DT schema checker.
> >
> > Changes since v2:
> >  - Update PATCH4 to check and inject interrupt after V=1 when
> >transitioning from V=0 to V=1
> >
> > Changes since v1:
> >  - Revamped whole series and created more granular patches
> >  - Added HGEIE and HGEIP CSR emulation for H-extension
> >  - Added APLIC emulation
> >  - Added IMSIC emulation
> >
> > Anup Patel (23):
> >   target/riscv: Fix trap cause for RV32 HS-mode CSR access from RV64
> > HS-mode
> >   target/riscv: Implement SGEIP bit in hip and hie CSRs
> >   target/riscv: Implement hgeie and hgeip CSRs
> >   target/riscv: Improve delivery of guest external interrupts
> >   target/riscv: Allow setting CPU feature from machine/device emulation
> >   target/riscv: Add AIA cpu feature
> >   target/riscv: Add defines for AIA CSRs
> >   target/riscv: Allow AIA device emulation to set ireg rmw callback
> >   target/riscv: Implement AIA local interrupt priorities
> >   target/riscv: Implement AIA CSRs for 64 local interrupts on RV32
> >   target/riscv: Implement AIA hvictl and hviprioX CSRs
> >   target/riscv: Implement AIA interrupt filtering CSRs
> >   target/riscv: Implement AIA mtopi, stopi, and vstopi CSRs
> >   target/riscv: Implement AIA xiselect and xireg CSRs
> >   target/riscv: Implement AIA IMSIC interface CSRs
> >   hw/riscv: virt: Use AIA INTC compatible string when available
> >   target/riscv: Allow users to force enable AIA CSRs in HART
> >   hw/intc: Add RISC-V AIA APLIC device emulation
> >   hw/riscv: virt: Add optional AIA APLIC support to virt machine
> >   hw/intc: Add

Re: [PATCH 16/31] vhost: pass queue index to vhost_vq_get_addr

2022-02-07 Thread Jason Wang



在 2022/2/1 上午1:44, Eugenio Perez Martin 写道:

On Sat, Jan 29, 2022 at 9:20 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

Doing that way allows vhost backend to know what address to return.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7b03efccec..64b955ba0c 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -798,9 +798,10 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
   struct vhost_virtqueue *vq,
   unsigned idx, bool enable_log)
   {
-struct vhost_vring_addr addr;
+struct vhost_vring_addr addr = {
+.index = idx,
+};
   int r;
-memset(&addr, 0, sizeof(struct vhost_vring_addr));

   if (dev->vhost_ops->vhost_vq_get_addr) {
   r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
@@ -813,7 +814,6 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
   addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail;
   addr.used_user_addr = (uint64_t)(unsigned long)vq->used;
   }


I'm a bit lost in the logic above, any reason we need call
vhost_vq_get_addr() :) ?


It's the way vhost_virtqueue_set_addr works if the backend has a
vhost_vq_get_addr operation (currently, only vhost-vdpa). vhost first
ask the address to the back end and then set it.



Right it's because vhost-vdpa doesn't use VA but GPA. But I'm not sure 
it's worth a dedicated vhost_ops. But consider we introduce shadow 
virtqueue stuffs, it should be ok now.


(In the future, we may consider to generalize non vhost-vdpa specific 
stuffs to VhostShadowVirtqueue, then we can get rid of this vhost_ops.





Previously, index was not needed because all the information was in
vhost_virtqueue. However to extract queue index from vhost_virtqueue
is tricky, so I think it's easier to simply have that information at
request, something similar to get_base or get_num when asking vdpa
device. We can extract the index from vq - dev->vqs or something
similar if it's prefered.



It looks odd for the caller to tell the index consider vhost_virtqueue 
is already passed. So I think we need deduce it from vhost_virtqueue as 
you mentioned here.


Thanks




Thanks!


Thanks



-addr.index = idx;
   addr.log_guest_addr = vq->used_phys;
   addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
   r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);





Re: [PATCH] hvf: arm: Handle ID_AA64ISAR2_EL1 reads

2022-02-07 Thread Ivan Babrou
The patch addresses the current issue for me, thanks!

Is it possible to make it more future proof? I can imagine a very
similar situation arising in the future and it would be good to be
able to handle it gracefully. If it's not possible, then maybe there's
a way to output some sort of error from qemu that a user can search
for. Right now all one gets is a qemu process using 100% of CPU and
outputting nothing. None of this is required for this patch, but it
would be good to have it at some point.

Reviewed-by: Ivan Babrou 



Re: [PATCH 2/5] libvduse: Add VDUSE (vDPA Device in Userspace) library

2022-02-07 Thread Yongji Xie
On Mon, Feb 7, 2022 at 10:01 PM Stefan Hajnoczi  wrote:
>
> On Tue, Jan 25, 2022 at 09:17:57PM +0800, Xie Yongji wrote:
> > VDUSE [1] is a linux framework that makes it possible to implement
> > software-emulated vDPA devices in userspace. This adds a library
> > as a subproject to help implementing VDUSE backends in QEMU.
> >
> > [1] https://www.kernel.org/doc/html/latest/userspace-api/vduse.html
>
> This library assumes that the program is allowed to access the control
> device (/dev/vduse/control). Is that always the case or should the
> library also support access to /dev/vduse/ only (maybe even with
> file descriptor passing) so a privileged process can create/destroy
> VDUSE devices?
>

Make sense. I will add two new API to support these two cases.

> I didn't review the vring code in detail.
>
> >
> > Signed-off-by: Xie Yongji 
> > ---
> >  meson.build |   15 +
> >  meson_options.txt   |2 +
> >  scripts/meson-buildoptions.sh   |3 +
> >  subprojects/libvduse/include/atomic.h   |1 +
> >  subprojects/libvduse/libvduse.c | 1025 +++
> >  subprojects/libvduse/libvduse.h |  193 
> >  subprojects/libvduse/meson.build|   10 +
> >  subprojects/libvduse/standard-headers/linux |1 +
> >  8 files changed, 1250 insertions(+)
> >  create mode 12 subprojects/libvduse/include/atomic.h
> >  create mode 100644 subprojects/libvduse/libvduse.c
> >  create mode 100644 subprojects/libvduse/libvduse.h
> >  create mode 100644 subprojects/libvduse/meson.build
> >  create mode 12 subprojects/libvduse/standard-headers/linux
> >
> > diff --git a/meson.build b/meson.build
> > index 333c61deba..864fb50ade 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1305,6 +1305,21 @@ if not get_option('fuse_lseek').disabled()
> >endif
> >  endif
> >
> > +have_libvduse = (targetos == 'linux')
> > +if get_option('libvduse').enabled()
> > +if targetos != 'linux'
> > +error('libvduse requires linux')
> > +endif
> > +elif get_option('libvduse').disabled()
> > +have_libvduse = false
> > +endif
> > +
> > +libvduse = not_found
> > +if have_libvduse
> > +  libvduse_proj = subproject('libvduse')
> > +  libvduse = libvduse_proj.get_variable('libvduse_dep')
> > +endif
> > +
> >  # libbpf
> >  libbpf = dependency('libbpf', required: get_option('bpf'), method: 
> > 'pkg-config')
> >  if libbpf.found() and not cc.links('''
> > diff --git a/meson_options.txt b/meson_options.txt
> > index 921967eddb..16790d1814 100644
> > --- a/meson_options.txt
> > +++ b/meson_options.txt
> > @@ -195,6 +195,8 @@ option('virtfs', type: 'feature', value: 'auto',
> > description: 'virtio-9p support')
> >  option('virtiofsd', type: 'feature', value: 'auto',
> > description: 'build virtiofs daemon (virtiofsd)')
> > +option('libvduse', type: 'feature', value: 'auto',
> > +   description: 'build VDUSE Library')
> >
> >  option('capstone', type: 'combo', value: 'auto',
> > choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
> > diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
> > index a4af02c527..af5c75d758 100644
> > --- a/scripts/meson-buildoptions.sh
> > +++ b/scripts/meson-buildoptions.sh
> > @@ -58,6 +58,7 @@ meson_options_help() {
> >printf "%s\n" '  libssh  ssh block device support'
> >printf "%s\n" '  libudev Use libudev to enumerate host devices'
> >printf "%s\n" '  libusb  libusb support for USB passthrough'
> > +  printf "%s\n" '  libvdusebuild VDUSE Library'
> >printf "%s\n" '  libxml2 libxml2 support for Parallels image 
> > format'
> >printf "%s\n" '  linux-aio   Linux AIO support'
> >printf "%s\n" '  linux-io-uring  Linux io_uring support'
> > @@ -188,6 +189,8 @@ _meson_option_parse() {
> >  --disable-libudev) printf "%s" -Dlibudev=disabled ;;
> >  --enable-libusb) printf "%s" -Dlibusb=enabled ;;
> >  --disable-libusb) printf "%s" -Dlibusb=disabled ;;
> > +--enable-libvduse) printf "%s" -Dlibvduse=enabled ;;
> > +--disable-libvduse) printf "%s" -Dlibvduse=disabled ;;
> >  --enable-libxml2) printf "%s" -Dlibxml2=enabled ;;
> >  --disable-libxml2) printf "%s" -Dlibxml2=disabled ;;
> >  --enable-linux-aio) printf "%s" -Dlinux_aio=enabled ;;
> > diff --git a/subprojects/libvduse/include/atomic.h 
> > b/subprojects/libvduse/include/atomic.h
> > new file mode 12
> > index 00..8c2be64f7b
> > --- /dev/null
> > +++ b/subprojects/libvduse/include/atomic.h
> > @@ -0,0 +1 @@
> > +../../../include/qemu/atomic.h
> > \ No newline at end of file
> > diff --git a/subprojects/libvduse/libvduse.c 
> > b/subprojects/libvduse/libvduse.c
> > new file mode 100644
> > index 00..7671864bca
> > --- /dev/null
> > +++ b/subprojects/libvduse/libvduse.c
> > @@ -0,0 +1,1025 @@
> > +/*
> > + * VDUSE (vDPA Device in Userspace) library

Re: [PATCH v6 3/7] target/riscv: access configuration through cfg_ptr in DisasContext

2022-02-07 Thread Alistair Francis
On Tue, Feb 8, 2022 at 4:07 PM Alistair Francis  wrote:
>
> On Wed, Feb 2, 2022 at 11:26 AM Philipp Tomsich
>  wrote:
> >
> > The implementation in trans_{rvi,rvv,rvzfh}.c.inc accesses the shallow
> > copies (in DisasContext) of some of the elements available in the
> > RISCVCPUConfig structure.  This commit redirects accesses to use the
> > cfg_ptr copied into DisasContext and removes the shallow copies.
> >
> > Signed-off-by: Philipp Tomsich 
> > Reviewed-by: Alistair Francis 
> > Suggested-by: Richard Henderson 
> > Reviewed-by: Richard Henderson 
> >
> > ---
> >
> > (no changes since v3)
> >
> > Changes in v3:
> > - (new patch) test extension-availability through cfg_ptr in
> >   DisasContext, removing the fields that have been copied into
> >   DisasContext directly
> >
> >  target/riscv/insn_trans/trans_rvi.c.inc   |   2 +-
> >  target/riscv/insn_trans/trans_rvv.c.inc   | 104 +++---
> >  target/riscv/insn_trans/trans_rvzfh.c.inc |   4 +-
> >  target/riscv/translate.c  |  14 ---
> >  4 files changed, 55 insertions(+), 69 deletions(-)
> >
> > diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
> > b/target/riscv/insn_trans/trans_rvi.c.inc
> > index 3cd1b3f877..f1342f30f8 100644
> > --- a/target/riscv/insn_trans/trans_rvi.c.inc
> > +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> > @@ -806,7 +806,7 @@ static bool trans_fence(DisasContext *ctx, arg_fence *a)
> >
> >  static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
> >  {
> > -if (!ctx->ext_ifencei) {
> > +if (!ctx->cfg_ptr->ext_ifencei) {
> >  return false;
> >  }
> >
> > diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> > b/target/riscv/insn_trans/trans_rvv.c.inc
> > index f85a9e83b4..ff09e345ad 100644
> > --- a/target/riscv/insn_trans/trans_rvv.c.inc
> > +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> > @@ -74,7 +74,7 @@ static bool require_zve32f(DisasContext *s)
> >  }
> >
> >  /* Zve32f doesn't support FP64. (Section 18.2) */
> > -return s->ext_zve32f ? s->sew <= MO_32 : true;
> > +return s->cfg_ptr->ext_zve32f ? s->sew <= MO_32 : true;
> >  }
> >
> >  static bool require_scale_zve32f(DisasContext *s)
> > @@ -85,7 +85,7 @@ static bool require_scale_zve32f(DisasContext *s)
> >  }
> >
> >  /* Zve32f doesn't support FP64. (Section 18.2) */
> > -return s->ext_zve64f ? s->sew <= MO_16 : true;
> > +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_16 : true;
> >  }
> >
> >  static bool require_zve64f(DisasContext *s)
> > @@ -96,7 +96,7 @@ static bool require_zve64f(DisasContext *s)
> >  }
> >
> >  /* Zve64f doesn't support FP64. (Section 18.2) */
> > -return s->ext_zve64f ? s->sew <= MO_32 : true;
> > +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_32 : true;
> >  }
> >
> >  static bool require_scale_zve64f(DisasContext *s)
> > @@ -107,7 +107,7 @@ static bool require_scale_zve64f(DisasContext *s)
> >  }
> >
> >  /* Zve64f doesn't support FP64. (Section 18.2) */
> > -return s->ext_zve64f ? s->sew <= MO_16 : true;
> > +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_16 : true;
> >  }
> >
> >  /* Destination vector register group cannot overlap source mask register. 
> > */
> > @@ -174,7 +174,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, 
> > TCGv s2)
> >  TCGv s1, dst;
> >
> >  if (!require_rvv(s) ||
> > -!(has_ext(s, RVV) || s->ext_zve32f || s->ext_zve64f)) {
> > +!(has_ext(s, RVV) || s->cfg_ptr->ext_zve32f || 
> > s->cfg_ptr->ext_zve64f)) {
>
> This fails checkpatch as the line is too long
>
> Can you run checkpatch on the series and re-send it?

Argh, there are too many patches depending on this!

Don't worry about resending it, I'll fixup the failures (assuming that's ok).

Alistair

>
> Alistair



Re: [PATCH v6 3/7] target/riscv: access configuration through cfg_ptr in DisasContext

2022-02-07 Thread Alistair Francis
On Wed, Feb 2, 2022 at 11:26 AM Philipp Tomsich
 wrote:
>
> The implementation in trans_{rvi,rvv,rvzfh}.c.inc accesses the shallow
> copies (in DisasContext) of some of the elements available in the
> RISCVCPUConfig structure.  This commit redirects accesses to use the
> cfg_ptr copied into DisasContext and removes the shallow copies.
>
> Signed-off-by: Philipp Tomsich 
> Reviewed-by: Alistair Francis 
> Suggested-by: Richard Henderson 
> Reviewed-by: Richard Henderson 
>
> ---
>
> (no changes since v3)
>
> Changes in v3:
> - (new patch) test extension-availability through cfg_ptr in
>   DisasContext, removing the fields that have been copied into
>   DisasContext directly
>
>  target/riscv/insn_trans/trans_rvi.c.inc   |   2 +-
>  target/riscv/insn_trans/trans_rvv.c.inc   | 104 +++---
>  target/riscv/insn_trans/trans_rvzfh.c.inc |   4 +-
>  target/riscv/translate.c  |  14 ---
>  4 files changed, 55 insertions(+), 69 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
> b/target/riscv/insn_trans/trans_rvi.c.inc
> index 3cd1b3f877..f1342f30f8 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -806,7 +806,7 @@ static bool trans_fence(DisasContext *ctx, arg_fence *a)
>
>  static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
>  {
> -if (!ctx->ext_ifencei) {
> +if (!ctx->cfg_ptr->ext_ifencei) {
>  return false;
>  }
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index f85a9e83b4..ff09e345ad 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -74,7 +74,7 @@ static bool require_zve32f(DisasContext *s)
>  }
>
>  /* Zve32f doesn't support FP64. (Section 18.2) */
> -return s->ext_zve32f ? s->sew <= MO_32 : true;
> +return s->cfg_ptr->ext_zve32f ? s->sew <= MO_32 : true;
>  }
>
>  static bool require_scale_zve32f(DisasContext *s)
> @@ -85,7 +85,7 @@ static bool require_scale_zve32f(DisasContext *s)
>  }
>
>  /* Zve32f doesn't support FP64. (Section 18.2) */
> -return s->ext_zve64f ? s->sew <= MO_16 : true;
> +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_16 : true;
>  }
>
>  static bool require_zve64f(DisasContext *s)
> @@ -96,7 +96,7 @@ static bool require_zve64f(DisasContext *s)
>  }
>
>  /* Zve64f doesn't support FP64. (Section 18.2) */
> -return s->ext_zve64f ? s->sew <= MO_32 : true;
> +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_32 : true;
>  }
>
>  static bool require_scale_zve64f(DisasContext *s)
> @@ -107,7 +107,7 @@ static bool require_scale_zve64f(DisasContext *s)
>  }
>
>  /* Zve64f doesn't support FP64. (Section 18.2) */
> -return s->ext_zve64f ? s->sew <= MO_16 : true;
> +return s->cfg_ptr->ext_zve64f ? s->sew <= MO_16 : true;
>  }
>
>  /* Destination vector register group cannot overlap source mask register. */
> @@ -174,7 +174,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, 
> TCGv s2)
>  TCGv s1, dst;
>
>  if (!require_rvv(s) ||
> -!(has_ext(s, RVV) || s->ext_zve32f || s->ext_zve64f)) {
> +!(has_ext(s, RVV) || s->cfg_ptr->ext_zve32f || 
> s->cfg_ptr->ext_zve64f)) {

This fails checkpatch as the line is too long

Can you run checkpatch on the series and re-send it?

Alistair



Re: [PATCH 08/11] mos6522: add "info via" HMP command for debugging

2022-02-07 Thread Philippe Mathieu-Daudé via

On 7/2/22 20:34, Peter Maydell wrote:

On Thu, 27 Jan 2022 at 21:03, Mark Cave-Ayland
 wrote:


This displays detailed information about the device registers and timers to aid
debugging problems with timers and interrupts.

Signed-off-by: Mark Cave-Ayland 
---
  hmp-commands-info.hx | 12 ++
  hw/misc/mos6522.c| 92 
  2 files changed, 104 insertions(+)



I'm not sure how keen we are on adding new device-specific
HMP info commands, but it's not my area of expertise. Markus ?


HMP is David :) IIRC it is OK as long as HMP is a QMP wrapper.



(patch below for context)

thanks
-- PMM



diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index e90f20a107..4e714e79a2 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -879,3 +879,15 @@ SRST
``info sgx``
  Show intel SGX information.
  ERST
+
+{
+.name   = "via",
+.args_type  = "",
+.params = "",
+.help   = "show guest 6522 VIA devices",
+},
+
+SRST
+  ``info via``
+Show guest 6522 VIA devices.
+ERST
diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
index aaae195d63..cfa6a9c44b 100644
--- a/hw/misc/mos6522.c
+++ b/hw/misc/mos6522.c
@@ -30,6 +30,8 @@
  #include "hw/misc/mos6522.h"
  #include "hw/qdev-properties.h"
  #include "migration/vmstate.h"
+#include "monitor/monitor.h"
+#include "qapi/type-helpers.h"
  #include "qemu/timer.h"
  #include "qemu/cutils.h"
  #include "qemu/log.h"
@@ -415,6 +417,95 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t 
val, unsigned size)
  }
  }

+static int qmp_x_query_via_foreach(Object *obj, void *opaque)
+{
+GString *buf = opaque;
+
+if (object_dynamic_cast(obj, TYPE_MOS6522)) {
+MOS6522State *s = MOS6522(obj);
+int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+uint16_t t1counter = get_counter(s, &s->timers[0]);
+uint16_t t2counter = get_counter(s, &s->timers[1]);
+
+g_string_append_printf(buf, "%s:\n", object_get_typename(obj));
+
+g_string_append_printf(buf, "  Registers:\n");
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[0], s->b);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[1], s->a);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[2], s->dirb);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[3], s->dira);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[4], t1counter & 0xff);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[5], t1counter >> 8);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[6],
+   s->timers[0].latch & 0xff);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[7],
+   s->timers[0].latch >> 8);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[8], t2counter & 0xff);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[9], t2counter >> 8);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[10], s->sr);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[11], s->acr);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[12], s->pcr);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[13], s->ifr);
+g_string_append_printf(buf, "%-*s:0x%x\n", 4,
+   mos6522_reg_names[14], s->ier);
+
+g_string_append_printf(buf, "  Timers:\n");
+g_string_append_printf(buf, "Using current time now(ns)=%"PRId64
+"\n", now);
+g_string_append_printf(buf, "T1 freq(hz)=%"PRId64
+   " mode=%s"
+   " counter=0x%x"
+   " latch=0x%x\n"
+   "   load_time(ns)=%"PRId64
+   " next_irq_time(ns)=%"PRId64 "\n",
+   s->timers[0].frequency,
+   ((s->acr & T1MODE) == T1MODE_CONT) ? 
"continuous"
+  : "one-shot",
+   t1counter,
+   s->timers[0].latch,
+   s->timers[0

Re: [PATCH] MAINTAINERS: python - remove ehabkost and add bleal

2022-02-07 Thread Philippe Mathieu-Daudé via

On 8/2/22 01:05, John Snow wrote:

Eduardo Habkost has left Red Hat and has other daily responsibilities to
attend to. In order to stop spamming him on every series, remove him as
"Reviewer" for the python/ library dir and add Beraldo Leal instead.

For the "python scripts" stanza (which is separate due to level of
support), replace Eduardo as maintainer with myself.

(Thanks for all of your hard work, Eduardo!)


Thank you Eduardo, and thank you John for taking this.

Reviewed-by: Philippe Mathieu-Daudé 



Signed-off-by: John Snow 
---
  MAINTAINERS | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9814580975..028ac0de25 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2735,13 +2735,13 @@ F: backends/cryptodev*.c
  Python library
  M: John Snow 
  M: Cleber Rosa 
-R: Eduardo Habkost 
+R: Beraldo Leal 
  S: Maintained
  F: python/
  T: git https://gitlab.com/jsnow/qemu.git python
  
  Python scripts

-M: Eduardo Habkost 
+M: John Snow 
  M: Cleber Rosa 
  S: Odd Fixes
  F: scripts/*.py





Re: target/arm: cp15.dacr migration

2022-02-07 Thread Pavel Dovgalyuk

On 07.02.2022 16:44, Peter Maydell wrote:

On Mon, 7 Feb 2022 at 12:13, Pavel Dovgalyuk  wrote:


I recently encountered a problem with cp15.dacr register.
It has _s and _ns versions. During the migration only dacr_ns is
saved/loaded.
But both of the values are used in get_phys_addr_v5 and get_phys_addr_v6
functions. Therefore VM behavior becomes incorrect after loading the
vmstate.


Yes, we don't correctly save and restore the Secure banked
registers. This is a long standing bug (eg it is the
cause of https://gitlab.com/qemu-project/qemu/-/issues/467).
Almost nobody notices this, because almost nobody both runs
Secure-world AArch32 code and also tries migration or save/restore.


We actually did it for reverse debugging of custom firmware.


I found that kvm_to_cpreg_id is responsible for disabling dacr_s
migration, because it always selects ns variant.



diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index c6a4d50e82..d3ffef3640 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2510,11 +2510,6 @@ static inline uint32_t kvm_to_cpreg_id(uint64_t
kvmid)
   if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
   cpregid |= (1 << 15);
   }
-
-/* KVM is always non-secure so add the NS flag on AArch32 register
- * entries.
- */
- cpregid |= 1 << CP_REG_NS_SHIFT;
   }
   return cpregid;
   }


This change is wrong, or at least incomplete -- as the comment notes,
a guest running under KVM is always NonSecure, so when KVM says "this is
DACR" (or whatever) it always means "this is the NS banked DACR".
(Though now AArch32 KVM support has been dropped we have some flexibility
to not necessarily use KVM register ID values that exactly match what
the kernel uses, if we need to do that.)


Unfortunately, I can't test anything with AArch32 KVM.


Also, kvm_to_cpreg_id() and cpreg_to_kvm_id() are supposed to be
inverses of each other -- at the moment they are not, hence
this bug, but I think your change has probably resulted in both
the S and the NS banked versions of each register being treated
as the S banked version, which will have a different set of problems.


I checked the flags coming through write_cpustate_to_list. There were 
both dacr_s and dacr_ns flags. Therefore both values were saved.




There is also the question of migration compatibility to consider
in any change in this area.



Pavel Dovgalyuk



Re: [PATCH RFC 14/15] migration: Postcopy preemption on separate channel

2022-02-07 Thread Peter Xu
On Thu, Feb 03, 2022 at 05:45:32PM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > This patch enables postcopy-preempt feature.
> > 
> > It contains two major changes to the migration logic:
> > 
> >   (1) Postcopy requests are now sent via a different socket from precopy
> >   background migration stream, so as to be isolated from very high page
> >   request delays
> > 
> >   (2) For huge page enabled hosts: when there's postcopy requests, they can 
> > now
> >   intercept a partial sending of huge host pages on src QEMU.
> > 
> > After this patch, we'll have two "channels" (or say, sockets, because it's 
> > only
> > supported on socket-based channels) for postcopy: (1) PRECOPY channel 
> > (which is
> > the default channel that transfers background pages), and (2) POSTCOPY
> > channel (which only transfers requested pages).
> > 
> > On the source QEMU, when we found a postcopy request, we'll interrupt the
> > PRECOPY channel sending process and quickly switch to the POSTCOPY channel.
> > After we serviced all the high priority postcopy pages, we'll switch back to
> > PRECOPY channel so that we'll continue to send the interrupted huge page 
> > again.
> > There's no new thread introduced.
> > 
> > On the destination QEMU, one new thread is introduced to receive page data 
> > from
> > the postcopy specific socket.
> > 
> > This patch has a side effect.  After sending postcopy pages, previously 
> > we'll
> > assume the guest will access follow up pages so we'll keep sending from 
> > there.
> > Now it's changed.  Instead of going on with a postcopy requested page, 
> > we'll go
> > back and continue sending the precopy huge page (which can be intercepted 
> > by a
> > postcopy request so the huge page can be sent partially before).
> > 
> > Whether that's a problem is debatable, because "assuming the guest will
> > continue to access the next page" doesn't really suite when huge pages are
> > used, especially if the huge page is large (e.g. 1GB pages).  So that 
> > locality
> > hint is much meaningless if huge pages are used.
> > 
> > If postcopy preempt is enabled, a separate channel is created for it so 
> > that it
> > can be used later for postcopy specific page requests.  On dst node, a
> > standalone thread is used to receive postcopy requested pages.  The thread 
> > is
> > created along with the ram listen thread during POSTCOPY_LISTEN phase.
> 
> I think this patch could do with being split into two; the first one that
> deals with closing/opening channels; and the second that handles the
> data on the two channels and does the preemption.

Sounds good, I'll give it a shot on the split.

> 
> Another thought is whether, if in the future we allow multifd +
> postcopy, the multifd code would change - I think it would end up closer
> to using multiple channels taking different pages on each one.

Right, so potentially the postcopy channels can be multi-threaded too itself.

We've had a quick discussion on irc, just to recap: I didn't reuse multifd
infra because IMO multifd is designed with below ideas in mind:

  (1) Every multifd thread is equal
  (2) Throughput oriented

However I found that postcopy needs something different when they're mixed up
together with multifd.

Firstly, we will have some channels sending as much as we could where latency
is not an issue (aka background pages).  However it's not suitable for page
requests, so we could also have channels that are servicing page faults fron
dst.  In short, there're two types of channels/threads we want, and we may want
to treat them differently.

The current model is we only have 1 postcopy channel and 1 precopy channel, but
it should be easier if we want to make it N post + 1 pre base on this series.

So far all send() is still done in the migration thread so no new sender thread
but 1 more receiver thread only. If we want to grow that 1->N for postcopy
channels we may want to move that out too just like what we do with multifd.
Not sure whether there can be something reused around.  That's where I haven't
yet explored, but this series should already share a common piece of code on
refactoring of things like tmp huge page on dst node to be able to receive with
multiple huge pages.

This also reminded me that, instead of a new capability, should I simply expose
a parameter "postcopy-channels=N" to CLI so that we can be prepared with multi
postcopy channels?

> 
> 
> Do we need to do anything in psotcopy recovery ?

Yes. It's a todo (in the cover letter), if the whole thing looks sane I'll add
that together in the non-rfc series.

Thanks,

-- 
Peter Xu




Re: [PATCH v9 00/23] QEMU RISC-V AIA support

2022-02-07 Thread Alistair Francis
On Sat, Feb 5, 2022 at 3:47 AM Anup Patel  wrote:
>
> From: Anup Patel 
>
> The advanced interrupt architecture (AIA) extends the per-HART local
> interrupt support. Along with this, it also adds IMSIC (MSI contrllor)
> and Advanced PLIC (wired interrupt controller).
>
> The latest AIA draft specification can be found here:
> https://github.com/riscv/riscv-aia/releases/download/0.2-draft.28/riscv-interrupts-028.pdf
>
> This series adds RISC-V AIA support in QEMU which includes emulating all
> AIA local CSRs, APLIC, and IMSIC. Only AIA local interrupt filtering is
> not implemented because we don't have any local interrupt greater than 12.
>
> To enable AIA in QEMU, use one of the following:
> 1) Only AIA local interrupt CSRs: Pass "x-aia=true" as CPU paramenter
>in the QEMU command-line
> 2) Only APLIC for virt machine: Pass "aia=aplic" as machine parameter
>in the QEMU command-line
> 3) Both APLIC and IMSIC for virt machine: Pass "aia=aplic-imsic" as
>machine parameter in the QEMU command-line
> 4) Both APLIC and IMSIC with 2 guest files for virt machine: Pass
>"aia=aplic-imsic,aia-guests=2" as machine parameter in the QEMU
>command-line
>
> To test series, we require OpenSBI and Linux with AIA support which can
> be found in:
> riscv_aia_v2 branch at https://github.com/avpatel/opensbi.git
> riscv_aia_v1 branch at https://github.com/avpatel/linux.git
>
> This series can be found riscv_aia_v9 branch at:
> https://github.com/avpatel/qemu.git
>
> Changes since v8:
>  - Use error_setg() in riscv_imsic_realize() added by PATCH20
>
> Changes since v7:
>  - Rebased on latest riscv-to-apply.next branch of Alistair's repo
>  - Improved default priority assignment in PATCH9
>
> Changes since v6:
>  - Fixed priority comparison in riscv_cpu_pending_to_irq() of PATCH9
>  - Fixed typos in comments added by PATCH11
>  - Added "pend = true;" for CSR_MSETEIPNUM case of rmw_xsetclreinum()
>in PATCH15
>  - Handle ithreshold == 0 case in riscv_aplic_idc_topi() of PATCH18
>  - Allow setting pending bit for Level0 or Level1 interrupts in
>riscv_aplic_set_pending() of PATCH18
>  - Force DOMAINCFG[31:24] bits to 0x80 in riscv_aplic_read() of PATCH18
>  - For APLIC direct mode, set target.iprio to 1 when zero is writtern
>in PATCH18
>  - Handle eithreshold == 0 case in riscv_imsic_topei() of PATCH20
>
> Changes since v5:
>  - Moved VSTOPI_NUM_SRCS define to top of the file in PATCH13
>  - Fixed typo in PATCH16
>
> Changes since v4:
>  - Changed IRQ_LOCAL_MAX to 16 in PATCH2
>  - Fixed typo in PATCH10
>  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH11
>  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH14
>  - Replaced TARGET_LONG_BITS with riscv_cpu_mxl_bits(env) in PATCH15
>  - Replaced TARGET_LONG_BITS with xlen passed via ireg callback in PATCH20
>  - Retrict maximum IMSIC guest files per-HART of virt machine to 7 in
>PATCH21.
>  - Added separate PATCH23 to increase maximum number of allowed CPUs
>for virt machine
>
> Changes since v3:
>  - Replaced "aplic,xyz" and "imsic,xyz" DT properties with "riscv,xyz"
>DT properties because "aplic" and "imsic" are not valid vendor names
>required by Linux DT schema checker.
>
> Changes since v2:
>  - Update PATCH4 to check and inject interrupt after V=1 when
>transitioning from V=0 to V=1
>
> Changes since v1:
>  - Revamped whole series and created more granular patches
>  - Added HGEIE and HGEIP CSR emulation for H-extension
>  - Added APLIC emulation
>  - Added IMSIC emulation
>
> Anup Patel (23):
>   target/riscv: Fix trap cause for RV32 HS-mode CSR access from RV64
> HS-mode
>   target/riscv: Implement SGEIP bit in hip and hie CSRs
>   target/riscv: Implement hgeie and hgeip CSRs
>   target/riscv: Improve delivery of guest external interrupts
>   target/riscv: Allow setting CPU feature from machine/device emulation
>   target/riscv: Add AIA cpu feature
>   target/riscv: Add defines for AIA CSRs
>   target/riscv: Allow AIA device emulation to set ireg rmw callback
>   target/riscv: Implement AIA local interrupt priorities
>   target/riscv: Implement AIA CSRs for 64 local interrupts on RV32
>   target/riscv: Implement AIA hvictl and hviprioX CSRs
>   target/riscv: Implement AIA interrupt filtering CSRs
>   target/riscv: Implement AIA mtopi, stopi, and vstopi CSRs
>   target/riscv: Implement AIA xiselect and xireg CSRs
>   target/riscv: Implement AIA IMSIC interface CSRs
>   hw/riscv: virt: Use AIA INTC compatible string when available
>   target/riscv: Allow users to force enable AIA CSRs in HART
>   hw/intc: Add RISC-V AIA APLIC device emulation
>   hw/riscv: virt: Add optional AIA APLIC support to virt machine
>   hw/intc: Add RISC-V AIA IMSIC device emulation
>   hw/riscv: virt: Add optional AIA IMSIC support to virt machine
>   docs/system: riscv: Document AIA options for virt machine
>   hw/riscv: virt: Increase maximum number of allowed CPUs

Thanks!

Applied to riscv-to-apply.next

Alista

Re: [PATCH 17/31] vdpa: adapt vhost_ops callbacks to svq

2022-02-07 Thread Jason Wang



在 2022/2/1 上午2:58, Eugenio Perez Martin 写道:

On Sun, Jan 30, 2022 at 5:03 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

First half of the buffers forwarding part, preparing vhost-vdpa
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
this is effectively dead code at the moment, but it helps to reduce
patch size.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-shadow-virtqueue.h |   2 +-
   hw/virtio/vhost-shadow-virtqueue.c |  21 -
   hw/virtio/vhost-vdpa.c | 133 ++---
   3 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 035207a469..39aef5ffdf 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -35,7 +35,7 @@ size_t vhost_svq_device_area_size(const VhostShadowVirtqueue 
*svq);

   void vhost_svq_stop(VhostShadowVirtqueue *svq);

-VhostShadowVirtqueue *vhost_svq_new(void);
+VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize);

   void vhost_svq_free(VhostShadowVirtqueue *vq);

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index f129ec8395..7c168075d7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -277,9 +277,17 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
   /**
* Creates vhost shadow virtqueue, and instruct vhost device to use the 
shadow
* methods and file descriptors.
+ *
+ * @qsize Shadow VirtQueue size
+ *
+ * Returns the new virtqueue or NULL.
+ *
+ * In case of error, reason is reported through error_report.
*/
-VhostShadowVirtqueue *vhost_svq_new(void)
+VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize)
   {
+size_t desc_size = sizeof(vring_desc_t) * qsize;
+size_t device_size, driver_size;
   g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
   int r;

@@ -300,6 +308,15 @@ VhostShadowVirtqueue *vhost_svq_new(void)
   /* Placeholder descriptor, it should be deleted at set_kick_fd */
   event_notifier_init_fd(&svq->svq_kick, INVALID_SVQ_KICK_FD);

+svq->vring.num = qsize;


I wonder if this is the best. E.g some hardware can support up to 32K
queue size. So this will probably end up with:

1) SVQ use 32K queue size
2) hardware queue uses 256


In that case SVQ vring queue size will be 32K and guest's vring can
negotiate any number with SVQ equal or less than 32K,



Sorry for being unclear what I meant is actually

1) SVQ uses 32K queue size

2) guest vq uses 256

This looks like a burden that needs extra logic and may damage the 
performance.


And this can lead other interesting situation:

1) SVQ uses 256

2) guest vq uses 1024

Where a lot of more SVQ logic is needed.



including 256.
Is that what you mean?



I mean, it looks to me the logic will be much more simplified if we just 
allocate the shadow virtqueue with the size what guest can see (guest 
vring).


Then we don't need to think if the difference of the queue size can have 
any side effects.





If with hardware queues you mean guest's vring, not sure why it is
"probably 256". I'd say that in that case with the virtio-net kernel
driver the ring size will be the same as the device export, for
example, isn't it?

The implementation should support any combination of sizes, but the
ring size exposed to the guest is never bigger than hardware one.


? Or we SVQ can stick to 256 but this will this cause trouble if we want
to add event index support?


I think we should not have any problem with event idx. If you mean
that the guest could mark more buffers available than SVQ vring's
size, that should not happen because there must be less entries in the
guest than SVQ.

But if I understood you correctly, a similar situation could happen if
a guest's contiguous buffer is scattered across many qemu's VA chunks.
Even if that would happen, the situation should be ok too: SVQ knows
the guest's avail idx and, if SVQ is full, it will continue forwarding
avail buffers when the device uses more buffers.

Does that make sense to you?



Yes.

Thanks




Re: [PATCH RFC 10/15] migration: Move static var in ram_block_from_stream() into global

2022-02-07 Thread Peter Xu
On Thu, Feb 03, 2022 at 05:48:31PM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > Static variable is very unfriendly to threading of ram_block_from_stream().
> > Move it into MigrationIncomingState.
> > 
> > Make the incoming state pointer to be passed over to 
> > ram_block_from_stream() on
> > both caller sites.
> > 
> > Signed-off-by: Peter Xu 
> 
> OK, but I'm not sure if I noticed where you changed this to be per
> channel later?

It's done in the last patch where it'll start to pass over "channel" index into
ram_block_from_stream():

static inline RAMBlock *ram_block_from_stream(MigrationIncomingState *mis,
  QEMUFile *f, int flags,
  int channel)
{
RAMBlock *block = mis->last_recv_block[channel];
...
}

I could have moved it into the new PostcopyTmpPage structure, but it'll be a
bit weird because precopy also uses this to cache the block info, hence I made
it an array.

> 
> Reviewed-by: Dr. David Alan Gilbert 

Thanks,

-- 
Peter Xu




Re: [PATCH 11/31] vhost: Add vhost_svq_valid_device_features to shadow vq

2022-02-07 Thread Jason Wang



在 2022/2/1 下午6:57, Eugenio Perez Martin 写道:

On Mon, Jan 31, 2022 at 4:49 PM Eugenio Perez Martin
 wrote:

On Sat, Jan 29, 2022 at 9:11 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

This allows SVQ to negotiate features with the device. For the device,
SVQ is a driver. While this function needs to bypass all non-transport
features, it needs to disable the features that SVQ does not support
when forwarding buffers. This includes packed vq layout, indirect
descriptors or event idx.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-shadow-virtqueue.h |  2 ++
   hw/virtio/vhost-shadow-virtqueue.c | 44 ++
   hw/virtio/vhost-vdpa.c | 21 ++
   3 files changed, 67 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c9ffa11fce..d963867a04 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,8 @@

   typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;

+bool vhost_svq_valid_device_features(uint64_t *features);
+
   void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
   void vhost_svq_set_guest_call_notifier(VhostShadowVirtqueue *svq, int 
call_fd);
   const EventNotifier *vhost_svq_get_dev_kick_notifier(
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 9619c8082c..51442b3dbf 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -45,6 +45,50 @@ const EventNotifier *vhost_svq_get_dev_kick_notifier(
   return &svq->hdev_kick;
   }

+/**
+ * Validate the transport device features that SVQ can use with the device
+ *
+ * @dev_features  The device features. If success, the acknowledged features.
+ *
+ * Returns true if SVQ can go with a subset of these, false otherwise.
+ */
+bool vhost_svq_valid_device_features(uint64_t *dev_features)
+{
+bool r = true;
+
+for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
+ ++b) {
+switch (b) {
+case VIRTIO_F_NOTIFY_ON_EMPTY:
+case VIRTIO_F_ANY_LAYOUT:
+continue;
+
+case VIRTIO_F_ACCESS_PLATFORM:
+/* SVQ does not know how to translate addresses */


I may miss something but any reason that we need to disable
ACCESS_PLATFORM? I'd expect the vring helper we used for shadow
virtqueue can deal with vIOMMU perfectly.


This function is validating SVQ <-> Device communications features,
that may or may not be the same as guest <-> SVQ. These feature flags
are valid for guest <-> SVQ communication, same as with indirect
descriptors one.

Having said that, there is a point in the series where
VIRTIO_F_ACCESS_PLATFORM is actually mandatory, so I think we could
use the latter addition of x-svq cmdline parameter and delay the
feature validations where it makes more sense.


+if (*dev_features & BIT_ULL(b)) {
+clear_bit(b, dev_features);
+r = false;
+}
+break;
+
+case VIRTIO_F_VERSION_1:


I had the same question here.


For VERSION_1 it's easier to assume that guest is little endian at
some points, but we could try harder to support both endianness if
needed.


Re-thinking the SVQ feature isolation stuff for this first iteration
based on your comments.

Maybe it's easier to simply fail if the device does not *match* the
expected feature set, and add all of the "feature isolation" later.
While a lot of guest <-> SVQ communication details are already solved
for free with qemu's VirtQueue (indirect, packed, ...), we may
simplify this series in particular and add the support for it later.

For example, at this moment would be valid for the device to export
indirect descriptors feature flag, and SVQ simply forward that feature
flag offering to the guest. So the guest <-> SVQ communication could
have indirect descriptors (qemu's VirtQueue code handles it for free),
but SVQ would not acknowledge it for the device. As a side note, to
negotiate it would have been harmless actually, but it's not the case
of packed vq.

So maybe for the v2 we can simply force the device to just export the
strictly needed features and nothing else with qemu cmdline, and then
enable the feature negotiation isolation for each side of SVQ?



Yes, that's exactly my point.

Thanks




Thanks!



Thanks!


Thanks



+/* SVQ trust that guest vring is little endian */
+if (!(*dev_features & BIT_ULL(b))) {
+set_bit(b, dev_features);
+r = false;
+}
+continue;
+
+default:
+if (*dev_features & BIT_ULL(b)) {
+clear_bit(b, dev_features);
+}
+}
+}
+
+return r;
+}
+
   /* Forward guest notifications */
   static void vhost_handle_guest_kick(EventNotifier *n)
   {
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bdb45c8808..9d

Re: [PATCH RFC 09/15] migration: Add postcopy_thread_create()

2022-02-07 Thread Peter Xu
On Thu, Feb 03, 2022 at 03:19:48PM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > Postcopy create threads. A common manner is we init a sem and use it to sync
> > with the thread.  Namely, we have fault_thread_sem and listen_thread_sem and
> > they're only used for this.
> > 
> > Make it a shared infrastructure so it's easier to create yet another thread.
> > 
> 
> It might be worth a note saying you now share that sem, so you can't
> start two threads in parallel.

I'll squash this into the patch:

---8<---
diff --git a/migration/migration.h b/migration/migration.h
index 845be3463c..2a311fd8d6 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -72,7 +72,10 @@ struct MigrationIncomingState {
 /* A hook to allow cleanup at the end of incoming migration */
 void *transport_data;
 void (*transport_cleanup)(void *data);
-/* Used to sync thread creations */
+/*
+ * Used to sync thread creations.  Note that we can't create threads in
+ * parallel with this sem.
+ */
 QemuSemaphore  thread_sync_sem;
 /*
  * Free at the start of the main state load, set as the main thread 
finishes
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 099d8ed478..1a3ba1db84 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -79,6 +79,10 @@ int postcopy_notify(enum PostcopyNotifyReason reason, Error 
**errp)
 &pnd);
 }
 
+/*
+ * NOTE: this routine is not thread safe, we can't call it concurrently. But it
+ * should be good enough for migration's purposes.
+ */
 void postcopy_thread_create(MigrationIncomingState *mis,
 QemuThread *thread, const char *name,
 void *(*fn)(void *), int joinable)
---8<---

> 
> Reviewed-by: Dr. David Alan Gilbert 

Thanks,

-- 
Peter Xu




Re: [PATCH RFC 07/15] migration: Introduce postcopy channels on dest node

2022-02-07 Thread Peter Xu
On Thu, Feb 03, 2022 at 03:08:39PM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > Postcopy handles huge pages in a special way that currently we can only have
> > one "channel" to transfer the page.
> > 
> > It's because when we install pages using UFFDIO_COPY, we need to have the 
> > whole
> > huge page ready, it also means we need to have a temp huge page when trying 
> > to
> > receive the whole content of the page.
> > 
> > Currently all maintainance around this tmp page is global: firstly we'll
> > allocate a temp huge page, then we maintain its status mostly within
> > ram_load_postcopy().
> > 
> > To enable multiple channels for postcopy, the first thing we need to do is 
> > to
> > prepare N temp huge pages as caching, one for each channel.
> > 
> > Meanwhile we need to maintain the tmp huge page status per-channel too.
> > 
> > To give some example, some local variables maintained in ram_load_postcopy()
> > are listed; they are responsible for maintaining temp huge page status:
> > 
> >   - all_zero: this keeps whether this huge page contains all zeros
> >   - target_pages: this counts how many target pages have been copied
> >   - host_page:this keeps the host ptr for the page to install
> > 
> > Move all these fields to be together with the temp huge pages to form a new
> > structure called PostcopyTmpPage.  Then for each (future) postcopy channel, 
> > we
> > need one structure to keep the state around.
> > 
> > For vanilla postcopy, obviously there's only one channel.  It contains both
> > precopy and postcopy pages.
> > 
> > This patch teaches the dest migration node to start realize the possible 
> > number
> > of postcopy channels by introducing the "postcopy_channels" variable.  Its
> > value is calculated when setup postcopy on dest node (during POSTCOPY_LISTEN
> > phase).
> > 
> > Vanilla postcopy will have channels=1, but when postcopy-preempt capability 
> > is
> > enabled (in the future), we will boost it to 2 because even during partial
> > sending of a precopy huge page we still want to preempt it and start sending
> > the postcopy requested page right away (so we start to keep two temp huge
> > pages; more if we want to enable multifd).  In this patch there's a TODO 
> > marked
> > for that; so far the channels is always set to 1.
> > 
> > We need to send one "host huge page" on one channel only and we cannot split
> > them, because otherwise the data upon the same huge page can locate on more
> > than one channel so we need more complicated logic to manage.  One temp host
> > huge page for each channel will be enough for us for now.
> > 
> > Postcopy will still always use the index=0 huge page even after this patch.
> > However it prepares for the latter patches where it can start to use 
> > multiple
> > channels (which needs src intervention, because only src knows which 
> > channel we
> > should use).
> 
> Generally OK, some minor nits.
> 
> > Signed-off-by: Peter Xu 
> > ---
> >  migration/migration.h| 35 +++-
> >  migration/postcopy-ram.c | 50 +---
> >  migration/ram.c  | 43 +-
> >  3 files changed, 91 insertions(+), 37 deletions(-)
> > 
> > diff --git a/migration/migration.h b/migration/migration.h
> > index 8130b703eb..8bb2931312 100644
> > --- a/migration/migration.h
> > +++ b/migration/migration.h
> > @@ -45,6 +45,24 @@ struct PostcopyBlocktimeContext;
> >   */
> >  #define CLEAR_BITMAP_SHIFT_MAX31
> >  
> > +/* This is an abstraction of a "temp huge page" for postcopy's purpose */
> > +typedef struct {
> > +/*
> > + * This points to a temporary huge page as a buffer for UFFDIO_COPY.  
> > It's
> > + * mmap()ed and needs to be freed when cleanup.
> > + */
> > +void *tmp_huge_page;
> > +/*
> > + * This points to the host page we're going to install for this temp 
> > page.
> > + * It tells us after we've received the whole page, where we should 
> > put it.
> > + */
> > +void *host_addr;
> > +/* Number of small pages copied (in size of TARGET_PAGE_SIZE) */
> > +int target_pages;
> 
> Can we take the opportunity to convert this to an unsigned?

Sure.

> 
> > +/* Whether this page contains all zeros */
> > +bool all_zero;
> > +} PostcopyTmpPage;
> > +
> >  /* State for the incoming migration */
> >  struct MigrationIncomingState {
> >  QEMUFile *from_src_file;
> > @@ -81,7 +99,22 @@ struct MigrationIncomingState {
> >  QemuMutex rp_mutex;/* We send replies from multiple threads */
> >  /* RAMBlock of last request sent to source */
> >  RAMBlock *last_rb;
> > -void *postcopy_tmp_page;
> > +/*
> > + * Number of postcopy channels including the default precopy channel, 
> > so
> > + * vanilla postcopy will only contain one channel which contain both
> > + * precopy and postcopy streams.
> > + *
> > + * This is calcu

Re: [PATCH 09/31] vhost-vdpa: Take into account SVQ in vhost_vdpa_set_vring_call

2022-02-07 Thread Jason Wang



在 2022/1/31 下午11:34, Eugenio Perez Martin 写道:

On Sat, Jan 29, 2022 at 9:06 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-vdpa.c | 20 ++--
   1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 18de14f0fb..029f98feee 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -687,13 +687,29 @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev 
*dev,
   }
   }

-static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
-   struct vhost_vring_file *file)
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
+ struct vhost_vring_file *file)
   {
   trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
   return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
   }

+static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
+ struct vhost_vring_file *file)
+{
+struct vhost_vdpa *v = dev->opaque;
+
+if (v->shadow_vqs_enabled) {
+int vdpa_idx = vhost_vdpa_get_vq_index(dev, file->index);
+VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+
+vhost_svq_set_guest_call_notifier(svq, file->fd);


Two questions here (had similar questions for vring kick):

1) Any reason that we setup the eventfd for vhost-vdpa in
vhost_vdpa_svq_setup() not here?


I'm not sure what you mean.

The guest->SVQ call and kick fds are set here and at
vhost_vdpa_set_vring_kick. The event notifier handler of the guest ->
SVQ kick_fd is set at vhost_vdpa_set_vring_kick /
vhost_svq_set_svq_kick_fd. The guest -> SVQ call fd has no event
notifier handler since we don't poll it.

On the other hand, the connection SVQ <-> device uses the same fds
from the beginning to the end, and they will not change with, for
example, call fd masking. That's why it's setup from
vhost_vdpa_svq_setup. Delaying to vhost_vdpa_set_vring_call would make
us add way more logic there.



More logic in general shadow vq code but less codes for vhost-vdpa 
specific code I think.


E.g for we can move the kick set logic from vhost_vdpa_svq_set_fds() to 
here.


Thanks





2) The call could be disabled by using -1 as the fd, I don't see any
code to deal with that.


Right, I didn't take that into account. vhost-kernel takes also -1 as
kick_fd to unbind, so SVQ can be reworked to take that into account
for sure.

Thanks!


Thanks



+return 0;
+} else {
+return vhost_vdpa_set_vring_dev_call(dev, file);
+}
+}
+
   /**
* Set shadow virtqueue descriptors to the device
*





Re: [PATCH RFC 02/15] migration: Allow pss->page jump over clean pages

2022-02-07 Thread Peter Xu
On Thu, Feb 03, 2022 at 06:19:22PM +, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > On Wed, Jan 19, 2022 at 01:42:47PM +, Dr. David Alan Gilbert wrote:
> > > * Peter Xu (pet...@redhat.com) wrote:
> > > > Commit ba1b7c812c ("migration/ram: Optimize ram_save_host_page()") 
> > > > managed to
> > > > optimize host huge page use case by scanning the dirty bitmap when 
> > > > looking for
> > > > the next dirty small page to migrate.
> > > > 
> > > > However when updating the pss->page before returning from that 
> > > > function, we
> > > > used MIN() of these two values: (1) next dirty bit, or (2) end of 
> > > > current sent
> > > > huge page, to fix up pss->page.
> > > > 
> > > > That sounds unnecessary, because I see nowhere that requires pss->page 
> > > > to be
> > > > not going over current huge page boundary.
> > > > 
> > > > What we need here is probably MAX() instead of MIN() so that we'll start
> > > > scanning from the next dirty bit next time. Since pss->page can't be 
> > > > smaller
> > > > than hostpage_boundary (the loop guarantees it), it probably means we 
> > > > don't
> > > > need to fix it up at all.
> > > > 
> > > > Cc: Keqian Zhu 
> > > > Cc: Kunkun Jiang 
> > > > Signed-off-by: Peter Xu 
> > > 
> > > 
> > > Hmm, I think that's potentially necessary.  note that the start of
> > > ram_save_host_page stores the 'start_page' at entry.
> > > That' start_page' goes to the ram_save_release_protection and so
> > > I think it needs to be pagesize aligned for the mmap/uffd that happens.
> > 
> > Right, that's indeed a functional change, but IMHO it's also fine.
> > 
> > When reaching ram_save_release_protection(), what we guaranteed is that 
> > below
> > page range contains no dirty bits in ramblock dirty bitmap:
> > 
> >   range0 = [start_page, pss->page)
> > 
> > Side note: inclusive on start, but not inclusive on the end side of range0
> > (that is, pss->page can be pointing to a dirty page).
> > 
> > What ram_save_release_protection() does is to unprotect the pages and let 
> > them
> > run free.  If we're sure range0 contains no dirty page, it means we have
> > already copied them over into the snapshot, so IIUC it's safe to unprotect 
> > all
> > of it (even if it's already bigger than the host page size)?
> 
> I think what's worrying me is the alignment of the address going into
> UFFDIO_WRITEPROTECT in uffd_change_protection - if it was previously
> huge page aligned and now isn't, what breaks? (Did it support
> hugepages?)

Good point..

It doesn't support huge pages yet, but we'd better keep it always page aligned
for the unprotect ioctl.

> 
> > That can be slightly less efficient for live snapshot in some extreme cases
> > (when unprotect, we'll need to walk the pgtables in the uffd ioctl()), but I
> > don't assume live snapshot to be run on a huge VM, so hopefully it's still
> > fine?  Not to mention it should make live migration a little bit faster,
> > assuming that's more frequently used..
> 
> Hmm I don't think I understand that statement.

I meant since we've scanned over those clean pages we don't need to scan it
again in the next find_dirty_block() call for precopy, per the "faster"
statement.

But to make it simple I think I'll drop this patch in the next version.

Thanks!

-- 
Peter Xu




Re: [PATCH 1/5] linux-headers: Add vduse.h

2022-02-07 Thread Yongji Xie
On Mon, Feb 7, 2022 at 9:12 PM Stefan Hajnoczi  wrote:
>
> On Tue, Jan 25, 2022 at 09:17:56PM +0800, Xie Yongji wrote:
> > diff --git a/scripts/update-linux-headers.sh 
> > b/scripts/update-linux-headers.sh
> > index fea4d6eb65..4c7846076f 100755
> > --- a/scripts/update-linux-headers.sh
> > +++ b/scripts/update-linux-headers.sh
> > @@ -198,6 +198,7 @@ for i in "$tmpdir"/include/linux/*virtio*.h \
> >   "$tmpdir/include/linux/const.h" \
> >   "$tmpdir/include/linux/kernel.h" \
> >   "$tmpdir/include/linux/vhost_types.h" \
> > + "$tmpdir/include/linux/vduse.h" \
> >   "$tmpdir/include/linux/sysinfo.h"; do
> >  cp_portable "$i" "$output/include/standard-headers/linux"
>
> VDUSE is only available on Linux hosts so it should go in linux-headers/
> instead of standard-headers/linux/:
>
>   # - linux-headers/ for files that are required for compiling for a
>   #   Linux host.  Generally we have these so we can use kernel structs
>   #   and defines that are more recent than the headers that might be
>   #   installed on the host system.  Usually this script can do simple
>   #   file copies for these headers.
>   #
>   # - include/standard-headers/ for files that are used for guest
>   #   device emulation and are required on all hosts.  For instance, we
>   #   get our definitions of the virtio structures from the Linux
>   #   kernel headers, but we need those definitions regardless of which
>   #   host OS we are building for.  This script has to be careful to
>   #   sanitize the headers to remove any use of Linux-specifics such as
>   #   types like "__u64".  This work is done in the cp_portable function.

Got it! Will fix it in v2.

Thanks,
Yongji



Re: [PATCH v8 5/5] multifd: Implement zero copy write in multifd migration (multifd-zero-copy)

2022-02-07 Thread Peter Xu
On Mon, Feb 07, 2022 at 11:49:38PM -0300, Leonardo Bras Soares Passos wrote:
> Hello Peter, thanks for reviewing!
> 
> On Mon, Feb 7, 2022 at 11:22 PM Peter Xu  wrote:
> >
> > On Tue, Feb 01, 2022 at 03:29:03AM -0300, Leonardo Bras wrote:
> > > -void multifd_send_sync_main(QEMUFile *f)
> > > +int multifd_send_sync_main(QEMUFile *f)
> > >  {
> > >  int i;
> > > +bool flush_zero_copy;
> > >
> > >  if (!migrate_use_multifd()) {
> > > -return;
> > > +return 0;
> > >  }
> > >  if (multifd_send_state->pages->num) {
> > >  if (multifd_send_pages(f) < 0) {
> > >  error_report("%s: multifd_send_pages fail", __func__);
> > > -return;
> > > +return 0;
> >
> > I've not checked how it used to do if multifd_send_pages() failed, but.. 
> > should
> > it returns -1 rather than 0 when there will be a return code?
> 
> Yeah, that makes sense.
> The point here is that I was trying not to modify much of the current 
> behavior.
> 
> I mean, multifd_send_sync_main() would previously return void, so any
> other errors would not matter to the caller of this function, which
> will continue to run as if nothing happened.
> 
> Now, if it fails with flush_zero_copy, the operation needs to be aborted.

Right, so how I understand is we'll fail anyway, and this allows us to fail
(probably) sooner.

> 
> Maybe, I should make it different:
> - In any error, return -1.
> - Create/use a specific error code in the case of a failing
> flush_zero_copy, so I can test the return value for it on the caller
> function and return early.
> 
> Or alternatively, the other errors could also return early, but since
> this will change how the code currently works, I would probably need
> another patch for that change. (so it can be easily reverted if
> needed)

Yeah, should work too to add a patch before this one.

> 
> What do you think is better?

I just don't see how it could continue if e.g. multifd_send_pages() failed.

The other thing is returning zero looks weird itself when there's obviously an
error.  Normally we could allow that but better with a comment showing why.
For this case it's more natural to me if we could just fail early.

Juan?

-- 
Peter Xu




Re: [PATCH v8 5/5] multifd: Implement zero copy write in multifd migration (multifd-zero-copy)

2022-02-07 Thread Leonardo Bras Soares Passos
Hello Peter, thanks for reviewing!

On Mon, Feb 7, 2022 at 11:22 PM Peter Xu  wrote:
>
> On Tue, Feb 01, 2022 at 03:29:03AM -0300, Leonardo Bras wrote:
> > -void multifd_send_sync_main(QEMUFile *f)
> > +int multifd_send_sync_main(QEMUFile *f)
> >  {
> >  int i;
> > +bool flush_zero_copy;
> >
> >  if (!migrate_use_multifd()) {
> > -return;
> > +return 0;
> >  }
> >  if (multifd_send_state->pages->num) {
> >  if (multifd_send_pages(f) < 0) {
> >  error_report("%s: multifd_send_pages fail", __func__);
> > -return;
> > +return 0;
>
> I've not checked how it used to do if multifd_send_pages() failed, but.. 
> should
> it returns -1 rather than 0 when there will be a return code?

Yeah, that makes sense.
The point here is that I was trying not to modify much of the current behavior.

I mean, multifd_send_sync_main() would previously return void, so any
other errors would not matter to the caller of this function, which
will continue to run as if nothing happened.

Now, if it fails with flush_zero_copy, the operation needs to be aborted.

Maybe, I should make it different:
- In any error, return -1.
- Create/use a specific error code in the case of a failing
flush_zero_copy, so I can test the return value for it on the caller
function and return early.

Or alternatively, the other errors could also return early, but since
this will change how the code currently works, I would probably need
another patch for that change. (so it can be easily reverted if
needed)

What do you think is better?


> >  }
> >  }
> > +
> > +/*
> > + * When using zero-copy, it's necessary to flush after each iteration 
> > to
> > + * make sure pages from earlier iterations don't end up replacing newer
> > + * pages.
> > + */
> > +flush_zero_copy = migrate_use_zero_copy_send();
> > +
> >  for (i = 0; i < migrate_multifd_channels(); i++) {
> >  MultiFDSendParams *p = &multifd_send_state->params[i];
> >
> > @@ -591,7 +600,7 @@ void multifd_send_sync_main(QEMUFile *f)
> >  if (p->quit) {
> >  error_report("%s: channel %d has already quit", __func__, i);
> >  qemu_mutex_unlock(&p->mutex);
> > -return;
> > +return 0;
>
> Same question here.

Please see above,

>
> >  }
>
> The rest looks good.  Thanks,

Thank you!

Best regards,
Leo




Re: [PATCH v5 1/1] virtio: fix the condition for iommu_platform not supported

2022-02-07 Thread Jason Wang
On Mon, Feb 7, 2022 at 7:29 PM Halil Pasic  wrote:
>
> The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> unsupported") claims to fail the device hotplug when iommu_platform
> is requested, but not supported by the (vhost) device. On the first
> glance the condition for detecting that situation looks perfect, but
> because a certain peculiarity of virtio_platform it ain't.
>
> In fact the aforementioned commit introduces a regression. It breaks
> virtio-fs support for Secure Execution, and most likely also for AMD SEV
> or any other confidential guest scenario that relies encrypted guest
> memory.  The same also applies to any other vhost device that does not
> support _F_ACCESS_PLATFORM.
>
> The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates
> "device can not access all of the guest RAM" and "iova != gpa, thus
> device needs to translate iova".
>
> Confidential guest technologies currently rely on the device/hypervisor
> offering _F_ACCESS_PLATFORM, so that, after the feature has been
> negotiated, the guest  grants access to the portions of memory the
> device needs to see. So in for confidential guests, generally,
> _F_ACCESS_PLATFORM is about the restricted access to memory, but not
> about the addresses used being something else than guest physical
> addresses.
>
> This is the very reason for which commit f7ef7e6e3b ("vhost: correctly
> turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the
> vhost device that does not need it, because on the vhost interface it
> only means "I/O address translation is needed".
>
> This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on
> VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the
> situation when _F_ACCESS_PLATFORM is requested, but no I/O translation
> by the device, and thus no device capability is needed. In this
> situation claiming that the device does not support iommu_plattform=on
> is counter-productive. So let us stop doing that!
>
> Signed-off-by: Halil Pasic 
> Reported-by: Jakob Naucke 
> Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> unsupported")
> Acked-by: Cornelia Huck 
> Reviewed-by: Daniel Henrique Barboza 
> Tested-by: Daniel Henrique Barboza 
> Cc: Kevin Wolf 
> Cc: qemu-sta...@nongnu.org

Acked-by: Jason Wang 

>
> ---
>
> v4->v5:
> * added back the return; so if somebody were to add code to the end of
>   the function we are still good
> v3->v4:
> * Fixed commit message (thanks Connie)
> * Removed counter-productive initialization (thanks Connie)
> * Added tags
> v2->v3:
> * Caught a bug: I tired to check if vdev has the feature
>ACCESS_PLATFORM after we have forced it. Moved the check
>to a better place
> v1->v2:
> * Commit message tweaks. Most notably fixed commit SHA (Michael)
>
> ---
> ---
>  hw/virtio/virtio-bus.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> index d23db98c56..0f69d1c742 100644
> --- a/hw/virtio/virtio-bus.c
> +++ b/hw/virtio/virtio-bus.c
> @@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
> **errp)
>  VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
>  VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
>  bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
> +bool vdev_has_iommu;
>  Error *local_err = NULL;
>
>  DPRINTF("%s: plug device.\n", qbus->name);
> @@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
> **errp)
>  return;
>  }
>
> -if (has_iommu && !virtio_host_has_feature(vdev, 
> VIRTIO_F_IOMMU_PLATFORM)) {
> -error_setg(errp, "iommu_platform=true is not supported by the 
> device");
> -return;
> -}
> -
>  if (klass->device_plugged != NULL) {
>  klass->device_plugged(qbus->parent, &local_err);
>  }
> @@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
> **errp)
>  return;
>  }
>
> +vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
>  if (klass->get_dma_as != NULL && has_iommu) {
>  virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM);
>  vdev->dma_as = klass->get_dma_as(qbus->parent);
> +if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
> +error_setg(errp,
> +   "iommu_platform=true is not supported by the device");
> +return;
> +}
>  } else {
>  vdev->dma_as = &address_space_memory;
>  }
>
> base-commit: 0d564a3e32ba8494014c67cdd2ebf0fb71860dff
> --
> 2.32.0
>




Re: [PATCH 1/4] target/ppc: Remove powerpc_excp_legacy

2022-02-07 Thread David Gibson
On Mon, Feb 07, 2022 at 03:30:33PM -0300, Fabiano Rosas wrote:
> Now that all CPU families have their own separate exception
> dispatching code we can remove powerpc_excp_legacy.
> 
> Signed-off-by: Fabiano Rosas 
> ---
>  target/ppc/excp_helper.c | 477 +--
>  1 file changed, 3 insertions(+), 474 deletions(-)

Nice!

> 
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 0050c8447f..c6646503aa 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -163,7 +163,7 @@ static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int 
> excp)
>   env->error_code);
>  }
>  
> -
> +#if defined(TARGET_PPC64)
>  static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
>  target_ulong *msr)
>  {
> @@ -267,7 +267,6 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
> excp_model, int excp,
>target_ulong *new_msr,
>target_ulong *vector)
>  {
> -#if defined(TARGET_PPC64)
>  CPUPPCState *env = &cpu->env;
>  bool mmu_all_on = ((msr >> MSR_IR) & 1) && ((msr >> MSR_DR) & 1);
>  bool hv_escalation = !(msr & MSR_HVB) && (*new_msr & MSR_HVB);
> @@ -356,8 +355,8 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
> excp_model, int excp,
>  *vector |= 0xc0003000ull; /* Apply scv's AIL=3 offset */
>  }
>  }
> -#endif
>  }
> +#endif
>  
>  static void powerpc_set_excp_state(PowerPCCPU *cpu,
>target_ulong vector, target_ulong 
> msr)
> @@ -1641,476 +1640,6 @@ static inline void powerpc_excp_books(PowerPCCPU 
> *cpu, int excp)
>  }
>  #endif
>  
> -/*
> - * Note that this function should be greatly optimized when called
> - * with a constant excp, from ppc_hw_interrupt
> - */
> -static inline void powerpc_excp_legacy(PowerPCCPU *cpu, int excp)
> -{
> -CPUState *cs = CPU(cpu);
> -CPUPPCState *env = &cpu->env;
> -int excp_model = env->excp_model;
> -target_ulong msr, new_msr, vector;
> -int srr0, srr1, lev = -1;
> -
> -if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
> -cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
> -}
> -
> -qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
> -  " => %s (%d) error=%02x\n", env->nip, 
> powerpc_excp_name(excp),
> -  excp, env->error_code);
> -
> -/* new srr1 value excluding must-be-zero bits */
> -if (excp_model == POWERPC_EXCP_BOOKE) {
> -msr = env->msr;
> -} else {
> -msr = env->msr & ~0x783fULL;
> -}
> -
> -/*
> - * new interrupt handler msr preserves existing HV and ME unless
> - * explicitly overriden
> - */
> -new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
> -
> -/* target registers */
> -srr0 = SPR_SRR0;
> -srr1 = SPR_SRR1;
> -
> -/*
> - * check for special resume at 0x100 from doze/nap/sleep/winkle on
> - * P7/P8/P9
> - */
> -if (env->resume_as_sreset) {
> -excp = powerpc_reset_wakeup(cs, env, excp, &msr);
> -}
> -
> -/*
> - * Hypervisor emulation assistance interrupt only exists on server
> - * arch 2.05 server or later. We also don't want to generate it if
> - * we don't have HVB in msr_mask (PAPR mode).
> - */
> -if (excp == POWERPC_EXCP_HV_EMU
> -#if defined(TARGET_PPC64)
> -&& !(mmu_is_64bit(env->mmu_model) && (env->msr_mask & MSR_HVB))
> -#endif /* defined(TARGET_PPC64) */
> -
> -) {
> -excp = POWERPC_EXCP_PROGRAM;
> -}
> -
> -#ifdef TARGET_PPC64
> -/*
> - * SPEU and VPU share the same IVOR but they exist in different
> - * processors. SPEU is e500v1/2 only and VPU is e6500 only.
> - */
> -if (excp_model == POWERPC_EXCP_BOOKE && excp == POWERPC_EXCP_VPU) {
> -excp = POWERPC_EXCP_SPEU;
> -}
> -#endif
> -
> -vector = env->excp_vectors[excp];
> -if (vector == (target_ulong)-1ULL) {
> -cpu_abort(cs, "Raised an exception without defined vector %d\n",
> -  excp);
> -}
> -
> -vector |= env->excp_prefix;
> -
> -switch (excp) {
> -case POWERPC_EXCP_CRITICAL:/* Critical input 
> */
> -switch (excp_model) {
> -case POWERPC_EXCP_40x:
> -srr0 = SPR_40x_SRR2;
> -srr1 = SPR_40x_SRR3;
> -break;
> -case POWERPC_EXCP_BOOKE:
> -srr0 = SPR_BOOKE_CSRR0;
> -srr1 = SPR_BOOKE_CSRR1;
> -break;
> -case POWERPC_EXCP_6xx:
> -break;
> -default:
> -goto excp_invalid;
> -}
> -break;
> -case POWERPC_EXCP_MCHECK:/* Machine check exception  
> */
> -if (msr_me == 0) {
> -/*
> - * Machine check exception is not enabled.  Enter
> -

Re: [PATCH v8 5/5] multifd: Implement zero copy write in multifd migration (multifd-zero-copy)

2022-02-07 Thread Peter Xu
On Tue, Feb 01, 2022 at 03:29:03AM -0300, Leonardo Bras wrote:
> -void multifd_send_sync_main(QEMUFile *f)
> +int multifd_send_sync_main(QEMUFile *f)
>  {
>  int i;
> +bool flush_zero_copy;
>  
>  if (!migrate_use_multifd()) {
> -return;
> +return 0;
>  }
>  if (multifd_send_state->pages->num) {
>  if (multifd_send_pages(f) < 0) {
>  error_report("%s: multifd_send_pages fail", __func__);
> -return;
> +return 0;

I've not checked how it used to do if multifd_send_pages() failed, but.. should
it returns -1 rather than 0 when there will be a return code?

>  }
>  }
> +
> +/*
> + * When using zero-copy, it's necessary to flush after each iteration to
> + * make sure pages from earlier iterations don't end up replacing newer
> + * pages.
> + */
> +flush_zero_copy = migrate_use_zero_copy_send();
> +
>  for (i = 0; i < migrate_multifd_channels(); i++) {
>  MultiFDSendParams *p = &multifd_send_state->params[i];
>  
> @@ -591,7 +600,7 @@ void multifd_send_sync_main(QEMUFile *f)
>  if (p->quit) {
>  error_report("%s: channel %d has already quit", __func__, i);
>  qemu_mutex_unlock(&p->mutex);
> -return;
> +return 0;

Same question here.

>  }

The rest looks good.  Thanks,

-- 
Peter Xu




Re: [RFC PATCH 1/1] virtio: fix feature negotiation for ACCESS_PLATFORM

2022-02-07 Thread Halil Pasic
On Mon, 7 Feb 2022 16:46:04 -0300
Daniel Henrique Barboza  wrote:

> On 2/7/22 11:46, Halil Pasic wrote:
> > On Mon, 7 Feb 2022 08:46:34 -0300
> > Daniel Henrique Barboza  wrote:
> >   
> > I have considered this and decided against it. The reason why is
> > if that approach is taken, we can't really add more code to the
> > end of the function. An early return is good if we want to
> > abort the function with an error. My point is !has_iommu does
> > not necessarily mean we are done: after a block that handles
> > the has_iommu situation, in future, there could be a block that
> > handles something different.  
> 
> And that's fine, but the way this patch is changing it I'm not sure it's 
> better
> than what we already have. Today we have:
> 
> if (has_iommu) {

To be exact today we have :
if (klass->get_dma_as != NULL && has_iommu) {


>(... assign vdev->dma_as in some cases ...)

Today not in some case but unconditionally. WE already checked for
!!klass->get_dma_as and that is important.

Because if you rewrite current to what you have just described here,
then in this branch of the if-else you have to handle !klass->get_dma_as.

So you would have to do
if (klass->get_dma_as) {
vdev->dma_as = klass->get_dma_as();
if (cond) {
do_error();
}
} else {
vdev->dma_as = &address_space_memory;
}

> } else {
> vdev->dma_as = &address_space_memory;
> }
> 
> 
> Your patch is doing:
> 
> vdev->dma_as = &address_space_memory;
> 
> if (has_iommu) {
>(... assign vdev->dma_as in some cases ...)
> }
> 
> 
> You got rid of an 'else', but ended up adding a double "vdev->dma_as =" 
> assignment
> depending on the case (has_iommu = true and klass->get_dma_as != NULL). 

And why is that bad?

The solution I wrote is very clear about vdev->dma_as != NULL and that
vdev->dma_as conceptually defaults to &address_space_memory, and may
deviate from that only if both has_iommu and klass->get_dma_as != NULL
in which case get_dma_as() may override it to something different.

The compile can still generate branches and stores as it pleases
as long as the behavior is the same AFAIK. 

> This is why
> I proposed the early exit.
> 
> If we're worried about adding more code in the future might as well leave the 
> existing
> if/else as is.
> 

Not really, we would end up having two extra else branches instead of
none (with 3 if-s in both cases) and 3 places where we might assign
->dma_as (although mutually exclusive) instead of just two.

For me my version is easier to read.


> 
> 
> > 
> > Would this patch work for power? Or are there valid scenarios that
> > it breaks? I'm asking, because you voiced concern regarding this before.  
> 
> 
> I'll test it when I have an opportunity and let you know.
> 
> 

Thank you!

Regards,
Halil



Re: [PATCH] hvf: arm: Handle ID_AA64ISAR2_EL1 reads

2022-02-07 Thread Cameron Esfahani
Reviewed-by: Cameron Esfahani mailto:di...@apple.com>>

Cameron

> On Feb 7, 2022, at 2:52 PM, Alexander Graf  wrote:
> 
> Recent Linux versions added support to read ID_AA64ISAR2_EL1. On M1,
> those reads trap into QEMU which handles them as faults.
> 
> However, according to the ARMv8 spec (issue D17783), reads on this
> register in older ARMv8 revisions should be RES0. So let's treat it
> as such instead.
> 
> Reported-by: Ivan Babrou 
> Signed-off-by: Alexander Graf 
> ---
> target/arm/hvf/hvf.c | 5 +
> 1 file changed, 5 insertions(+)
> 
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 92ad0d29c4..045ec69c7c 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -54,6 +54,7 @@
> #define SYSREG_PMCEID1_EL0SYSREG(3, 3, 9, 12, 7)
> #define SYSREG_PMCCNTR_EL0SYSREG(3, 3, 9, 13, 0)
> #define SYSREG_PMCCFILTR_EL0  SYSREG(3, 3, 14, 15, 7)
> +#define SYSREG_ID_AA64ISAR2_EL1 SYSREG(3, 0, 0, 6, 2)
> 
> #define WFX_IS_WFE (1 << 0)
> 
> @@ -780,6 +781,10 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, 
> uint32_t rt)
> case SYSREG_OSDLR_EL1:
> /* Dummy register */
> break;
> +case SYSREG_ID_AA64ISAR2_EL1:
> +/* We do not support any of the ISAR2 features yet */
> +val = 0;
> +break;
> default:
> cpu_synchronize_state(cpu);
> trace_hvf_unhandled_sysreg_read(env->pc, reg,
> -- 
> 2.32.0 (Apple Git-132)
> 



[PATCH] MAINTAINERS: python - remove ehabkost and add bleal

2022-02-07 Thread John Snow
Eduardo Habkost has left Red Hat and has other daily responsibilities to
attend to. In order to stop spamming him on every series, remove him as
"Reviewer" for the python/ library dir and add Beraldo Leal instead.

For the "python scripts" stanza (which is separate due to level of
support), replace Eduardo as maintainer with myself.

(Thanks for all of your hard work, Eduardo!)

Signed-off-by: John Snow 
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9814580975..028ac0de25 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2735,13 +2735,13 @@ F: backends/cryptodev*.c
 Python library
 M: John Snow 
 M: Cleber Rosa 
-R: Eduardo Habkost 
+R: Beraldo Leal 
 S: Maintained
 F: python/
 T: git https://gitlab.com/jsnow/qemu.git python
 
 Python scripts
-M: Eduardo Habkost 
+M: John Snow 
 M: Cleber Rosa 
 S: Odd Fixes
 F: scripts/*.py
-- 
2.34.1




Re: [PATCH v5 10/11] 9p: darwin: meson: Allow VirtFS on Darwin

2022-02-07 Thread Will Cohen
On Mon, Feb 7, 2022 at 6:44 PM Christian Schoenebeck 
wrote:

> On Montag, 7. Februar 2022 23:40:23 CET Will Cohen wrote:
> > From: Keno Fischer 
> >
> > To allow VirtFS on darwin, we need to check that pthread_fchdir_np is
> > available, which has only been available since macOS 10.12.
> >
> > Additionally, virtfs_proxy_helper is disabled on Darwin. This patch
> > series does not currently provide an implementation of the proxy-helper,
> > but this functionality could be implemented later on.
> >
> > Signed-off-by: Keno Fischer 
> > [Michael Roitzsch: - Rebase for NixOS]
> > Signed-off-by: Michael Roitzsch 
> > [Will Cohen: - Rebase to master]
> > Signed-off-by: Will Cohen 
> > Reviewed-by: Paolo Bonzini 
> > [Will Cohen: - Add check for pthread_fchdir_np to virtfs
> >  - Add comments to patch commit
> >  - Note that virtfs_proxy_helper does not work
> >on macOS
> >  - Adjust meson virtfs error note to specify macOS]
> > Signed-off-by: Will Cohen 
> > ---
> >  fsdev/meson.build |  1 +
> >  meson.build   | 14 ++
> >  2 files changed, 11 insertions(+), 4 deletions(-)
> >
> > diff --git a/fsdev/meson.build b/fsdev/meson.build
> > index adf57cc43e..b632b66348 100644
> > --- a/fsdev/meson.build
> > +++ b/fsdev/meson.build
> > @@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files(
> >'qemu-fsdev.c',
> >  ), if_false: files('qemu-fsdev-dummy.c'))
> >  softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss)
> > +softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss)
> >
> >  if have_virtfs_proxy_helper
> >executable('virtfs-proxy-helper',
> > diff --git a/meson.build b/meson.build
> > index 5f43355071..c1d13209ff 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -1421,17 +1421,23 @@ if not get_option('dbus_display').disabled()
> >endif
> >  endif
> >
> > -have_virtfs = (targetos == 'linux' and
> > +if targetos == 'darwin' and cc.has_function('pthread_fchdir_np')
> > +  have_virtfs = have_system
> > +else
> > +  have_virtfs = (targetos == 'linux' and
> >  have_system and
> >  libattr.found() and
> >  libcap_ng.found())
> > +endif
> >
> > -have_virtfs_proxy_helper = have_virtfs and have_tools
> > +have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and
> > have_tools
> >
> >  if get_option('virtfs').enabled()
> >if not have_virtfs
> > -if targetos != 'linux'
> > -  error('virtio-9p (virtfs) requires Linux')
> > +if targetos != 'linux' and targetos != 'darwin'
> > +  error('virtio-9p (virtfs) requires Linux or macOS')
> > +elif targetos == 'darwin' and not
> cc.has_function('pthread_fchdir_np')
> > +  error('virtio-9p (virtfs) on Darwin requires the presence of
> pthread_fchdir_np')
>
> Maybe call this "macOS" in this error message as well?
>
> error('virtio-9p (virtfs) requires the presence of pthread_fchdir_np
> on macOS')


Agreed — shouldn’t have omitted.

>
>
> >  elif not libcap_ng.found() or not libattr.found()
> >error('virtio-9p (virtfs) requires libcap-ng-devel and
> > libattr-devel') elif not have_system
>
>
>


Re: [PATCH v5 10/11] 9p: darwin: meson: Allow VirtFS on Darwin

2022-02-07 Thread Christian Schoenebeck
On Montag, 7. Februar 2022 23:40:23 CET Will Cohen wrote:
> From: Keno Fischer 
> 
> To allow VirtFS on darwin, we need to check that pthread_fchdir_np is
> available, which has only been available since macOS 10.12.
> 
> Additionally, virtfs_proxy_helper is disabled on Darwin. This patch
> series does not currently provide an implementation of the proxy-helper,
> but this functionality could be implemented later on.
> 
> Signed-off-by: Keno Fischer 
> [Michael Roitzsch: - Rebase for NixOS]
> Signed-off-by: Michael Roitzsch 
> [Will Cohen: - Rebase to master]
> Signed-off-by: Will Cohen 
> Reviewed-by: Paolo Bonzini 
> [Will Cohen: - Add check for pthread_fchdir_np to virtfs
>  - Add comments to patch commit
>  - Note that virtfs_proxy_helper does not work
>on macOS
>  - Adjust meson virtfs error note to specify macOS]
> Signed-off-by: Will Cohen 
> ---
>  fsdev/meson.build |  1 +
>  meson.build   | 14 ++
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/fsdev/meson.build b/fsdev/meson.build
> index adf57cc43e..b632b66348 100644
> --- a/fsdev/meson.build
> +++ b/fsdev/meson.build
> @@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files(
>'qemu-fsdev.c',
>  ), if_false: files('qemu-fsdev-dummy.c'))
>  softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss)
> +softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss)
> 
>  if have_virtfs_proxy_helper
>executable('virtfs-proxy-helper',
> diff --git a/meson.build b/meson.build
> index 5f43355071..c1d13209ff 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -1421,17 +1421,23 @@ if not get_option('dbus_display').disabled()
>endif
>  endif
> 
> -have_virtfs = (targetos == 'linux' and
> +if targetos == 'darwin' and cc.has_function('pthread_fchdir_np')
> +  have_virtfs = have_system
> +else
> +  have_virtfs = (targetos == 'linux' and
>  have_system and
>  libattr.found() and
>  libcap_ng.found())
> +endif
> 
> -have_virtfs_proxy_helper = have_virtfs and have_tools
> +have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and
> have_tools
> 
>  if get_option('virtfs').enabled()
>if not have_virtfs
> -if targetos != 'linux'
> -  error('virtio-9p (virtfs) requires Linux')
> +if targetos != 'linux' and targetos != 'darwin'
> +  error('virtio-9p (virtfs) requires Linux or macOS')
> +elif targetos == 'darwin' and not cc.has_function('pthread_fchdir_np')
> +  error('virtio-9p (virtfs) on Darwin requires the presence of 
> pthread_fchdir_np')

Maybe call this "macOS" in this error message as well?

error('virtio-9p (virtfs) requires the presence of pthread_fchdir_np on 
macOS')

>  elif not libcap_ng.found() or not libattr.found()
>error('virtio-9p (virtfs) requires libcap-ng-devel and
> libattr-devel') elif not have_system





Re: [PATCH 06/16] hw/arm/xlnx-zcu102: Don't enable PSCI conduit when booting guest in EL3

2022-02-07 Thread Alexander Graf



On 07.02.22 19:59, Philippe Mathieu-Daudé wrote:

On 7/2/22 19:13, Edgar E. Iglesias wrote:


On Mon, Feb 7, 2022 at 5:24 PM Alexander Graf > wrote:



    On 07.02.22 17:06, Philippe Mathieu-Daudé wrote:
 > On 7/2/22 16:59, Alexander Graf wrote:
 >>
 >> On 07.02.22 16:52, Edgar E. Iglesias wrote:
 >
 >>> Both Versal and ZynqMP require MicroBlaze firmware to run the
 >>> reference implementations of Trusted Firmware. We never 
supported

 >>> this in upstream QEMU but we do support it with our fork (by
    running
 >>> multiple QEMU instances co-simulating).
 >>>
 >>> Having said that, we do have tons of EL3 test-cases that we 
use to

 >>> validate QEMU that run with EL3 enabled in upstream.
 >>>
 >>> So there's two user flows:
 >>> 1. Direct boots using QEMUs builtin PSCI (Most users use this
    to run
 >>> Linux, Xen, U-boot, etc)
 >>> 2. Firmware boot at EL3 without QEMUs builtin PSCI (Mostly 
used by

 >>> test-code)
 >>>
 >>> Number #2 is the one affected here and that by accident used to
    have
 >>> the builtin PSCI support enabled but now requires more power
    control
 >>> modelling to keep working.
 >>> Unless I'm missing something, the -kernel boots will continue
    to use
 >>> the builtin PSCI implementation.
 >>
 >>
 >> So nobody is using upstream QEMU to validate and prototype
 >> ATF/EL1s/EL0s code? That's a shame :). I suppose there is little
 >> value without the bitstream emulation and R cluster. Do you have
 >> plans to bring multi process emulation upstream some day to 
enable

 >> these there?
 >
 > The R cluster is already in mainstream, isn't it?


    In that case, wouldn't it make sense to build an emulation model 
of the

    PMU behavior so that normal ATF works out of the box?


    Thanks,

    Alex


Yes, that makes sense and there are several ways to implement it. To 
fully support the programmability of the PMU we'd need to model the 
MicroBlazes together with the ARM cores.


But PMU support does not really conflict with this patch series, or 
is there something I'm missing?


My understanding is Alex generically wonders about code coverage, not
about the ZynqMP in particular :)



I'm more curious what the purpose of zynqmp / versal simulation in QEMU 
is. What we're saying here is that we only care about "Linux at EL2 and 
below" plus a Xilinx validation test suite. I understand how multi-QEMU 
emulation may be difficult, but EL3 simulation with Cortex-A plus 
Cortex-R clusters and a simulated PMU sounds like it would get you a 
very long way on simulation coverage.


That said, Xilinx probably knows their user base the best, so if they 
decide that the ability to run TrustZone code is not something they 
believe their users need in QEMU, I'm definitely happy with that stance.



Alex




Re: [PATCH v2] tests/qtest: add qtests for npcm7xx sdhci

2022-02-07 Thread Patrick Venture
On Mon, Feb 7, 2022 at 9:34 AM Peter Maydell 
wrote:

> On Sun, 6 Feb 2022 at 01:41, Patrick Venture  wrote:
> >
> > From: Shengtan Mao 
> >
> > Reviewed-by: Hao Wu 
> > Reviewed-by: Chris Rauer 
> > Signed-off-by: Shengtan Mao 
> > Signed-off-by: Patrick Venture 
> > ---
> > v2:
> >  * update copyright year
> >  * check result of open
> >  * use g_free instead of free
> >  * move declarations to the top
> >  * use g_file_open_tmp
>
> Fails to compile:
>
> ../../tests/qtest/npcm7xx_sdhci-test.c:121:32: error: use of
> undeclared identifier 'NPCM7XX_REG_SIZE'
> uint64_t end_addr = addr + NPCM7XX_REG_SIZE;
>^
>

Thanks. I must have only compiled at a part-way point while tweaking it.
I'll see if it compiles for me, and then figure out why it does when it
doesn't, or if it doesn't, then obviously fix it.  Either way, will fix in
v3, thanks.


>
>
> -- PMM
>


[PATCH] hvf: arm: Handle ID_AA64ISAR2_EL1 reads

2022-02-07 Thread Alexander Graf
Recent Linux versions added support to read ID_AA64ISAR2_EL1. On M1,
those reads trap into QEMU which handles them as faults.

However, according to the ARMv8 spec (issue D17783), reads on this
register in older ARMv8 revisions should be RES0. So let's treat it
as such instead.

Reported-by: Ivan Babrou 
Signed-off-by: Alexander Graf 
---
 target/arm/hvf/hvf.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 92ad0d29c4..045ec69c7c 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -54,6 +54,7 @@
 #define SYSREG_PMCEID1_EL0SYSREG(3, 3, 9, 12, 7)
 #define SYSREG_PMCCNTR_EL0SYSREG(3, 3, 9, 13, 0)
 #define SYSREG_PMCCFILTR_EL0  SYSREG(3, 3, 14, 15, 7)
+#define SYSREG_ID_AA64ISAR2_EL1 SYSREG(3, 0, 0, 6, 2)
 
 #define WFX_IS_WFE (1 << 0)
 
@@ -780,6 +781,10 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, 
uint32_t rt)
 case SYSREG_OSDLR_EL1:
 /* Dummy register */
 break;
+case SYSREG_ID_AA64ISAR2_EL1:
+/* We do not support any of the ISAR2 features yet */
+val = 0;
+break;
 default:
 cpu_synchronize_state(cpu);
 trace_hvf_unhandled_sysreg_read(env->pc, reg,
-- 
2.32.0 (Apple Git-132)




[PATCH v5 10/11] 9p: darwin: meson: Allow VirtFS on Darwin

2022-02-07 Thread Will Cohen
From: Keno Fischer 

To allow VirtFS on darwin, we need to check that pthread_fchdir_np is
available, which has only been available since macOS 10.12.

Additionally, virtfs_proxy_helper is disabled on Darwin. This patch
series does not currently provide an implementation of the proxy-helper,
but this functionality could be implemented later on.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Rebase to master]
Signed-off-by: Will Cohen 
Reviewed-by: Paolo Bonzini 
[Will Cohen: - Add check for pthread_fchdir_np to virtfs
 - Add comments to patch commit
 - Note that virtfs_proxy_helper does not work
   on macOS
 - Adjust meson virtfs error note to specify macOS]
Signed-off-by: Will Cohen 
---
 fsdev/meson.build |  1 +
 meson.build   | 14 ++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/fsdev/meson.build b/fsdev/meson.build
index adf57cc43e..b632b66348 100644
--- a/fsdev/meson.build
+++ b/fsdev/meson.build
@@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files(
   'qemu-fsdev.c',
 ), if_false: files('qemu-fsdev-dummy.c'))
 softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss)
+softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss)
 
 if have_virtfs_proxy_helper
   executable('virtfs-proxy-helper',
diff --git a/meson.build b/meson.build
index 5f43355071..c1d13209ff 100644
--- a/meson.build
+++ b/meson.build
@@ -1421,17 +1421,23 @@ if not get_option('dbus_display').disabled()
   endif
 endif
 
-have_virtfs = (targetos == 'linux' and
+if targetos == 'darwin' and cc.has_function('pthread_fchdir_np')
+  have_virtfs = have_system
+else
+  have_virtfs = (targetos == 'linux' and
 have_system and
 libattr.found() and
 libcap_ng.found())
+endif
 
-have_virtfs_proxy_helper = have_virtfs and have_tools
+have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and have_tools
 
 if get_option('virtfs').enabled()
   if not have_virtfs
-if targetos != 'linux'
-  error('virtio-9p (virtfs) requires Linux')
+if targetos != 'linux' and targetos != 'darwin'
+  error('virtio-9p (virtfs) requires Linux or macOS')
+elif targetos == 'darwin' and not cc.has_function('pthread_fchdir_np')
+  error('virtio-9p (virtfs) on Darwin requires the presence of 
pthread_fchdir_np')
 elif not libcap_ng.found() or not libattr.found()
   error('virtio-9p (virtfs) requires libcap-ng-devel and libattr-devel')
 elif not have_system
-- 
2.32.0 (Apple Git-132)




Re: [PATCH v5 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Christian Schoenebeck
On Montag, 7. Februar 2022 23:40:22 CET Will Cohen wrote:
> From: Keno Fischer 
> 
> Darwin does not support mknodat. However, to avoid race conditions
> with later setting the permissions, we must avoid using mknod on
> the full path instead. We could try to fchdir, but that would cause
> problems if multiple threads try to call mknodat at the same time.
> However, luckily there is a solution: Darwin includes a function
> that sets the cwd for the current thread only.
> This should suffice to use mknod safely.
> 
> This function (pthread_fchdir_np) is protected by a check in
> meson in a patch later in tihs series.
> 
> Signed-off-by: Keno Fischer 
> Signed-off-by: Michael Roitzsch 
> [Will Cohen: - Adjust coding style
>  - Replace clang references with gcc
>  - Note radar filed with Apple for missing syscall
>  - Replace direct syscall with pthread_fchdir_np and
>adjust patch notes accordingly
>  - Move qemu_mknodat from 9p-util to osdep and os-posix]
> Signed-off-by: Will Cohen 
> ---

Like already mentioned by me moments ago on previous v4 (just echoing) ...

>  hw/9pfs/9p-local.c   |  4 ++--
>  include/qemu/osdep.h | 10 ++
>  os-posix.c   | 34 ++
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index a0d08e5216..d42ce6d8b8 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path,
> 
>  if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
>  fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
> -err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
> +err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
>  if (err == -1) {
>  goto out;
>  }
> @@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path, }
>  } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
> fs_ctx->export_flags & V9FS_SM_NONE) {
> -err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
> +err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
>  if (err == -1) {
>  goto out;
>  }
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index d1660d67fa..f3a8367ece 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -810,3 +810,13 @@ static inline int
> platform_does_not_support_system(const char *command) #endif
> 
>  #endif
> +
> +/*
> + * As long as mknodat is not available on macOS, this workaround
> + * using pthread_fchdir_np is needed. qemu_mknodat is defined in
> + * os-posix.c
> + */
> +#ifdef CONFIG_DARWIN
> +int pthread_fchdir_np(int fd);
> +#endif

I would make that:

#ifdef CONFIG_DARWIN
int pthread_fchdir_np(int fd) API_AVAILABLE(macosx(10.12));
#endif

here and ...

> +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
> diff --git a/os-posix.c b/os-posix.c
> index ae6c9f2a5e..95c1607065 100644
> --- a/os-posix.c
> +++ b/os-posix.c
> @@ -24,6 +24,7 @@
>   */
> 
>  #include "qemu/osdep.h"
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -332,3 +333,36 @@ int os_mlock(void)
>  return -ENOSYS;
>  #endif
>  }
> +
> +/*
> + * As long as mknodat is not available on macOS, this workaround
> + * using pthread_fchdir_np is needed.
> + *
> + * Radar filed with Apple for implementing mknodat:
> + * rdar://FB9862426 (https://openradar.appspot.com/FB9862426)
> + */
> +#ifdef CONFIG_DARWIN
> +
> +int pthread_fchdir_np(int fd) API_AVAILABLE(macosx(10.12));

... drop the duplicate declaration of pthread_fchdir_np() here.

> +
> +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
> +{
> +int preserved_errno, err;
> +if (pthread_fchdir_np(dirfd) < 0) {
> +return -1;
> +}
> +err = mknod(filename, mode, dev);
> +preserved_errno = errno;
> +/* Stop using the thread-local cwd */
> +pthread_fchdir_np(-1);
> +if (err < 0) {
> +errno = preserved_errno;
> +}
> +return err;
> +}
> +#else
> +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
> +{
> +return mknodat(dirfd, filename, mode, dev);
> +}
> +#endif





[PATCH v5 08/11] 9p: darwin: Compatibility for f/l*xattr

2022-02-07 Thread Will Cohen
From: Keno Fischer 

On darwin `fgetxattr` takes two extra optional arguments,
and the l* variants are not defined (in favor of an extra
flag to the regular variants.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-local.c | 12 
 hw/9pfs/9p-util.h  | 17 +
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index f3272f0b43..a0d08e5216 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -790,16 +790,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
 mode_t tmp_mode;
 dev_t tmp_dev;
 
-if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.uid",
+   &tmp_uid, sizeof(uid_t)) > 0) {
 stbuf->st_uid = le32_to_cpu(tmp_uid);
 }
-if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.gid",
+   &tmp_gid, sizeof(gid_t)) > 0) {
 stbuf->st_gid = le32_to_cpu(tmp_gid);
 }
-if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.mode",
+   &tmp_mode, sizeof(mode_t)) > 0) {
 stbuf->st_mode = le32_to_cpu(tmp_mode);
 }
-if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+if (qemu_fgetxattr(fd, "user.virtfs.rdev",
+   &tmp_dev, sizeof(dev_t)) > 0) {
 stbuf->st_rdev = le64_to_cpu(tmp_dev);
 }
 } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 0e445b5d52..82399702b9 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -19,6 +19,23 @@
 #define O_PATH_9P_UTIL 0
 #endif
 
+#ifdef CONFIG_DARWIN
+#define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
+#define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW)
+#define qemu_llistxattr(...) listxattr(__VA_ARGS__, XATTR_NOFOLLOW)
+#define qemu_lremovexattr(...) removexattr(__VA_ARGS__, XATTR_NOFOLLOW)
+static inline int qemu_lsetxattr(const char *path, const char *name,
+ const void *value, size_t size, int flags) {
+return setxattr(path, name, value, size, 0, flags | XATTR_NOFOLLOW);
+}
+#else
+#define qemu_fgetxattr fgetxattr
+#define qemu_lgetxattr lgetxattr
+#define qemu_llistxattr llistxattr
+#define qemu_lremovexattr lremovexattr
+#define qemu_lsetxattr lsetxattr
+#endif
+
 static inline void close_preserve_errno(int fd)
 {
 int serrno = errno;
-- 
2.32.0 (Apple Git-132)




Re: [PATCH v4 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Will Cohen
On Mon, Feb 7, 2022 at 5:48 PM Christian Schoenebeck 
wrote:

> On Montag, 7. Februar 2022 22:07:34 CET Will Cohen wrote:
> > On Mon, Feb 7, 2022 at 9:21 AM Christian Schoenebeck
> > 
> > wrote:
> > > On Montag, 7. Februar 2022 11:57:25 CET Dr. David Alan Gilbert wrote:
> > > > * Greg Kurz (gr...@kaod.org) wrote:
> > > > > On Mon, 7 Feb 2022 11:30:18 +0100
> > > > >
> > > > > Philippe Mathieu-Daudé  wrote:
> > > > > > On 7/2/22 09:47, Greg Kurz wrote:
> > > > > > > On Sun, 6 Feb 2022 20:10:23 -0500
> > > > > > >
> > > > > > > Will Cohen  wrote:
> > > > > > >> This patch set currently places it in 9p-util only because 9p
> is
> > >
> > > the
> > >
> > > > > > >> only
> > > > > > >> place where this issue seems to have come up so far and we
> were
> > >
> > > wary
> > >
> > > > > > >> of
> > > > > > >> editing files too far afield, but I have no attachment to its
> > > > > > >> specific
> > > > > > >> location!
> > > > > > >
> > > > > > > Inline comments are preferred on qemu-devel. Please don't top
> post
> > >
> > > !
> > >
> > > > > > > This complicates the review a lot.
> > > > > > >
> > > > > > > This is indeed a good candidate for osdep. This being said,
> unless
> > > > > > > there's
> > > > > > > some other user in the QEMU code base, it is acceptable to
> leave
> > > > > > > it
> > > > > > > under
> > > > > > > 9pfs.
> > > > > >
> > > > > > virtiofsd could eventually use it.
> > > > >
> > > > > Indeed but virtiofsd is for linux hosts only AFAICT and I'm not
> aware
> > >
> > > of
> > >
> > > > > any work to support any other host OS.
> > > > >
> > > > > Cc'ing virtio-fs people for inputs on this topic.
> > > >
> > > > Indeeed, there's a lot of Linux specific code in the virtiofsd - I
> know
> > > > people are interested in other platforms, but I'm not sure that's the
> > > > right starting point.
> > > >
> > > > Dave
> > >
> > > Agreeing with Greg here: i.e. I would have placed this into osdep, but
> I
> > > would
> > > not insist on it either.
> > >
> > > Best regards,
> > > Christian Schoenebeck
> >
> > This makes sense. A revised version of this patch, moving qemu_mknodat
> from
> > 9p-util to osdep and os-posix, is attached below. I'd appreciate any
> > feedback from those looped in here, so that the context isn't lost before
> > resubmitting as a v5 patch, especially since this is starting to touch
> > files outside of 9p.
> >
> > From c9713c87163da7c96b5357d0d85ac318ae3d3051 Mon Sep 17 00:00:00 2001
> > From: Keno Fischer 
> > Date: Sat, 16 Jun 2018 20:56:55 -0400
> > Subject: [PATCH] 9p: darwin: Implement compatibility for mknodat
> >
> > Darwin does not support mknodat. However, to avoid race conditions
> > with later setting the permissions, we must avoid using mknod on
> > the full path instead. We could try to fchdir, but that would cause
> > problems if multiple threads try to call mknodat at the same time.
> > However, luckily there is a solution: Darwin includes a function
> > that sets the cwd for the current thread only.
> > This should suffice to use mknod safely.
> >
> > This function (pthread_fchdir_np) is protected by a check in
> > meson in a patch later in tihs series.
> >
> > Signed-off-by: Keno Fischer 
> > Signed-off-by: Michael Roitzsch 
> > [Will Cohen: - Adjust coding style
> >  - Replace clang references with gcc
> >  - Note radar filed with Apple for missing syscall
> >  - Replace direct syscall with pthread_fchdir_np and
> >adjust patch notes accordingly
> >  - Move qemu_mknodat from 9p-util to osdep and os-posix]
> > Signed-off-by: Will Cohen 
> > ---
> >  hw/9pfs/9p-local.c   |  4 ++--
> >  include/qemu/osdep.h | 10 ++
> >  os-posix.c   | 34 ++
> >  3 files changed, 46 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> > index a0d08e5216..d42ce6d8b8 100644
> > --- a/hw/9pfs/9p-local.c
> > +++ b/hw/9pfs/9p-local.c
> > @@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> > *dir_path,
> >
> >  if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
> >  fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
> > -err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
> > +err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
> >  if (err == -1) {
> >  goto out;
> >  }
> > @@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> > *dir_path,
> >  }
> >  } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
> > fs_ctx->export_flags & V9FS_SM_NONE) {
> > -err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
> > +err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
> >  if (err == -1) {
> >  goto out;
> >  }
> > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > index d1660d67fa..f3a8367ece 100644
> > --- a/include/qemu/osdep.h
> > +++ b/inclu

Re: [PATCH v4 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Christian Schoenebeck
On Montag, 7. Februar 2022 22:07:34 CET Will Cohen wrote:
> On Mon, Feb 7, 2022 at 9:21 AM Christian Schoenebeck
> 
> wrote:
> > On Montag, 7. Februar 2022 11:57:25 CET Dr. David Alan Gilbert wrote:
> > > * Greg Kurz (gr...@kaod.org) wrote:
> > > > On Mon, 7 Feb 2022 11:30:18 +0100
> > > > 
> > > > Philippe Mathieu-Daudé  wrote:
> > > > > On 7/2/22 09:47, Greg Kurz wrote:
> > > > > > On Sun, 6 Feb 2022 20:10:23 -0500
> > > > > > 
> > > > > > Will Cohen  wrote:
> > > > > >> This patch set currently places it in 9p-util only because 9p is
> > 
> > the
> > 
> > > > > >> only
> > > > > >> place where this issue seems to have come up so far and we were
> > 
> > wary
> > 
> > > > > >> of
> > > > > >> editing files too far afield, but I have no attachment to its
> > > > > >> specific
> > > > > >> location!
> > > > > > 
> > > > > > Inline comments are preferred on qemu-devel. Please don't top post
> > 
> > !
> > 
> > > > > > This complicates the review a lot.
> > > > > > 
> > > > > > This is indeed a good candidate for osdep. This being said, unless
> > > > > > there's
> > > > > > some other user in the QEMU code base, it is acceptable to leave
> > > > > > it
> > > > > > under
> > > > > > 9pfs.
> > > > > 
> > > > > virtiofsd could eventually use it.
> > > > 
> > > > Indeed but virtiofsd is for linux hosts only AFAICT and I'm not aware
> > 
> > of
> > 
> > > > any work to support any other host OS.
> > > > 
> > > > Cc'ing virtio-fs people for inputs on this topic.
> > > 
> > > Indeeed, there's a lot of Linux specific code in the virtiofsd - I know
> > > people are interested in other platforms, but I'm not sure that's the
> > > right starting point.
> > > 
> > > Dave
> > 
> > Agreeing with Greg here: i.e. I would have placed this into osdep, but I
> > would
> > not insist on it either.
> > 
> > Best regards,
> > Christian Schoenebeck
> 
> This makes sense. A revised version of this patch, moving qemu_mknodat from
> 9p-util to osdep and os-posix, is attached below. I'd appreciate any
> feedback from those looped in here, so that the context isn't lost before
> resubmitting as a v5 patch, especially since this is starting to touch
> files outside of 9p.
> 
> From c9713c87163da7c96b5357d0d85ac318ae3d3051 Mon Sep 17 00:00:00 2001
> From: Keno Fischer 
> Date: Sat, 16 Jun 2018 20:56:55 -0400
> Subject: [PATCH] 9p: darwin: Implement compatibility for mknodat
> 
> Darwin does not support mknodat. However, to avoid race conditions
> with later setting the permissions, we must avoid using mknod on
> the full path instead. We could try to fchdir, but that would cause
> problems if multiple threads try to call mknodat at the same time.
> However, luckily there is a solution: Darwin includes a function
> that sets the cwd for the current thread only.
> This should suffice to use mknod safely.
> 
> This function (pthread_fchdir_np) is protected by a check in
> meson in a patch later in tihs series.
> 
> Signed-off-by: Keno Fischer 
> Signed-off-by: Michael Roitzsch 
> [Will Cohen: - Adjust coding style
>  - Replace clang references with gcc
>  - Note radar filed with Apple for missing syscall
>  - Replace direct syscall with pthread_fchdir_np and
>adjust patch notes accordingly
>  - Move qemu_mknodat from 9p-util to osdep and os-posix]
> Signed-off-by: Will Cohen 
> ---
>  hw/9pfs/9p-local.c   |  4 ++--
>  include/qemu/osdep.h | 10 ++
>  os-posix.c   | 34 ++
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index a0d08e5216..d42ce6d8b8 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path,
> 
>  if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
>  fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
> -err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
> +err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
>  if (err == -1) {
>  goto out;
>  }
> @@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path,
>  }
>  } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
> fs_ctx->export_flags & V9FS_SM_NONE) {
> -err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
> +err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
>  if (err == -1) {
>  goto out;
>  }
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index d1660d67fa..f3a8367ece 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -810,3 +810,13 @@ static inline int
> platform_does_not_support_system(const char *command)
>  #endif
> 
>  #endif
> +
> +/*
> + * As long as mknodat is not available on macOS, this workaround
> + * using pthread_fchdir_np is needed. qemu_mknodat is defined in
> + * os-posix.c
> + */
> +#

[PATCH v5 06/11] 9p: darwin: Move XATTR_SIZE_MAX->P9_XATTR_SIZE_MAX

2022-02-07 Thread Will Cohen
From: Keno Fischer 

Signed-off-by: Keno Fischer 
Signed-off-by: Michael Roitzsch 

Because XATTR_SIZE_MAX is not defined on Darwin,
create a cross-platform P9_XATTR_SIZE_MAX instead.

[Will Cohen: - Adjust coding style
 - Lower XATTR_SIZE_MAX to 64k
 - Add explanatory context related to XATTR_SIZE_MAX]
[Fabian Franz: - Move XATTR_SIZE_MAX reference from 9p.c to
 P9_XATTR_SIZE_MAX in 9p.h]
Signed-off-by: Will Cohen 
Signed-off-by: Fabian Franz 
---
 hw/9pfs/9p.c |  2 +-
 hw/9pfs/9p.h | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 14e84c3bcf..7405352c37 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3949,7 +3949,7 @@ static void coroutine_fn v9fs_xattrcreate(void *opaque)
 rflags |= XATTR_REPLACE;
 }
 
-if (size > XATTR_SIZE_MAX) {
+if (size > P9_XATTR_SIZE_MAX) {
 err = -E2BIG;
 goto out_nofid;
 }
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 1567b67841..6a1856b4dc 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -479,4 +479,15 @@ struct V9fsTransport {
 void(*push_and_notify)(V9fsPDU *pdu);
 };
 
+/*
+ * Darwin doesn't seem to define a maximum xattr size in its user
+ * space header, so manually configure it across platforms as 64k.
+ *
+ * Having no limit at all can lead to QEMU crashing during large g_malloc()
+ * calls. Because QEMU does not currently support macOS guests, the below
+ * preliminary solution only works due to its being a reflection of the limit 
of
+ * Linux guests.
+ */
+#define P9_XATTR_SIZE_MAX 65536
+
 #endif
-- 
2.32.0 (Apple Git-132)




[PATCH v5 04/11] 9p: darwin: Handle struct dirent differences

2022-02-07 Thread Will Cohen
From: Keno Fischer 

On darwin d_seekoff exists, but is optional and does not seem to
be commonly used by file systems. Use `telldir` instead to obtain
the seek offset and inject it into d_seekoff, and create a
qemu_dirent_off helper to call it appropriately when appropriate.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Adjust to pass testing
 - Ensure that d_seekoff is filled using telldir
   on darwin, and create qemu_dirent_off helper
   to decide which to access]
[Fabian Franz: - Add telldir error handling for darwin]
Signed-off-by: Fabian Franz 
[Will Cohen: - Ensure that telldir error handling uses
   signed int
 - Cleanup of telldir error handling
 - Remove superfluous error handling for
   qemu_dirent_off
 - Adjust formatting
 - Use qemu_dirent_off in codir.c]
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-local.c |  9 +
 hw/9pfs/9p-proxy.c | 16 +++-
 hw/9pfs/9p-synth.c |  4 
 hw/9pfs/9p-util.h  | 16 
 hw/9pfs/9p.c   |  7 +--
 hw/9pfs/codir.c|  4 +++-
 6 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 1a5e3eed73..f3272f0b43 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -562,6 +562,15 @@ again:
 if (!entry) {
 return NULL;
 }
+#ifdef CONFIG_DARWIN
+int off;
+off = telldir(fs->dir.stream);
+/* If telldir fails, fail the entire readdir call */
+if (off < 0) {
+return NULL;
+}
+entry->d_seekoff = off;
+#endif
 
 if (ctx->export_flags & V9FS_SM_MAPPED) {
 entry->d_type = DT_UNKNOWN;
diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index b1664080d8..8b4b5cf7dc 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -706,7 +706,21 @@ static off_t proxy_telldir(FsContext *ctx, 
V9fsFidOpenState *fs)
 
 static struct dirent *proxy_readdir(FsContext *ctx, V9fsFidOpenState *fs)
 {
-return readdir(fs->dir.stream);
+struct dirent *entry;
+entry = readdir(fs->dir.stream);
+#ifdef CONFIG_DARWIN
+if (!entry) {
+return NULL;
+}
+int td;
+td = telldir(fs->dir.stream);
+/* If telldir fails, fail the entire readdir call */
+if (td < 0) {
+return NULL;
+}
+entry->d_seekoff = td;
+#endif
+return entry;
 }
 
 static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off)
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 4a4a776d06..e264a03eef 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -222,7 +222,11 @@ static void synth_direntry(V9fsSynthNode *node,
 {
 strcpy(entry->d_name, node->name);
 entry->d_ino = node->attr->inode;
+#ifdef CONFIG_DARWIN
+entry->d_seekoff = off + 1;
+#else
 entry->d_off = off + 1;
+#endif
 }
 
 static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 546f46dc7d..d41f37f085 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -79,3 +79,19 @@ ssize_t fremovexattrat_nofollow(int dirfd, const char 
*filename,
 const char *name);
 
 #endif
+
+
+/**
+ * Darwin has d_seekoff, which appears to function similarly to d_off.
+ * However, it does not appear to be supported on all file systems,
+ * so ensure it is manually injected earlier and call here when
+ * needed.
+ */
+inline off_t qemu_dirent_off(struct dirent *dent)
+{
+#ifdef CONFIG_DARWIN
+return dent->d_seekoff;
+#else
+return dent->d_off;
+#endif
+}
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 1563d7b7c6..caf3b240fe 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -27,6 +27,7 @@
 #include "virtio-9p.h"
 #include "fsdev/qemu-fsdev.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
 #include "coth.h"
 #include "trace.h"
 #include "migration/blocker.h"
@@ -2281,7 +2282,7 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU 
*pdu,
 count += len;
 v9fs_stat_free(&v9stat);
 v9fs_path_free(&path);
-saved_dir_pos = dent->d_off;
+saved_dir_pos = qemu_dirent_off(dent);
 }
 
 v9fs_readdir_unlock(&fidp->fs.dir);
@@ -2420,6 +2421,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 V9fsString name;
 int len, err = 0;
 int32_t count = 0;
+off_t off;
 struct dirent *dent;
 struct stat *st;
 struct V9fsDirEnt *entries = NULL;
@@ -2480,12 +2482,13 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 qid.version = 0;
 }
 
+off = qemu_dirent_off(dent);
 v9fs_string_init(&name);
 v9fs_string_sprintf(&name, "%s", dent->d_name);
 
 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
 len = pdu_marshal(pdu, 11 + count, "Qqbs",
-  &qid, dent->d_off,
+

[PATCH v5 02/11] 9p: Rename 9p-util -> 9p-util-linux

2022-02-07 Thread Will Cohen
From: Keno Fischer 

The current file only has the Linux versions of these functions.
Rename the file accordingly and update the Makefile to only build
it on Linux. A Darwin version of these will follow later in the
series.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
Signed-off-by: Will Cohen 
Reviewed-by: Greg Kurz 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/9pfs/{9p-util.c => 9p-util-linux.c} | 2 +-
 hw/9pfs/meson.build| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename hw/9pfs/{9p-util.c => 9p-util-linux.c} (97%)

diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util-linux.c
similarity index 97%
rename from hw/9pfs/9p-util.c
rename to hw/9pfs/9p-util-linux.c
index 3221d9b498..398614a5d0 100644
--- a/hw/9pfs/9p-util.c
+++ b/hw/9pfs/9p-util-linux.c
@@ -1,5 +1,5 @@
 /*
- * 9p utilities
+ * 9p utilities (Linux Implementation)
  *
  * Copyright IBM, Corp. 2017
  *
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 99be5d9119..1b28e70040 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -4,7 +4,6 @@ fs_ss.add(files(
   '9p-posix-acl.c',
   '9p-proxy.c',
   '9p-synth.c',
-  '9p-util.c',
   '9p-xattr-user.c',
   '9p-xattr.c',
   '9p.c',
@@ -14,6 +13,7 @@ fs_ss.add(files(
   'coth.c',
   'coxattr.c',
 ))
+fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
 fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
 
-- 
2.32.0 (Apple Git-132)




[PATCH v5 07/11] 9p: darwin: *xattr_nofollow implementations

2022-02-07 Thread Will Cohen
From: Keno Fischer 

This implements the darwin equivalent of the functions that were
moved to 9p-util(-linux) earlier in this series in the new
9p-util-darwin file.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-util-darwin.c | 64 
 hw/9pfs/meson.build  |  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 hw/9pfs/9p-util-darwin.c

diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
new file mode 100644
index 00..cdb4c9e24c
--- /dev/null
+++ b/hw/9pfs/9p-util-darwin.c
@@ -0,0 +1,64 @@
+/*
+ * 9p utilities (Darwin Implementation)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+int ret;
+int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fgetxattr(fd, name, value, size, 0, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+  char *list, size_t size)
+{
+int ret;
+int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = flistxattr(fd, list, size, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+const char *name)
+{
+int ret;
+int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fremovexattr(fd, name, 0);
+close_preserve_errno(fd);
+return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+int ret;
+int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+if (fd == -1) {
+return -1;
+}
+ret = fsetxattr(fd, name, value, size, 0, flags);
+close_preserve_errno(fd);
+return ret;
+}
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 1b28e70040..12443b6ad5 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -14,6 +14,7 @@ fs_ss.add(files(
   'coxattr.c',
 ))
 fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
+fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
 fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
 
-- 
2.32.0 (Apple Git-132)




[PATCH v5 05/11] 9p: darwin: Ignore O_{NOATIME, DIRECT}

2022-02-07 Thread Will Cohen
From: Keno Fischer 

Darwin doesn't have either of these flags. Darwin does have
F_NOCACHE, which is similar to O_DIRECT, but has different
enough semantics that other projects don't generally map
them automatically. In any case, we don't support O_DIRECT
on Linux at the moment either.

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Adjust coding style]
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-util.h |  2 ++
 hw/9pfs/9p.c  | 13 -
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index d41f37f085..0e445b5d52 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -41,6 +41,7 @@ again:
 fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
 mode);
 if (fd == -1) {
+#ifndef CONFIG_DARWIN
 if (errno == EPERM && (flags & O_NOATIME)) {
 /*
  * The client passed O_NOATIME but we lack permissions to honor it.
@@ -53,6 +54,7 @@ again:
 flags &= ~O_NOATIME;
 goto again;
 }
+#endif
 return -1;
 }
 
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index caf3b240fe..14e84c3bcf 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -138,11 +138,20 @@ static int dotl_to_open_flags(int flags)
 { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
 { P9_DOTL_DSYNC, O_DSYNC },
 { P9_DOTL_FASYNC, FASYNC },
+#ifndef CONFIG_DARWIN
+{ P9_DOTL_NOATIME, O_NOATIME },
+/*
+ *  On Darwin, we could map to F_NOCACHE, which is
+ *  similar, but doesn't quite have the same
+ *  semantics. However, we don't support O_DIRECT
+ *  even on linux at the moment, so we just ignore
+ *  it here.
+ */
 { P9_DOTL_DIRECT, O_DIRECT },
+#endif
 { P9_DOTL_LARGEFILE, O_LARGEFILE },
 { P9_DOTL_DIRECTORY, O_DIRECTORY },
 { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
-{ P9_DOTL_NOATIME, O_NOATIME },
 { P9_DOTL_SYNC, O_SYNC },
 };
 
@@ -171,10 +180,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
  */
 flags = dotl_to_open_flags(oflags);
 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
+#ifndef CONFIG_DARWIN
 /*
  * Ignore direct disk access hint until the server supports it.
  */
 flags &= ~O_DIRECT;
+#endif
 return flags;
 }
 
-- 
2.32.0 (Apple Git-132)




[PATCH v5 01/11] 9p: linux: Fix a couple Linux assumptions

2022-02-07 Thread Will Cohen
From: Keno Fischer 

 - Guard Linux only headers.
 - Add qemu/statfs.h header to abstract over the which
   headers are needed for struct statfs
 - Define `ENOATTR` only if not only defined
   (it's defined in system headers on Darwin).

Signed-off-by: Keno Fischer 
[Michael Roitzsch: - Rebase for NixOS]
Signed-off-by: Michael Roitzsch 

While it might at first appear that fsdev/virtfs-proxy-header.c would
need similar adjustment for darwin as file-op-9p here, a later patch in
this series disables virtfs-proxy-helper for non-Linux. Allowing
virtfs-proxy-helper on darwin could potentially be an additional
optimization later.

[Will Cohen: - Fix headers for Alpine
 - Integrate statfs.h back into file-op-9p.h
 - Remove superfluous header guards from file-opt-9p
 - Add note about virtfs-proxy-helper being disabled
   on non-Linux for this patch series]
Signed-off-by: Will Cohen 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Greg Kurz 
---
 fsdev/file-op-9p.h   | 9 -
 hw/9pfs/9p-local.c   | 2 ++
 hw/9pfs/9p.c | 4 
 include/qemu/xattr.h | 4 +++-
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 8fd89f0447..4997677460 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -16,10 +16,17 @@
 
 #include 
 #include 
-#include 
 #include "qemu-fsdev-throttle.h"
 #include "p9array.h"
 
+#ifdef CONFIG_LINUX
+# include 
+#endif
+#ifdef CONFIG_DARWIN
+# include 
+# include 
+#endif
+
 #define SM_LOCAL_MODE_BITS0600
 #define SM_LOCAL_DIR_MODE_BITS0700
 
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 210d9e7705..1a5e3eed73 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -32,10 +32,12 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include 
+#ifdef CONFIG_LINUX
 #include 
 #ifdef CONFIG_LINUX_MAGIC_H
 #include 
 #endif
+#endif
 #include 
 
 #ifndef XFS_SUPER_MAGIC
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 15b3f4d385..9c63e14b28 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -32,7 +32,11 @@
 #include "migration/blocker.h"
 #include "qemu/xxhash.h"
 #include 
+#ifdef CONFIG_LINUX
 #include 
+#else
+#include 
+#endif
 
 int open_fd_hw;
 int total_open_fd;
diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h
index a83fe8e749..f1d0f7be74 100644
--- a/include/qemu/xattr.h
+++ b/include/qemu/xattr.h
@@ -22,7 +22,9 @@
 #ifdef CONFIG_LIBATTR
 #  include 
 #else
-#  define ENOATTR ENODATA
+#  if !defined(ENOATTR)
+#define ENOATTR ENODATA
+#  endif
 #  include 
 #endif
 
-- 
2.32.0 (Apple Git-132)




[PATCH v5 11/11] 9p: darwin: Adjust assumption on virtio-9p-test

2022-02-07 Thread Will Cohen
The previous test depended on the assumption that P9_DOTL_AT_REMOVEDIR
and AT_REMOVEDIR have the same value.

While this is true on Linux, it is not true everywhere, and leads to an
incorrect test failure on unlink_at, noticed when adding 9p to darwin:

Received response 7 (RLERROR) instead of 77 (RUNLINKAT)
Rlerror has errno 22 (Invalid argument)
**

ERROR:../tests/qtest/virtio-9p-test.c:305:v9fs_req_recv: assertion
failed (hdr.id == id): (7 == 77) Bail out!

ERROR:../tests/qtest/virtio-9p-test.c:305:v9fs_req_recv: assertion
failed (hdr.id == id): (7 == 77)

Signed-off-by: Fabian Franz 
[Will Cohen: - Add explanation of patch and description
   of pre-patch test failure]
Signed-off-by: Will Cohen 
Acked-by: Thomas Huth 
---
 tests/qtest/virtio-9p-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 41fed41de1..6bcf89f0f8 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -1270,7 +1270,7 @@ static void fs_unlinkat_dir(void *obj, void *data, 
QGuestAllocator *t_alloc)
 /* ... and is actually a directory */
 g_assert((st.st_mode & S_IFMT) == S_IFDIR);
 
-do_unlinkat(v9p, "/", "02", AT_REMOVEDIR);
+do_unlinkat(v9p, "/", "02", P9_DOTL_AT_REMOVEDIR);
 /* directory should be gone now */
 g_assert(stat(new_dir, &st) != 0);
 
-- 
2.32.0 (Apple Git-132)




[PATCH v5 03/11] 9p: darwin: Handle struct stat(fs) differences

2022-02-07 Thread Will Cohen
From: Keno Fischer 

Signed-off-by: Keno Fischer 
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Note lack of f_namelen and f_frsize on Darwin
 - Ensure that tv_sec and tv_nsec are both
   initialized for Darwin and non-Darwin]
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-proxy.c | 22 --
 hw/9pfs/9p-synth.c |  2 ++
 hw/9pfs/9p.c   | 16 ++--
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index 09bd9f1464..b1664080d8 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -123,10 +123,16 @@ static void prstatfs_to_statfs(struct statfs *stfs, 
ProxyStatFS *prstfs)
 stfs->f_bavail = prstfs->f_bavail;
 stfs->f_files = prstfs->f_files;
 stfs->f_ffree = prstfs->f_ffree;
+#ifdef CONFIG_DARWIN
+/* f_namelen and f_frsize do not exist on Darwin */
+stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xU;
+stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xU;
+#else
 stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xU;
 stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xU;
 stfs->f_namelen = prstfs->f_namelen;
 stfs->f_frsize = prstfs->f_frsize;
+#endif
 }
 
 /* Converts proxy_stat structure to VFS stat structure */
@@ -143,12 +149,24 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat 
*prstat)
stbuf->st_size = prstat->st_size;
stbuf->st_blksize = prstat->st_blksize;
stbuf->st_blocks = prstat->st_blocks;
+   stbuf->st_atime = prstat->st_atim_sec;
+   stbuf->st_mtime = prstat->st_mtim_sec;
+   stbuf->st_ctime = prstat->st_ctim_sec;
+#ifdef CONFIG_DARWIN
+   stbuf->st_atimespec.tv_sec = prstat->st_atim_sec;
+   stbuf->st_mtimespec.tv_sec = prstat->st_mtim_sec;
+   stbuf->st_ctimespec.tv_sec = prstat->st_ctim_sec;
+   stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec;
+   stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec;
+   stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec;
+#else
stbuf->st_atim.tv_sec = prstat->st_atim_sec;
+   stbuf->st_mtim.tv_sec = prstat->st_mtim_sec;
+   stbuf->st_ctim.tv_sec = prstat->st_ctim_sec;
stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
-   stbuf->st_mtime = prstat->st_mtim_sec;
stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
-   stbuf->st_ctime = prstat->st_ctim_sec;
stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec;
+#endif
 }
 
 /*
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index b38088e066..4a4a776d06 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -427,7 +427,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path,
 stbuf->f_bsize = 512;
 stbuf->f_blocks = 0;
 stbuf->f_files = synth_node_count;
+#ifndef CONFIG_DARWIN
 stbuf->f_namelen = NAME_MAX;
+#endif
 return 0;
 }
 
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 9c63e14b28..1563d7b7c6 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1313,11 +1313,17 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const 
struct stat *stbuf,
 v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
 v9lstat->st_blocks = stbuf->st_blocks;
 v9lstat->st_atime_sec = stbuf->st_atime;
-v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
 v9lstat->st_mtime_sec = stbuf->st_mtime;
-v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 v9lstat->st_ctime_sec = stbuf->st_ctime;
+#ifdef CONFIG_DARWIN
+v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec;
+v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec;
+v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec;
+#else
+v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
+v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
+#endif
 /* Currently we only support BASIC fields in stat */
 v9lstat->st_result_mask = P9_STATS_BASIC;
 
@@ -3519,9 +3525,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, 
struct statfs *stbuf)
 f_bavail = stbuf->f_bavail / bsize_factor;
 f_files  = stbuf->f_files;
 f_ffree  = stbuf->f_ffree;
+#ifdef CONFIG_DARWIN
+fsid_val = (unsigned int)stbuf->f_fsid.val[0] |
+   (unsigned long long)stbuf->f_fsid.val[1] << 32;
+f_namelen = NAME_MAX;
+#else
 fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
(unsigned long long)stbuf->f_fsid.__val[1] << 32;
 f_namelen = stbuf->f_namelen;
+#endif
 
 return pdu_marshal(pdu, offset, "ddqqd",
f_type, f_bsize, f_blocks, f_bfree,
-- 
2.32.0 (Apple Git-132)




[PATCH v5 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Will Cohen
From: Keno Fischer 

Darwin does not support mknodat. However, to avoid race conditions
with later setting the permissions, we must avoid using mknod on
the full path instead. We could try to fchdir, but that would cause
problems if multiple threads try to call mknodat at the same time.
However, luckily there is a solution: Darwin includes a function
that sets the cwd for the current thread only.
This should suffice to use mknod safely.

This function (pthread_fchdir_np) is protected by a check in
meson in a patch later in tihs series.

Signed-off-by: Keno Fischer 
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Adjust coding style
 - Replace clang references with gcc
 - Note radar filed with Apple for missing syscall
 - Replace direct syscall with pthread_fchdir_np and
   adjust patch notes accordingly
 - Move qemu_mknodat from 9p-util to osdep and os-posix]
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-local.c   |  4 ++--
 include/qemu/osdep.h | 10 ++
 os-posix.c   | 34 ++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index a0d08e5216..d42ce6d8b8 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 
 if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
 fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
+err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
 if (err == -1) {
 goto out;
 }
@@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 }
 } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
fs_ctx->export_flags & V9FS_SM_NONE) {
-err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
+err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
 if (err == -1) {
 goto out;
 }
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index d1660d67fa..f3a8367ece 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -810,3 +810,13 @@ static inline int platform_does_not_support_system(const 
char *command)
 #endif
 
 #endif
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed. qemu_mknodat is defined in
+ * os-posix.c
+ */
+#ifdef CONFIG_DARWIN
+int pthread_fchdir_np(int fd);
+#endif
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
diff --git a/os-posix.c b/os-posix.c
index ae6c9f2a5e..95c1607065 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include 
 #include 
 #include 
 #include 
@@ -332,3 +333,36 @@ int os_mlock(void)
 return -ENOSYS;
 #endif
 }
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed.
+ *
+ * Radar filed with Apple for implementing mknodat:
+ * rdar://FB9862426 (https://openradar.appspot.com/FB9862426)
+ */
+#ifdef CONFIG_DARWIN
+
+int pthread_fchdir_np(int fd) API_AVAILABLE(macosx(10.12));
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+int preserved_errno, err;
+if (pthread_fchdir_np(dirfd) < 0) {
+return -1;
+}
+err = mknod(filename, mode, dev);
+preserved_errno = errno;
+/* Stop using the thread-local cwd */
+pthread_fchdir_np(-1);
+if (err < 0) {
+errno = preserved_errno;
+}
+return err;
+}
+#else
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+return mknodat(dirfd, filename, mode, dev);
+}
+#endif
-- 
2.32.0 (Apple Git-132)




[PATCH v5 00/11] 9p: Add support for darwin

2022-02-07 Thread Will Cohen
This is a followup to 
https://lists.gnu.org/archive/html/qemu-devel/2022-02/msg01223.html,
adding 9p server support for Darwin.

Since v4, the following changes have been made to the following patches:

Patch 4/11: 9p: darwin: Handle struct dirent differences
- Cleanup of telldir error handling
- Remove superfluous error handling for qemu_dirent_off
- Removal of superfluous whitespace
- Adjust codir.c to use qemu_dirent_off instead of duplicating the logic

Patch 9/11: 9p: darwin: Implement compatibility for mknodat
- Move qemu_mknodat from 9p-util to osdep and os-posix

Patch 10/11: 9p: darwin: meson: Allow VirtFS on Darwin
- Add comments to patch commit
- Note that virtfs_proxy_helper does not work on Darwin and adjust
- Adjust meson virtfs error note to specify macOS

Keno Fischer (10):
  9p: linux: Fix a couple Linux assumptions
  9p: Rename 9p-util -> 9p-util-linux
  9p: darwin: Handle struct stat(fs) differences
  9p: darwin: Handle struct dirent differences
  9p: darwin: Ignore O_{NOATIME, DIRECT}
  9p: darwin: Move XATTR_SIZE_MAX->P9_XATTR_SIZE_MAX
  9p: darwin: *xattr_nofollow implementations
  9p: darwin: Compatibility for f/l*xattr
  9p: darwin: Implement compatibility for mknodat
  9p: darwin: meson: Allow VirtFS on Darwin

Will Cohen (1):
  9p: darwin: Adjust assumption on virtio-9p-test

 fsdev/file-op-9p.h |  9 +++-
 fsdev/meson.build  |  1 +
 hw/9pfs/9p-local.c | 27 ---
 hw/9pfs/9p-proxy.c | 38 +--
 hw/9pfs/9p-synth.c |  6 +++
 hw/9pfs/9p-util-darwin.c   | 64 ++
 hw/9pfs/{9p-util.c => 9p-util-linux.c} |  2 +-
 hw/9pfs/9p-util.h  | 35 ++
 hw/9pfs/9p.c   | 42 ++---
 hw/9pfs/9p.h   | 11 +
 hw/9pfs/codir.c|  4 +-
 hw/9pfs/meson.build|  3 +-
 include/qemu/osdep.h   | 10 
 include/qemu/xattr.h   |  4 +-
 meson.build| 14 --
 os-posix.c | 34 ++
 tests/qtest/virtio-9p-test.c   |  2 +-
 17 files changed, 281 insertions(+), 25 deletions(-)
 create mode 100644 hw/9pfs/9p-util-darwin.c
 rename hw/9pfs/{9p-util.c => 9p-util-linux.c} (97%)

-- 
2.32.0 (Apple Git-132)




Re: [PATCH v4 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Will Cohen
On Mon, Feb 7, 2022 at 4:07 PM Will Cohen  wrote:

> On Mon, Feb 7, 2022 at 9:21 AM Christian Schoenebeck <
> qemu_...@crudebyte.com> wrote:
>
>> On Montag, 7. Februar 2022 11:57:25 CET Dr. David Alan Gilbert wrote:
>> > * Greg Kurz (gr...@kaod.org) wrote:
>> > > On Mon, 7 Feb 2022 11:30:18 +0100
>> > >
>> > > Philippe Mathieu-Daudé  wrote:
>> > > > On 7/2/22 09:47, Greg Kurz wrote:
>> > > > > On Sun, 6 Feb 2022 20:10:23 -0500
>> > > > >
>> > > > > Will Cohen  wrote:
>> > > > >> This patch set currently places it in 9p-util only because 9p is
>> the
>> > > > >> only
>> > > > >> place where this issue seems to have come up so far and we were
>> wary
>> > > > >> of
>> > > > >> editing files too far afield, but I have no attachment to its
>> > > > >> specific
>> > > > >> location!
>> > > > >
>> > > > > Inline comments are preferred on qemu-devel. Please don't top
>> post !
>> > > > > This complicates the review a lot.
>> > > > >
>> > > > > This is indeed a good candidate for osdep. This being said, unless
>> > > > > there's
>> > > > > some other user in the QEMU code base, it is acceptable to leave
>> it
>> > > > > under
>> > > > > 9pfs.
>> > > >
>> > > > virtiofsd could eventually use it.
>> > >
>> > > Indeed but virtiofsd is for linux hosts only AFAICT and I'm not aware
>> of
>> > > any work to support any other host OS.
>> > >
>> > > Cc'ing virtio-fs people for inputs on this topic.
>> >
>> > Indeeed, there's a lot of Linux specific code in the virtiofsd - I know
>> > people are interested in other platforms, but I'm not sure that's the
>> > right starting point.
>> >
>> > Dave
>>
>> Agreeing with Greg here: i.e. I would have placed this into osdep, but I
>> would
>> not insist on it either.
>>
>> Best regards,
>> Christian Schoenebeck
>>
>>
> This makes sense. A revised version of this patch, moving qemu_mknodat
> from 9p-util to osdep and os-posix, is attached below. I'd appreciate any
> feedback from those looped in here, so that the context isn't lost before
> resubmitting as a v5 patch, especially since this is starting to touch
> files outside of 9p.
>
> From c9713c87163da7c96b5357d0d85ac318ae3d3051 Mon Sep 17 00:00:00 2001
> From: Keno Fischer 
> Date: Sat, 16 Jun 2018 20:56:55 -0400
> Subject: [PATCH] 9p: darwin: Implement compatibility for mknodat
>
> Darwin does not support mknodat. However, to avoid race conditions
> with later setting the permissions, we must avoid using mknod on
> the full path instead. We could try to fchdir, but that would cause
> problems if multiple threads try to call mknodat at the same time.
> However, luckily there is a solution: Darwin includes a function
> that sets the cwd for the current thread only.
> This should suffice to use mknod safely.
>
> This function (pthread_fchdir_np) is protected by a check in
> meson in a patch later in tihs series.
>
> Signed-off-by: Keno Fischer 
> Signed-off-by: Michael Roitzsch 
> [Will Cohen: - Adjust coding style
>  - Replace clang references with gcc
>  - Note radar filed with Apple for missing syscall
>  - Replace direct syscall with pthread_fchdir_np and
>adjust patch notes accordingly
>  - Move qemu_mknodat from 9p-util to osdep and os-posix]
> Signed-off-by: Will Cohen 
> ---
>  hw/9pfs/9p-local.c   |  4 ++--
>  include/qemu/osdep.h | 10 ++
>  os-posix.c   | 34 ++
>  3 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index a0d08e5216..d42ce6d8b8 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path,
>
>  if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
>  fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
> -err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
> +err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
>  if (err == -1) {
>  goto out;
>  }
> @@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
> *dir_path,
>  }
>  } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
> fs_ctx->export_flags & V9FS_SM_NONE) {
> -err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
> +err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
>  if (err == -1) {
>  goto out;
>  }
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index d1660d67fa..f3a8367ece 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -810,3 +810,13 @@ static inline int
> platform_does_not_support_system(const char *command)
>  #endif
>
>  #endif
> +
> +/*
> + * As long as mknodat is not available on macOS, this workaround
> + * using pthread_fchdir_np is needed. qemu_mknodat is defined in
> + * os-posix.c
> + */
> +#ifdef CONFIG_DARWIN
> +int pthread_fchdir_np(int fd);
> +#endif
> +int qemu_mknodat(int dirfd, 

[PATCH 1/5] ui/gtk: new param monitor to specify target monitor for launching QEMU

2022-02-07 Thread Dongwon Kim
Introducing a new integer parameter to specify the monitor where the
Qemu window is placed upon launching.

Monitor can be any number between 0 and (total number of monitors - 1).

It can be used together with full-screen=on, which will make the QEMU
window full-screened on the targeted monitor.

v2: fixed typos and updated commit subject and msg
(Philippe Mathieu-Daudé)

changed param name to monitor, removed unnecessary condition check
on the parameter
(Paolo Bonzini)

Cc: Philippe Mathieu-Daudé 
Cc: Klaus Kiwi 
Cc: Paolo Bonzini 
Cc: Gerd Hoffmann 
Cc: Vivek Kasireddy 
Cc: sweeaun 
Cc: Khairul Anuar Romli 
Signed-off-by: Dongwon Kim 
---
 qapi/ui.json| 6 +-
 qemu-options.hx | 2 +-
 ui/gtk.c| 8 
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/qapi/ui.json b/qapi/ui.json
index d7567ac866..c2c677bb1c 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1099,13 +1099,17 @@
 #   assuming the guest will resize the display to match
 #   the window size then.  Otherwise it defaults to "off".
 #   Since 3.1
+# @monitor: Indicate monitor where QEMU window is lauched. monitor
+#   could be any number from 0 to (total num of monitors - 1).
+#   since 7.0
 #
 # Since: 2.12
 #
 ##
 { 'struct'  : 'DisplayGTK',
   'data': { '*grab-on-hover' : 'bool',
-'*zoom-to-fit'   : 'bool'  } }
+'*zoom-to-fit'   : 'bool',
+'*monitor'   : 'uint32' } }
 
 ##
 # @DisplayEGLHeadless:
diff --git a/qemu-options.hx b/qemu-options.hx
index 7749f59300..6d062b8aa1 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1852,7 +1852,7 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
 #endif
 #if defined(CONFIG_GTK)
 "-display gtk[,full-screen=on|off][,gl=on|off][,grab-on-hover=on|off]\n"
-"[,show-cursor=on|off][,window-close=on|off]\n"
+"
[,monitor=][,show-cursor=on|off][,window-close=on|off]\n"
 #endif
 #if defined(CONFIG_VNC)
 "-display vnc=[,]\n"
diff --git a/ui/gtk.c b/ui/gtk.c
index d2892ea6b4..7abe1a69d8 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -2314,6 +2314,14 @@ static void gtk_display_init(DisplayState *ds, 
DisplayOptions *opts)
  vc && vc->type == GD_VC_VTE);
 #endif
 
+if (opts->u.gtk.has_monitor &&
+opts->u.gtk.monitor < gdk_display_get_n_monitors(window_display)) {
+GdkRectangle mon_dest;
+gdk_monitor_get_geometry(
+gdk_display_get_monitor(window_display, opts->u.gtk.monitor),
+&mon_dest);
+gtk_window_move(GTK_WINDOW(s->window), mon_dest.x, mon_dest.y);
+}
 if (opts->has_full_screen &&
 opts->full_screen) {
 gtk_menu_item_activate(GTK_MENU_ITEM(s->full_screen_item));
-- 
2.30.2




Re: [PATCH v2 0/6] qtests/libqos: Allow PCI tests to be run with virt-machine

2022-02-07 Thread Eric Auger
Hi Michael,

On 2/5/22 2:43 AM, Michael S. Tsirkin wrote:
> On Tue, Jan 18, 2022 at 09:38:27PM +0100, Eric Auger wrote:
>> Up to now the virt-machine node only contains a virtio-mmio
>> driver node but no driver that eventually produces any pci-bus
>> interface.
>>
>> Hence, PCI libqos tests cannot be run with aarch64 binary.
>>
>> This series brings the pieces needed to be able to run PCI tests
>> with the aarch64 binary: a generic-pcihost driver node gets
>> instantiated by the machine. This later contains a pci-bus-generic
>> driver which produces a pci-bus interface. Then all tests
>> consuming the pci-bus interface can be run with the libqos arm
>> virt machine.
>>
>> One of the first goal was to be able to run the virtio-iommu-pci
>> tests as the virtio-iommu was initially targetting ARM and it
>> was awkard to be run the test with the pc machine. This is now
>> possible.
>>
>> Only the tests doing hotplug cannot be run yet as hotplug is
>> not possible on the root bus. This will be dealt with separately
>> by adding a root port to the object tree.
>>
>> Best Regards
>>
>> Eric
>>
>> This series can be found at:
>> https://github.com/eauger/qemu/tree/libqos-pci-arm-v2
>>
>> History
>
> I dropped this due to make check failures. Pls make sure
> to make and test all targets. Thanks!
Sorry for the inconvenience.

qos-test now is failing on aarch64 and definitively it was passing when
I submitted the patches (ie. with above branch).
I bisected and d9afe24c29a0 ("hw/arm/virt: Disable highmem devices that
don't fit in the PA range") recenly applied upstream changed the way the
test behave and now is causing
/aarch64/virt/generic-pcihost/pci-bus-generic/pci-bus/megasas/megasas-tests/dcmd/pd-get-info/fuzz
to fail.

This is due to the fact GPA is limited to 32b and the device seems to
require high ECAM. I will fix that.

Thanks

Eric
>
>> v1 -> v2:
>> - copyright updated to 2022
>> - QPCIBusARM renamed into QGenericPCIBus
>> - QGenericPCIHost declarations and definitions moved in the same
>>   place as the generic pci implementation
>> - rename pci-arm.c/h in generic-pcihost.c/h and remove any ref to
>>   ARM there
>> - remove qos_node_produces_opts, qpci_new_arm, qpci_free_arm
>> - ecam_alloc_ptr now is a field of QGenericPCIBus and not QPCIBus
>> - new libqos_init to create generic-pcihost driver that contains
>>   pci-bus-generic
>> - QGenericPCIHost moved in the same place as the generic pci
>>   bindings
>> - collected Thomas A-b/R-b
>>
>>
>> Eric Auger (6):
>>   tests/qtest/vhost-user-test.c: Use vhostforce=on
>>   tests/qtest/libqos/pci: Introduce pio_limit
>>   tests/qtest/libqos: Skip hotplug tests if pci root bus is not
>> hotpluggable
>>   tests/qtest/vhost-user-blk-test: Setup MSIx to avoid error on aarch64
>>   tests/qtest/vhost-user-blk-test: Factorize vq setup code
>>   tests/qtest/libqos: Add generic pci host bridge in arm-virt machine
>>
>>  tests/qtest/e1000e-test.c |   6 +
>>  tests/qtest/libqos/arm-virt-machine.c |  18 +-
>>  tests/qtest/libqos/generic-pcihost.c  | 231 ++
>>  tests/qtest/libqos/generic-pcihost.h  |  54 ++
>>  tests/qtest/libqos/meson.build|   1 +
>>  tests/qtest/libqos/pci-pc.c   |   1 +
>>  tests/qtest/libqos/pci-spapr.c|   1 +
>>  tests/qtest/libqos/pci.c  |  78 +
>>  tests/qtest/libqos/pci.h  |   6 +-
>>  tests/qtest/vhost-user-blk-test.c |  39 -
>>  tests/qtest/vhost-user-test.c |   2 +-
>>  tests/qtest/virtio-blk-test.c |   5 +
>>  tests/qtest/virtio-net-test.c |   5 +
>>  tests/qtest/virtio-rng-test.c |   5 +
>>  14 files changed, 408 insertions(+), 44 deletions(-)
>>  create mode 100644 tests/qtest/libqos/generic-pcihost.c
>>  create mode 100644 tests/qtest/libqos/generic-pcihost.h
>>
>> -- 
>> 2.26.3




Re: [PATCH v5 1/6] tcg/sparc: Add scratch argument to tcg_out_movi_int

2022-02-07 Thread Richard Henderson

On 2/8/22 04:59, Peter Maydell wrote:

On Sun, 6 Feb 2022 at 10:31, Richard Henderson
 wrote:


This will allow us to control exactly what scratch register is
used for loading the constant.  Also, fix a theoretical problem
in recursing through tcg_out_movi, which may provide a different
value for in_prologue.

Signed-off-by: Richard Henderson 
---
  tcg/sparc/tcg-target.c.inc | 21 +
  1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 0c062c60eb..8c3671f56a 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -414,7 +414,8 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, 
int32_t arg)
  }

  static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
- tcg_target_long arg, bool in_prologue)
+ tcg_target_long arg, bool in_prologue,
+ TCGReg scratch)
  {
  tcg_target_long hi, lo = (int32_t)arg;
  tcg_target_long test, lsb;
@@ -471,22 +472,25 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, 
TCGReg ret,
  /* A 64-bit constant decomposed into 2 32-bit pieces.  */
  if (check_fit_i32(lo, 13)) {
  hi = (arg - lo) >> 32;
-tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+tcg_out_movi_int(s, TCG_TYPE_I32, ret, hi, in_prologue, scratch);
  tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
  tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
  } else {
+tcg_debug_assert(scratch != TCG_REG_G0);
  hi = arg >> 32;
-tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
+tcg_out_movi_int(s, TCG_TYPE_I32, ret, hi, in_prologue, scratch);
+tcg_out_movi_int(s, TCG_TYPE_I32, scratch, lo, in_prologue, 
TCG_REG_G0);
  tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
-tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
+tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
  }
  }

  static void tcg_out_movi(TCGContext *s, TCGType type,
   TCGReg ret, tcg_target_long arg)
  {
-tcg_out_movi_int(s, type, ret, arg, false);
+/* When outputting to T2, we have no scratch available. */
+TCGReg scratch = ret != TCG_REG_T2 ? TCG_REG_T2 : TCG_REG_G0;


Why won't using G0 trip the assertion above that scratch != TCG_REG_G0 ?


It would only do so for full 64-bit constants, where the scratch is needed.  For the case 
in which we use T2 explicitly, the value is constrained to J (a 13-bit constant).


I suppose I could have also changed addsub2 to use tcg_out_movi_imm13 
directly...


r~



Re: [PATCH v5 0/9] virtiofsd: Add support for file security context at file creation

2022-02-07 Thread Daniel Walsh

On 2/7/22 16:19, Vivek Goyal wrote:

On Mon, Feb 07, 2022 at 01:05:16PM +, Daniel P. Berrangé wrote:

On Wed, Feb 02, 2022 at 02:39:26PM -0500, Vivek Goyal wrote:

Hi,

This is V5 of the patches. I posted V4 here.

https://listman.redhat.com/archives/virtio-fs/2022-January/msg00041.html

These will allow us to support SELinux with virtiofs. This will send
SELinux context at file creation to server and server can set it on
file.

I've not entirely figured it out from the code, so easier for me
to ask...

How is the SELinux labelled stored on the host side ? It is stored
directly in the security.* xattr namespace,

[ CC Dan Walsh ]

I just tried to test the mode where I don't do xattr remapping and try
to set /proc/pid/attr/fscreate with the context I want to set. It will
set security.selinux xattr on host.

But write to /proc/pid/attr/fscreate fails if host does not recognize
the label sent by guest. I am running virtiofsd with unconfined_t but
it still fails because guest is trying to create a file with
"test_filesystem_filetranscon_t" and host does not recognize this
label. Seeing following in audit logs.

type=SELINUX_ERR msg=audit(1644268262.666:8111): op=fscreate 
invalid_context="unconfined_u:object_r:test_filesystem_filetranscon_t:s0"

So if we don't remap xattrs and host has SELinux enabled, then it probably
work in very limited circumstances where host and guest policies don't
conflict. I guess its like running fedora 34 guest on fedora 34 host.
I suspect that this will see very limited use. Though I have put the
code in for the sake of completeness.

Thanks
Vivek


or is is subject to
xattr remapping that virtiofsd already supports.

Storing directly means virtiofsd has to run in an essentially
unconfined context, to let it do arbitrary  changes on security.*
xattrs without being blocked by SELinux) and has risk that guest
initiated changes can open holes in the host confinement if
the exported FS is generally visible to processes on the host.


Using remapping lets virtiofsd be strictly isolated by SELinux
policy on the host, and ensures that guest context changes
can't open up holes in the host.

Both are valid use cases, so I'd ultimately expect us to want
to support both, but my preference for a "default" behaviour
would be remapping.

Regards,
Daniel
--
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

I had a bug today, where someone tried to run a --privileged container 
with RHEL9 which attempted to place a label onto a volume from the RHEL8 
host and got a MAC_ADMIN avc and the label was denied.


Even unconfined domains are not allowed to place non-understood labels 
on disk.  The problem is when the processes on RHEL8 would look at the 
labeled file, they would just see it as unlabeled_t.






[PATCH] Python: discourage direct setup.py install

2022-02-07 Thread John Snow
When invoking setup.py directly, the default behavior for 'install' is
to run the bdist_egg installation hook, which is ... actually deprecated
by setuptools. It doesn't seem to work quite right anymore.

By contrast, 'pip install' will invoke the bdist_wheel hook
instead. This leads to differences in behavior for the two approaches. I
advocate using pip in the documentation in this directory, but the
'setup.py' which has been used for quite a long time in the Python world
may deceptively appear to work at first glance.

Add an error message that will save a bit of time and frustration
that points the user towards using the supported installation
invocation.

Reported-by: Daniel P. Berrangé 
Signed-off-by: John Snow 
---
 python/setup.py | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/python/setup.py b/python/setup.py
index 2014f81b75..c5bc45919a 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -5,9 +5,26 @@
 """
 
 import setuptools
+from setuptools.command import bdist_egg
+import sys
 import pkg_resources
 
 
+class bdist_egg_guard(bdist_egg.bdist_egg):
+"""
+Protect against bdist_egg from being executed
+
+This prevents calling 'setup.py install' directly, as the 'install'
+CLI option will invoke the deprecated bdist_egg hook. "pip install"
+calls the more modern bdist_wheel hook, which is what we want.
+"""
+def run(self):
+sys.exit(
+'Installation directly via setup.py is not supported.\n'
+'Please use `pip install .` instead.'
+)
+
+
 def main():
 """
 QEMU tooling installer
@@ -16,7 +33,7 @@ def main():
 # 
https://medium.com/@daveshawley/safely-using-setup-cfg-for-metadata-1babbe54c108
 pkg_resources.require('setuptools>=39.2')
 
-setuptools.setup()
+setuptools.setup(cmdclass={'bdist_egg': bdist_egg_guard})
 
 
 if __name__ == '__main__':
-- 
2.34.1




Re: [PATCH v5 0/9] virtiofsd: Add support for file security context at file creation

2022-02-07 Thread Vivek Goyal
On Mon, Feb 07, 2022 at 01:05:16PM +, Daniel P. Berrangé wrote:
> On Wed, Feb 02, 2022 at 02:39:26PM -0500, Vivek Goyal wrote:
> > Hi,
> > 
> > This is V5 of the patches. I posted V4 here.
> > 
> > https://listman.redhat.com/archives/virtio-fs/2022-January/msg00041.html
> > 
> > These will allow us to support SELinux with virtiofs. This will send
> > SELinux context at file creation to server and server can set it on
> > file.
> 
> I've not entirely figured it out from the code, so easier for me
> to ask...
> 
> How is the SELinux labelled stored on the host side ? It is stored
> directly in the security.* xattr namespace,

[ CC Dan Walsh ]

I just tried to test the mode where I don't do xattr remapping and try
to set /proc/pid/attr/fscreate with the context I want to set. It will
set security.selinux xattr on host.

But write to /proc/pid/attr/fscreate fails if host does not recognize
the label sent by guest. I am running virtiofsd with unconfined_t but
it still fails because guest is trying to create a file with
"test_filesystem_filetranscon_t" and host does not recognize this
label. Seeing following in audit logs.

type=SELINUX_ERR msg=audit(1644268262.666:8111): op=fscreate 
invalid_context="unconfined_u:object_r:test_filesystem_filetranscon_t:s0"

So if we don't remap xattrs and host has SELinux enabled, then it probably
work in very limited circumstances where host and guest policies don't
conflict. I guess its like running fedora 34 guest on fedora 34 host. 
I suspect that this will see very limited use. Though I have put the
code in for the sake of completeness.

Thanks
Vivek

> or is is subject to
> xattr remapping that virtiofsd already supports.
> 
> Storing directly means virtiofsd has to run in an essentially
> unconfined context, to let it do arbitrary  changes on security.*
> xattrs without being blocked by SELinux) and has risk that guest
> initiated changes can open holes in the host confinement if
> the exported FS is generally visible to processes on the host.
> 
> 
> Using remapping lets virtiofsd be strictly isolated by SELinux
> policy on the host, and ensures that guest context changes
> can't open up holes in the host.
> 
> Both are valid use cases, so I'd ultimately expect us to want
> to support both, but my preference for a "default" behaviour
> would be remapping.
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 




Re: [PATCH v4 09/11] 9p: darwin: Implement compatibility for mknodat

2022-02-07 Thread Will Cohen
On Mon, Feb 7, 2022 at 9:21 AM Christian Schoenebeck 
wrote:

> On Montag, 7. Februar 2022 11:57:25 CET Dr. David Alan Gilbert wrote:
> > * Greg Kurz (gr...@kaod.org) wrote:
> > > On Mon, 7 Feb 2022 11:30:18 +0100
> > >
> > > Philippe Mathieu-Daudé  wrote:
> > > > On 7/2/22 09:47, Greg Kurz wrote:
> > > > > On Sun, 6 Feb 2022 20:10:23 -0500
> > > > >
> > > > > Will Cohen  wrote:
> > > > >> This patch set currently places it in 9p-util only because 9p is
> the
> > > > >> only
> > > > >> place where this issue seems to have come up so far and we were
> wary
> > > > >> of
> > > > >> editing files too far afield, but I have no attachment to its
> > > > >> specific
> > > > >> location!
> > > > >
> > > > > Inline comments are preferred on qemu-devel. Please don't top post
> !
> > > > > This complicates the review a lot.
> > > > >
> > > > > This is indeed a good candidate for osdep. This being said, unless
> > > > > there's
> > > > > some other user in the QEMU code base, it is acceptable to leave it
> > > > > under
> > > > > 9pfs.
> > > >
> > > > virtiofsd could eventually use it.
> > >
> > > Indeed but virtiofsd is for linux hosts only AFAICT and I'm not aware
> of
> > > any work to support any other host OS.
> > >
> > > Cc'ing virtio-fs people for inputs on this topic.
> >
> > Indeeed, there's a lot of Linux specific code in the virtiofsd - I know
> > people are interested in other platforms, but I'm not sure that's the
> > right starting point.
> >
> > Dave
>
> Agreeing with Greg here: i.e. I would have placed this into osdep, but I
> would
> not insist on it either.
>
> Best regards,
> Christian Schoenebeck
>
>
This makes sense. A revised version of this patch, moving qemu_mknodat from
9p-util to osdep and os-posix, is attached below. I'd appreciate any
feedback from those looped in here, so that the context isn't lost before
resubmitting as a v5 patch, especially since this is starting to touch
files outside of 9p.

>From c9713c87163da7c96b5357d0d85ac318ae3d3051 Mon Sep 17 00:00:00 2001
From: Keno Fischer 
Date: Sat, 16 Jun 2018 20:56:55 -0400
Subject: [PATCH] 9p: darwin: Implement compatibility for mknodat

Darwin does not support mknodat. However, to avoid race conditions
with later setting the permissions, we must avoid using mknod on
the full path instead. We could try to fchdir, but that would cause
problems if multiple threads try to call mknodat at the same time.
However, luckily there is a solution: Darwin includes a function
that sets the cwd for the current thread only.
This should suffice to use mknod safely.

This function (pthread_fchdir_np) is protected by a check in
meson in a patch later in tihs series.

Signed-off-by: Keno Fischer 
Signed-off-by: Michael Roitzsch 
[Will Cohen: - Adjust coding style
 - Replace clang references with gcc
 - Note radar filed with Apple for missing syscall
 - Replace direct syscall with pthread_fchdir_np and
   adjust patch notes accordingly
 - Move qemu_mknodat from 9p-util to osdep and os-posix]
Signed-off-by: Will Cohen 
---
 hw/9pfs/9p-local.c   |  4 ++--
 include/qemu/osdep.h | 10 ++
 os-posix.c   | 34 ++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index a0d08e5216..d42ce6d8b8 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -682,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
*dir_path,

 if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
 fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
+err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
 if (err == -1) {
 goto out;
 }
@@ -697,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath
*dir_path,
 }
 } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
fs_ctx->export_flags & V9FS_SM_NONE) {
-err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
+err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
 if (err == -1) {
 goto out;
 }
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index d1660d67fa..f3a8367ece 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -810,3 +810,13 @@ static inline int
platform_does_not_support_system(const char *command)
 #endif

 #endif
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed. qemu_mknodat is defined in
+ * os-posix.c
+ */
+#ifdef CONFIG_DARWIN
+int pthread_fchdir_np(int fd);
+#endif
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
diff --git a/os-posix.c b/os-posix.c
index ae6c9f2a5e..95c1607065 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -24,6 +24,7 @@
  */

 #include "qemu/osdep.h"
+#include 
 #include 
 #include 
 #include 
@@ -332,3 +333,36 @@ int os_mlock(void)

Re: [PATCH v3 0/2] python: a few improvements to qmp-shell

2022-02-07 Thread John Snow
On Fri, Jan 28, 2022 at 11:12 AM Daniel P. Berrangé  wrote:
>
> This makes the qmp-shell program a little more pleasant to use when you
> are just trying to spawn a throw-away QEMU process to query some info
> from.
>
> First it introduces a 'qmp-shell-wrap' command that takes a QEMU command
> line instead of QMP socket, and spawns QEMU automatically, so its life
> is tied to that of the shell.
>
> Second it adds ability to log QMP commands/responses to a file that can
> be queried with 'jq' to extract information. This is good for commands
> which return huge JSON docs.
>
> In v3:
>
>  - Add qmp-shell-wrap to setup.cfg entry points
>
> In v2:
>
>  - Unlink unix socket path on exit
>  - Fix default command name
>  - Deal with flake8/pylint warnings
>
> Daniel P. Berrangé (2):
>   python: introduce qmp-shell-wrap convenience tool
>   python: support recording QMP session to a file
>
>  python/qemu/aqmp/qmp_shell.py | 88 ---
>  python/setup.cfg  |  4 ++
>  scripts/qmp/qmp-shell-wrap| 11 +
>  3 files changed, 96 insertions(+), 7 deletions(-)
>  create mode 100755 scripts/qmp/qmp-shell-wrap
>
> --
> 2.34.1
>
>

Great, thanks! I rebased patch 1/2 myself as a courtesy and have staged these.

--js

(fwiw: using pip, it seems like the wrapper script works just fine. it
appears as though using 'python3 setup.py install' does indeed cause
issues here. I have a patch I'll send soon that discourages the direct
setup.py invocation to avoid frustration in the future.)




Re: [PATCH v8 1/5] QIOChannel: Add flags on io_writev and introduce io_flush callback

2022-02-07 Thread Leonardo Bras Soares Passos
Hello Peter,

On Mon, Feb 7, 2022 at 9:50 AM Peter Xu  wrote:
>
> On Tue, Feb 01, 2022 at 03:28:59AM -0300, Leonardo Bras wrote:
> > Add flags to io_writev and introduce io_flush as optional callback to
> > QIOChannelClass, allowing the implementation of zero copy writes by
> > subclasses.
> >
> > How to use them:
> > - Write data using 
> > qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
> > - Wait write completion with qio_channel_flush().
> >
> > Notes:
> > As some zero copy write implementations work asynchronously, it's
> > recommended to keep the write buffer untouched until the return of
> > qio_channel_flush(), to avoid the risk of sending an updated buffer
> > instead of the buffer state during write.
> >
> > As io_flush callback is optional, if a subclass does not implement it, then:
> > - io_flush will return 0 without changing anything.
> >
> > Also, some functions like qio_channel_writev_full_all() were adapted to
> > receive a flag parameter. That allows shared code between zero copy and
> > non-zero copy writev, and also an easier implementation on new flags.
> >
> > Signed-off-by: Leonardo Bras 
>
> With Dan's comment addressed on removing the redundant assertion:
>
> Reviewed-by: Peter Xu 
>

Thank you for reviewing!

I think I am now missing reviewing only on patch 5/5 before sending
the next version.
Could you and/or Daniel help me with that? Just to check if I am
missing anything?

Best regards,
Leo




[PATCH RFCv2 2/4] i386/pc: relocate 4g start to 1T where applicable

2022-02-07 Thread Joao Martins
It is assumed that the whole GPA space is available to be DMA
addressable, within a given address space limit, expect for a
tiny region before the 4G. Since Linux v5.4, VFIO validates
whether the selected GPA is indeed valid i.e. not reserved by
IOMMU on behalf of some specific devices or platform-defined
restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
 -EINVAL.

AMD systems with an IOMMU are examples of such platforms and
particularly may only have these ranges as allowed:

 - fedf (0  .. 3.982G)
fef0 - 00fc (3.983G .. 1011.9G)
0100 -  (1Tb.. 16Pb[*])

We already account for the 4G hole, albeit if the guest is big
enough we will fail to allocate a guest with  >1010G due to the
~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).

[*] there is another reserved region unrelated to HT that exists
in the 256T boundaru in Fam 17h according to Errata #1286,
documeted also in "Open-Source Register Reference for AMD Family
17h Processors (PUB)"

When creating the region above 4G, take into account that on AMD
platforms the HyperTransport range is reserved and hence it
cannot be used either as GPAs. On those cases rather than
establishing the start of ram-above-4g to be 4G, relocate instead
to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
Topology", for more information on the underlying restriction of
IOVAs.

After accounting for the 1Tb hole on AMD hosts, mtree should
look like:

-7fff (prio 0, i/o):
 alias ram-below-4g @pc.ram -7fff
0100-01ff7fff (prio 0, i/o):
alias ram-above-4g @pc.ram 8000-00ff

If the relocation is done, we also add the the reserved HT
e820 range as reserved.

Suggested-by: Igor Mammedov 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c  | 66 +++
 target/i386/cpu.h |  4 +++
 2 files changed, 70 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7de0e87f4a3f..b060aedd38f3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -802,6 +802,65 @@ void xen_load_linux(PCMachineState *pcms)
 #define PC_ROM_ALIGN   0x800
 #define PC_ROM_SIZE(PC_ROM_MAX - PC_ROM_MIN_VGA)
 
+/*
+ * AMD systems with an IOMMU have an additional hole close to the
+ * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
+ * on kernel version, VFIO may or may not let you DMA map those ranges.
+ * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
+ * with certain memory sizes. It's also wrong to use those IOVA ranges
+ * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
+ * The ranges reserved for Hyper-Transport are:
+ *
+ * FD__h - FF__h
+ *
+ * The ranges represent the following:
+ *
+ * Base Address   Top Address  Use
+ *
+ * FD__h FD_F7FF_h Reserved interrupt address space
+ * FD_F800_h FD_F8FF_h Interrupt/EOI IntCtl
+ * FD_F900_h FD_F90F_h Legacy PIC IACK
+ * FD_F910_h FD_F91F_h System Management
+ * FD_F920_h FD_FAFF_h Reserved Page Tables
+ * FD_FB00_h FD_FBFF_h Address Translation
+ * FD_FC00_h FD_FDFF_h I/O Space
+ * FD_FE00_h FD__h Configuration
+ * FE__h FE_1FFF_h Extended Configuration/Device Messages
+ * FE_2000_h FF__h Reserved
+ *
+ * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
+ * Table 3: Special Address Controls (GPA) for more information.
+ */
+#define AMD_HT_START 0xfdUL
+#define AMD_HT_END   0xffUL
+#define AMD_ABOVE_1TB_START  (AMD_HT_END + 1)
+#define AMD_HT_SIZE  (AMD_ABOVE_1TB_START - AMD_HT_START)
+
+static void relocate_4g(MachineState *machine, PCMachineState *pcms)
+{
+PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+X86MachineState *x86ms = X86_MACHINE(pcms);
+ram_addr_t device_mem_size = 0;
+uint32_t eax, vendor[3];
+
+host_cpuid(0x0, 0, &eax, &vendor[0], &vendor[2], &vendor[1]);
+if (!IS_AMD_VENDOR(vendor)) {
+return;
+}
+
+if (pcmc->has_reserved_memory &&
+   (machine->ram_size < machine->maxram_size)) {
+device_mem_size = machine->maxram_size - machine->ram_size;
+}
+
+if ((x86ms->above_4g_mem_start + x86ms->above_4g_mem_size +
+ device_mem_size) < AMD_HT_START) {
+return;
+}
+
+x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
+}
+
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
@@ -821,6 +880,8 @@ void pc_memory_init(PCMachineState *pcms,
 
 linux_boot = (machine->kernel_filename != NULL);
 
+relocate_4g(machine, pcms);
+
 /*
  * Split single memory region and use aliases to address portions of it,
  * done for backwards compatibility with older qemus.
@@ -83

[PATCH RFCv2 0/4] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU

2022-02-07 Thread Joao Martins
RFC[0] -> RFCv2:

* At Igor's suggestion in one of the patches I reworked the series enterily,
and more or less as he was thinking it is far simpler to relocate the
ram-above-4g to be at 1TiB where applicable. The changeset is 3x simpler,
and less intrusive. (patch 1 & 2)
* Check phys-bits is big enough prior to relocating (new patch 3)
* Remove the machine property, and it's only internal and set by new machine
version (Igor, patch 4).
* Clarify whether it's GPA or HPA as a more clear meaning (Igor, patch 2)
* Add IOMMU SDM in the commit message (Igor, patch 2)

Note: It still makes me a tiny bit unconfortable to just remove memory from
[4G  - 1010G] range, but it's a little baseless. It's definitely a lot
better to maintain this set given its simplicity. For long term ideas proposed
here, perhaps a Igor's pc-dimm based model idea or equivalent's Alex's
suggestion of an option to control reserved address ranges could enable
adjusting the 1Tb hole to be closer to baremetal. 

The one downside of this approach is CMOS loosing its meaning of the above 4G
ram blocks, but it was mentioned over RFC that CMOS is only useful for very
old seabios. If so, either I leave it as is, or perhaps folks prefer that
I just set the ram above 4G in CMOS as 0.

[0] 
https://lore.kernel.org/qemu-devel/20210622154905.30858-1-joao.m.mart...@oracle.com/

---

This series lets Qemu properly spawn i386 guests with >= 1010G with VFIO,
particularly when running on AMD systems with an IOMMU.

Since Linux v5.4, VFIO validates whether the IOVA in DMA_MAP ioctl is valid and 
it
will return -EINVAL on those cases. On x86, Intel hosts aren't particularly
affected by this extra validation. But AMD systems with IOMMU have a hole in
the 1TB boundary which is *reserved* for HyperTransport I/O addresses located
here: FD__h - FF__h. See IOMMU manual [1], specifically
section '2.1.2 IOMMU Logical Topology', Table 3 on what those addresses mean.

VFIO DMA_MAP calls in this IOVA address range fall through this check and hence 
return
 -EINVAL, consequently failing the creation the guests bigger than 1010G. 
Example
of the failure:

qemu-system-x86_64: -device vfio-pci,host=:41:10.1,bootindex=-1: 
VFIO_MAP_DMA: -22
qemu-system-x86_64: -device vfio-pci,host=:41:10.1,bootindex=-1: vfio 
:41:10.1: 
failed to setup container for group 258: memory listener initialization 
failed:
Region pc.ram: vfio_dma_map(0x55ba53e7a9d0, 0x1, 
0xff3000, 0x7ed243e0) = -22 (Invalid argument)

Prior to v5.4, we could map to these IOVAs *but* that's still not the right 
thing
to do and could trigger certain IOMMU events (e.g. INVALID_DEVICE_REQUEST), or
spurious guest VF failures from the resultant IOMMU target abort (see Errata 
1155[2])
as documented on the links down below.

This small series tries to address that by dealing with this AMD-specific 1Tb 
hole,
but rather than dealing like the 4G hole, it instead relocates RAM above 4G
to be above the 1T if the maximum RAM range crosses the HT reserved range.
It is organized as following:

patch 1: Introduce a @above_4g_mem_start which defaults to 4 GiB as starting
address of the 4G boundary

patch 2: Change @above_4g_mem_start to 1TiB /if we are on AMD and the max
possible address acrosses the HT region.

patch 3: Warns user if phys-bits is too low

patch 4: Ensure valid IOVAs only on new machine types, but not older
ones (<= v6.2.0)

The 'consequence' of this approach is that we may need more than the default
phys-bits e.g. a guest with >1010G, will have most of its RAM after the 1TB
address, consequently needing 41 phys-bits as opposed to the default of 40
(TCG_PHYS_BITS). Today there's already a precedent to depend on the user to
pick the right value of phys-bits (regardless of this series), so we warn in
case phys-bits aren't enough.

Additionally, the reserved region is added to E820 if the relocation is done.

Alternative options considered (RFCv1):

a) Dealing with the 1T hole like the 4G hole -- which also represents what
hardware closely does.

Thanks,
Joao

[1] https://www.amd.com/system/files/TechDocs/48882_IOMMU.pdf
[2] https://developer.amd.com/wp-content/resources/56323-PUB_0.78.pdf

Joao Martins (4):
  hw/i386: add 4g boundary start to X86MachineState
  i386/pc: relocate 4g start to 1T where applicable
  i386/pc: warn if phys-bits is too low
  i386/pc: Restrict AMD-only enforcing of valid IOVAs to new machine
type

 hw/i386/acpi-build.c  |  2 +-
 hw/i386/pc.c  | 87 +--
 hw/i386/pc_piix.c |  2 +
 hw/i386/pc_q35.c  |  2 +
 hw/i386/sgx.c |  2 +-
 hw/i386/x86.c |  1 +
 include/hw/i386/pc.h  |  1 +
 include/hw/i386/x86.h |  3 ++
 target/i386/cpu.h |  4 ++
 9 files changed, 98 insertions(+), 6 deletions(-)

-- 
2.17.2




[PATCH RFCv2 4/4] i386/pc: Restrict AMD-only enforcing of valid IOVAs to new machine type

2022-02-07 Thread Joao Martins
The added enforcing is only relevant in the case of AMD where the
range right before the 1TB is restricted and cannot be DMA mapped
by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
or possibly other kinds of IOMMU events in the AMD IOMMU.

Although, there's a case where it may make sense to disable the
IOVA relocation/validation when migrating from a
non-valid-IOVA-aware qemu to one that supports it.

Relocating RAM regions to after the 1Tb hole has consequences for
guest ABI because we are changing the memory mapping, so make
sure that only new machine enforce but not older ones.

Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 5 +
 hw/i386/pc_piix.c| 2 ++
 hw/i386/pc_q35.c | 2 ++
 include/hw/i386/pc.h | 1 +
 4 files changed, 10 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f8712eb8427e..e62d446b28c7 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -844,6 +844,10 @@ static void relocate_4g(MachineState *machine, 
PCMachineState *pcms)
 uint32_t eax, vendor[3];
 hwaddr maxphysaddr;
 
+if (!pcmc->enforce_valid_iova) {
+return;
+}
+
 host_cpuid(0x0, 0, &eax, &vendor[0], &vendor[2], &vendor[1]);
 if (!IS_AMD_VENDOR(vendor)) {
 return;
@@ -1787,6 +1791,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 pcmc->has_reserved_memory = true;
 pcmc->kvmclock_enabled = true;
 pcmc->enforce_aligned_dimm = true;
+pcmc->enforce_valid_iova = true;
 /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
  * to be used at the moment, 32K should be enough for a while.  */
 pcmc->acpi_data_size = 0x2 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index d9b344248dac..ccf8b6d9895f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -429,9 +429,11 @@ DEFINE_I440FX_MACHINE(v7_0, "pc-i440fx-7.0", NULL,
 
 static void pc_i440fx_6_2_machine_options(MachineClass *m)
 {
+PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_i440fx_7_0_machine_options(m);
 m->alias = NULL;
 m->is_default = false;
+pcmc->enforce_valid_iova = false;
 compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
 compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1780f79bc127..1022abf4953d 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -373,8 +373,10 @@ DEFINE_Q35_MACHINE(v7_0, "pc-q35-7.0", NULL,
 
 static void pc_q35_6_2_machine_options(MachineClass *m)
 {
+PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_q35_7_0_machine_options(m);
 m->alias = NULL;
+pcmc->enforce_valid_iova = false;
 compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
 compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
 }
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9c9f4ac74810..10dba9767861 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -117,6 +117,7 @@ struct PCMachineClass {
 bool has_reserved_memory;
 bool enforce_aligned_dimm;
 bool broken_reserved_end;
+bool enforce_valid_iova;
 
 /* generate legacy CPU hotplug AML */
 bool legacy_cpu_hotplug;
-- 
2.17.2




[PATCH RFCv2 3/4] i386/pc: warn if phys-bits is too low

2022-02-07 Thread Joao Martins
Default phys-bits on Qemu is TCG_PHYS_BITS (40) which is enough
to address 1Tb (0xff  ). On AMD platforms, if a
ram-above-4g relocation happens and the CPU wasn't configured
with a big enough phys-bits, warn the user. There isn't a
catastrophic failure exactly, the guest will still boot, but
most likely won't be able to use more than ~4G of RAM.

Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index b060aedd38f3..f8712eb8427e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -842,6 +842,7 @@ static void relocate_4g(MachineState *machine, 
PCMachineState *pcms)
 X86MachineState *x86ms = X86_MACHINE(pcms);
 ram_addr_t device_mem_size = 0;
 uint32_t eax, vendor[3];
+hwaddr maxphysaddr;
 
 host_cpuid(0x0, 0, &eax, &vendor[0], &vendor[2], &vendor[1]);
 if (!IS_AMD_VENDOR(vendor)) {
@@ -858,6 +859,12 @@ static void relocate_4g(MachineState *machine, 
PCMachineState *pcms)
 return;
 }
 
+maxphysaddr = ((hwaddr)1 << X86_CPU(first_cpu)->phys_bits) - 1;
+if (maxphysaddr < AMD_ABOVE_1TB_START)
+warn_report("Relocated RAM above 4G to start at %lu "
+"phys-bits too low (%u)",
+AMD_ABOVE_1TB_START, X86_CPU(first_cpu)->phys_bits);
+
 x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
 }
 
-- 
2.17.2




[PATCH RFCv2 1/4] hw/i386: add 4g boundary start to X86MachineState

2022-02-07 Thread Joao Martins
Rather than hardcoding the 4G boundary everywhere, introduce a
X86MachineState property @above_4g_mem_start and use it
accordingly.

This is in preparation for relocating ram-above-4g to be
dynamically start at 1T on AMD platforms.

Signed-off-by: Joao Martins 
---
 hw/i386/acpi-build.c  | 2 +-
 hw/i386/pc.c  | 9 +
 hw/i386/sgx.c | 2 +-
 hw/i386/x86.c | 1 +
 include/hw/i386/x86.h | 3 +++
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ebd47aa26fd8..4bf54ccdab91 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2063,7 +2063,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 build_srat_memory(table_data, mem_base, mem_len, i - 1,
   MEM_AFFINITY_ENABLED);
 }
-mem_base = 1ULL << 32;
+mem_base = x86ms->above_4g_mem_start;
 mem_len = next_base - x86ms->below_4g_mem_size;
 next_base = mem_base + mem_len;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c8696ac01e85..7de0e87f4a3f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -837,9 +837,10 @@ void pc_memory_init(PCMachineState *pcms,
  machine->ram,
  x86ms->below_4g_mem_size,
  x86ms->above_4g_mem_size);
-memory_region_add_subregion(system_memory, 0x1ULL,
+memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
 ram_above_4g);
-e820_add_entry(0x1ULL, x86ms->above_4g_mem_size, E820_RAM);
+e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
+   E820_RAM);
 }
 
 if (pcms->sgx_epc.size != 0) {
@@ -880,7 +881,7 @@ void pc_memory_init(PCMachineState *pcms,
 machine->device_memory->base = 
sgx_epc_above_4g_end(&pcms->sgx_epc);
 } else {
 machine->device_memory->base =
-0x1ULL + x86ms->above_4g_mem_size;
+x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
 machine->device_memory->base =
@@ -972,7 +973,7 @@ uint64_t pc_pci_hole64_start(void)
 } else if (pcms->sgx_epc.size != 0) {
 hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
 } else {
-hole64_start = 0x1ULL + x86ms->above_4g_mem_size;
+hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
 return ROUND_UP(hole64_start, 1 * GiB);
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index a2b318dd9387..164ee1ddb8de 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
 return;
 }
 
-sgx_epc->base = 0x1ULL + x86ms->above_4g_mem_size;
+sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 
 memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
 memory_region_add_subregion(get_system_memory(), sgx_epc->base,
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b84840a1bb99..912e96718ee8 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1319,6 +1319,7 @@ static void x86_machine_initfn(Object *obj)
 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 x86ms->bus_lock_ratelimit = 0;
+x86ms->above_4g_mem_start = 0x1ULL;
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index a145a303703f..2de7ec046b75 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -58,6 +58,9 @@ struct X86MachineState {
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
 
+/* RAM information when there's a hole in 1Tb */
+ram_addr_t above_4g_mem_start;
+
 /* CPU and apic information: */
 bool apic_xrupt_override;
 unsigned pci_irq_mask;
-- 
2.17.2




Re: [RFC PATCH 1/1] virtio: fix feature negotiation for ACCESS_PLATFORM

2022-02-07 Thread Daniel Henrique Barboza




On 2/7/22 11:46, Halil Pasic wrote:

On Mon, 7 Feb 2022 08:46:34 -0300
Daniel Henrique Barboza  wrote:


On 2/3/22 13:45, Halil Pasic wrote:

Unlike most virtio features ACCESS_PATFORM is considered mandatory, i.e.
the driver must accept it if offered by the device. The virtio
specification says that the driver SHOULD accept the ACCESS_PLATFORM
feature if offered, and that the device MAY fail to operate if
ACCESS_PLATFORM was offered but not negotiated.

While a SHOULD ain't exactly a MUST, we are certainly allowed to fail
the device when the driver fences ACCESS_PLATFORM. With commit



I believe a link to the virtio specification where this is being mentioned would
be good to have in the commit message.


I can add that if Michael agrees, and if the patch is deemed worthy.




2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") we already made the
decision to do so whenever the get_dma_as() callback is implemented (by
the bus), which in practice means for the entirety of virtio-pci.

That means, if the device needs to translate I/O addresses, then
ACCESS_PLATFORM is mandatory. The aforementioned commit tells us
in the commit message that this is for security reasons.

If ACCESS_PLATFORM is offered not we want the device to utilize an


I think you meant "If ACCESS_PLATFORM is offered".


I'm missing because. I.e. s/not/not becasue/




IOMMU and do address translation, but because the device does not have
access to the entire guest RAM, and needs the driver to grant access
to the bits it needs access to (e.g. confidential guest support), we
still require the guest to have the corresponding logic and to accept
ACCESS_PLATFORM. If the driver does not accept ACCESS_PLATFORM, then
things are bound to go wrong, and we may see failures much less graceful
than failing the device because the driver didn't negotiate
ACCESS_PLATFORM.

So let us make ACCESS_PLATFORM mandatory for the driver regardless
of whether the get_dma_as() callback is implemented or not.

Signed-off-by: Halil Pasic 
Fixes: 2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM")

---
This patch is based on:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg866199.html

During the review of "virtio: fix the condition for iommu_platform not
supported" Daniel raised the question why do we "force IOMMU_PLATFORM"
iff has_iommu && !!klass->get_dma_as. My answer to that was, that
this logic ain't right.

While at it I used the opportunity to re-organize the code a little
and provide an explanatory comment.
---
   hw/virtio/virtio-bus.c | 17 ++---
   1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index fbf0dd14b8..359430eb1c 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -78,16 +78,19 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
**errp)
   return;
   }
   
-vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);

-if (klass->get_dma_as != NULL && has_iommu) {
+vdev->dma_as = &address_space_memory;


At this point you can also do:

 if (!has_iommu) {
 return;
 }

and the rest of the code will have one less indentation level.


I have considered this and decided against it. The reason why is
if that approach is taken, we can't really add more code to the
end of the function. An early return is good if we want to
abort the function with an error. My point is !has_iommu does
not necessarily mean we are done: after a block that handles
the has_iommu situation, in future, there could be a block that
handles something different.


And that's fine, but the way this patch is changing it I'm not sure it's better
than what we already have. Today we have:

if (has_iommu) {
  (... assign vdev->dma_as in some cases ...)
} else {
   vdev->dma_as = &address_space_memory;
}


Your patch is doing:

vdev->dma_as = &address_space_memory;

if (has_iommu) {
  (... assign vdev->dma_as in some cases ...)
}


You got rid of an 'else', but ended up adding a double "vdev->dma_as =" 
assignment
depending on the case (has_iommu = true and klass->get_dma_as != NULL). This is 
why
I proposed the early exit.

If we're worried about adding more code in the future might as well leave the 
existing
if/else as is.
   





Would this patch work for power? Or are there valid scenarios that
it breaks? I'm asking, because you voiced concern regarding this before.



I'll test it when I have an opportunity and let you know.


Thanks,


Daniel



Thanks for your feedback!

Halil




Re: [PATCH 08/11] mos6522: add "info via" HMP command for debugging

2022-02-07 Thread Peter Maydell
On Thu, 27 Jan 2022 at 21:03, Mark Cave-Ayland
 wrote:
>
> This displays detailed information about the device registers and timers to 
> aid
> debugging problems with timers and interrupts.
>
> Signed-off-by: Mark Cave-Ayland 
> ---
>  hmp-commands-info.hx | 12 ++
>  hw/misc/mos6522.c| 92 
>  2 files changed, 104 insertions(+)


I'm not sure how keen we are on adding new device-specific
HMP info commands, but it's not my area of expertise. Markus ?

(patch below for context)

thanks
-- PMM

>
> diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
> index e90f20a107..4e714e79a2 100644
> --- a/hmp-commands-info.hx
> +++ b/hmp-commands-info.hx
> @@ -879,3 +879,15 @@ SRST
>``info sgx``
>  Show intel SGX information.
>  ERST
> +
> +{
> +.name   = "via",
> +.args_type  = "",
> +.params = "",
> +.help   = "show guest 6522 VIA devices",
> +},
> +
> +SRST
> +  ``info via``
> +Show guest 6522 VIA devices.
> +ERST
> diff --git a/hw/misc/mos6522.c b/hw/misc/mos6522.c
> index aaae195d63..cfa6a9c44b 100644
> --- a/hw/misc/mos6522.c
> +++ b/hw/misc/mos6522.c
> @@ -30,6 +30,8 @@
>  #include "hw/misc/mos6522.h"
>  #include "hw/qdev-properties.h"
>  #include "migration/vmstate.h"
> +#include "monitor/monitor.h"
> +#include "qapi/type-helpers.h"
>  #include "qemu/timer.h"
>  #include "qemu/cutils.h"
>  #include "qemu/log.h"
> @@ -415,6 +417,95 @@ void mos6522_write(void *opaque, hwaddr addr, uint64_t 
> val, unsigned size)
>  }
>  }
>
> +static int qmp_x_query_via_foreach(Object *obj, void *opaque)
> +{
> +GString *buf = opaque;
> +
> +if (object_dynamic_cast(obj, TYPE_MOS6522)) {
> +MOS6522State *s = MOS6522(obj);
> +int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> +uint16_t t1counter = get_counter(s, &s->timers[0]);
> +uint16_t t2counter = get_counter(s, &s->timers[1]);
> +
> +g_string_append_printf(buf, "%s:\n", object_get_typename(obj));
> +
> +g_string_append_printf(buf, "  Registers:\n");
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[0], s->b);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[1], s->a);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[2], s->dirb);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[3], s->dira);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[4], t1counter & 0xff);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[5], t1counter >> 8);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[6],
> +   s->timers[0].latch & 0xff);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[7],
> +   s->timers[0].latch >> 8);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[8], t2counter & 0xff);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[9], t2counter >> 8);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[10], s->sr);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[11], s->acr);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[12], s->pcr);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[13], s->ifr);
> +g_string_append_printf(buf, "%-*s:0x%x\n", 4,
> +   mos6522_reg_names[14], s->ier);
> +
> +g_string_append_printf(buf, "  Timers:\n");
> +g_string_append_printf(buf, "Using current time now(ns)=%"PRId64
> +"\n", now);
> +g_string_append_printf(buf, "T1 freq(hz)=%"PRId64
> +   " mode=%s"
> +   " counter=0x%x"
> +   " latch=0x%x\n"
> +   "   load_time(ns)=%"PRId64
> +   " next_irq_time(ns)=%"PRId64 "\n",
> +   s->timers[0].frequency,
> +   ((s->acr & T1MODE) == T1MODE_CONT) ? 
> "continuous"
> +  : 
> "one-shot",
> +   t1counter,
>

Re: [PATCH 07/11] mos6522: add register names to register read/write trace events

2022-02-07 Thread Peter Maydell
On Thu, 27 Jan 2022 at 21:11, Mark Cave-Ayland
 wrote:
>
> This helps to follow how the guest is programming the mos6522 when debugging.
>
> Signed-off-by: Mark Cave-Ayland 
> ---
>  hw/misc/mos6522.c| 10 --
>  hw/misc/trace-events |  4 ++--
>  2 files changed, 10 insertions(+), 4 deletions(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 06/11] mos6522: use device_class_set_parent_reset() to propagate reset to parent

2022-02-07 Thread Peter Maydell
On Thu, 27 Jan 2022 at 21:04, Mark Cave-Ayland
 wrote:
>
> Switch from using a legacy approach to the more formal approach for 
> propagating
> device reset to the parent.
>
> Signed-off-by: Mark Cave-Ayland 
> ---
>  hw/misc/mac_via.c| 7 +--
>  hw/misc/macio/cuda.c | 3 ++-
>  hw/misc/macio/pmu.c  | 3 ++-
>  hw/misc/mos6522.c| 1 -
>  4 files changed, 9 insertions(+), 5 deletions(-)
>

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 04/11] mos6522: switch over to use qdev gpios for IRQs

2022-02-07 Thread Peter Maydell
On Thu, 27 Jan 2022 at 21:01, Mark Cave-Ayland
 wrote:
>
> For historical reasons each mos6522 instance implements its own setting and
> update of the IFR flag bits using methods exposed by MOS6522DeviceClass. As
> of today this is no longer required, and it is now possible to implement
> the mos6522 IRQs as standard qdev gpios.
>
> Switch over to use qdev gpios for the mos6522 device and update all instances
> accordingly.
>
> Signed-off-by: Mark Cave-Ayland 
> ---
>  hw/misc/mac_via.c | 56 +++
>  hw/misc/macio/cuda.c  |  5 ++--
>  hw/misc/macio/pmu.c   |  4 +--
>  hw/misc/mos6522.c | 15 +++
>  include/hw/misc/mac_via.h |  6 +
>  include/hw/misc/mos6522.h |  2 ++
>  6 files changed, 32 insertions(+), 56 deletions(-)


> -static void via2_nubus_irq_request(void *opaque, int irq, int level)
> +static void via2_nubus_irq_request(void *opaque, int n, int level)
>  {
>  MOS6522Q800VIA2State *v2s = opaque;
>  MOS6522State *s = MOS6522(v2s);
> -MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(s);
> +qemu_irq irq = qdev_get_gpio_in(DEVICE(s), VIA2_IRQ_NUBUS_BIT);
>
>  if (level) {
>  /* Port A nubus IRQ inputs are active LOW */
> -s->a &= ~(1 << irq);
> -s->ifr |= 1 << VIA2_IRQ_NUBUS_BIT;
> +s->a &= ~(1 << n);
>  } else {
> -s->a |= (1 << irq);
> -s->ifr &= ~(1 << VIA2_IRQ_NUBUS_BIT);
> +s->a |= (1 << n);
>  }
>
> -mdc->update_irq(s);
> +qemu_set_irq(irq, level);
>  }

It feels a bit inconsistent here that we're still reaching into
the MOS6522State to set s->a, but I guess this is still
better than what we had before.

> -#define VIA1_IRQ_NB 8
> -
>  #define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT)
>  #define VIA1_IRQ_60HZ   (1 << VIA1_IRQ_60HZ_BIT)
>  #define VIA1_IRQ_ADB_READY  (1 << VIA1_IRQ_ADB_READY_BIT)
> @@ -42,7 +40,7 @@ struct MOS6522Q800VIA1State {
>
>  MemoryRegion via_mem;
>
> -qemu_irq irqs[VIA1_IRQ_NB];
> +qemu_irq irqs[VIA_NUM_INTS];

This irqs[] array appears to be entirely unused. You could
delete it as a separate patch before this one.

>  qemu_irq auxmode_irq;
>  uint8_t last_b;
>
> @@ -85,8 +83,6 @@ struct MOS6522Q800VIA1State {
>  #define VIA2_IRQ_SCSI_BIT   CB2_INT_BIT
>  #define VIA2_IRQ_ASC_BITCB1_INT_BIT
>
> -#define VIA2_IRQ_NB 8
> -
>  #define VIA2_IRQ_SCSI_DATA  (1 << VIA2_IRQ_SCSI_DATA_BIT)
>  #define VIA2_IRQ_NUBUS  (1 << VIA2_IRQ_NUBUS_BIT)
>  #define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT)
> diff --git a/include/hw/misc/mos6522.h b/include/hw/misc/mos6522.h
> index 12abd8b8d2..ced8a670bf 100644
> --- a/include/hw/misc/mos6522.h
> +++ b/include/hw/misc/mos6522.h
> @@ -57,6 +57,8 @@
>  #define T2_INT (1 << T2_INT_BIT)
>  #define T1_INT (1 << T1_INT_BIT)
>
> +#define VIA_NUM_INTS   5

Were we not using 5,6,7 previously ?

Anyway,
Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH 05/11] mos6522: remove update_irq() and set_sr_int() methods from MOS6522DeviceClass

2022-02-07 Thread Peter Maydell
On Thu, 27 Jan 2022 at 21:03, Mark Cave-Ayland
 wrote:
>
> Now that the mos6522 IRQs are managed using standard qdev gpios these methods
> are no longer required.
>
> Signed-off-by: Mark Cave-Ayland 

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH v4 02/12] mm/memfd: Introduce MFD_INACCESSIBLE flag

2022-02-07 Thread Vlastimil Babka
On 1/18/22 14:21, Chao Peng wrote:
> Introduce a new memfd_create() flag indicating the content of the
> created memfd is inaccessible from userspace. It does this by force
> setting F_SEAL_INACCESSIBLE seal when the file is created. It also set
> F_SEAL_SEAL to prevent future sealing, which means, it can not coexist
> with MFD_ALLOW_SEALING.
> 
> The pages backed by such memfd will be used as guest private memory in
> confidential computing environments such as Intel TDX/AMD SEV. Since
> page migration/swapping is not yet supported for such usages so these
> pages are currently marked as UNMOVABLE and UNEVICTABLE which makes
> them behave like long-term pinned pages.

Shouldn't the amount of such memory allocations be restricted? E.g. similar
to secretmem_mmap() doing mlock_future_check().

> Signed-off-by: Chao Peng 
> ---
>  include/uapi/linux/memfd.h |  1 +
>  mm/memfd.c | 20 +++-
>  2 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
> index 7a8a26751c23..48750474b904 100644
> --- a/include/uapi/linux/memfd.h
> +++ b/include/uapi/linux/memfd.h
> @@ -8,6 +8,7 @@
>  #define MFD_CLOEXEC  0x0001U
>  #define MFD_ALLOW_SEALING0x0002U
>  #define MFD_HUGETLB  0x0004U
> +#define MFD_INACCESSIBLE 0x0008U
>  
>  /*
>   * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
> diff --git a/mm/memfd.c b/mm/memfd.c
> index 9f80f162791a..26998d96dc11 100644
> --- a/mm/memfd.c
> +++ b/mm/memfd.c
> @@ -245,16 +245,19 @@ long memfd_fcntl(struct file *file, unsigned int cmd, 
> unsigned long arg)
>  #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
>  #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
>  
> -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
> +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | \
> +MFD_INACCESSIBLE)
>  
>  SYSCALL_DEFINE2(memfd_create,
>   const char __user *, uname,
>   unsigned int, flags)
>  {
> + struct address_space *mapping;
>   unsigned int *file_seals;
>   struct file *file;
>   int fd, error;
>   char *name;
> + gfp_t gfp;
>   long len;
>  
>   if (!(flags & MFD_HUGETLB)) {
> @@ -267,6 +270,10 @@ SYSCALL_DEFINE2(memfd_create,
>   return -EINVAL;
>   }
>  
> + /* Disallow sealing when MFD_INACCESSIBLE is set. */
> + if (flags & MFD_INACCESSIBLE && flags & MFD_ALLOW_SEALING)
> + return -EINVAL;
> +
>   /* length includes terminating zero */
>   len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
>   if (len <= 0)
> @@ -315,6 +322,17 @@ SYSCALL_DEFINE2(memfd_create,
>   *file_seals &= ~F_SEAL_SEAL;
>   }
>  
> + if (flags & MFD_INACCESSIBLE) {
> + mapping = file_inode(file)->i_mapping;
> + gfp = mapping_gfp_mask(mapping);
> + gfp &= ~__GFP_MOVABLE;
> + mapping_set_gfp_mask(mapping, gfp);
> + mapping_set_unevictable(mapping);
> +
> + file_seals = memfd_file_seals_ptr(file);
> + *file_seals &= F_SEAL_SEAL | F_SEAL_INACCESSIBLE;
> + }
> +
>   fd_install(fd, file);
>   kfree(name);
>   return fd;




Re: [PATCH 06/16] hw/arm/xlnx-zcu102: Don't enable PSCI conduit when booting guest in EL3

2022-02-07 Thread Philippe Mathieu-Daudé via

On 7/2/22 19:13, Edgar E. Iglesias wrote:


On Mon, Feb 7, 2022 at 5:24 PM Alexander Graf > wrote:



On 07.02.22 17:06, Philippe Mathieu-Daudé wrote:
 > On 7/2/22 16:59, Alexander Graf wrote:
 >>
 >> On 07.02.22 16:52, Edgar E. Iglesias wrote:
 >
 >>> Both Versal and ZynqMP require MicroBlaze firmware to run the
 >>> reference implementations of Trusted Firmware. We never supported
 >>> this in upstream QEMU but we do support it with our fork (by
running
 >>> multiple QEMU instances co-simulating).
 >>>
 >>> Having said that, we do have tons of EL3 test-cases that we use to
 >>> validate QEMU that run with EL3 enabled in upstream.
 >>>
 >>> So there's two user flows:
 >>> 1. Direct boots using QEMUs builtin PSCI (Most users use this
to run
 >>> Linux, Xen, U-boot, etc)
 >>> 2. Firmware boot at EL3 without QEMUs builtin PSCI (Mostly used by
 >>> test-code)
 >>>
 >>> Number #2 is the one affected here and that by accident used to
have
 >>> the builtin PSCI support enabled but now requires more power
control
 >>> modelling to keep working.
 >>> Unless I'm missing something, the -kernel boots will continue
to use
 >>> the builtin PSCI implementation.
 >>
 >>
 >> So nobody is using upstream QEMU to validate and prototype
 >> ATF/EL1s/EL0s code? That's a shame :). I suppose there is little
 >> value without the bitstream emulation and R cluster. Do you have
 >> plans to bring multi process emulation upstream some day to enable
 >> these there?
 >
 > The R cluster is already in mainstream, isn't it?


In that case, wouldn't it make sense to build an emulation model of the
PMU behavior so that normal ATF works out of the box?


Thanks,

Alex


Yes, that makes sense and there are several ways to implement it. To 
fully support the programmability of the PMU we'd need to model the 
MicroBlazes together with the ARM cores.


But PMU support does not really conflict with this patch series, or is 
there something I'm missing?


My understanding is Alex generically wonders about code coverage, not
about the ZynqMP in particular :)



[PATCH 0/4] target/ppc: powerpc_excp improvements (9/9)

2022-02-07 Thread Fabiano Rosas
This is the last part of this series of changes to the exceptions
code.

First two patches remove the powerpc_excp_legacy function which is not
needed anymore and move some of the common code from the individual
powerpc_excp_* functions into powerpc_excp.

Third patch makes the sanity check against msr_mask generic to check
all MSR bits.

Last patch removes excp_model from the AIL code for BookS. We now have
only two instances left of excp_model being used as an identifier for
specific CPUs.

== Next steps ==

I'll work on the next steps which include some cleanups to cpu_init
and hopefully moving into separate files for each CPU family.

I'm also thinking about some changes to the POWERPC_FAMILY macro to
remove the _FAMILY part, since this code has been used for a long time
to create single CPUs instead of a whole family. I think the
separation we have now with the exception models better represents the
concept of family. So I would rather call the macro POWERPC_CPU and
add (if needed) a new field 'family' to the class. With that and
having one family per file, we would be able to remove the excp_model
enum altogether by adding a pointer to powerpc_excp like we have for
init_proc.

I'll put all of that in an RFC so we can discuss.

Thanks

Fabiano Rosas (4):
  target/ppc: Remove powerpc_excp_legacy
  target/ppc: powerpc_excp: Move common code to the caller function
  target/ppc: Assert if MSR bits differ from msr_mask during exceptions
  target/ppc: books: Remove excp_model argument from ppc_excp_apply_ail

 target/ppc/excp_helper.c | 623 ++-
 1 file changed, 31 insertions(+), 592 deletions(-)

-- 
2.34.1




Re: [PATCH v4] hw/sensor: Add lsm303dlhc magnetometer device

2022-02-07 Thread Peter Maydell
On Sun, 30 Jan 2022 at 09:53, Kevin Townsend  wrote:
>
> This commit adds emulation of the magnetometer on the LSM303DLHC.
> It allows the magnetometer's X, Y and Z outputs to be set via the
> mag-x, mag-y and mag-z properties, as well as the 12-bit
> temperature output via the temperature property. Sensor can be
> enabled with 'CONFIG_LSM303DLHC_MAG=y'.
>
> Signed-off-by: Kevin Townsend 



Applied to target-arm.next, thanks.

-- PMM



[PATCH 1/4] target/ppc: Remove powerpc_excp_legacy

2022-02-07 Thread Fabiano Rosas
Now that all CPU families have their own separate exception
dispatching code we can remove powerpc_excp_legacy.

Signed-off-by: Fabiano Rosas 
---
 target/ppc/excp_helper.c | 477 +--
 1 file changed, 3 insertions(+), 474 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 0050c8447f..c6646503aa 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -163,7 +163,7 @@ static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int 
excp)
  env->error_code);
 }
 
-
+#if defined(TARGET_PPC64)
 static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
 target_ulong *msr)
 {
@@ -267,7 +267,6 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
excp_model, int excp,
   target_ulong *new_msr,
   target_ulong *vector)
 {
-#if defined(TARGET_PPC64)
 CPUPPCState *env = &cpu->env;
 bool mmu_all_on = ((msr >> MSR_IR) & 1) && ((msr >> MSR_DR) & 1);
 bool hv_escalation = !(msr & MSR_HVB) && (*new_msr & MSR_HVB);
@@ -356,8 +355,8 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
excp_model, int excp,
 *vector |= 0xc0003000ull; /* Apply scv's AIL=3 offset */
 }
 }
-#endif
 }
+#endif
 
 static void powerpc_set_excp_state(PowerPCCPU *cpu,
   target_ulong vector, target_ulong 
msr)
@@ -1641,476 +1640,6 @@ static inline void powerpc_excp_books(PowerPCCPU *cpu, 
int excp)
 }
 #endif
 
-/*
- * Note that this function should be greatly optimized when called
- * with a constant excp, from ppc_hw_interrupt
- */
-static inline void powerpc_excp_legacy(PowerPCCPU *cpu, int excp)
-{
-CPUState *cs = CPU(cpu);
-CPUPPCState *env = &cpu->env;
-int excp_model = env->excp_model;
-target_ulong msr, new_msr, vector;
-int srr0, srr1, lev = -1;
-
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
-/* new srr1 value excluding must-be-zero bits */
-if (excp_model == POWERPC_EXCP_BOOKE) {
-msr = env->msr;
-} else {
-msr = env->msr & ~0x783fULL;
-}
-
-/*
- * new interrupt handler msr preserves existing HV and ME unless
- * explicitly overriden
- */
-new_msr = env->msr & (((target_ulong)1 << MSR_ME) | MSR_HVB);
-
-/* target registers */
-srr0 = SPR_SRR0;
-srr1 = SPR_SRR1;
-
-/*
- * check for special resume at 0x100 from doze/nap/sleep/winkle on
- * P7/P8/P9
- */
-if (env->resume_as_sreset) {
-excp = powerpc_reset_wakeup(cs, env, excp, &msr);
-}
-
-/*
- * Hypervisor emulation assistance interrupt only exists on server
- * arch 2.05 server or later. We also don't want to generate it if
- * we don't have HVB in msr_mask (PAPR mode).
- */
-if (excp == POWERPC_EXCP_HV_EMU
-#if defined(TARGET_PPC64)
-&& !(mmu_is_64bit(env->mmu_model) && (env->msr_mask & MSR_HVB))
-#endif /* defined(TARGET_PPC64) */
-
-) {
-excp = POWERPC_EXCP_PROGRAM;
-}
-
-#ifdef TARGET_PPC64
-/*
- * SPEU and VPU share the same IVOR but they exist in different
- * processors. SPEU is e500v1/2 only and VPU is e6500 only.
- */
-if (excp_model == POWERPC_EXCP_BOOKE && excp == POWERPC_EXCP_VPU) {
-excp = POWERPC_EXCP_SPEU;
-}
-#endif
-
-vector = env->excp_vectors[excp];
-if (vector == (target_ulong)-1ULL) {
-cpu_abort(cs, "Raised an exception without defined vector %d\n",
-  excp);
-}
-
-vector |= env->excp_prefix;
-
-switch (excp) {
-case POWERPC_EXCP_CRITICAL:/* Critical input */
-switch (excp_model) {
-case POWERPC_EXCP_40x:
-srr0 = SPR_40x_SRR2;
-srr1 = SPR_40x_SRR3;
-break;
-case POWERPC_EXCP_BOOKE:
-srr0 = SPR_BOOKE_CSRR0;
-srr1 = SPR_BOOKE_CSRR1;
-break;
-case POWERPC_EXCP_6xx:
-break;
-default:
-goto excp_invalid;
-}
-break;
-case POWERPC_EXCP_MCHECK:/* Machine check exception  */
-if (msr_me == 0) {
-/*
- * Machine check exception is not enabled.  Enter
- * checkstop state.
- */
-fprintf(stderr, "Machine check while not allowed. "
-"Entering checkstop state\n");
-if (qemu_log_separate()) {
-qemu_log("Machine check while not allowed. "
-"Entering checkstop state\n");
-}
-cs->halted = 1;
-

[PATCH 2/4] target/ppc: powerpc_excp: Move common code to the caller function

2022-02-07 Thread Fabiano Rosas
Make the cpu-specific powerpc_excp_* functions a bit simpler by moving
the bounds check and logging to powerpc_excp.

Signed-off-by: Fabiano Rosas 
---
 target/ppc/excp_helper.c | 57 +++-
 1 file changed, 9 insertions(+), 48 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index c6646503aa..206314aaa2 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -396,14 +396,6 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
 target_ulong msr, new_msr, vector;
 int srr0, srr1;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 /* new srr1 value excluding must-be-zero bits */
 msr = env->msr & ~0x783fULL;
 
@@ -554,14 +546,6 @@ static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
 CPUPPCState *env = &cpu->env;
 target_ulong msr, new_msr, vector;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 /* new srr1 value excluding must-be-zero bits */
 msr = env->msr & ~0x783fULL;
 
@@ -746,14 +730,6 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
 CPUPPCState *env = &cpu->env;
 target_ulong msr, new_msr, vector;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 /* new srr1 value excluding must-be-zero bits */
 msr = env->msr & ~0x783fULL;
 
@@ -926,14 +902,6 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
 CPUPPCState *env = &cpu->env;
 target_ulong msr, new_msr, vector;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 /* new srr1 value excluding must-be-zero bits */
 msr = env->msr & ~0x783fULL;
 
@@ -1121,14 +1089,6 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 target_ulong msr, new_msr, vector;
 int srr0, srr1;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 msr = env->msr;
 
 /*
@@ -1348,14 +1308,6 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 target_ulong msr, new_msr, vector;
 int srr0, srr1, lev = -1;
 
-if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
-cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
-}
-
-qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
-  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
-  excp, env->error_code);
-
 /* new srr1 value excluding must-be-zero bits */
 msr = env->msr & ~0x783fULL;
 
@@ -1642,8 +1594,17 @@ static inline void powerpc_excp_books(PowerPCCPU *cpu, 
int excp)
 
 static void powerpc_excp(PowerPCCPU *cpu, int excp)
 {
+CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
 
+if (excp <= POWERPC_EXCP_NONE || excp >= POWERPC_EXCP_NB) {
+cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
+}
+
+qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
+  " => %s (%d) error=%02x\n", env->nip, 
powerpc_excp_name(excp),
+  excp, env->error_code);
+
 switch (env->excp_model) {
 case POWERPC_EXCP_40x:
 powerpc_excp_40x(cpu, excp);
-- 
2.34.1




[PATCH 3/4] target/ppc: Assert if MSR bits differ from msr_mask during exceptions

2022-02-07 Thread Fabiano Rosas
We currently abort QEMU during the dispatch of an interrupt if we try
to set MSR_HV without having MSR_HVB in the msr_mask. I think we
should verify this for all MSR bits. There is no reason to ever have a
MSR bit set if the corresponding bit is not set in that CPU's
msr_mask.

Note that this is not about the emulated code setting reserved
bits. We clear the new_msr when starting to dispatch an exception, so
if we end up with bits not present in the msr_mask that is a QEMU
programming error.

I kept the HSRR verification for BookS because it is the only CPU
family that has HSRRs.

Signed-off-by: Fabiano Rosas 
---
 target/ppc/excp_helper.c | 64 
 1 file changed, 6 insertions(+), 58 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 206314aaa2..861b7fc24d 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -364,6 +364,8 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu,
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
 
+assert((msr & env->msr_mask) == msr);
+
 /*
  * We don't use hreg_store_msr here as already have treated any
  * special case that could occur. Just store MSR and update hflags
@@ -372,7 +374,7 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu,
  * will prevent setting of the HV bit which some exceptions might need
  * to do.
  */
-env->msr = msr & env->msr_mask;
+env->msr = msr;
 hreg_compute_hflags(env);
 env->nip = vector;
 /* Reset exception state */
@@ -519,18 +521,6 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
 break;
 }
 
-/* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-if (srr0 == SPR_HSRR0) {
-cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with "
-  "no HV support\n", excp);
-}
-}
-
 /* Save PC */
 env->spr[srr0] = env->nip;
 
@@ -699,14 +689,6 @@ static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
 break;
 }
 
-/* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-}
-
 /*
  * Sort out endianness of interrupt, this differs depending on the
  * CPU, the HV mode, etc...
@@ -871,14 +853,6 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
 break;
 }
 
-/* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-}
-
 /*
  * Sort out endianness of interrupt, this differs depending on the
  * CPU, the HV mode, etc...
@@ -1057,14 +1031,6 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
 break;
 }
 
-/* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-}
-
 /*
  * Sort out endianness of interrupt, this differs depending on the
  * CPU, the HV mode, etc...
@@ -1269,18 +1235,6 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 break;
 }
 
-/* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-if (srr0 == SPR_HSRR0) {
-cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with "
-  "no HV support\n", excp);
-}
-}
-
 #if defined(TARGET_PPC64)
 if (env->spr[SPR_BOOKE_EPCR] & EPCR_ICM) {
 /* Cat.64-bit: EPCR.ICM is copied to MSR.CM */
@@ -1551,15 +1505,9 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 }
 
 /* Sanity check */
-if (!(env->msr_mask & MSR_HVB)) {
-if (new_msr & MSR_HVB) {
-cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
-  "no HV support\n", excp);
-}
-if (srr0 == SPR_HSRR0) {
-cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with "
-  "no HV support\n", excp);
-}
+if (!(env->msr_mask & MSR_HVB) && srr0 == SPR_HSRR0) {
+cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with "
+  "no HV support\n", excp);
 }
 
 /*
-- 
2.34.1




Re: [PATCH v5 6/6] tcg/sparc: Support unaligned access for user-only

2022-02-07 Thread Peter Maydell
On Sun, 6 Feb 2022 at 10:31, Richard Henderson
 wrote:
>
> This is kinda sorta the opposite of the other tcg hosts, where
> we get (normal) alignment checks for free with host SIGBUS and
> need to add code to support unaligned accesses.
>
> This inline code expansion is somewhat large, but it takes quite
> a few instructions to make a function call to a helper anyway.
>
> Signed-off-by: Richard Henderson 
> ---

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH 4/4] target/ppc: books: Remove excp_model argument from ppc_excp_apply_ail

2022-02-07 Thread Fabiano Rosas
We don't really need to check for exception model while applying
AIL. We can check the lpcr_mask for the presence of
LPCR_AIL/LPCR_HAIL.

This removes one more instance of passing the exception model ID
around.

Signed-off-by: Fabiano Rosas 
---
 target/ppc/excp_helper.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 861b7fc24d..116398f36a 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -262,11 +262,10 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState 
*env, int excp,
  * | a | h  | 11  | 1   | 1   | h   |
  * ++
  */
-static void ppc_excp_apply_ail(PowerPCCPU *cpu, int excp_model, int excp,
-  target_ulong msr,
-  target_ulong *new_msr,
-  target_ulong *vector)
+static void ppc_excp_apply_ail(PowerPCCPU *cpu, int excp, target_ulong msr,
+   target_ulong *new_msr, target_ulong *vector)
 {
+PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 CPUPPCState *env = &cpu->env;
 bool mmu_all_on = ((msr >> MSR_IR) & 1) && ((msr >> MSR_DR) & 1);
 bool hv_escalation = !(msr & MSR_HVB) && (*new_msr & MSR_HVB);
@@ -279,8 +278,13 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
excp_model, int excp,
 return;
 }
 
-if (excp_model == POWERPC_EXCP_POWER8 ||
-excp_model == POWERPC_EXCP_POWER9) {
+if (!(pcc->lpcr_mask & LPCR_AIL)) {
+/* This CPU does not have AIL */
+return;
+}
+
+/* P8 & P9 */
+if (!(pcc->lpcr_mask & LPCR_HAIL)) {
 if (!mmu_all_on) {
 /* AIL only works if MSR[IR] and MSR[DR] are both enabled. */
 return;
@@ -303,7 +307,8 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
excp_model, int excp,
 return;
 }
 
-} else if (excp_model == POWERPC_EXCP_POWER10) {
+/* P10 and up */
+} else {
 if (!mmu_all_on && !hv_escalation) {
 /*
  * AIL works for HV interrupts even with guest MSR[IR/DR] disabled.
@@ -328,9 +333,6 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int 
excp_model, int excp,
 /* AIL=1 and AIL=2 are reserved, treat them like AIL=0 */
 return;
 }
-} else {
-/* Other processors do not support AIL */
-return;
 }
 
 /*
@@ -1258,7 +1260,6 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 {
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
-int excp_model = env->excp_model;
 target_ulong msr, new_msr, vector;
 int srr0, srr1, lev = -1;
 
@@ -1529,7 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 }
 
 /* This can update new_msr and vector if AIL applies */
-ppc_excp_apply_ail(cpu, excp_model, excp, msr, &new_msr, &vector);
+ppc_excp_apply_ail(cpu, excp, msr, &new_msr, &vector);
 
 powerpc_set_excp_state(cpu, vector, new_msr);
 }
-- 
2.34.1




Re: [PATCH v6 00/33] block layer: split block APIs in global state and I/O

2022-02-07 Thread Kevin Wolf
Am 21.01.2022 um 18:05 hat Emanuele Giuseppe Esposito geschrieben:
> Each function in the GS API will have an assertion, checking
> that it is always running under BQL.
> I/O functions are instead thread safe (or so should be), meaning
> that they *can* run under BQL, but also in an iothread in another
> AioContext. Therefore they do not provide any assertion, and
> need to be audited manually to verify the correctness.

I wonder if we could actually do something to catch at least some kinds
of bugs. The first conclusion from thinking about it is that we probably
shouldn't open-code assert(qemu_in_main_thread()) everywhere, but have a
macro or inline function for each category to be called in each function.

So an IO_CODE() macro could increase a counter in the coroutine object
(that is decreased again at the end of the function with g_auto), and
then GLOBAL_STATE_CODE() could not only assert that we're holding the
BQL, but also that the counter is still 0, i.e. it is not (indirectly)
called by an I/O function.

We may want to enable this only in debug builds, but maybe still worth a
thought anyway?

Kevin




Re: [PATCH 00/13] hw/intc/arm_gicv3_its: more cleanups, bugfixes

2022-02-07 Thread Peter Maydell
On Tue, 1 Feb 2022 at 19:32, Peter Maydell  wrote:
>
> This is another set of patches to the ITS emulation; mostly
> cleanups, but there are two bug fixes.

Applied to target-arm.next, thanks.

-- PMM



Re: [PULL 0/2] VFIO fixes 2022-02-03

2022-02-07 Thread Alex Williamson
On Mon, 7 Feb 2022 09:54:59 -0700
Alex Williamson  wrote:

> On Mon, 7 Feb 2022 17:08:01 +0100
> Philippe Mathieu-Daudé  wrote:
> 
> > On 7/2/22 16:50, Alex Williamson wrote:  
> > > On Sat, 5 Feb 2022 10:49:35 +
> > > Peter Maydell  wrote:
> >   
> > >> Hi; this has a format-string issue that means it doesn't build
> > >> on 32-bit systems:
> > >>
> > >> https://gitlab.com/qemu-project/qemu/-/jobs/2057116569
> > >>
> > >> ../hw/vfio/common.c: In function 'vfio_listener_region_add':
> > >> ../hw/vfio/common.c:893:26: error: format '%llx' expects argument of
> > >> type 'long long unsigned int', but argument 6 has type 'intptr_t' {aka
> > >> 'int'} [-Werror=format=]
> > >> error_report("%s received unaligned region %s iova=0x%"PRIx64
> > >> ^~
> > >> ../hw/vfio/common.c:899:26:
> > >> qemu_real_host_page_mask);
> > >> 
> > >>
> > >> For intptr_t you want PRIxPTR.
> > > 
> > > Darn.  Well, let me use this opportunity to ask, how are folks doing
> > > 32-bit cross builds on Fedora?  I used to keep an i686 PAE VM for this
> > > purpose, but I was eventually no longer able to maintain the build
> > > dependencies.  Looks like this failed on a mipsel cross build, but I
> > > don't see such a cross compiler in Fedora.  I do mingw32/64 cross
> > > builds, but they leave a lot to be desired for code coverage.  Thanks,
> > 
> > You can use docker images:
> > https://wiki.qemu.org/Testing/DockerBuild  
> 
> Hmm, not ideal...
> 
> Clean git clone, HEAD 55ef0b702bc2 ("Merge remote-tracking branch 
> 'remotes/lvivier-gitlab/tags/linux-user-for-7.0-pull-request' into staging")
> 
> $ make docker-test-quick@debian-mips64el-cross J=16

Accidentally selected the mips64el, but tests failing seems to be
common.  I can reproduce the build issue with either the mipsel or
fedora-i386-cross, so I'll include some flavor of the test-build in my
build script.  Thanks,

Alex




Re: [PATCH v2] tests/qtest: add qtests for npcm7xx sdhci

2022-02-07 Thread Peter Maydell
On Sun, 6 Feb 2022 at 01:41, Patrick Venture  wrote:
>
> From: Shengtan Mao 
>
> Reviewed-by: Hao Wu 
> Reviewed-by: Chris Rauer 
> Signed-off-by: Shengtan Mao 
> Signed-off-by: Patrick Venture 
> ---
> v2:
>  * update copyright year
>  * check result of open
>  * use g_free instead of free
>  * move declarations to the top
>  * use g_file_open_tmp

Fails to compile:

../../tests/qtest/npcm7xx_sdhci-test.c:121:32: error: use of
undeclared identifier 'NPCM7XX_REG_SIZE'
uint64_t end_addr = addr + NPCM7XX_REG_SIZE;
   ^


-- PMM



Re: [PATCH v6 31/33] include/qemu/job.h: introduce job->pre_run() and use it in amend

2022-02-07 Thread Kevin Wolf
Am 21.01.2022 um 18:05 hat Emanuele Giuseppe Esposito geschrieben:
> Introduce .pre_run() job callback. This cb will run in job_start,
> before the coroutine is created and runs run() in the job aiocontext.
> 
> Therefore, .pre_run() always runs in the main loop.
> We can use this function together with clean() cb to replace
> bdrv_child_refresh_perms in block_crypto_amend_options_generic_luks(),
> since that function can also be called from an iothread via
> .bdrv_co_amend().

How is this different from having the same code in the function that
creates the job, i.e. qmp_x_blockdev_amend()?

Almost all block jobs have some setup code in the function that creates
the job instead of doing everything in .run(), precisely because they
know this code runs in the main thread.

Is amend really so different from the other block jobs in this respect
that it needs a different solution?

> In addition, doing so we check for permissions in all bdrv
> in amend, not only crypto.
> 
> .pre_run() and .clean() take care of calling bdrv_amend_pre_run()
> and bdrv_amend_clean() respectively, to set up driver-specific flags
> and allow the crypto driver to temporarly provide the WRITE
> perm to qcrypto_block_amend_options().
> 
> .pre_run() is not yet invoked by job_start, but .clean() is.
> This is not a problem, since it will just be a redundant check
> and crypto will have the update->keys flag == false anyways.
> 
> Signed-off-by: Emanuele Giuseppe Esposito 

I find the way how you split the patches a bit confusing because the
patches aren't self-contained, but always refer to what the code will do
in the future, because after the patch it's dead code that isn't even
theoretically called until the final patch comes in.

Can we restructure this a bit? First a patch that adds a new JobDriver
callback (if really needed) along with the actual calls for it and
everything else that needs to be touched in the generic job
infrastructure. Second, new BlockDriver callbacks with all of the
plumbing code. Third, the amend job changes with a patch that doesn't
touch anything but block/amend.c and potentially block/crypto.c (the
latter could also be another separate patch).

This change with three or four patches could also be a candidate to be
split out into a separate smaller series.

Kevin




Re: [PATCH v5 16/43] tests/acpi: Add update DSDT.viot

2022-02-07 Thread Jonathan Cameron via
On Mon, 7 Feb 2022 16:10:14 +0100
Igor Mammedov  wrote:

> On Fri, 4 Feb 2022 09:01:31 -0500
> "Michael S. Tsirkin"  wrote:
> 
> > On Wed, Feb 02, 2022 at 02:10:10PM +, Jonathan Cameron wrote:  
> > > From: Jonathan Cameron 
> > > 
> > > The consolidation of DSDT AML generation for PCI host bridges
> > > lead to some minor ordering changes and the addition of _ADR
> > > with a default of 0 for those case that didn't already have it.
> > > Only DSDT.viot test is affected.
> > > 
> > > Changes all similar to:
> > > 
> > > Scope (\_SB)
> > >  {
> > >Device (PC30)
> > >{
> > > -Name (_UID, 0x30)  // _UID: Unique ID
> > >  Name (_BBN, 0x30)  // _BBN: BIOS Bus Number
> > >  Name (_HID, EisaId ("PNP0A08") /* PCI Express Bus */)  // _HID: 
> > > Hardware ID
> > >  Name (_CID, EisaId ("PNP0A03") /* PCI Bus */)  // _CID: 
> > > Compatible ID
> > > +Name (_ADR, Zero)  // _ADR: Address
> > > +Name (_UID, 0x30)  // _UID: Unique ID
> > >  Method (_OSC, 4, NotSerialized)  // _OSC: Operating System 
> > > Capabilities
> > > 
> > > Signed-off-by: Jonathan Cameron 
> > 
> > A bit worried about _ADR here.  It's probably fine as it should be
> > unused but in the past some changes like that confused windows guests
> > where they would lose e.g. a static ip config since from their
> > POV device address changed.  
> 
> Spec[1] doesn't mention _ADR in context of host bridge(s) at all,
> for all I know it shouldn't be there. QEMU inherited it from
> SeaBIOS where it is dated to 2008 (as part of large blob adding ACPI for PCI).
> 
> Instead of spreading undefined field to other places,
> I'd prefer removing it from root host bridge.
> But as Michael said it should be very well tested with various guest
> OSes.
> 
> Jonathan,
> Can you compare nic naming (as guest sees it) with current master
> and without _ADR on root host bridge?
> One way to test it could be
>   1. start QEMU(master) configure static IP addr on an interface,
>  and shutdown guest
>   2. start QEMU(-_ARR) with guest image from step 1 and see if
>  interface is still there with IP address it was configured.
> 
> test matrix should be something like that:
>  PCI(pc machine),PCI-E (q35 machine)/
>Windows 2012-whatever latest Windows, some contemporary linux,
>ancient linux (pre 'stable' interface naming) (something like
>RHEL6 or any other distro from that era)

Hi Igor,

Potentially long term I can run those tests, but short term I'd like
to separate this tidy up from introducing the CXL support.

The tidy up / deduplication is rather less useful than when
first introduced now we've decided to only implement CXL support
for PXBs for the short term. Earlier versions included
the main host bridge on x86 which made this change more helpful.

Thanks for the info on what it would require and
I will hopefully get to this once the CXL emulation is in
place (or someone else will beat me to it!)  Not going to be
terribly near the top of my todo list though I'm afraid.

Result for v6 will be that patches 14-16 are dropped and a few changes
to later patches as a result.

Thanks,

Jonathan



> 
> 1) PCI_Firmware_v3.2_01-26-2015_ts_clean_Firmware_Final
> 
> > Igor, what do you think?
> >  
> > > ---
> > >  tests/data/acpi/q35/DSDT.viot   | Bin 9398 -> 9416 bytes
> > >  tests/qtest/bios-tables-test-allowed-diff.h |   1 -
> > >  2 files changed, 1 deletion(-)
> > > 
> > > diff --git a/tests/data/acpi/q35/DSDT.viot b/tests/data/acpi/q35/DSDT.viot
> > > index 
> > > 1c3b4da5cbe81ecab5e1ef50d383b561c5e0f55f..207ac5b9ae4c3a4bc0094c2242d1a1b08771b784
> > >  100644
> > > GIT binary patch
> > > delta 139
> > > zcmdnydBT&+CD > > z$Z0Y1qbM*kn0!E9nwKNq(Itq1BR > > F0sxp4B{u*7
> > > 
> > > delta 143
> > > zcmX@%xy_TyCDWlVjy%CeC%7
> > > z+^Kj^(SX5#0jQdxl0g7Ptr1kM!sPw((lEse3<_8k8$uNeOjb|?Dc; > > 
> > > diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> > > b/tests/qtest/bios-tables-test-allowed-diff.h
> > > index 08a8095432..dfb8523c8b 100644
> > > --- a/tests/qtest/bios-tables-test-allowed-diff.h
> > > +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> > > @@ -1,2 +1 @@
> > >  /* List of comma-separated changed AML files to ignore */
> > > -"tests/data/acpi/q35/DSDT.viot",
> > > -- 
> > > 2.32.0
> >   
> 




Re: [PATCH 06/16] hw/arm/xlnx-zcu102: Don't enable PSCI conduit when booting guest in EL3

2022-02-07 Thread Edgar E. Iglesias
On Mon, Feb 7, 2022 at 5:24 PM Alexander Graf  wrote:

>
> On 07.02.22 17:06, Philippe Mathieu-Daudé wrote:
> > On 7/2/22 16:59, Alexander Graf wrote:
> >>
> >> On 07.02.22 16:52, Edgar E. Iglesias wrote:
> >
> >>> Both Versal and ZynqMP require MicroBlaze firmware to run the
> >>> reference implementations of Trusted Firmware. We never supported
> >>> this in upstream QEMU but we do support it with our fork (by running
> >>> multiple QEMU instances co-simulating).
> >>>
> >>> Having said that, we do have tons of EL3 test-cases that we use to
> >>> validate QEMU that run with EL3 enabled in upstream.
> >>>
> >>> So there's two user flows:
> >>> 1. Direct boots using QEMUs builtin PSCI (Most users use this to run
> >>> Linux, Xen, U-boot, etc)
> >>> 2. Firmware boot at EL3 without QEMUs builtin PSCI (Mostly used by
> >>> test-code)
> >>>
> >>> Number #2 is the one affected here and that by accident used to have
> >>> the builtin PSCI support enabled but now requires more power control
> >>> modelling to keep working.
> >>> Unless I'm missing something, the -kernel boots will continue to use
> >>> the builtin PSCI implementation.
> >>
> >>
> >> So nobody is using upstream QEMU to validate and prototype
> >> ATF/EL1s/EL0s code? That's a shame :). I suppose there is little
> >> value without the bitstream emulation and R cluster. Do you have
> >> plans to bring multi process emulation upstream some day to enable
> >> these there?
> >
> > The R cluster is already in mainstream, isn't it?
>
>
> In that case, wouldn't it make sense to build an emulation model of the
> PMU behavior so that normal ATF works out of the box?
>
>
> Thanks,
>
> Alex
>

Yes, that makes sense and there are several ways to implement it. To fully
support the programmability of the PMU we'd need to model the MicroBlazes
together with the ARM cores.

But PMU support does not really conflict with this patch series, or is
there something I'm missing?

Cheers,
Edgar


Re: Adding a 'qemu.qmp' repository to gitlab.com/qemu-project/

2022-02-07 Thread Daniel P . Berrangé
On Mon, Feb 07, 2022 at 04:54:25PM +, Alex Bennée wrote:
> 
> John Snow  writes:
> 
> > Hi Peter:
> >
> > I am working my way towards splitting the QMP library out of the
> > qemu.git source tree. I'd like to ask for permission to:
> >
> > (1) Create a "qemu.qmp" repository under the qemu-project umbrella on
> > GitLab
> 
> No objection for my part except maybe a better name? QemuPythonQmp? I
> guess the header text can make it clear.

I think it definitely ought to have the word 'python' in the
repo name, because there's talk of having bindings for other
languages and we want a reliable way to distinguish what repo
and project we're dealing with, if the remaining part of the
name is not very distinctive.  'python-qemu.qmp.git' seems
reasonable enough.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v6 22/33] block/coroutines: I/O API

2022-02-07 Thread Kevin Wolf
Am 21.01.2022 um 18:05 hat Emanuele Giuseppe Esposito geschrieben:
> block coroutines functions run in different aiocontext, and are
> not protected by the BQL. Therefore are I/O.
> 
> Signed-off-by: Emanuele Giuseppe Esposito 
> Reviewed-by: Paolo Bonzini 
> Reviewed-by: Stefan Hajnoczi 
> ---
>  block/coroutines.h | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/block/coroutines.h b/block/coroutines.h
> index c8c14a29c8..c61abd271a 100644
> --- a/block/coroutines.h
> +++ b/block/coroutines.h
> @@ -29,6 +29,12 @@
>  
>  /* For blk_bs() in generated block/block-gen.c */
>  #include "sysemu/block-backend.h"
> +/*
> + * I/O API functions. These functions are thread-safe.
> + *
> + * See include/block/block-io.h for more information about
> + * the I/O API.
> + */

Please keep an empty line between #include and the rest.

As discussed in patch 2, this file will need some rearrangement.
Generally, the coroutine_fn is indeed I/O, but the generated_co_wrapper
is mixed "I/O or GS" and requires the BQL or a specific iothread.

Kevin




Re: [PATCH v5 1/6] tcg/sparc: Add scratch argument to tcg_out_movi_int

2022-02-07 Thread Peter Maydell
On Sun, 6 Feb 2022 at 10:31, Richard Henderson
 wrote:
>
> This will allow us to control exactly what scratch register is
> used for loading the constant.  Also, fix a theoretical problem
> in recursing through tcg_out_movi, which may provide a different
> value for in_prologue.
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/sparc/tcg-target.c.inc | 21 +
>  1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
> index 0c062c60eb..8c3671f56a 100644
> --- a/tcg/sparc/tcg-target.c.inc
> +++ b/tcg/sparc/tcg-target.c.inc
> @@ -414,7 +414,8 @@ static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, 
> int32_t arg)
>  }
>
>  static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
> - tcg_target_long arg, bool in_prologue)
> + tcg_target_long arg, bool in_prologue,
> + TCGReg scratch)
>  {
>  tcg_target_long hi, lo = (int32_t)arg;
>  tcg_target_long test, lsb;
> @@ -471,22 +472,25 @@ static void tcg_out_movi_int(TCGContext *s, TCGType 
> type, TCGReg ret,
>  /* A 64-bit constant decomposed into 2 32-bit pieces.  */
>  if (check_fit_i32(lo, 13)) {
>  hi = (arg - lo) >> 32;
> -tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
> +tcg_out_movi_int(s, TCG_TYPE_I32, ret, hi, in_prologue, scratch);
>  tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
>  tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
>  } else {
> +tcg_debug_assert(scratch != TCG_REG_G0);
>  hi = arg >> 32;
> -tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
> -tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
> +tcg_out_movi_int(s, TCG_TYPE_I32, ret, hi, in_prologue, scratch);
> +tcg_out_movi_int(s, TCG_TYPE_I32, scratch, lo, in_prologue, 
> TCG_REG_G0);
>  tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
> -tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
> +tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
>  }
>  }
>
>  static void tcg_out_movi(TCGContext *s, TCGType type,
>   TCGReg ret, tcg_target_long arg)
>  {
> -tcg_out_movi_int(s, type, ret, arg, false);
> +/* When outputting to T2, we have no scratch available. */
> +TCGReg scratch = ret != TCG_REG_T2 ? TCG_REG_T2 : TCG_REG_G0;

Why won't using G0 trip the assertion above that scratch != TCG_REG_G0 ?

> +tcg_out_movi_int(s, type, ret, arg, false, scratch);
>  }
>

-- PMM



Re: Adding a 'qemu.qmp' repository to gitlab.com/qemu-project/

2022-02-07 Thread Alex Bennée


John Snow  writes:

> Hi Peter:
>
> I am working my way towards splitting the QMP library out of the
> qemu.git source tree. I'd like to ask for permission to:
>
> (1) Create a "qemu.qmp" repository under the qemu-project umbrella on
> GitLab

No objection for my part except maybe a better name? QemuPythonQmp? I
guess the header text can make it clear.

> (2) Add Cleber Rosa and myself as maintainers of this repository. (In
> discussion, Dan Berrange suggested a third maintainer for redundancy,
> but nobody from outside of RH has yet volunteered. The offer stands,
> but I have to press on in the meantime.)
>
> The initial patches that set up the new repository are not yet
> finalized and are still under review/development (on the qemu-devel
> list, as normal), but I wanted to reach out and directly ask if you
> have any objections to this plan so I can adjust the trajectory of my
> work if necessary.
>
> In short, the plan is to publish the QMP library as its own
> mini-project published to the Python package repository, and take
> patches via GitLab merge requests.
>
> Thanks,
> --js


-- 
Alex Bennée



Re: [PATCH v6 09/33] block: introduce assert_bdrv_graph_writable

2022-02-07 Thread Kevin Wolf
Am 21.01.2022 um 18:05 hat Emanuele Giuseppe Esposito geschrieben:
> We want to be sure that the functions that write the child and
> parent list of a bs are under BQL and drain.
> 
> BQL prevents from concurrent writings from the GS API, while
> drains protect from I/O.
> 
> TODO: drains are missing in some functions using this assert.
> Therefore a proper assertion will fail. Because adding drains
> requires additional discussions, they will be added in future
> series.
> 
> Signed-off-by: Emanuele Giuseppe Esposito 

> diff --git a/block/io.c b/block/io.c
> index cb095deeec..3be08cad29 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -734,6 +734,17 @@ void bdrv_drain_all(void)
>  bdrv_drain_all_end();
>  }
>  
> +void assert_bdrv_graph_writable(BlockDriverState *bs)
> +{
> +/*
> + * TODO: this function is incomplete. Because the users of this
> + * assert lack the necessary drains, check only for BQL.
> + * Once the necessary drains are added,
> + * assert also for qatomic_read(&bs->quiesce_counter) > 0
> + */
> +assert(qemu_in_main_thread());
> +}

This looks like a trivial function that could easily be static inline.

Kevin




Re: Adding a 'qemu.qmp' repository to gitlab.com/qemu-project/

2022-02-07 Thread John Snow
On Mon, Feb 7, 2022 at 11:56 AM Alex Bennée  wrote:
>
>
> John Snow  writes:
>
> > Hi Peter:
> >
> > I am working my way towards splitting the QMP library out of the
> > qemu.git source tree. I'd like to ask for permission to:
> >
> > (1) Create a "qemu.qmp" repository under the qemu-project umbrella on
> > GitLab
>
> No objection for my part except maybe a better name? QemuPythonQmp? I
> guess the header text can make it clear.
>

At present, the package is named "qemu.qmp", which is the import name in Python.
("qemu" is the namespace, "qmp" is the package, "import qemu.qmp" is
how you import it.)

I figured I'd just name the repository the exact same thing, but I
realize that may introduce some ambiguity at the QEMU project gitlab
namespace level. "py-qemu.qmp" or similar would also probably be fine.
I think I want to avoid straying too far away from the import and
package names, if I can.

--js




[PATCH 05/14] block: merge bdrv_delete and bdrv_close

2022-02-07 Thread Vladimir Sementsov-Ogievskiy
bdrv_delete() is the only caller of bdrv_close(). Let's merge them to
simplify further commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c | 27 ++-
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index 82fbf81a3c..71a5aec24c 100644
--- a/block.c
+++ b/block.c
@@ -4785,12 +4785,19 @@ static void bdrv_reopen_abort(BDRVReopenState 
*reopen_state)
 }
 
 
-static void bdrv_close(BlockDriverState *bs)
+static void bdrv_delete(BlockDriverState *bs)
 {
 BdrvAioNotifier *ban, *ban_next;
 BdrvChild *child, *next;
 
 assert(!bs->refcnt);
+assert(bdrv_op_blocker_is_empty(bs));
+
+/* remove from list, if necessary */
+if (bs->node_name[0] != '\0') {
+QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
+}
+QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
 
 bdrv_drained_begin(bs); /* complete I/O */
 bdrv_flush(bs);
@@ -4844,6 +4851,8 @@ static void bdrv_close(BlockDriverState *bs)
 if (bs->quiesce_counter) {
 bdrv_drain_all_end_quiesce(bs);
 }
+
+g_free(bs);
 }
 
 void bdrv_close_all(void)
@@ -5164,22 +5173,6 @@ int bdrv_replace_child_bs(BdrvChild *child, 
BlockDriverState *new_bs,
 return ret;
 }
 
-static void bdrv_delete(BlockDriverState *bs)
-{
-assert(bdrv_op_blocker_is_empty(bs));
-assert(!bs->refcnt);
-
-/* remove from list, if necessary */
-if (bs->node_name[0] != '\0') {
-QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
-}
-QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
-
-bdrv_close(bs);
-
-g_free(bs);
-}
-
 
 /*
  * Replace @bs by newly created block node.
-- 
2.31.1




Re: [RFC PATCH] arm: force flag recalculation when messing with DAIF

2022-02-07 Thread Peter Maydell
On Wed, 2 Feb 2022 at 12:24, Alex Bennée  wrote:
>
> The recently introduced debug tests in kvm-unit-tests exposed an error
> in our handling of singlestep cause by stale hflags. This is caught by
> --enable-debug-tcg when running the tests.
>
> Signed-off-by: Alex Bennée 
> Cc: Richard Henderson 
> Cc: Andrew Jones 
> ---
>  target/arm/helper-a64.c | 2 ++
>  1 file changed, 2 insertions(+)



Applied to target-arm.next, thanks.

-- PMM



Re: [PATCH v3 1/1] util: adjust coroutine pool size to virtio block queue

2022-02-07 Thread Stefan Hajnoczi
On Fri, Jan 28, 2022 at 05:36:16PM +0900, Hiroki Narukawa wrote:
> Coroutine pool size was 64 from long ago, and the basis was organized in the 
> commit message in c740ad92.
> 
> At that time, virtio-blk queue-size and num-queue were not configuable, and 
> equivalent values were 128 and 1.
> 
> Coroutine pool size 64 was fine then.
> 
> Later queue-size and num-queue got configuable, and default values were 
> increased.
> 
> Coroutine pool with size 64 exhausts frequently with random disk IO in new 
> size, and slows down.
> 
> This commit adjusts coroutine pool size adaptively with new values.
> 
> This commit adds 64 by default, but now coroutine is not only for block 
> devices,
> 
> and is not too much burdon comparing with new default.
> 
> pool size of 128 * vCPUs.
> 
> Signed-off-by: Hiroki Narukawa 
> ---
>  hw/block/virtio-blk.c|  5 +
>  include/qemu/coroutine.h | 10 ++
>  util/qemu-coroutine.c| 20 
>  3 files changed, 31 insertions(+), 4 deletions(-)

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature


[PATCH 10/14] qcow2: qcow2_inactivate(): don't call qcow2_mark_clean() when RO

2022-02-07 Thread Vladimir Sementsov-Ogievskiy
qcow2_inactivate() prints errors on different failures, let's not
exclude qcow2_mark_clean() call.

Still, if image is read-only, no reason to report failed write and no
reason even to try write. Write failure is possible when we open dirty
image for check in read-only mode. Let's not do it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 04994df240..ccfcd0db05 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2699,8 +2699,12 @@ static int qcow2_inactivate(BlockDriverState *bs)
 local_err = NULL;
 }
 
-if (result == 0) {
-qcow2_mark_clean(bs);
+if (result == 0 && bdrv_is_writable(bs)) {
+ret = qcow2_mark_clean(bs);
+if (ret < 0) {
+error_report("Failed to mark qcow2 node '%s' clean",
+ bdrv_get_device_or_node_name(bs));
+}
 }
 
 return result;
-- 
2.31.1




Re: [PATCH v6 05/33] include/sysemu/block-backend: split header into I/O and global state (GS) API

2022-02-07 Thread Kevin Wolf
Am 21.01.2022 um 18:05 hat Emanuele Giuseppe Esposito geschrieben:
> Similarly to the previous patches, split block-backend.h
> in block-backend-io.h and block-backend-global-state.h
> 
> In addition, remove "block/block.h" include as it seems
> it is not necessary anymore, together with "qemu/iov.h"
> 
> block-backend-common.h contains the structures shared between
> the two headers, and the functions that can't be categorized as
> I/O or global state.
> 
> Assertions are added in the next patch.
> 
> Signed-off-by: Emanuele Giuseppe Esposito 

The same "GS or I/O" category is needed for drain and other polling
functions here (mainly the synchronous I/O functions like blk_pread()
etc.).

Most of these functions just wrap the bdrv_*() counterpart, so I'm not
reviewing them in detail now. I don't expect surprises as long as both
interfaces are consistent.

> diff --git a/block/block-backend.c b/block/block-backend.c
> index 23e727199b..6f91dcc85d 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -79,6 +79,7 @@ struct BlockBackend {
>  bool allow_aio_context_change;
>  bool allow_write_beyond_eof;
>  
> +/* Protected by BQL lock */
>  NotifierList remove_bs_notifiers, insert_bs_notifiers;
>  QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;

This should be just "BQL" (like in patch 2) because the L in "BQL"
already means "lock", so "BQL lock" would be the "Big QEMU lock lock".
(More instances in this patch.)

Kevin




  1   2   3   >