[PATCH 4/6] hw/block/nvme: support allocated namespace type

2021-02-05 Thread Minwoo Im
>From NVMe spec 1.4b "6.1.5. NSID and Namespace Relationships" defines
valid namespace types:

- Unallocated: Not exists in the NVMe subsystem
- Allocated: Exists in the NVMe subsystem
- Inactive: Not attached to the controller
- Active: Attached to the controller

This patch added support for allocated, but not attached namespace type:

!nvme_ns(n, nsid) && nvme_subsys_ns(n->subsys, nsid)

nvme_ns() returns attached namespace instance of the given controller
and nvme_subsys_ns() returns allocated namespace instance in the
subsystem.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.h | 13 +
 hw/block/nvme.c| 63 +++---
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 8a0732b22316..14627f9ccb41 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -30,4 +30,17 @@ typedef struct NvmeSubsystem {
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
 int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp);
 
+/*
+ * Return allocated namespace of the specified nsid in the subsystem.
+ */
+static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
+uint32_t nsid)
+{
+if (!subsys) {
+return NULL;
+}
+
+return subsys->namespaces[nsid];
+}
+
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a1e930f7c8e4..d1761a82731f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -3124,7 +3124,7 @@ static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, 
NvmeRequest *req)
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
 {
 NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -3138,7 +3138,14 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeRequest *req)
 
 ns = nvme_ns(n, nsid);
 if (unlikely(!ns)) {
-return nvme_rpt_empty_id_struct(n, req);
+if (!active) {
+ns = nvme_subsys_ns(n->subsys, nsid);
+if (!ns) {
+return nvme_rpt_empty_id_struct(n, req);
+}
+} else {
+return nvme_rpt_empty_id_struct(n, req);
+}
 }
 
 if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
@@ -3149,7 +3156,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest 
*req)
 return NVME_INVALID_CMD_SET | NVME_DNR;
 }
 
-static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
+bool active)
 {
 NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -3163,7 +3171,14 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, 
NvmeRequest *req)
 
 ns = nvme_ns(n, nsid);
 if (unlikely(!ns)) {
-return nvme_rpt_empty_id_struct(n, req);
+if (!active) {
+ns = nvme_subsys_ns(n->subsys, nsid);
+if (!ns) {
+return nvme_rpt_empty_id_struct(n, req);
+}
+} else {
+return nvme_rpt_empty_id_struct(n, req);
+}
 }
 
 if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
@@ -3176,7 +3191,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, 
NvmeRequest *req)
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
+bool active)
 {
 NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -3201,7 +3217,14 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 for (i = 1; i <= n->num_namespaces; i++) {
 ns = nvme_ns(n, i);
 if (!ns) {
-continue;
+if (!active) {
+ns = nvme_subsys_ns(n->subsys, i);
+if (!ns) {
+continue;
+}
+} else {
+continue;
+}
 }
 if (ns->params.nsid <= min_nsid) {
 continue;
@@ -3215,7 +3238,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
+bool active)
 {
 NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
@@ -3241,7 +3265,14 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, 
NvmeRequest *req)
 for (i = 1; i <= n->num_namespaces; i++) {
 ns = nvme_ns(n, i);
 if (!ns) {
-continue;
+if (!active) {
+ns = nvme_subsys_ns(n->subsys, i);
+if (!ns) {
+continue;
+

[PATCH 5/6] hw/block/nvme: refactor nvme_select_ns_iocs

2021-02-05 Thread Minwoo Im
This patch has no functional changes.  This patch just refactored
nvme_select_ns_iocs() to iterate the attached namespaces of the
controlller and make it invoke __nvme_select_ns_iocs().

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 36 +---
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d1761a82731f..697368a6ae0c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -3896,6 +3896,25 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
 }
 }
 
+static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ns->iocs = nvme_cse_iocs_none;
+switch (ns->csi) {
+case NVME_CSI_NVM:
+if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
+ns->iocs = nvme_cse_iocs_nvm;
+}
+break;
+case NVME_CSI_ZONED:
+if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
+ns->iocs = nvme_cse_iocs_zoned;
+} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
+ns->iocs = nvme_cse_iocs_nvm;
+}
+break;
+}
+}
+
 static void nvme_select_ns_iocs(NvmeCtrl *n)
 {
 NvmeNamespace *ns;
@@ -3906,21 +3925,8 @@ static void nvme_select_ns_iocs(NvmeCtrl *n)
 if (!ns) {
 continue;
 }
-ns->iocs = nvme_cse_iocs_none;
-switch (ns->csi) {
-case NVME_CSI_NVM:
-if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
-ns->iocs = nvme_cse_iocs_nvm;
-}
-break;
-case NVME_CSI_ZONED:
-if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
-ns->iocs = nvme_cse_iocs_zoned;
-} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
-ns->iocs = nvme_cse_iocs_nvm;
-}
-break;
-}
+
+__nvme_select_ns_iocs(n, ns);
 }
 }
 
-- 
2.17.1




[PATCH 3/6] hw/block/nvme: fix allocated namespace list to 256

2021-02-05 Thread Minwoo Im
Expand allocated namespace list (subsys->namespaces) to have 256 entries
which is a value lager than at least NVME_MAX_NAMESPACES which is for
attached namespace list in a controller.

Allocated namespace list should at least larger than attached namespace
list.

n->num_namespaces = NVME_MAX_NAMESPACES;

The above line will set the NN field by id->nn so that the subsystem
should also prepare at least this number of namespace list entries.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.h | 2 +-
 hw/block/nvme.h| 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 574774390c4c..8a0732b22316 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -14,7 +14,7 @@
 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
 
 #define NVME_SUBSYS_MAX_CTRLS   32
-#define NVME_SUBSYS_MAX_NAMESPACES  32
+#define NVME_SUBSYS_MAX_NAMESPACES  256
 
 typedef struct NvmeCtrl NvmeCtrl;
 typedef struct NvmeNamespace NvmeNamespace;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index bde0ed7c2679..1c7796b20996 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -10,6 +10,12 @@
 #define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
 
+/*
+ * Subsystem namespace list for allocated namespaces should be larger than
+ * attached namespace list in a controller.
+ */
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_SUBSYS_MAX_NAMESPACES);
+
 typedef struct NvmeParams {
 char *serial;
 uint32_t num_queues; /* deprecated since 5.1 */
-- 
2.17.1




[PATCH 1/6] hw/block/nvme: support namespace detach

2021-02-05 Thread Minwoo Im
Given that now we have nvme-subsys device supported, we can manage
namespace allocated, but not attached: detached.  This patch introduced
a parameter for nvme-ns device named 'detached'.  This parameter
indicates whether the given namespace device is detached from
a entire NVMe subsystem('subsys' given case, shared namespace) or a
controller('bus' given case, private namespace).

- Allocated namespace

  1) Shared ns in the subsystem 'subsys0':

 -device nvme-ns,id=ns1,drive=blknvme0,nsid=1,subsys=subsys0,detached=true

  2) Private ns for the controller 'nvme0' of the subsystem 'subsys0':

 -device nvme-subsys,id=subsys0
 -device nvme,serial=foo,id=nvme0,subsys=subsys0
 -device nvme-ns,id=ns1,drive=blknvme0,nsid=1,bus=nvme0,detached=true

  3) (Invalid case) Controller 'nvme0' has no subsystem to manage ns:

 -device nvme,serial=foo,id=nvme0
 -device nvme-ns,id=ns1,drive=blknvme0,nsid=1,bus=nvme0,detached=true

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c |  1 +
 hw/block/nvme-ns.h |  1 +
 hw/block/nvme-subsys.h |  1 +
 hw/block/nvme.c| 41 +++--
 hw/block/nvme.h| 22 ++
 5 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index c3b513b0fc78..cdcb81319fb5 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -393,6 +393,7 @@ static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
 DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
  NvmeSubsystem *),
+DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 7af6884862b5..b0c00e115d81 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -26,6 +26,7 @@ typedef struct NvmeZone {
 } NvmeZone;
 
 typedef struct NvmeNamespaceParams {
+bool detached;
 uint32_t nsid;
 QemuUUID uuid;
 
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index ccf6a71398d3..890d118117dc 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -23,6 +23,7 @@ typedef struct NvmeSubsystem {
 uint8_t subnqn[256];
 
 NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
+/* Allocated namespaces for this subsystem */
 NvmeNamespace *namespaces[NVME_SUBSYS_MAX_NAMESPACES];
 } NvmeSubsystem;
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6b84e34843f5..a1e930f7c8e4 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -23,7 +23,7 @@
  *  max_ioqpairs=, \
  *  aerl=, aer_max_queued=, \
  *  mdts=,zoned.append_size_limit=, \
- *  subsys= \
+ *  subsys=,detached=
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=, \
  *  subsys=
@@ -78,6 +78,13 @@
  *   controllers in the subsystem. Otherwise, `bus` must be given to attach
  *   this namespace to a specified single controller as a non-shared namespace.
  *
+ * - `detached`
+ *   Not to attach the namespace device to controllers in the NVMe subsystem
+ *   during boot-up. If not given, namespaces are all attahced to all
+ *   controllers in the subsystem by default.
+ *   It's mutual exclusive with 'bus' parameter. It's only valid in case
+ *   `subsys` is provided.
+ *
  * Setting `zoned` to true selects Zoned Command Set at the namespace.
  * In this case, the following namespace properties are available to configure
  * zoned operation:
@@ -4521,6 +4528,20 @@ static void nvme_init_state(NvmeCtrl *n)
 n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
 }
 
+static int nvme_attach_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+if (nvme_ns_is_attached(n, ns)) {
+error_setg(errp,
+   "namespace %d is already attached to controller %d",
+   nvme_nsid(ns), n->cntlid);
+return -1;
+}
+
+nvme_ns_attach(n, ns);
+
+return 0;
+}
+
 int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
 {
 uint32_t nsid = nvme_nsid(ns);
@@ -4552,7 +4573,23 @@ int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace 
*ns, Error **errp)
 
 trace_pci_nvme_register_namespace(nsid);
 
-n->namespaces[nsid - 1] = ns;
+/*
+ * If subsys is not given, namespae is always attached to the controller
+ * because there's no subsystem to manage namespace allocation.
+ */
+if (!n->subsys) {
+if (ns->params.detached) {
+error_setg(errp,
+   "detached needs nvme-subsys specified nvme or nvme-ns");
+return -1;
+}
+
+return nvme_attach_namespace(n, ns, errp);
+} else {
+if (!ns->params.detached) {
+return nvme_attach_name

[PATCH 6/6] hw/block/nvme: support namespace attachment command

2021-02-05 Thread Minwoo Im
This patch supports Namespace Attachment command for the pre-defined
nvme-ns device nodes.  Of course, attach/detach namespace should only be
supported in case 'subsys' is given.  This is because if we detach a
namespace from a controller, somebody needs to manage the detached, but
allocated namespace in the NVMe subsystem.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.h | 10 +++
 hw/block/nvme.c| 59 ++
 hw/block/nvme.h|  5 
 hw/block/trace-events  |  2 ++
 include/block/nvme.h   |  5 
 5 files changed, 81 insertions(+)

diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 14627f9ccb41..ef4bec928eae 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -30,6 +30,16 @@ typedef struct NvmeSubsystem {
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
 int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp);
 
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+uint32_t cntlid)
+{
+if (!subsys) {
+return NULL;
+}
+
+return subsys->ctrls[cntlid];
+}
+
 /*
  * Return allocated namespace of the specified nsid in the subsystem.
  */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 697368a6ae0c..769436722c7e 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -183,6 +183,7 @@ static const uint32_t nvme_cse_acs[256] = {
 [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP,
 };
 
 static const uint32_t nvme_cse_iocs_none[256];
@@ -3766,6 +3767,62 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
+static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns);
+static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeNamespace *ns;
+NvmeCtrl *ctrl;
+uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+bool attach = !(dw10 & 0xf);
+uint16_t *nr_ids = &list[0];
+uint16_t *ids = &list[1];
+uint16_t ret;
+int i;
+
+trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
+
+ns = nvme_subsys_ns(n->subsys, nsid);
+if (!ns) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+ret = nvme_dma(n, (uint8_t *)list, 4096,
+   DMA_DIRECTION_TO_DEVICE, req);
+if (ret) {
+return ret;
+}
+
+if (!*nr_ids) {
+return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+}
+
+for (i = 0; i < *nr_ids; i++) {
+ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
+if (!ctrl) {
+return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+}
+
+if (attach) {
+if (nvme_ns_is_attached(ctrl, ns)) {
+return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
+}
+
+nvme_ns_attach(n, ns);
+__nvme_select_ns_iocs(n, ns);
+} else {
+if (!nvme_ns_is_attached(ctrl, ns)) {
+return NVME_NS_NOT_ATTACHED | NVME_DNR;
+}
+
+nvme_ns_detach(n, ns);
+}
+}
+
+return NVME_SUCCESS;
+}
+
 static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
 trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -3797,6 +3854,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 return nvme_get_feature(n, req);
 case NVME_ADM_CMD_ASYNC_EV_REQ:
 return nvme_aer(n, req);
+case NVME_ADM_CMD_NS_ATTACHMENT:
+return nvme_ns_attachment(n, req);
 default:
 assert(false);
 }
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 1c7796b20996..5a1ab857d166 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -222,6 +222,11 @@ static inline void nvme_ns_attach(NvmeCtrl *n, 
NvmeNamespace *ns)
 n->namespaces[nvme_nsid(ns) - 1] = ns;
 }
 
+static inline void nvme_ns_detach(NvmeCtrl *n, NvmeNamespace *ns)
+{
+n->namespaces[nvme_nsid(ns) - 1] = NULL;
+}
+
 static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
 {
 NvmeSQueue *sq = req->sq;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b6e972d733a6..bf67fe7873d2 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -80,6 +80,8 @@ pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
 pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
 pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 
0x%"PRIx8""
 pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 
0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", 
sel=0x%"PRIx8""
+pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) 
"cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
 pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 
0x%"PRIx8

[PATCH 2/6] hw/block/nvme: fix namespaces array to 1-based

2021-02-05 Thread Minwoo Im
subsys->namespaces array used to be sized to NVME_SUBSYS_MAX_NAMESPACES.
But subsys->namespaces are being accessed with 1-based namespace id
which means the very first array entry will always be empty(NULL).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 890d118117dc..574774390c4c 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -24,7 +24,7 @@ typedef struct NvmeSubsystem {
 
 NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
 /* Allocated namespaces for this subsystem */
-NvmeNamespace *namespaces[NVME_SUBSYS_MAX_NAMESPACES];
+NvmeNamespace *namespaces[NVME_SUBSYS_MAX_NAMESPACES + 1];
 } NvmeSubsystem;
 
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
-- 
2.17.1




[PATCH 0/6] hw/block/nvme: support namespace attachment

2021-02-05 Thread Minwoo Im
Hello,

This series supports namespace attachment: attach and detach.  It means
that this series also introduced a scheme for allocated namespace which
is detached, but allocated in a NVMe subsystem.  Given that now we have
nvme-subsys device to specify a NVMe subsystem, it can manage detached
namespaces from controllers in the subsystem itself.

Tested:

  -device nvme-subsys,id=subsys0 \  
  
  -device nvme,serial=foo,id=nvme0,subsys=subsys0 \ 
  
  -device nvme-ns,id=ns1,drive=drv0,nsid=1,subsys=subsys0,zoned=false \ 
 
  -device nvme-ns,id=ns2,drive=drv1,nsid=2,subsys=subsys0,zoned=true \  
 
  -device 
nvme-ns,id=ns3,drive=drv2,nsid=3,subsys=subsys0,detached=true,zoned=false \
  -device 
nvme-ns,id=ns4,drive=drv3,nsid=4,subsys=subsys0,detached=true,zoned=true \ 

  root@vm:~# nvme list
  Node  SN   Model  
  Namespace Usage  Format   FW Rev
  -  
 - -- 
 
  /dev/nvme0n1  foo  QEMU NVMe Ctrl 
  1 268.44  MB / 268.44  MB512   B +  0 B   1.0
  /dev/nvme0n2  foo  QEMU NVMe Ctrl 
  2 268.44  MB / 268.44  MB512   B +  0 B   1.0

  root@vm:~# nvme attach-ns /dev/nvme0 --namespace-id=3 --controllers=0
  attach-ns: Success, nsid:3
  root@vm:~# nvme attach-ns /dev/nvme0 --namespace-id=4 --controllers=0
  attach-ns: Success, nsid:4
  root@vm:~# echo 1 > /sys/class/nvme/nvme0/rescan_controller

  root@vm:~# nvme list
  Node  SN   Model  
  Namespace Usage  Format   FW Rev  
  -  
 - -- 
 
  /dev/nvme0n1  foo  QEMU NVMe Ctrl 
  1 268.44  MB / 268.44  MB512   B +  0 B   1.0 
  /dev/nvme0n2  foo  QEMU NVMe Ctrl 
  2 268.44  MB / 268.44  MB512   B +  0 B   1.0 
  /dev/nvme0n3  foo  QEMU NVMe Ctrl 
  3 268.44  MB / 268.44  MB512   B +  0 B   1.0 
  /dev/nvme0n4  foo  QEMU NVMe Ctrl 
  4 268.44  MB / 268.44  MB512   B +  0 B   1.0 

  root@vm:~# nvme detach-ns /dev/nvme0 --namespace-id=3 --controllers=0
  detach-ns: Success, nsid:3
  root@vm:~# nvme detach-ns /dev/nvme0 --namespace-id=4 --controllers=0
  detach-ns: Success, nsid:4
  root@vm:~# echo 1 > /sys/class/nvme/nvme0/rescan_controller

  root@vm:~# nvme list
  Node  SN   Model  
  Namespace Usage  Format   FW Rev  
  -  
 - -- 
 
  /dev/nvme0n1  foo  QEMU NVMe Ctrl 
  1 268.44  MB / 268.44  MB512   B +  0 B   1.0 
  /dev/nvme0n2  foo  QEMU NVMe Ctrl 
  2 268.44  MB / 268.44  MB512   B +  0 B   1.0 

Thanks,

Minwoo Im (6):
  hw/block/nvme: support namespace detach
  hw/block/nvme: fix namespaces array to 1-based
  hw/block/nvme: fix allocated namespace list to 256
  hw/block/nvme: support allocated namespace type
  hw/block/nvme: refactor nvme_select_ns_iocs
  hw/block/nvme: support namespace attachment command

 hw/block/nvme-ns.c |   1 +
 hw/block/nvme-ns.h |   1 +
 hw/block/nvme-subsys.h |  28 +-
 hw/block/nvme.c| 199 ++---
 hw/block/nvme.h|  33 +++
 hw/block/trace-events  |   2 +
 include/block/nvme.h   |   5 ++
 7 files changed, 234 insertions(+), 35 deletions(-)

-- 
2.17.1




Re: [PULL v3 00/27] Block patches

2021-02-05 Thread Peter Maydell
On Fri, 5 Feb 2021 at 16:45, Stefan Hajnoczi  wrote:
>
> The following changes since commit e2c5093c993ef646e4e28f7aa78429853bcc06ac:
>
>   iotests: 30: drop from auto group (and effectively from make check) 
> (2021-02-05 15:16:13 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/stefanha/qemu.git tags/block-pull-request
>
> for you to fetch changes up to b07011f375bda3319cf72eee7cb18d310078387b:
>
>   docs: fix Parallels Image "dirty bitmap" section (2021-02-05 16:36:36 +)
>
> 
> Pull request
>
> v3:
>  * Replace {0} array initialization with {} to make clang happy [Peter]
>
> 


Fails 'make check' on s390x host:

socket_accept failed: Resource temporarily unavailable
**
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
signal 6 (Aborted) (core dumped)
socket_accept failed: Resource temporarily unavailable
**
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
signal 6 (Aborted) (core dumped)
ERROR qtest-s390x/pxe-test - Bail out!
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
ERROR qtest-s390x/test-netfilter - Bail out!
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
Makefile.mtest:3121: recipe for target 'run-test-388' failed
make: *** [run-test-388] Error 1
make: *** Waiting for unfinished jobs
Makefile.mtest:3113: recipe for target 'run-test-387' failed

thanks
-- PMM



Re: [PATCH v2] qemu-nbd: Use SOMAXCONN for socket listen() backlog

2021-02-05 Thread Richard W.M. Jones
On Fri, Feb 05, 2021 at 12:57:05PM -0600, Eric Blake wrote:
> Our default of a backlog of 1 connection is rather puny, particularly
> for scenarios where we expect multiple listeners to connect (such as
> qemu-nbd -e X).  This is especially important for Unix sockets, as a
> definite benefit to clients: at least on Linux, a client trying to
> connect to a Unix socket with a backlog gets an EAGAIN failure with no
> way to poll() for when the backlog is no longer present short of
> sleeping an arbitrary amount of time before retrying.
> 
> See https://bugzilla.redhat.com/1925045 for a demonstration of where
> our low backlog prevents libnbd from connecting as many parallel
> clients as it wants.
> 
> Reported-by: Richard W.M. Jones 
> Signed-off-by: Eric Blake 
> ---
> 
> v2: target the correct API used by qemu-nbd, rather than an unrelated
> legacy wrapper [Dan]
> 
>  qemu-nbd.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 608c63e82a25..cd20ee73be19 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -965,7 +965,8 @@ int main(int argc, char **argv)
>  server = qio_net_listener_new();
>  if (socket_activation == 0) {
>  saddr = nbd_build_socket_address(sockpath, bindto, port);
> -if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) {
> +if (qio_net_listener_open_sync(server, saddr, SOMAXCONN,
> +   &local_err) < 0) {
>  object_unref(OBJECT(server));
>  error_report_err(local_err);
>  exit(EXIT_FAILURE);

This one works:

Tested-by: Richard W.M. Jones 

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-top is 'top' for virtual machines.  Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://people.redhat.com/~rjones/virt-top




Re: [PATCH 2/4] hw/block/fdc: Remove the check_media_rate property

2021-02-05 Thread John Snow

On 2/5/21 1:37 AM, Thomas Huth wrote:

On 05/02/2021 01.40, John Snow wrote:

On 2/3/21 12:18 PM, Thomas Huth wrote:

This was only required for the pc-1.0 and earlier machine types.
Now that these have been removed, we can also drop the corresponding
code from the FDC device.

Signed-off-by: Thomas Huth 
---
  hw/block/fdc.c | 17 ++---
  tests/qemu-iotests/172.out | 35 ---
  2 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 292ea87805..198940e737 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -874,7 +874,6 @@ struct FDCtrl {
  FloppyDriveType type;
  } qdev_for_drives[MAX_FD];
  int reset_sensei;
-    uint32_t check_media_rate;


I am a bit of a dunce when it comes to the compatibility properties... 
does this mess with the migration format?


I guess it doesn't, since it's not in the VMSTATE declaration.

H, alright.


I think that should be fine, yes.


  FloppyDriveType fallback; /* type=auto failure fallback */
  /* Timers state */
  uint8_t timer0;
@@ -1021,18 +1020,10 @@ static const VMStateDescription 
vmstate_fdrive_media_changed = {

  }
  };
-static bool fdrive_media_rate_needed(void *opaque)
-{
-    FDrive *drive = opaque;
-
-    return drive->fdctrl->check_media_rate;
-}
-
  static const VMStateDescription vmstate_fdrive_media_rate = {
  .name = "fdrive/media_rate",
  .version_id = 1,
  .minimum_version_id = 1,
-    .needed = fdrive_media_rate_needed,
  .fields = (VMStateField[]) {
  VMSTATE_UINT8(media_rate, FDrive),
  VMSTATE_END_OF_LIST()
@@ -1689,8 +1680,7 @@ static void fdctrl_start_transfer(FDCtrl 
*fdctrl, int direction)

  /* Check the data rate. If the programmed data rate does not match
   * the currently inserted medium, the operation has to fail. */
-    if (fdctrl->check_media_rate &&
-    (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) {
+    if ((fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) {
  FLOPPY_DPRINTF("data rate mismatch (fdc=%d, media=%d)\n",
 fdctrl->dsr & FD_DSR_DRATEMASK, 
cur_drv->media_rate);

  fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
@@ -2489,8 +2479,7 @@ static void fdctrl_result_timer(void *opaque)
  cur_drv->sect = (cur_drv->sect % cur_drv->last_sect) + 1;
  }
  /* READ_ID can't automatically succeed! */
-    if (fdctrl->check_media_rate &&
-    (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) {
+    if ((fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) {
  FLOPPY_DPRINTF("read id rate mismatch (fdc=%d, media=%d)\n",
 fdctrl->dsr & FD_DSR_DRATEMASK, 
cur_drv->media_rate);

  fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
@@ -2895,8 +2884,6 @@ static Property isa_fdc_properties[] = {
  DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2),
  DEFINE_PROP_DRIVE("driveA", FDCtrlISABus, 
state.qdev_for_drives[0].blk),
  DEFINE_PROP_DRIVE("driveB", FDCtrlISABus, 
state.qdev_for_drives[1].blk),
-    DEFINE_PROP_BIT("check_media_rate", FDCtrlISABus, 
state.check_media_rate,

-    0, true),


Could you theoretically set this via QOM commands in QMP, and claim 
that this is a break in behavior?


Though, it's ENTIRELY undocumented, so ... it's probably fine, I 
think. Probably. (Please soothe my troubled mind.)


A user actually could mess with this property even on the command line, 
e.g. by using:


  qemu-system-x86_64 -global isa-fdc.check_media_rate=false

... but, as you said, it's completely undocumented, the property is 
really just there for the internal use of machine type compatibility. 
We've done such clean-ups in the past already, see e.g. 
c6026998eef382d7ad76 or 2a4dbaf1c0db2453ab78f, so I think this should be 
fine. But if you disagree, I could replace this by a patch that adds 
this property to the list of deprecated features instead, so we could at 
least remove it after it has been deprecated for two releases?




I don't think it's necessary, personally -- just wanted to make sure I 
knew the exact stakes here.


Reviewed-by: John Snow 
Acked-by: John Snow 




Re: [PATCH v2] qemu-nbd: Use SOMAXCONN for socket listen() backlog

2021-02-05 Thread Nir Soffer
On Fri, Feb 5, 2021 at 8:57 PM Eric Blake  wrote:
>
> Our default of a backlog of 1 connection is rather puny, particularly
> for scenarios where we expect multiple listeners to connect (such as
> qemu-nbd -e X).  This is especially important for Unix sockets, as a
> definite benefit to clients: at least on Linux, a client trying to
> connect to a Unix socket with a backlog gets an EAGAIN failure with no
> way to poll() for when the backlog is no longer present short of
> sleeping an arbitrary amount of time before retrying.
>
> See https://bugzilla.redhat.com/1925045 for a demonstration of where
> our low backlog prevents libnbd from connecting as many parallel
> clients as it wants.
>
> Reported-by: Richard W.M. Jones 
> Signed-off-by: Eric Blake 
> ---
>
> v2: target the correct API used by qemu-nbd, rather than an unrelated
> legacy wrapper [Dan]
>
>  qemu-nbd.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 608c63e82a25..cd20ee73be19 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -965,7 +965,8 @@ int main(int argc, char **argv)
>  server = qio_net_listener_new();
>  if (socket_activation == 0) {
>  saddr = nbd_build_socket_address(sockpath, bindto, port);
> -if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) {
> +if (qio_net_listener_open_sync(server, saddr, SOMAXCONN,

Shouldn't we use value based on --shared=N?

Using maximum value makes sense for generic server expecting to handle many
connections from different clients. qemu-nbd is typically used by one
client, and we
need to make it possible to connect a known number of connections quickly.

> +   &local_err) < 0) {
>  object_unref(OBJECT(server));
>  error_report_err(local_err);
>  exit(EXIT_FAILURE);
> --
> 2.30.0
>
>




Re: [PATCH] sockets: Use SOMAXCONN for Unix socket listen()

2021-02-05 Thread Eric Blake
On 2/5/21 3:55 AM, Daniel P. Berrangé wrote:

>> +++ b/util/qemu-sockets.c
>> @@ -1059,7 +1059,7 @@ int unix_listen(const char *str, Error **errp)
>>
>>  saddr = g_new0(UnixSocketAddress, 1);
>>  saddr->path = g_strdup(str);
>> -sock = unix_listen_saddr(saddr, 1, errp);
>> +sock = unix_listen_saddr(saddr, SOMAXCONN, errp);
>>  qapi_free_UnixSocketAddress(saddr);
>>  return sock;
>>  }
> 
> This method is a legacy back compat function, only used by the QEMU
> guest agent, so this can't explain the NBD problems, which use the
> QIONetListener class.
> 
> IOW, the problem is in the qemu-nbd.c / blockdev-nbd.c code I believe

D'oh. Serves me right for trying to guess the spot using just a grep on
listen() rather than running under gdb with a breakpoint to find the
actual backtrace.  v2 posted with a slightly changed subject line, and
this time tested to actually work.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Keith Busch
On Sat, Feb 06, 2021 at 01:48:29AM +0900, Minwoo Im wrote:
> Not sure if this is okay just give ctrl->tagset for the head
> request_queue, but this patch works fine as far.

Huh, that's probably not supposed to work: bio-based drivers should
never use tagsets.

Since this is getting a little more complicated, let's take it to the
kernel mailing lists. Meanwhile, I'll work on a proposal for there.
 
> ---
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 282b7a4ea9a9..22febc7baa36 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -375,7 +375,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct 
> nvme_ns_head *head)
> if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
> return 0;
>  
> -   q = blk_alloc_queue(ctrl->numa_node);
> +   q = blk_mq_init_queue(ctrl->tagset);
> if (!q)
> goto out;
> blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
> @@ -677,6 +677,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct 
> nvme_id_ns *id)
> if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
> blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
>ns->head->disk->queue);
> +   if (blk_queue_is_zoned(ns->queue))
> +   blk_revalidate_disk_zones(ns->head->disk, NULL);
>  }
>  
>  void nvme_mpath_remove_disk(struct nvme_ns_head *head)



[PATCH v2] qemu-nbd: Use SOMAXCONN for socket listen() backlog

2021-02-05 Thread Eric Blake
Our default of a backlog of 1 connection is rather puny, particularly
for scenarios where we expect multiple listeners to connect (such as
qemu-nbd -e X).  This is especially important for Unix sockets, as a
definite benefit to clients: at least on Linux, a client trying to
connect to a Unix socket with a backlog gets an EAGAIN failure with no
way to poll() for when the backlog is no longer present short of
sleeping an arbitrary amount of time before retrying.

See https://bugzilla.redhat.com/1925045 for a demonstration of where
our low backlog prevents libnbd from connecting as many parallel
clients as it wants.

Reported-by: Richard W.M. Jones 
Signed-off-by: Eric Blake 
---

v2: target the correct API used by qemu-nbd, rather than an unrelated
legacy wrapper [Dan]

 qemu-nbd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 608c63e82a25..cd20ee73be19 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -965,7 +965,8 @@ int main(int argc, char **argv)
 server = qio_net_listener_new();
 if (socket_activation == 0) {
 saddr = nbd_build_socket_address(sockpath, bindto, port);
-if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) {
+if (qio_net_listener_open_sync(server, saddr, SOMAXCONN,
+   &local_err) < 0) {
 object_unref(OBJECT(server));
 error_report_err(local_err);
 exit(EXIT_FAILURE);
-- 
2.30.0




Re: [PATCH v2 30/36] block: bdrv_reopen_multiple: refresh permissions on updated graph

2021-02-05 Thread Kevin Wolf
Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Move bdrv_reopen_multiple to new paradigm of permission update:
> first update graph relations, then do refresh the permissions.
> 
> We have to modify reopen process in file-posix driver: with new scheme
> we don't have prepared permissions in raw_reopen_prepare(), so we
> should reconfigure fd in raw_check_perm(). Still this seems more native
> and simple anyway.

Hm... The diffstat shows that it is simpler because it needs less code.

But relying on the permission change callbacks for getting a new file
descriptor that changes more than just permissions doesn't feel
completely right either. Can we even expect the permission callbacks to
be called when the permissions aren't changed?

But then, reopen and permission updates were already a bit entangled
before. If we can guarantee that the permission functions will always be
called, even if the permissions don't change, I guess it's okay.

> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block.h |   2 +-
>  block.c   | 183 +++---
>  block/file-posix.c|  84 +--
>  3 files changed, 70 insertions(+), 199 deletions(-)
> 
> diff --git a/include/block/block.h b/include/block/block.h
> index 0f21ef313f..82271d9ccd 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -195,7 +195,7 @@ typedef struct BDRVReopenState {
>  BlockdevDetectZeroesOptions detect_zeroes;
>  bool backing_missing;
>  bool replace_backing_bs;  /* new_backing_bs is ignored if this is false 
> */
> -BlockDriverState *new_backing_bs; /* If NULL then detach the current bs 
> */
> +BlockDriverState *old_backing_bs; /* keep pointer for permissions update 
> */
>  uint64_t perm, shared_perm;

perm and shared_perm are unused now and can be removed.

>  QDict *options;
>  QDict *explicit_options;
> diff --git a/block.c b/block.c
> index 617cba9547..474e624152 100644
> --- a/block.c
> +++ b/block.c
> @@ -103,8 +103,9 @@ static int bdrv_attach_child_common(BlockDriverState 
> *child_bs,
>  GSList **tran, Error **errp);
>  static void bdrv_remove_backing(BlockDriverState *bs, GSList **tran);
>  
> -static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 
> BlockReopenQueue
> -   *queue, Error **errp);
> +static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
> +   BlockReopenQueue *queue,
> +   GSList **set_backings_tran, Error **errp);
>  static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
>  static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
>  
> @@ -2403,6 +2404,7 @@ static void bdrv_list_abort_perm_update(GSList *list)
>  }
>  }
>  
> +__attribute__((unused))
>  static void bdrv_abort_perm_update(BlockDriverState *bs)
>  {
>  g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
> @@ -2498,6 +2500,7 @@ char *bdrv_perm_names(uint64_t perm)
>   *
>   * Needs to be followed by a call to either bdrv_set_perm() or
>   * bdrv_abort_perm_update(). */
> +__attribute__((unused))
>  static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
>uint64_t new_used_perm,
>uint64_t new_shared_perm,
> @@ -4100,10 +4103,6 @@ static BlockReopenQueue 
> *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
>  bs_entry->state.explicit_options = explicit_options;
>  bs_entry->state.flags = flags;
>  
> -/* This needs to be overwritten in bdrv_reopen_prepare() */
> -bs_entry->state.perm = UINT64_MAX;
> -bs_entry->state.shared_perm = 0;
> -
>  /*
>   * If keep_old_opts is false then it means that unspecified
>   * options must be reset to their original value. We don't allow
> @@ -4186,40 +4185,37 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue 
> *bs_queue,
>   */
>  int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
>  {
> -int ret = -1;
> +int ret = 0;

I would prefer to leave this right before the 'goto cleanup'.

Not sure if I fully understand all consequences yet, but overall, apart
from my concerns about file-posix and the potential AioContext locking
problems, this looks like a nice simplification of the process.

Come to think of it, the AioContext handling is probably wrong already
before your series. reopen_commit for one node could move the whole tree
to a different context and then the later nodes would all be processed
while holding the wrong lock.

Kevin




[PATCH v2 7/8] tests/fp/fp-test: Replace the word 'blacklist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the word "blacklist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Acked-by: Alex Bennée 
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/fp/fp-test.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
index 06ffebd6db1..5a4cad8c8b2 100644
--- a/tests/fp/fp-test.c
+++ b/tests/fp/fp-test.c
@@ -123,7 +123,7 @@ static void not_implemented(void)
 fprintf(stderr, "Not implemented.\n");
 }
 
-static bool blacklisted(unsigned op, int rmode)
+static bool is_allowed(unsigned op, int rmode)
 {
 /* odd has not been implemented for any 80-bit ops */
 if (rmode == softfloat_round_odd) {
@@ -161,10 +161,10 @@ static bool blacklisted(unsigned op, int rmode)
 case F32_TO_EXTF80:
 case F64_TO_EXTF80:
 case F128_TO_EXTF80:
-return true;
+return false;
 }
 }
-return false;
+return true;
 }
 
 static void do_testfloat(int op, int rmode, bool exact)
@@ -194,7 +194,7 @@ static void do_testfloat(int op, int rmode, bool exact)
 verCases_writeFunctionName(stderr);
 fputs("\n", stderr);
 
-if (blacklisted(op, rmode)) {
+if (!is_allowed(op, rmode)) {
 not_implemented();
 return;
 }
-- 
2.26.2




[PATCH v2 6/8] qemu-options: Replace the word 'blacklist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the word "blacklist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Signed-off-by: Philippe Mathieu-Daudé 
---
v2: Reword (danpb)
---
 qemu-options.hx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index c09c4646e28..5f86cd2fbbf 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4274,12 +4274,12 @@ DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
 "use 'obsolete' to allow obsolete system calls that are 
provided\n" \
 "by the kernel, but typically no longer used by 
modern\n" \
 "C library implementations.\n" \
-"use 'elevateprivileges' to allow or deny QEMU process to 
elevate\n" \
-"its privileges by blacklisting all set*uid|gid system 
calls.\n" \
+"use 'elevateprivileges' to allow or deny the QEMU process 
ability\n" \
+"to elevate privileges using set*uid|gid system 
calls.\n" \
 "The value 'children' will deny set*uid|gid system 
calls for\n" \
 "main QEMU process but will allow forks and execves to 
run unprivileged\n" \
 "use 'spawn' to avoid QEMU to spawn new threads or 
processes by\n" \
-" blacklisting *fork and execve\n" \
+" blocking *fork and execve\n" \
 "use 'resourcecontrol' to disable process affinity and 
schedular priority\n",
 QEMU_ARCH_ALL)
 SRST
-- 
2.26.2




[PATCH v2 2/8] tools/virtiofsd: Replace the word 'whitelist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the words "whitelist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
---
 tools/virtiofsd/passthrough_ll.c  |  6 +++---
 tools/virtiofsd/passthrough_seccomp.c | 12 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 147b59338a1..5f3afe85579 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -3204,7 +3204,7 @@ static void setup_mounts(const char *source)
 }
 
 /*
- * Only keep whitelisted capabilities that are needed for file system operation
+ * Only keep capabilities in allowlist that are needed for file system 
operation
  * The (possibly NULL) modcaps_in string passed in is free'd before exit.
  */
 static void setup_capabilities(char *modcaps_in)
@@ -3214,8 +3214,8 @@ static void setup_capabilities(char *modcaps_in)
 capng_restore_state(&cap.saved);
 
 /*
- * Whitelist file system-related capabilities that are needed for a file
- * server to act like root.  Drop everything else like networking and
+ * Add to allowlist file system-related capabilities that are needed for a
+ * file server to act like root.  Drop everything else like networking and
  * sysadmin capabilities.
  *
  * Exclusions:
diff --git a/tools/virtiofsd/passthrough_seccomp.c 
b/tools/virtiofsd/passthrough_seccomp.c
index ea852e2e33b..62441cfcdb9 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -21,7 +21,7 @@
 #endif
 #endif
 
-static const int syscall_whitelist[] = {
+static const int syscall_allowlist[] = {
 /* TODO ireg sem*() syscalls */
 SCMP_SYS(brk),
 SCMP_SYS(capget), /* For CAP_FSETID */
@@ -117,12 +117,12 @@ static const int syscall_whitelist[] = {
 };
 
 /* Syscalls used when --syslog is enabled */
-static const int syscall_whitelist_syslog[] = {
+static const int syscall_allowlist_syslog[] = {
 SCMP_SYS(send),
 SCMP_SYS(sendto),
 };
 
-static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t 
len)
+static void add_allowlist(scmp_filter_ctx ctx, const int syscalls[], size_t 
len)
 {
 size_t i;
 
@@ -153,10 +153,10 @@ void setup_seccomp(bool enable_syslog)
 exit(1);
 }
 
-add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
+add_allowlist(ctx, syscall_allowlist, G_N_ELEMENTS(syscall_allowlist));
 if (enable_syslog) {
-add_whitelist(ctx, syscall_whitelist_syslog,
-  G_N_ELEMENTS(syscall_whitelist_syslog));
+add_allowlist(ctx, syscall_allowlist_syslog,
+  G_N_ELEMENTS(syscall_allowlist_syslog));
 }
 
 /* libvhost-user calls this for post-copy migration, we don't need it */
-- 
2.26.2




Re: [PATCH 2/9] tests/qtest: Restrict xlnx-can-test to TCG builds

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 5:57 PM, Peter Maydell wrote:
> On Fri, 5 Feb 2021 at 14:43, Philippe Mathieu-Daudé  wrote:
>>
>> The Xilinx CAN controller test is uses the ZCU102 board which is
>> based on a ZynqMP SoC. In the default configuration - used by this
>> test - this SoC creates 2 Cortex R5F cores. Such cores are not
>> v8A archicture, thus can not be run under KVM. Therefore restrict
>> this test to TCG.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>> Cc: Alistair Francis 
>> Cc: "Edgar E. Iglesias" 
>> Cc: Vikram Garhwal 
>> ---
>>  tests/qtest/meson.build | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
>> index c83bc211b6a..d8ebd5bf98e 100644
>> --- a/tests/qtest/meson.build
>> +++ b/tests/qtest/meson.build
>> @@ -159,10 +159,10 @@
>>(cpu != 'arm' ? ['bios-tables-test'] : []) +  
>> \
>>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
>> ['tpm-tis-device-test'] : []) +\
>>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
>> ['tpm-tis-device-swtpm-test'] : []) +  \
>> +  (config_all.has_key('CONFIG_TCG') ? ['xlnx-can-test'] : []) +  \
>>['arm-cpu-features',
>> 'numa-test',
>> 'boot-serial-test',
>> -   'xlnx-can-test',
>> 'migration-test']
> 
> The implementation in hw/net/can/meson.build is conditioned on
> CONFIG_XLNX_ZYNQMP -- does it work to use that here too?

Yes. Thanks, clever idea :)



Re: [PATCH v4 5/5] qapi: More complex uses of QAPI_LIST_APPEND

2021-02-05 Thread Eric Blake
On 1/26/21 3:31 AM, Markus Armbruster wrote:
> Eric Blake  writes:
> 
>> These cases require a bit more thought to review; in each case, the
>> code was appending to a list, but not with a FOOList **tail variable.
>>
>> Signed-off-by: Eric Blake 
>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>>
>> ---
>> fix qmp_guest_network_get_interfaces [Vladimir]
> 
> Fails tests/test-qga.  I should've double-checked earlier.

And me, too.  Looks like the culprit is:

>>
>> -address_list = &info->value->ip_addresses;
>> -
>> -while (*address_list && (*address_list)->next) {
>> -address_list = &(*address_list)->next;
>> -}
>> -
>> -if (!*address_list) {
>> -*address_list = address_item;
>> -} else {
>> -(*address_list)->next = address_item;
>> +address_tail = &info->ip_addresses;
>> +while (!*address_tail) {
>> +address_tail = &(*address_tail)->next;
>>  }
>> +QAPI_LIST_APPEND(address_tail, address_item);

right here; the condition 'while (!*address_tail)' should instead be
'while (*address_tail)'.  Will submit v5 now that I've identified the bug.

> I'd like to drop just this part, and merge the rest.  You can then
> respin just this part as a follow-up patch.  Okay?
> 
> I can't test qemu-ga under Windows.  Fingers crossed...
> 
> [...]
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




[PATCH v2 8/8] hw/vfio/pci-quirks: Replace the word 'blacklist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the word "blacklist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Alex Williamson 
Acked-by: Alex Williamson 
Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/vfio/pci.h|  2 +-
 hw/vfio/pci-quirks.c | 14 +++---
 hw/vfio/pci.c|  4 ++--
 hw/vfio/trace-events |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 1574ef983f8..64777516d16 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -197,7 +197,7 @@ void vfio_pci_write_config(PCIDevice *pdev,
 uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
 void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
 
-bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
+bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
 void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index fc8d63c8504..81c3e30df77 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -43,19 +43,19 @@
 static const struct {
 uint32_t vendor;
 uint32_t device;
-} romblacklist[] = {
+} rom_denylist[] = {
 { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
 };
 
-bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
+bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev)
 {
 int i;
 
-for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
-if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) 
{
-trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
- romblacklist[i].vendor,
- romblacklist[i].device);
+for (i = 0 ; i < ARRAY_SIZE(rom_denylist); i++) {
+if (vfio_pci_is(vdev, rom_denylist[i].vendor, rom_denylist[i].device)) 
{
+trace_vfio_quirk_rom_in_denylist(vdev->vbasedev.name,
+ rom_denylist[i].vendor,
+ rom_denylist[i].device);
 return true;
 }
 }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index f74be782091..759a3b1abf4 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -900,7 +900,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
 
 if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
 /* Since pci handles romfile, just print a message and return */
-if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
+if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) {
 warn_report("Device at %s is known to cause system instability"
 " issues during option rom execution",
 vdev->vbasedev.name);
@@ -927,7 +927,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
 return;
 }
 
-if (vfio_blacklist_opt_rom(vdev)) {
+if (vfio_opt_rom_in_denylist(vdev)) {
 if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
 warn_report("Device at %s is known to cause system instability"
 " issues during option rom execution",
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index c0e75f24b76..079f53acf28 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -49,7 +49,7 @@ vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t 
val) "%s 0x%04x"
 vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x"
 
 # pci-quirks.c
-vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s 
%04x:%04x"
+vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s 
%04x:%04x"
 vfio_quirk_generic_window_address_write(const char *name, const char * 
region_name, uint64_t data) "%s %s 0x%"PRIx64
 vfio_quirk_generic_window_data_read(const char *name, const char * 
region_name, uint64_t data) "%s %s 0x%"PRIx64
 vfio_quirk_generic_window_data_write(const char *name, const char * 
region_name, uint64_t data) "%s %s 0x%"PRIx64
-- 
2.26.2




[PATCH v2 4/8] scripts/device-crash-test: Replace the word 'whitelist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the word "whitelist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Philippe Mathieu-Daudé 
---
 scripts/device-crash-test | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 04118669ba7..6812de42f8c 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -41,18 +41,18 @@ logger = logging.getLogger('device-crash-test')
 dbg = logger.debug
 
 
-# Purposes of the following whitelist:
+# Purposes of the following allowlist:
 # * Avoiding verbose log messages when we find known non-fatal
 #   (exitcode=1) errors
 # * Avoiding fatal errors when we find known crashes
 # * Skipping machines/devices that are known not to work out of
 #   the box, when running in --quick mode
 #
-# Keeping the whitelist updated is desirable, but not required,
+# Keeping the allowlist updated is desirable, but not required,
 # because unexpected cases where QEMU exits with exitcode=1 will
 # just trigger a INFO message.
 
-# Valid whitelist entry keys:
+# Valid allowlist entry keys:
 # * accel: regexp, full match only
 # * machine: regexp, full match only
 # * device: regexp, full match only
@@ -62,7 +62,7 @@ dbg = logger.debug
 # * expected: if True, QEMU is expected to always fail every time
 #   when testing the corresponding test case
 # * loglevel: log level of log output when there's a match.
-ERROR_WHITELIST = [
+ERROR_ALLOWLIST = [
 # Machines that won't work out of the box:
 # MACHINE | ERROR MESSAGE
 {'machine':'niagara', 'expected':True},   # Unable to load a firmware 
for -M niagara
@@ -187,9 +187,9 @@ ERROR_WHITELIST = [
 
 
 def whitelistTestCaseMatch(wl, t):
-"""Check if a test case specification can match a whitelist entry
+"""Check if a test case specification can match a allowlist entry
 
-This only checks if a whitelist entry is a candidate match
+This only checks if a allowlist entry is a candidate match
 for a given test case, it won't check if the test case
 results/output match the entry.  See whitelistResultMatch().
 """
@@ -206,16 +206,16 @@ def whitelistTestCaseMatch(wl, t):
 
 def whitelistCandidates(t):
 """Generate the list of candidates that can match a test case"""
-for i, wl in enumerate(ERROR_WHITELIST):
+for i, wl in enumerate(ERROR_ALLOWLIST):
 if whitelistTestCaseMatch(wl, t):
 yield (i, wl)
 
 
 def findExpectedResult(t):
-"""Check if there's an expected=True whitelist entry for a test case
+"""Check if there's an expected=True allowlist entry for a test case
 
 Returns (i, wl) tuple, where i is the index in
-ERROR_WHITELIST and wl is the whitelist entry itself.
+ERROR_ALLOWLIST and wl is the allowlist entry itself.
 """
 for i, wl in whitelistCandidates(t):
 if wl.get('expected'):
@@ -223,7 +223,7 @@ def findExpectedResult(t):
 
 
 def whitelistResultMatch(wl, r):
-"""Check if test case results/output match a whitelist entry
+"""Check if test case results/output match a allowlist entry
 
 It is valid to call this function only if
 whitelistTestCaseMatch() is True for the entry (e.g. on
@@ -237,10 +237,10 @@ def whitelistResultMatch(wl, r):
 
 
 def checkResultWhitelist(r):
-"""Look up whitelist entry for a given test case result
+"""Look up allowlist entry for a given test case result
 
 Returns (i, wl) tuple, where i is the index in
-ERROR_WHITELIST and wl is the whitelist entry itself.
+ERROR_ALLOWLIST and wl is the allowlist entry itself.
 """
 for i, wl in whitelistCandidates(r['testcase']):
 if whitelistResultMatch(wl, r):
@@ -544,7 +544,7 @@ def main():
 
 if f:
 i, wl = checkResultWhitelist(f)
-dbg("testcase: %r, whitelist match: %r", t, wl)
+dbg("testcase: %r, allowlist match: %r", t, wl)
 wl_stats.setdefault(i, []).append(f)
 level = wl.get('loglevel', logging.DEBUG)
 logFailure(f, level)
@@ -561,9 +561,9 @@ def main():
 
 if args.debug:
 stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
- enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
+ enumerate(ERROR_ALLOWLIST)], key=lambda x: x[0])
 for count, wl in stats:
-dbg("whitelist entry stats: %d: %r", count, wl)
+dbg("allowlist entry stats: %d: %r", count, wl)
 
 if fatal_failures:
 for f in fatal_failures:
-- 
2.26.2




[PATCH v2 5/8] seccomp: Replace the word 'blacklist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the word "blacklist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Daniel P. Berrangé 
Acked-by: Eduardo Otubo 
Signed-off-by: Philippe Mathieu-Daudé 
---
 softmmu/qemu-seccomp.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/softmmu/qemu-seccomp.c b/softmmu/qemu-seccomp.c
index 377ef6937ca..4c684bc9e71 100644
--- a/softmmu/qemu-seccomp.c
+++ b/softmmu/qemu-seccomp.c
@@ -45,8 +45,8 @@ const struct scmp_arg_cmp sched_setscheduler_arg[] = {
 { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE }
 };
 
-static const struct QemuSeccompSyscall blacklist[] = {
-/* default set of syscalls to blacklist */
+static const struct QemuSeccompSyscall denylist[] = {
+/* default set of syscalls to denylist */
 { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT },
 { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT },
 { SCMP_SYS(swapoff),QEMU_SECCOMP_SET_DEFAULT },
@@ -175,18 +175,18 @@ static int seccomp_start(uint32_t seccomp_opts, Error 
**errp)
 goto seccomp_return;
 }
 
-for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
+for (i = 0; i < ARRAY_SIZE(denylist); i++) {
 uint32_t action;
-if (!(seccomp_opts & blacklist[i].set)) {
+if (!(seccomp_opts & denylist[i].set)) {
 continue;
 }
 
-action = qemu_seccomp_get_action(blacklist[i].set);
-rc = seccomp_rule_add_array(ctx, action, blacklist[i].num,
-blacklist[i].narg, blacklist[i].arg_cmp);
+action = qemu_seccomp_get_action(denylist[i].set);
+rc = seccomp_rule_add_array(ctx, action, denylist[i].num,
+denylist[i].narg, denylist[i].arg_cmp);
 if (rc < 0) {
 error_setg_errno(errp, -rc,
- "failed to add seccomp blacklist rules");
+ "failed to add seccomp denylist rules");
 goto seccomp_return;
 }
 }
-- 
2.26.2




[PATCH v2 0/8] misc: Replace the words 'blacklist/whitelist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the words "blacklist"
and "whitelist" appropriately.

Since v1:
- dropped qemu-guest-agent patches
- addressed review comments
- added R-b tags

Missing review: PATCH #6 "qemu-options: Replace the word 'blacklist'"

Series expected to go via the qemu-trivial@ tree.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Philippe Mathieu-Daudé (8):
  ui: Replace the word 'whitelist'
  tools/virtiofsd: Replace the word 'whitelist'
  scripts/tracetool: Replace the word 'whitelist'
  scripts/device-crash-test: Replace the word 'whitelist'
  seccomp: Replace the word 'blacklist'
  qemu-options: Replace the word 'blacklist'
  tests/fp/fp-test: Replace the word 'blacklist'
  hw/vfio/pci-quirks: Replace the word 'blacklist'

 hw/vfio/pci.h |  2 +-
 hw/vfio/pci-quirks.c  | 14 ++---
 hw/vfio/pci.c |  4 ++--
 softmmu/qemu-seccomp.c| 16 +++---
 tests/fp/fp-test.c|  8 +++
 tools/virtiofsd/passthrough_ll.c  |  6 +++---
 tools/virtiofsd/passthrough_seccomp.c | 12 +--
 ui/console.c  |  2 +-
 ui/vnc-auth-sasl.c|  4 ++--
 hw/vfio/trace-events  |  2 +-
 qemu-options.hx   |  6 +++---
 scripts/device-crash-test | 30 +--
 scripts/tracetool/__init__.py |  2 +-
 13 files changed, 54 insertions(+), 54 deletions(-)

-- 
2.26.2





[PATCH v2 1/8] ui: Replace the word 'whitelist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the words "whitelist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Gerd Hoffmann 
Signed-off-by: Philippe Mathieu-Daudé 
---
v2: Do not use acronyms (danpb)
---
 ui/console.c   | 2 +-
 ui/vnc-auth-sasl.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ui/console.c b/ui/console.c
index c5d11bc7017..5a8311ced20 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1708,7 +1708,7 @@ bool dpy_gfx_check_format(QemuConsole *con,
 return false;
 }
 } else {
-/* default is to whitelist native 32 bpp only */
+/* default is to allow native 32 bpp only */
 if (format != qemu_default_pixman_format(32, true)) {
 return false;
 }
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index f67111a3662..df7dc08e9fc 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -288,7 +288,7 @@ static int protocol_client_auth_sasl_step(VncState *vs, 
uint8_t *data, size_t le
 goto authreject;
 }
 
-/* Check username whitelist ACL */
+/* Check the username access control list */
 if (vnc_auth_sasl_check_access(vs) < 0) {
 goto authreject;
 }
@@ -409,7 +409,7 @@ static int protocol_client_auth_sasl_start(VncState *vs, 
uint8_t *data, size_t l
 goto authreject;
 }
 
-/* Check username whitelist ACL */
+/* Check the username access control list */
 if (vnc_auth_sasl_check_access(vs) < 0) {
 goto authreject;
 }
-- 
2.26.2




[PATCH v2 3/8] scripts/tracetool: Replace the word 'whitelist'

2021-02-05 Thread Philippe Mathieu-Daudé
Follow the inclusive terminology from the "Conscious Language in your
Open Source Projects" guidelines [*] and replace the words "whitelist"
appropriately.

[*] https://github.com/conscious-lang/conscious-lang-docs/blob/main/faq.md

Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
 scripts/tracetool/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index 96b1cd69a52..5bc94d95cfc 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -100,7 +100,7 @@ def validate_type(name):
 if bit == "const":
 continue
 if bit not in ALLOWED_TYPES:
-raise ValueError("Argument type '%s' is not in whitelist. "
+raise ValueError("Argument type '%s' is not allowed. "
  "Only standard C types and fixed size integer "
  "types should be used. struct, union, and "
  "other complex pointer types should be "
-- 
2.26.2




Re: [PATCH 2/9] tests/qtest: Restrict xlnx-can-test to TCG builds

2021-02-05 Thread Peter Maydell
On Fri, 5 Feb 2021 at 14:43, Philippe Mathieu-Daudé  wrote:
>
> The Xilinx CAN controller test is uses the ZCU102 board which is
> based on a ZynqMP SoC. In the default configuration - used by this
> test - this SoC creates 2 Cortex R5F cores. Such cores are not
> v8A archicture, thus can not be run under KVM. Therefore restrict
> this test to TCG.
>
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Cc: Alistair Francis 
> Cc: "Edgar E. Iglesias" 
> Cc: Vikram Garhwal 
> ---
>  tests/qtest/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
> index c83bc211b6a..d8ebd5bf98e 100644
> --- a/tests/qtest/meson.build
> +++ b/tests/qtest/meson.build
> @@ -159,10 +159,10 @@
>(cpu != 'arm' ? ['bios-tables-test'] : []) +   
>\
>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
> ['tpm-tis-device-test'] : []) +\
>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
> ['tpm-tis-device-swtpm-test'] : []) +  \
> +  (config_all.has_key('CONFIG_TCG') ? ['xlnx-can-test'] : []) +  \
>['arm-cpu-features',
> 'numa-test',
> 'boot-serial-test',
> -   'xlnx-can-test',
> 'migration-test']

The implementation in hw/net/can/meson.build is conditioned on
CONFIG_XLNX_ZYNQMP -- does it work to use that here too?

thanks
-- PMM



[PULL v3 26/27] multi-process: perform device reset in the remote process

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Perform device reset in the remote process when QEMU performs
device reset. This is required to reset the internal state
(like registers, etc...) of emulated devices

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
7cb220a51f565dc0817bd76e2f540e89c2d2b850.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h |  1 +
 hw/remote/message.c | 22 ++
 hw/remote/proxy.c   | 19 +++
 3 files changed, 42 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 71d206f00e..4ec0915885 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -40,6 +40,7 @@ typedef enum {
 MPQEMU_CMD_BAR_WRITE,
 MPQEMU_CMD_BAR_READ,
 MPQEMU_CMD_SET_IRQFD,
+MPQEMU_CMD_DEVICE_RESET,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index adab040ca1..11d729845c 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -19,6 +19,7 @@
 #include "exec/memattrs.h"
 #include "hw/remote/memory.h"
 #include "hw/remote/iohub.h"
+#include "sysemu/reset.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
@@ -26,6 +27,8 @@ static void process_config_read(QIOChannel *ioc, PCIDevice 
*dev,
 MPQemuMsg *msg, Error **errp);
 static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
+static void process_device_reset_msg(QIOChannel *ioc, PCIDevice *dev,
+ Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -69,6 +72,9 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_SET_IRQFD:
 process_set_irqfd_msg(pci_dev, &msg);
 break;
+case MPQEMU_CMD_DEVICE_RESET:
+process_device_reset_msg(com->ioc, pci_dev, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -206,3 +212,19 @@ fail:
   getpid());
 }
 }
+
+static void process_device_reset_msg(QIOChannel *ioc, PCIDevice *dev,
+ Error **errp)
+{
+DeviceClass *dc = DEVICE_GET_CLASS(dev);
+DeviceState *s = DEVICE(dev);
+MPQemuMsg ret = { 0 };
+
+if (dc->reset) {
+dc->reset(s);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+
+mpqemu_msg_send(&ret, ioc, errp);
+}
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index a082709881..4fa4be079d 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -26,6 +26,7 @@
 #include "util/event_notifier-posix.c"
 
 static void probe_pci_info(PCIDevice *dev, Error **errp);
+static void proxy_device_reset(DeviceState *dev);
 
 static void proxy_intx_update(PCIDevice *pci_dev)
 {
@@ -202,6 +203,8 @@ static void pci_proxy_dev_class_init(ObjectClass *klass, 
void *data)
 k->config_read = pci_proxy_read_config;
 k->config_write = pci_proxy_write_config;
 
+dc->reset = proxy_device_reset;
+
 device_class_set_props(dc, proxy_properties);
 }
 
@@ -358,3 +361,19 @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
 }
 }
 }
+
+static void proxy_device_reset(DeviceState *dev)
+{
+PCIProxyDev *pdev = PCI_PROXY_DEV(dev);
+MPQemuMsg msg = { 0 };
+Error *local_err = NULL;
+
+msg.cmd = MPQEMU_CMD_DEVICE_RESET;
+msg.size = 0;
+
+mpqemu_msg_send_and_await_reply(&msg, pdev, &local_err);
+if (local_err) {
+error_report_err(local_err);
+}
+
+}
-- 
2.29.2



Re: [PULL v2 00/27] Block patches

2021-02-05 Thread Daniel P . Berrangé
On Fri, Feb 05, 2021 at 05:52:59PM +0100, Thomas Huth wrote:
> On 05/02/2021 17.23, Peter Maydell wrote:
> > On Fri, 5 Feb 2021 at 16:21, Stefan Hajnoczi  wrote:
> > > Thanks, I update the patch in question.
> > > 
> > > It looks like the GitLab CI doesn't include a clang version that
> > > produces this error because the pipeline passed for me:
> > > https://gitlab.com/stefanha/qemu/-/pipelines/251524779
> > > 
> > > Is there something clang-specific you want to check in the CI? Maybe
> > > clang 3.4, the oldest version supported according to ./configure?
> > 
> > Would probably be nice I guess. My ad-hoc builds use clang 6,
> > which is what tripped up here.
> 
> We should maybe discuss first whether we can bump the minimum version of
> Clang that we would like to support. I once picked Clang 3.4 since that was
> available in EPEL for RHEL7, but I think there were newer versions of Clang
> available in RHEL7 via other repos later, so 3.4 is likely really just way
> too old now...
> 
> According to https://developers.redhat.com/HW/ClangLLVM-RHEL-7 there was at
> least Clang 7.0 available on RHEL7. Debian stable seems to have at least
> 7.0, too, according to repology.org. Ubuntu 18.04 seems to have version 6,
> but later ones are available via updates? Anyway, I think we could at least
> bump the minimum version to 6.0 nowadays...

Per our support matrix, this is the last dev cycle where we need to
care about RHEL-7, as RHEL-7 will be past the 2 year cutoff in
the QEMU 6.1 cycle.

Furthermore given that CLang was only ever an EPEL package, not a
core part of the distro, I think we are justified in just ignoring
RHEL-7 already for purpose of choosing CLang min version.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL v3 24/27] multi-process: create IOHUB object to handle irq

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

IOHUB object is added to manage PCI IRQs. It uses KVM_IRQFD
ioctl to create irqfd to injecting PCI interrupts to the guest.
IOHUB object forwards the irqfd to the remote process. Remote process
uses this fd to directly send interrupts to the guest, bypassing QEMU.

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
51d5c3d54e28a68b002e3875c59599c9f5a424a1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |   2 +
 include/hw/pci/pci_ids.h|   3 +
 include/hw/remote/iohub.h   |  42 +++
 include/hw/remote/machine.h |   2 +
 include/hw/remote/mpqemu-link.h |   1 +
 include/hw/remote/proxy.h   |   4 ++
 hw/remote/iohub.c   | 119 
 hw/remote/machine.c |  10 +++
 hw/remote/message.c |   4 ++
 hw/remote/mpqemu-link.c |   5 ++
 hw/remote/proxy.c   |  56 +++
 hw/remote/meson.build   |   1 +
 12 files changed, 249 insertions(+)
 create mode 100644 include/hw/remote/iohub.h
 create mode 100644 hw/remote/iohub.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 3b0ea950fc..58da5d6e66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3215,6 +3215,8 @@ F: hw/remote/proxy.c
 F: include/hw/remote/proxy.h
 F: hw/remote/proxy-memory-listener.c
 F: include/hw/remote/proxy-memory-listener.h
+F: hw/remote/iohub.c
+F: include/hw/remote/iohub.h
 
 Build and test automation
 -
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 11f8ab7149..bd0c17dc78 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -192,6 +192,9 @@
 #define PCI_DEVICE_ID_SUN_SIMBA  0x5000
 #define PCI_DEVICE_ID_SUN_SABRE  0xa000
 
+#define PCI_VENDOR_ID_ORACLE 0x108e
+#define PCI_DEVICE_ID_REMOTE_IOHUB   0xb000
+
 #define PCI_VENDOR_ID_CMD0x1095
 #define PCI_DEVICE_ID_CMD_6460x0646
 
diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h
new file mode 100644
index 00..0bf98e0d78
--- /dev/null
+++ b/include/hw/remote/iohub.h
@@ -0,0 +1,42 @@
+/*
+ * IO Hub for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_IOHUB_H
+#define REMOTE_IOHUB_H
+
+#include "hw/pci/pci.h"
+#include "qemu/event_notifier.h"
+#include "qemu/thread-posix.h"
+#include "hw/remote/mpqemu-link.h"
+
+#define REMOTE_IOHUB_NB_PIRQSPCI_DEVFN_MAX
+
+typedef struct ResampleToken {
+void *iohub;
+int pirq;
+} ResampleToken;
+
+typedef struct RemoteIOHubState {
+PCIDevice d;
+EventNotifier irqfds[REMOTE_IOHUB_NB_PIRQS];
+EventNotifier resamplefds[REMOTE_IOHUB_NB_PIRQS];
+unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS];
+ResampleToken token[REMOTE_IOHUB_NB_PIRQS];
+QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS];
+} RemoteIOHubState;
+
+int remote_iohub_map_irq(PCIDevice *pci_dev, int intx);
+void remote_iohub_set_irq(void *opaque, int pirq, int level);
+void process_set_irqfd_msg(PCIDevice *pci_dev, MPQemuMsg *msg);
+
+void remote_iohub_init(RemoteIOHubState *iohub);
+void remote_iohub_finalize(RemoteIOHubState *iohub);
+
+#endif
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
index b92b2ce705..2a2a33c4b2 100644
--- a/include/hw/remote/machine.h
+++ b/include/hw/remote/machine.h
@@ -15,11 +15,13 @@
 #include "hw/boards.h"
 #include "hw/pci-host/remote.h"
 #include "io/channel.h"
+#include "hw/remote/iohub.h"
 
 struct RemoteMachineState {
 MachineState parent_obj;
 
 RemotePCIHost *host;
+RemoteIOHubState iohub;
 };
 
 /* Used to pass to co-routine device and ioc. */
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 6303e62b17..71d206f00e 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -39,6 +39,7 @@ typedef enum {
 MPQEMU_CMD_PCI_CFGREAD,
 MPQEMU_CMD_BAR_WRITE,
 MPQEMU_CMD_BAR_READ,
+MPQEMU_CMD_SET_IRQFD,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index 12888b4f90..741def71f1 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -12,6 +12,7 @@
 #include "hw/pci/pci.h"
 #include "io/channel.h"
 #include "hw/remote/proxy-memory-listener.h"
+#include "qemu/event_notifier.h"
 
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
@@ -38,6 +39,9 @@ struct PCIProxyDev {
 QIOChannel *ioc;
 Error *migration_blocker;
 ProxyMemoryListener proxy_listener;
+int virq;
+EventNotifier intr;
+EventNotifier resample;
 ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/hw/remote/i

[PULL v3 23/27] multi-process: Synchronize remote memory

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Add ProxyMemoryListener object which is used to keep the view of the RAM
in sync between QEMU and remote process.
A MemoryListener is registered for system-memory AddressSpace. The
listener sends SYNC_SYSMEM message to the remote process when memory
listener commits the changes to memory, the remote process receives
the message and processes it in the handler for SYNC_SYSMEM message.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
04fe4e6a9ca90d4f11ab6f59be7652f5b086a071.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |   2 +
 include/hw/remote/proxy-memory-listener.h |  28 +++
 include/hw/remote/proxy.h |   2 +
 hw/remote/message.c   |   4 +
 hw/remote/proxy-memory-listener.c | 227 ++
 hw/remote/proxy.c |   6 +
 hw/remote/meson.build |   1 +
 7 files changed, 270 insertions(+)
 create mode 100644 include/hw/remote/proxy-memory-listener.h
 create mode 100644 hw/remote/proxy-memory-listener.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 51a8859357..3b0ea950fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3213,6 +3213,8 @@ F: include/hw/remote/memory.h
 F: hw/remote/memory.c
 F: hw/remote/proxy.c
 F: include/hw/remote/proxy.h
+F: hw/remote/proxy-memory-listener.c
+F: include/hw/remote/proxy-memory-listener.h
 
 Build and test automation
 -
diff --git a/include/hw/remote/proxy-memory-listener.h 
b/include/hw/remote/proxy-memory-listener.h
new file mode 100644
index 00..c4f3efb928
--- /dev/null
+++ b/include/hw/remote/proxy-memory-listener.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_MEMORY_LISTENER_H
+#define PROXY_MEMORY_LISTENER_H
+
+#include "exec/memory.h"
+#include "io/channel.h"
+
+typedef struct ProxyMemoryListener {
+MemoryListener listener;
+
+int n_mr_sections;
+MemoryRegionSection *mr_sections;
+
+QIOChannel *ioc;
+} ProxyMemoryListener;
+
+void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener,
+ QIOChannel *ioc);
+void proxy_memory_listener_deconfigure(ProxyMemoryListener *proxy_listener);
+
+#endif
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index ea7fa4fb3c..12888b4f90 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -11,6 +11,7 @@
 
 #include "hw/pci/pci.h"
 #include "io/channel.h"
+#include "hw/remote/proxy-memory-listener.h"
 
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
@@ -36,6 +37,7 @@ struct PCIProxyDev {
 QemuMutex io_mutex;
 QIOChannel *ioc;
 Error *migration_blocker;
+ProxyMemoryListener proxy_listener;
 ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index f2e84457e0..25341d8ad2 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -17,6 +17,7 @@
 #include "sysemu/runstate.h"
 #include "hw/pci/pci.h"
 #include "exec/memattrs.h"
+#include "hw/remote/memory.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
@@ -61,6 +62,9 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_BAR_READ:
 process_bar_read(com->ioc, &msg, &local_err);
 break;
+case MPQEMU_CMD_SYNC_SYSMEM:
+remote_sysmem_reconfig(&msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
diff --git a/hw/remote/proxy-memory-listener.c 
b/hw/remote/proxy-memory-listener.c
new file mode 100644
index 00..af1fa6f5aa
--- /dev/null
+++ b/hw/remote/proxy-memory-listener.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/compiler.h"
+#include "qemu/int128.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h"
+#include "cpu.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "hw/remote/mpqemu-link.h"
+#include "hw/remote/proxy-memory-listener.h"
+
+/*
+ * TODO: get_fd_from_hostaddr(), proxy_mrs_can_merge() and
+ * proxy_memory_listener_commit() defined below perform tasks similar to the
+ * functions defined in vhost-user.c. These functions are good candidates
+ * for refactoring.
+

Re: [PATCH 2/9] tests/qtest: Restrict xlnx-can-test to TCG builds

2021-02-05 Thread Alistair Francis
On Fri, Feb 5, 2021 at 6:45 AM Philippe Mathieu-Daudé  wrote:
>
> The Xilinx CAN controller test is uses the ZCU102 board which is
> based on a ZynqMP SoC. In the default configuration - used by this
> test - this SoC creates 2 Cortex R5F cores. Such cores are not
> v8A archicture, thus can not be run under KVM. Therefore restrict
> this test to TCG.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Alistair Francis 

Alistair

> ---
> Cc: Alistair Francis 
> Cc: "Edgar E. Iglesias" 
> Cc: Vikram Garhwal 
> ---
>  tests/qtest/meson.build | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
> index c83bc211b6a..d8ebd5bf98e 100644
> --- a/tests/qtest/meson.build
> +++ b/tests/qtest/meson.build
> @@ -159,10 +159,10 @@
>(cpu != 'arm' ? ['bios-tables-test'] : []) +   
>\
>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
> ['tpm-tis-device-test'] : []) +\
>(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? 
> ['tpm-tis-device-swtpm-test'] : []) +  \
> +  (config_all.has_key('CONFIG_TCG') ? ['xlnx-can-test'] : []) +  \
>['arm-cpu-features',
> 'numa-test',
> 'boot-serial-test',
> -   'xlnx-can-test',
> 'migration-test']
>
>  qtests_s390x = \
> --
> 2.26.2
>
>



[PULL v3 18/27] multi-process: setup memory manager for remote device

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

SyncSysMemMsg message format is defined. It is used to send
file descriptors of the RAM regions to remote device.
RAM on the remote device is configured with a set of file descriptors.
Old RAM regions are deleted and new regions, each with an fd, is
added to the RAM.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
7d2d1831d812e85f681e7a8ab99e032cf4704689.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |  2 +
 include/hw/remote/memory.h  | 19 ++
 include/hw/remote/mpqemu-link.h | 10 +
 hw/remote/memory.c  | 65 +
 hw/remote/mpqemu-link.c | 11 ++
 hw/remote/meson.build   |  2 +
 6 files changed, 109 insertions(+)
 create mode 100644 include/hw/remote/memory.h
 create mode 100644 hw/remote/memory.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e37fc4b226..88732e51a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3209,6 +3209,8 @@ F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
 F: hw/remote/message.c
 F: hw/remote/remote-obj.c
+F: include/hw/remote/memory.h
+F: hw/remote/memory.c
 
 Build and test automation
 -
diff --git a/include/hw/remote/memory.h b/include/hw/remote/memory.h
new file mode 100644
index 00..bc2e30945f
--- /dev/null
+++ b/include/hw/remote/memory.h
@@ -0,0 +1,19 @@
+/*
+ * Memory manager for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MEMORY_H
+#define REMOTE_MEMORY_H
+
+#include "exec/hwaddr.h"
+#include "hw/remote/mpqemu-link.h"
+
+void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp);
+
+#endif
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index cac699cb42..6ee5bc5751 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -14,6 +14,7 @@
 #include "qom/object.h"
 #include "qemu/thread.h"
 #include "io/channel.h"
+#include "exec/hwaddr.h"
 
 #define REMOTE_MAX_FDS 8
 
@@ -30,9 +31,16 @@
  *
  */
 typedef enum {
+MPQEMU_CMD_SYNC_SYSMEM,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
+typedef struct {
+hwaddr gpas[REMOTE_MAX_FDS];
+uint64_t sizes[REMOTE_MAX_FDS];
+off_t offsets[REMOTE_MAX_FDS];
+} SyncSysmemMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -43,12 +51,14 @@ typedef enum {
  * MPQemuMsg Format of the message sent to the remote device from QEMU.
  *
  */
+
 typedef struct {
 int cmd;
 size_t size;
 
 union {
 uint64_t u64;
+SyncSysmemMsg sync_sysmem;
 } data;
 
 int fds[REMOTE_MAX_FDS];
diff --git a/hw/remote/memory.c b/hw/remote/memory.c
new file mode 100644
index 00..32085b1e05
--- /dev/null
+++ b/hw/remote/memory.c
@@ -0,0 +1,65 @@
+/*
+ * Memory manager for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/memory.h"
+#include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
+#include "qapi/error.h"
+
+static void remote_sysmem_reset(void)
+{
+MemoryRegion *sysmem, *subregion, *next;
+
+sysmem = get_system_memory();
+
+QTAILQ_FOREACH_SAFE(subregion, &sysmem->subregions, subregions_link, next) 
{
+if (subregion->ram) {
+memory_region_del_subregion(sysmem, subregion);
+object_unparent(OBJECT(subregion));
+}
+}
+}
+
+void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+SyncSysmemMsg *sysmem_info = &msg->data.sync_sysmem;
+MemoryRegion *sysmem, *subregion;
+static unsigned int suffix;
+int region;
+
+sysmem = get_system_memory();
+
+remote_sysmem_reset();
+
+for (region = 0; region < msg->num_fds; region++) {
+g_autofree char *name;
+subregion = g_new(MemoryRegion, 1);
+name = g_strdup_printf("remote-mem-%u", suffix++);
+memory_region_init_ram_from_fd(subregion, NULL,
+   name, sysmem_info->sizes[region],
+   true, msg->fds[region],
+   sysmem_info->offsets[region],
+   errp);
+
+if (*errp) {
+g_free(subregion);
+remote_sysmem_reset();
+return;
+}
+
+memory_region_add_subregion(sysmem, sysmem_info->gpas[region],
+subregion);
+
+}
+}
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 0d1899fd94..4ee1128285 100644
--- a/hw/remote/

Re: [PULL v2 00/27] Block patches

2021-02-05 Thread Thomas Huth

On 05/02/2021 17.23, Peter Maydell wrote:

On Fri, 5 Feb 2021 at 16:21, Stefan Hajnoczi  wrote:

Thanks, I update the patch in question.

It looks like the GitLab CI doesn't include a clang version that
produces this error because the pipeline passed for me:
https://gitlab.com/stefanha/qemu/-/pipelines/251524779

Is there something clang-specific you want to check in the CI? Maybe
clang 3.4, the oldest version supported according to ./configure?


Would probably be nice I guess. My ad-hoc builds use clang 6,
which is what tripped up here.


We should maybe discuss first whether we can bump the minimum version of 
Clang that we would like to support. I once picked Clang 3.4 since that was 
available in EPEL for RHEL7, but I think there were newer versions of Clang 
available in RHEL7 via other repos later, so 3.4 is likely really just way 
too old now...


According to https://developers.redhat.com/HW/ClangLLVM-RHEL-7 there was at 
least Clang 7.0 available on RHEL7. Debian stable seems to have at least 
7.0, too, according to repology.org. Ubuntu 18.04 seems to have version 6, 
but later ones are available via updates? Anyway, I think we could at least 
bump the minimum version to 6.0 nowadays...


 Thomas




[PULL v3 27/27] docs: fix Parallels Image "dirty bitmap" section

2021-02-05 Thread Stefan Hajnoczi
From: "Denis V. Lunev" 

Original specification says that l1 table size if 64 * l1_size, which
is obviously wrong. The size of the l1 entry is 64 _bits_, not bytes.
Thus 64 is to be replaces with 8 as specification says about bytes.

There is also minor tweak, field name is renamed from l1 to l1_table,
which matches with the later text.

Signed-off-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-id: 20210128171313.2210947-1-...@openvz.org
CC: Stefan Hajnoczi 
CC: Vladimir Sementsov-Ogievskiy 

[Replace the original commit message "docs: fix mistake in dirty bitmap
feature description" as suggested by Eric Blake.
--Stefan]

Signed-off-by: Stefan Hajnoczi 
---
 docs/interop/parallels.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.txt
index e9271eba5d..f15bf35bd1 100644
--- a/docs/interop/parallels.txt
+++ b/docs/interop/parallels.txt
@@ -208,7 +208,7 @@ of its data area are:
   28 - 31:l1_size
   The number of entries in the L1 table of the bitmap.
 
-  variable:   l1 (64 * l1_size bytes)
+  variable:   l1_table (8 * l1_size bytes)
   L1 offset table (in bytes)
 
 A dirty bitmap is stored using a one-level structure for the mapping to host
-- 
2.29.2



[PULL v3 25/27] multi-process: Retrieve PCI info from remote process

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Retrieve PCI configuration info about the remote device and
configure the Proxy PCI object based on the returned information

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
85ee367bbb993aa23699b44cfedd83b4ea6d5221.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/remote/proxy.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index 555b3103f4..a082709881 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -25,6 +25,8 @@
 #include "sysemu/kvm.h"
 #include "util/event_notifier-posix.c"
 
+static void probe_pci_info(PCIDevice *dev, Error **errp);
+
 static void proxy_intx_update(PCIDevice *pci_dev)
 {
 PCIProxyDev *dev = PCI_PROXY_DEV(pci_dev);
@@ -77,6 +79,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error 
**errp)
 {
 ERRP_GUARD();
 PCIProxyDev *dev = PCI_PROXY_DEV(device);
+uint8_t *pci_conf = device->config;
 int fd;
 
 if (!dev->fd) {
@@ -106,9 +109,14 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error 
**errp)
 qemu_mutex_init(&dev->io_mutex);
 qio_channel_set_blocking(dev->ioc, true, NULL);
 
+pci_conf[PCI_LATENCY_TIMER] = 0xff;
+pci_conf[PCI_INTERRUPT_PIN] = 0x01;
+
 proxy_memory_listener_configure(&dev->proxy_listener, dev->ioc);
 
 setup_irqfd(dev);
+
+probe_pci_info(PCI_DEVICE(dev), errp);
 }
 
 static void pci_proxy_dev_exit(PCIDevice *pdev)
@@ -274,3 +282,79 @@ const MemoryRegionOps proxy_mr_ops = {
 .max_access_size = 8,
 },
 };
+
+static void probe_pci_info(PCIDevice *dev, Error **errp)
+{
+PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+uint32_t orig_val, new_val, base_class, val;
+PCIProxyDev *pdev = PCI_PROXY_DEV(dev);
+DeviceClass *dc = DEVICE_CLASS(pc);
+uint8_t type;
+int i, size;
+
+config_op_send(pdev, PCI_VENDOR_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->vendor_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_DEVICE_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->device_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_CLASS_DEVICE, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->class_id = (uint16_t)val;
+
+config_op_send(pdev, PCI_SUBSYSTEM_ID, &val, 2, MPQEMU_CMD_PCI_CFGREAD);
+pc->subsystem_id = (uint16_t)val;
+
+base_class = pc->class_id >> 4;
+switch (base_class) {
+case PCI_BASE_CLASS_BRIDGE:
+set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+break;
+case PCI_BASE_CLASS_STORAGE:
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+break;
+case PCI_BASE_CLASS_NETWORK:
+set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+break;
+case PCI_BASE_CLASS_INPUT:
+set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+break;
+case PCI_BASE_CLASS_DISPLAY:
+set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+break;
+case PCI_BASE_CLASS_PROCESSOR:
+set_bit(DEVICE_CATEGORY_CPU, dc->categories);
+break;
+default:
+set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+break;
+}
+
+for (i = 0; i < PCI_NUM_REGIONS; i++) {
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &orig_val, 4,
+   MPQEMU_CMD_PCI_CFGREAD);
+new_val = 0x;
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &new_val, 4,
+   MPQEMU_CMD_PCI_CFGWRITE);
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &new_val, 4,
+   MPQEMU_CMD_PCI_CFGREAD);
+size = (~(new_val & 0xFFF0)) + 1;
+config_op_send(pdev, PCI_BASE_ADDRESS_0 + (4 * i), &orig_val, 4,
+   MPQEMU_CMD_PCI_CFGWRITE);
+type = (new_val & 0x1) ?
+   PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY;
+
+if (size) {
+g_autofree char *name;
+pdev->region[i].dev = pdev;
+pdev->region[i].present = true;
+if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) {
+pdev->region[i].memory = true;
+}
+name = g_strdup_printf("bar-region-%d", i);
+memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev),
+  &proxy_mr_ops, &pdev->region[i],
+  name, size);
+pci_register_bar(dev, i, type, &pdev->region[i].mr);
+}
+}
+}
-- 
2.29.2



[PULL v3 20/27] multi-process: add proxy communication functions

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
d54edb4176361eed86b903e8f27058363b6c83b3.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h |  4 
 hw/remote/mpqemu-link.c | 34 +
 2 files changed, 38 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 6ee5bc5751..1b35d408f8 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -15,6 +15,8 @@
 #include "qemu/thread.h"
 #include "io/channel.h"
 #include "exec/hwaddr.h"
+#include "io/channel-socket.h"
+#include "hw/remote/proxy.h"
 
 #define REMOTE_MAX_FDS 8
 
@@ -68,6 +70,8 @@ typedef struct {
 bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
 bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
 
+uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
+ Error **errp);
 bool mpqemu_msg_valid(MPQemuMsg *msg);
 
 #endif
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 4ee1128285..f5e9e01923 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -182,6 +182,40 @@ fail:
 return ret;
 }
 
+/*
+ * Send msg and wait for a reply with command code RET_MSG.
+ * Returns the message received of size u64 or UINT64_MAX
+ * on error.
+ * Called from VCPU thread in non-coroutine context.
+ * Used by the Proxy object to communicate to remote processes.
+ */
+uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
+ Error **errp)
+{
+ERRP_GUARD();
+MPQemuMsg msg_reply = {0};
+uint64_t ret = UINT64_MAX;
+
+assert(!qemu_in_coroutine());
+
+QEMU_LOCK_GUARD(&pdev->io_mutex);
+if (!mpqemu_msg_send(msg, pdev->ioc, errp)) {
+return ret;
+}
+
+if (!mpqemu_msg_recv(&msg_reply, pdev->ioc, errp)) {
+return ret;
+}
+
+if (!mpqemu_msg_valid(&msg_reply)) {
+error_setg(errp, "ERROR: Invalid reply received for command %d",
+ msg->cmd);
+return ret;
+}
+
+return msg_reply.data.u64;
+}
+
 bool mpqemu_msg_valid(MPQemuMsg *msg)
 {
 if (msg->cmd >= MPQEMU_CMD_MAX && msg->cmd < 0) {
-- 
2.29.2



[PULL v3 19/27] multi-process: introduce proxy object

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Defines a PCI Device proxy object as a child of TYPE_PCI_DEVICE.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
b5186ebfedf8e557044d09a768846c59230ad3a7.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |  2 +
 include/hw/remote/proxy.h | 33 +
 hw/remote/proxy.c | 99 +++
 hw/remote/meson.build |  1 +
 4 files changed, 135 insertions(+)
 create mode 100644 include/hw/remote/proxy.h
 create mode 100644 hw/remote/proxy.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 88732e51a2..51a8859357 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3211,6 +3211,8 @@ F: hw/remote/message.c
 F: hw/remote/remote-obj.c
 F: include/hw/remote/memory.h
 F: hw/remote/memory.c
+F: hw/remote/proxy.c
+F: include/hw/remote/proxy.h
 
 Build and test automation
 -
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
new file mode 100644
index 00..faa9c4d580
--- /dev/null
+++ b/include/hw/remote/proxy.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PROXY_H
+#define PROXY_H
+
+#include "hw/pci/pci.h"
+#include "io/channel.h"
+
+#define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
+OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
+
+struct PCIProxyDev {
+PCIDevice parent_dev;
+char *fd;
+
+/*
+ * Mutex used to protect the QIOChannel fd from
+ * the concurrent access by the VCPUs since proxy
+ * blocks while awaiting for the replies from the
+ * process remote.
+ */
+QemuMutex io_mutex;
+QIOChannel *ioc;
+Error *migration_blocker;
+};
+
+#endif /* PROXY_H */
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
new file mode 100644
index 00..cd5b071ab4
--- /dev/null
+++ b/hw/remote/proxy.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/proxy.h"
+#include "hw/pci/pci.h"
+#include "qapi/error.h"
+#include "io/channel-util.h"
+#include "hw/qdev-properties.h"
+#include "monitor/monitor.h"
+#include "migration/blocker.h"
+#include "qemu/sockets.h"
+
+static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
+{
+ERRP_GUARD();
+PCIProxyDev *dev = PCI_PROXY_DEV(device);
+int fd;
+
+if (!dev->fd) {
+error_setg(errp, "fd parameter not specified for %s",
+   DEVICE(device)->id);
+return;
+}
+
+fd = monitor_fd_param(monitor_cur(), dev->fd, errp);
+if (fd == -1) {
+error_prepend(errp, "proxy: unable to parse fd %s: ", dev->fd);
+return;
+}
+
+if (!fd_is_socket(fd)) {
+error_setg(errp, "proxy: fd %d is not a socket", fd);
+close(fd);
+return;
+}
+
+dev->ioc = qio_channel_new_fd(fd, errp);
+
+error_setg(&dev->migration_blocker, "%s does not support migration",
+   TYPE_PCI_PROXY_DEV);
+migrate_add_blocker(dev->migration_blocker, errp);
+
+qemu_mutex_init(&dev->io_mutex);
+qio_channel_set_blocking(dev->ioc, true, NULL);
+}
+
+static void pci_proxy_dev_exit(PCIDevice *pdev)
+{
+PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
+
+if (dev->ioc) {
+qio_channel_close(dev->ioc, NULL);
+}
+
+migrate_del_blocker(dev->migration_blocker);
+
+error_free(dev->migration_blocker);
+}
+
+static Property proxy_properties[] = {
+DEFINE_PROP_STRING("fd", PCIProxyDev, fd),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+k->realize = pci_proxy_dev_realize;
+k->exit = pci_proxy_dev_exit;
+device_class_set_props(dc, proxy_properties);
+}
+
+static const TypeInfo pci_proxy_dev_type_info = {
+.name  = TYPE_PCI_PROXY_DEV,
+.parent= TYPE_PCI_DEVICE,
+.instance_size = sizeof(PCIProxyDev),
+.class_init= pci_proxy_dev_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
+{ },
+},
+};
+
+static void pci_proxy_dev_register_types(void)
+{
+type_register_static(&pci_proxy_dev_type_info);
+}
+
+type_init(pci_proxy_dev_register_types)
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index 64da16c1de..569cd20edf 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -4,6 +4,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: 
files('machine.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCES

Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Minwoo Im
On 21-02-06 01:43:18, Minwoo Im wrote:
> On 21-02-05 08:22:52, Keith Busch wrote:
> > On Sat, Feb 06, 2021 at 01:07:57AM +0900, Minwoo Im wrote:
> > > If multipath is enabled, the namespace head and hidden namespace will be
> > > created.  In this case, /sys/block/nvme0n1/queue/nr_zones are not
> > > returning proper value for the namespace itself.  By the way, the hidden
> > > namespace /sys/block/nvme0c0n1/queue/nr_zones are returning properly.
> > > 
> > > Is it okay for sysfs of the head namespace node (nvme0n1) not to manage
> > > the request queue attributes like nr_zones?
> > 
> > This should fix it. Untested, as my dev machine is in need of repair,
> > but if someone can confirm this is successful, I can send it to the
> > kernel list.
> > 
> > ---
> > diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> > index 65bd6efa5e1c..eb18949bb999 100644
> > --- a/drivers/nvme/host/multipath.c
> > +++ b/drivers/nvme/host/multipath.c
> > @@ -677,6 +677,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct 
> > nvme_id_ns *id)
> > if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
> > blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
> >ns->head->disk->queue);
> > +   if (blk_queue_is_zoned(ns->queue))
> > +   blk_revalidate_disk_zones(ns->head->disk, NULL);
> >  }
> >  
> >  void nvme_mpath_remove_disk(struct nvme_ns_head *head)
> > --
> 
> Thanks Keith,
> 
> Just for sharing testing result based on this kernel quickly:
> 
> In blk_revalidate_disk_zones(), 
> 
>   488 int blk_revalidate_disk_zones(struct gendisk *disk,
>   489   void (*update_driver_data)(struct 
> gendisk *disk))
>   490 {
>   491 struct request_queue *q = disk->queue;
>   492 struct blk_revalidate_zone_args args = {
>   493 .disk   = disk,
>   494 };
>   495 unsigned int noio_flag;
>   496 int ret;
>   497
>   498 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
>   499 return -EIO;
>   500 if (WARN_ON_ONCE(!queue_is_mq(q)))
>   501 return -EIO;
>    
> 
> (q->mq_ops == NULL) in this case, so that the q->nr_zones are not
> getting set.

Not sure if this is okay just give ctrl->tagset for the head
request_queue, but this patch works fine as far.

---
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 282b7a4ea9a9..22febc7baa36 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -375,7 +375,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct 
nvme_ns_head *head)
if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
return 0;
 
-   q = blk_alloc_queue(ctrl->numa_node);
+   q = blk_mq_init_queue(ctrl->tagset);
if (!q)
goto out;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -677,6 +677,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct 
nvme_id_ns *id)
if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
   ns->head->disk->queue);
+   if (blk_queue_is_zoned(ns->queue))
+   blk_revalidate_disk_zones(ns->head->disk, NULL);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)



[PULL v3 17/27] multi-process: Associate fd of a PCIDevice with its object

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Associate the file descriptor for a PCIDevice in remote process with
DeviceState object.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
f405a2ed5d7518b87bea7c59cfdf334d67e5ee51.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS|   1 +
 hw/remote/remote-obj.c | 203 +
 hw/remote/meson.build  |   1 +
 3 files changed, 205 insertions(+)
 create mode 100644 hw/remote/remote-obj.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 771513bc34..e37fc4b226 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3208,6 +3208,7 @@ F: include/hw/remote/machine.h
 F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
 F: hw/remote/message.c
+F: hw/remote/remote-obj.c
 
 Build and test automation
 -
diff --git a/hw/remote/remote-obj.c b/hw/remote/remote-obj.c
new file mode 100644
index 00..4f21254219
--- /dev/null
+++ b/hw/remote/remote-obj.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright © 2020, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/error-report.h"
+#include "qemu/notify.h"
+#include "qom/object_interfaces.h"
+#include "hw/qdev-core.h"
+#include "io/channel.h"
+#include "hw/qdev-core.h"
+#include "hw/remote/machine.h"
+#include "io/channel-util.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "qemu/sockets.h"
+#include "monitor/monitor.h"
+
+#define TYPE_REMOTE_OBJECT "x-remote-object"
+OBJECT_DECLARE_TYPE(RemoteObject, RemoteObjectClass, REMOTE_OBJECT)
+
+struct RemoteObjectClass {
+ObjectClass parent_class;
+
+unsigned int nr_devs;
+unsigned int max_devs;
+};
+
+struct RemoteObject {
+/* private */
+Object parent;
+
+Notifier machine_done;
+
+int32_t fd;
+char *devid;
+
+QIOChannel *ioc;
+
+DeviceState *dev;
+DeviceListener listener;
+};
+
+static void remote_object_set_fd(Object *obj, const char *str, Error **errp)
+{
+RemoteObject *o = REMOTE_OBJECT(obj);
+int fd = -1;
+
+fd = monitor_fd_param(monitor_cur(), str, errp);
+if (fd == -1) {
+error_prepend(errp, "Could not parse remote object fd %s:", str);
+return;
+}
+
+if (!fd_is_socket(fd)) {
+error_setg(errp, "File descriptor '%s' is not a socket", str);
+close(fd);
+return;
+}
+
+o->fd = fd;
+}
+
+static void remote_object_set_devid(Object *obj, const char *str, Error **errp)
+{
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+g_free(o->devid);
+
+o->devid = g_strdup(str);
+}
+
+static void remote_object_unrealize_listener(DeviceListener *listener,
+ DeviceState *dev)
+{
+RemoteObject *o = container_of(listener, RemoteObject, listener);
+
+if (o->dev == dev) {
+object_unref(OBJECT(o));
+}
+}
+
+static void remote_object_machine_done(Notifier *notifier, void *data)
+{
+RemoteObject *o = container_of(notifier, RemoteObject, machine_done);
+DeviceState *dev = NULL;
+QIOChannel *ioc = NULL;
+Coroutine *co = NULL;
+RemoteCommDev *comdev = NULL;
+Error *err = NULL;
+
+dev = qdev_find_recursive(sysbus_get_default(), o->devid);
+if (!dev || !object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+error_report("%s is not a PCI device", o->devid);
+return;
+}
+
+ioc = qio_channel_new_fd(o->fd, &err);
+if (!ioc) {
+error_report_err(err);
+return;
+}
+qio_channel_set_blocking(ioc, false, NULL);
+
+o->dev = dev;
+
+o->listener.unrealize = remote_object_unrealize_listener;
+device_listener_register(&o->listener);
+
+/* co-routine should free this. */
+comdev = g_new0(RemoteCommDev, 1);
+*comdev = (RemoteCommDev) {
+.ioc = ioc,
+.dev = PCI_DEVICE(dev),
+};
+
+co = qemu_coroutine_create(mpqemu_remote_msg_loop_co, comdev);
+qemu_coroutine_enter(co);
+}
+
+static void remote_object_init(Object *obj)
+{
+RemoteObjectClass *k = REMOTE_OBJECT_GET_CLASS(obj);
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+if (k->nr_devs >= k->max_devs) {
+error_report("Reached maximum number of devices: %u", k->max_devs);
+return;
+}
+
+o->ioc = NULL;
+o->fd = -1;
+o->devid = NULL;
+
+k->nr_devs++;
+
+o->machine_done.notify = remote_object_machine_done;
+qemu_add_machine_init_done_notifier(&o->machine_done);
+}
+
+static void remote_object_finalize(Object *obj)
+{
+RemoteObjectClass *k = REMOTE_OBJECT_GET_CLASS(obj);
+RemoteObject *o = REMOTE_OBJECT(obj);
+
+device_listener_unregister(&o->listener);
+
+if (o->ioc) {
+qio_channel_shutdown(o->i

[PULL v3 16/27] multi-process: Initialize message handler in remote device

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Initializes the message handler function in the remote process. It is
called whenever there's an event pending on QIOChannel that registers
this function.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
99d38d8b93753a6409ac2340e858858cda59ab1b.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |  1 +
 include/hw/remote/machine.h |  9 ++
 hw/remote/message.c | 57 +
 hw/remote/meson.build   |  1 +
 4 files changed, 68 insertions(+)
 create mode 100644 hw/remote/message.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 97137f617b..771513bc34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3207,6 +3207,7 @@ F: hw/remote/machine.c
 F: include/hw/remote/machine.h
 F: hw/remote/mpqemu-link.c
 F: include/hw/remote/mpqemu-link.h
+F: hw/remote/message.c
 
 Build and test automation
 -
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
index bdfbca40b9..b92b2ce705 100644
--- a/include/hw/remote/machine.h
+++ b/include/hw/remote/machine.h
@@ -14,6 +14,7 @@
 #include "qom/object.h"
 #include "hw/boards.h"
 #include "hw/pci-host/remote.h"
+#include "io/channel.h"
 
 struct RemoteMachineState {
 MachineState parent_obj;
@@ -21,7 +22,15 @@ struct RemoteMachineState {
 RemotePCIHost *host;
 };
 
+/* Used to pass to co-routine device and ioc. */
+typedef struct RemoteCommDev {
+PCIDevice *dev;
+QIOChannel *ioc;
+} RemoteCommDev;
+
 #define TYPE_REMOTE_MACHINE "x-remote-machine"
 OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE)
 
+void coroutine_fn mpqemu_remote_msg_loop_co(void *data);
+
 #endif
diff --git a/hw/remote/message.c b/hw/remote/message.c
new file mode 100644
index 00..36e2d4fb0c
--- /dev/null
+++ b/hw/remote/message.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2020, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/machine.h"
+#include "io/channel.h"
+#include "hw/remote/mpqemu-link.h"
+#include "qapi/error.h"
+#include "sysemu/runstate.h"
+
+void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
+{
+g_autofree RemoteCommDev *com = (RemoteCommDev *)data;
+PCIDevice *pci_dev = NULL;
+Error *local_err = NULL;
+
+assert(com->ioc);
+
+pci_dev = com->dev;
+for (; !local_err;) {
+MPQemuMsg msg = {0};
+
+if (!mpqemu_msg_recv(&msg, com->ioc, &local_err)) {
+break;
+}
+
+if (!mpqemu_msg_valid(&msg)) {
+error_setg(&local_err, "Received invalid message from proxy"
+   "in remote process pid="FMT_pid"",
+   getpid());
+break;
+}
+
+switch (msg.cmd) {
+default:
+error_setg(&local_err,
+   "Unknown command (%d) received for device %s"
+   " (pid="FMT_pid")",
+   msg.cmd, DEVICE(pci_dev)->id, getpid());
+}
+}
+
+if (local_err) {
+error_report_err(local_err);
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
+} else {
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+}
+}
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index a2b2fc0e59..9f5c57f35a 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -2,5 +2,6 @@ remote_ss = ss.source_set()
 
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('machine.c'))
 remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('mpqemu-link.c'))
+remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c'))
 
 softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
-- 
2.29.2



[PULL v3 22/27] multi-process: PCI BAR read/write handling for proxy & remote endpoints

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Proxy device object implements handler for PCI BAR writes and reads.
The handler uses BAR_WRITE/BAR_READ message to communicate to the
remote process with the BAR address and value to be written/read.
The remote process implements handler for BAR_WRITE/BAR_READ
message.

Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
a8b76714a9688be5552c4c92d089bc9e8a4707ff.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h | 10 
 include/hw/remote/proxy.h   |  9 
 hw/remote/message.c | 83 +
 hw/remote/mpqemu-link.c |  6 +++
 hw/remote/proxy.c   | 60 
 5 files changed, 168 insertions(+)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 7bc0bddb5a..6303e62b17 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -37,6 +37,8 @@ typedef enum {
 MPQEMU_CMD_RET,
 MPQEMU_CMD_PCI_CFGWRITE,
 MPQEMU_CMD_PCI_CFGREAD,
+MPQEMU_CMD_BAR_WRITE,
+MPQEMU_CMD_BAR_READ,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
@@ -52,6 +54,13 @@ typedef struct {
 int len;
 } PciConfDataMsg;
 
+typedef struct {
+hwaddr addr;
+uint64_t val;
+unsigned size;
+bool memory;
+} BarAccessMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -71,6 +80,7 @@ typedef struct {
 uint64_t u64;
 PciConfDataMsg pci_conf_data;
 SyncSysmemMsg sync_sysmem;
+BarAccessMsg bar_access;
 } data;
 
 int fds[REMOTE_MAX_FDS];
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index faa9c4d580..ea7fa4fb3c 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -15,6 +15,14 @@
 #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIProxyDev, PCI_PROXY_DEV)
 
+typedef struct ProxyMemoryRegion {
+PCIProxyDev *dev;
+MemoryRegion mr;
+bool memory;
+bool present;
+uint8_t type;
+} ProxyMemoryRegion;
+
 struct PCIProxyDev {
 PCIDevice parent_dev;
 char *fd;
@@ -28,6 +36,7 @@ struct PCIProxyDev {
 QemuMutex io_mutex;
 QIOChannel *ioc;
 Error *migration_blocker;
+ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
 #endif /* PROXY_H */
diff --git a/hw/remote/message.c b/hw/remote/message.c
index 636bd161bd..f2e84457e0 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -16,11 +16,14 @@
 #include "qapi/error.h"
 #include "sysemu/runstate.h"
 #include "hw/pci/pci.h"
+#include "exec/memattrs.h"
 
 static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
  MPQemuMsg *msg, Error **errp);
 static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
 MPQemuMsg *msg, Error **errp);
+static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
+static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -52,6 +55,12 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 case MPQEMU_CMD_PCI_CFGREAD:
 process_config_read(com->ioc, pci_dev, &msg, &local_err);
 break;
+case MPQEMU_CMD_BAR_WRITE:
+process_bar_write(com->ioc, &msg, &local_err);
+break;
+case MPQEMU_CMD_BAR_READ:
+process_bar_read(com->ioc, &msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -115,3 +124,77 @@ static void process_config_read(QIOChannel *ioc, PCIDevice 
*dev,
   getpid());
 }
 }
+
+static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+BarAccessMsg *bar_access = &msg->data.bar_access;
+AddressSpace *as =
+bar_access->memory ? &address_space_memory : &address_space_io;
+MPQemuMsg ret = { 0 };
+MemTxResult res;
+uint64_t val;
+
+if (!is_power_of_2(bar_access->size) ||
+   (bar_access->size > sizeof(uint64_t))) {
+ret.data.u64 = UINT64_MAX;
+goto fail;
+}
+
+val = cpu_to_le64(bar_access->val);
+
+res = address_space_rw(as, bar_access->addr, MEMTXATTRS_UNSPECIFIED,
+   (void *)&val, bar_access->size, true);
+
+if (res != MEMTX_OK) {
+error_setg(errp, "Bad address %"PRIx64" for mem write, pid "FMT_pid".",
+   bar_access->addr, getpid());
+ret.data.u64 = -1;
+}
+
+fail:
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
+
+static void process_bar_read(QIOChannel *ioc

[PULL v3 14/27] io: add qio_channel_readv_full_all_eof & qio_channel_readv_full_all helpers

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds qio_channel_readv_full_all_eof() and qio_channel_readv_full_all()
to read both data and FDs. Refactors existing code to use these helpers.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Acked-by: Daniel P. Berrangé 
Message-id: 
b059c4cc0fb741e794d644c144cc21372cad877d.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/io/channel.h |  53 +++
 io/channel.c | 101 ++-
 2 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 19e76fc32f..88988979f8 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -777,6 +777,59 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
 IOHandler *io_write,
 void *opaque);
 
+/**
+ * qio_channel_readv_full_all_eof:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *  occurs without data, or -1 on error
+ */
+
+int qio_channel_readv_full_all_eof(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp);
+
+/**
+ * qio_channel_readv_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to read data to
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to read
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Performs same function as qio_channel_readv_all_eof.
+ * Additionally, attempts to read file descriptors shared
+ * over the channel. The function will wait for all
+ * requested data to be read, yielding from the current
+ * coroutine if required. data refers to both file
+ * descriptors and the iovs.
+ *
+ * Returns: 0 if all bytes were read, or -1 on error
+ */
+
+int qio_channel_readv_full_all(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp);
+
 /**
  * qio_channel_writev_full_all:
  * @ioc: the channel object
diff --git a/io/channel.c b/io/channel.c
index 0d4b8b5160..4555021b62 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -91,20 +91,48 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
   const struct iovec *iov,
   size_t niov,
   Error **errp)
+{
+return qio_channel_readv_full_all_eof(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int qio_channel_readv_all(QIOChannel *ioc,
+  const struct iovec *iov,
+  size_t niov,
+  Error **errp)
+{
+return qio_channel_readv_full_all(ioc, iov, niov, NULL, NULL, errp);
+}
+
+int qio_channel_readv_full_all_eof(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int **fds, size_t *nfds,
+   Error **errp)
 {
 int ret = -1;
 struct iovec *local_iov = g_new(struct iovec, niov);
 struct iovec *local_iov_head = local_iov;
 unsigned int nlocal_iov = niov;
+int **local_fds = fds;
+size_t *local_nfds = nfds;
 bool partial = false;
 
+if (nfds) {
+*nfds = 0;
+}
+
+if (fds) {
+*fds = NULL;
+}
+
 nlocal_iov = iov_copy(local_iov, nlocal_iov,
   iov, niov,
   0, iov_size(iov, niov));
 
-while (nlocal_iov > 0) {
+while ((nlocal_iov > 0) || local_fds) {
 ssize_t len;
-len = qio_channel_readv(ioc, local_iov, nlocal_iov, errp);
+len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
+ local_nfds, errp);
 if (len == QIO_CHANNEL_ERR_BLOCK) {
 if (qemu_in_coroutine()) {
 qio_channel_yield(ioc, G_IO_IN);
@@ -112,20 +140,50 @@ int qio_channel_readv_all_eof(QIOChannel *ioc,
 qio_channel_wait(ioc, G_IO_IN);
 }
 continue;
-} else if (len < 

[PULL v3 21/27] multi-process: Forward PCI config space acceses to the remote process

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

The Proxy Object sends the PCI config space accesses as messages
to the remote process over the communication channel

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
d3c94f4618813234655356c60e6f0d0362ff42d6.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/remote/mpqemu-link.h | 10 ++
 hw/remote/message.c | 60 +
 hw/remote/mpqemu-link.c |  8 -
 hw/remote/proxy.c   | 55 ++
 4 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
index 1b35d408f8..7bc0bddb5a 100644
--- a/include/hw/remote/mpqemu-link.h
+++ b/include/hw/remote/mpqemu-link.h
@@ -34,6 +34,9 @@
  */
 typedef enum {
 MPQEMU_CMD_SYNC_SYSMEM,
+MPQEMU_CMD_RET,
+MPQEMU_CMD_PCI_CFGWRITE,
+MPQEMU_CMD_PCI_CFGREAD,
 MPQEMU_CMD_MAX,
 } MPQemuCmd;
 
@@ -43,6 +46,12 @@ typedef struct {
 off_t offsets[REMOTE_MAX_FDS];
 } SyncSysmemMsg;
 
+typedef struct {
+uint32_t addr;
+uint32_t val;
+int len;
+} PciConfDataMsg;
+
 /**
  * MPQemuMsg:
  * @cmd: The remote command
@@ -60,6 +69,7 @@ typedef struct {
 
 union {
 uint64_t u64;
+PciConfDataMsg pci_conf_data;
 SyncSysmemMsg sync_sysmem;
 } data;
 
diff --git a/hw/remote/message.c b/hw/remote/message.c
index 36e2d4fb0c..636bd161bd 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -15,6 +15,12 @@
 #include "hw/remote/mpqemu-link.h"
 #include "qapi/error.h"
 #include "sysemu/runstate.h"
+#include "hw/pci/pci.h"
+
+static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
+ MPQemuMsg *msg, Error **errp);
+static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
+MPQemuMsg *msg, Error **errp);
 
 void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 {
@@ -40,6 +46,12 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 }
 
 switch (msg.cmd) {
+case MPQEMU_CMD_PCI_CFGWRITE:
+process_config_write(com->ioc, pci_dev, &msg, &local_err);
+break;
+case MPQEMU_CMD_PCI_CFGREAD:
+process_config_read(com->ioc, pci_dev, &msg, &local_err);
+break;
 default:
 error_setg(&local_err,
"Unknown command (%d) received for device %s"
@@ -55,3 +67,51 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 }
 }
+
+static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
+ MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+PciConfDataMsg *conf = (PciConfDataMsg *)&msg->data.pci_conf_data;
+MPQemuMsg ret = { 0 };
+
+if ((conf->addr + sizeof(conf->val)) > pci_config_size(dev)) {
+error_setg(errp, "Bad address for PCI config write, pid "FMT_pid".",
+   getpid());
+ret.data.u64 = UINT64_MAX;
+} else {
+pci_default_write_config(dev, conf->addr, conf->val, conf->len);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
+
+static void process_config_read(QIOChannel *ioc, PCIDevice *dev,
+MPQemuMsg *msg, Error **errp)
+{
+ERRP_GUARD();
+PciConfDataMsg *conf = (PciConfDataMsg *)&msg->data.pci_conf_data;
+MPQemuMsg ret = { 0 };
+
+if ((conf->addr + sizeof(conf->val)) > pci_config_size(dev)) {
+error_setg(errp, "Bad address for PCI config read, pid "FMT_pid".",
+   getpid());
+ret.data.u64 = UINT64_MAX;
+} else {
+ret.data.u64 = pci_default_read_config(dev, conf->addr, conf->len);
+}
+
+ret.cmd = MPQEMU_CMD_RET;
+ret.size = sizeof(ret.data.u64);
+
+if (!mpqemu_msg_send(&ret, ioc, NULL)) {
+error_prepend(errp, "Error returning code to proxy, pid "FMT_pid": ",
+  getpid());
+}
+}
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index f5e9e01923..b45f325686 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -207,7 +207,7 @@ uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, 
PCIProxyDev *pdev,
 return ret;
 }
 
-if (!mpqemu_msg_valid(&msg_reply)) {
+if (!mpqemu_msg_valid(&msg_reply) || msg_reply.cmd != MPQEMU_CMD_RET) {
 error_setg(errp, "ERROR: Invalid reply received for command %d",
  msg->cmd);
 return ret;
@@ -242,6 +242,12 @@ bool mpqemu_msg_valid(MPQemuMsg *msg)
 return false;
 }
 break;
+case MPQEMU_CMD_PCI_C

[PULL v3 12/27] multi-process: setup a machine object for remote device process

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

x-remote-machine object sets up various subsystems of the remote
device process. Instantiate PCI host bridge object and initialize RAM, IO &
PCI memory regions.

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
c537f38d17f90453ca610c6b70cf3480274e0ba1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |  2 ++
 include/hw/pci-host/remote.h |  1 +
 include/hw/remote/machine.h  | 27 ++
 hw/remote/machine.c  | 70 
 hw/meson.build   |  1 +
 hw/remote/meson.build|  5 +++
 6 files changed, 106 insertions(+)
 create mode 100644 include/hw/remote/machine.h
 create mode 100644 hw/remote/machine.c
 create mode 100644 hw/remote/meson.build

diff --git a/MAINTAINERS b/MAINTAINERS
index 45e777bc55..45979452ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3203,6 +3203,8 @@ F: docs/devel/multi-process.rst
 F: docs/system/multi-process.rst
 F: hw/pci-host/remote.c
 F: include/hw/pci-host/remote.h
+F: hw/remote/machine.c
+F: include/hw/remote/machine.h
 
 Build and test automation
 -
diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h
index 06b8a83a4b..3dcf6aa51d 100644
--- a/include/hw/pci-host/remote.h
+++ b/include/hw/pci-host/remote.h
@@ -24,6 +24,7 @@ struct RemotePCIHost {
 
 MemoryRegion *mr_pci_mem;
 MemoryRegion *mr_sys_io;
+MemoryRegion *mr_sys_mem;
 };
 
 #endif
diff --git a/include/hw/remote/machine.h b/include/hw/remote/machine.h
new file mode 100644
index 00..bdfbca40b9
--- /dev/null
+++ b/include/hw/remote/machine.h
@@ -0,0 +1,27 @@
+/*
+ * Remote machine configuration
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_MACHINE_H
+#define REMOTE_MACHINE_H
+
+#include "qom/object.h"
+#include "hw/boards.h"
+#include "hw/pci-host/remote.h"
+
+struct RemoteMachineState {
+MachineState parent_obj;
+
+RemotePCIHost *host;
+};
+
+#define TYPE_REMOTE_MACHINE "x-remote-machine"
+OBJECT_DECLARE_SIMPLE_TYPE(RemoteMachineState, REMOTE_MACHINE)
+
+#endif
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
new file mode 100644
index 00..9519a6c0a4
--- /dev/null
+++ b/hw/remote/machine.c
@@ -0,0 +1,70 @@
+/*
+ * Machine for remote device
+ *
+ *  This machine type is used by the remote device process in multi-process
+ *  QEMU. QEMU device models depend on parent busses, interrupt controllers,
+ *  memory regions, etc. The remote machine type offers this environment so
+ *  that QEMU device models can be used as remote devices.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/remote/machine.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "qapi/error.h"
+
+static void remote_machine_init(MachineState *machine)
+{
+MemoryRegion *system_memory, *system_io, *pci_memory;
+RemoteMachineState *s = REMOTE_MACHINE(machine);
+RemotePCIHost *rem_host;
+
+system_memory = get_system_memory();
+system_io = get_system_io();
+
+pci_memory = g_new(MemoryRegion, 1);
+memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
+
+rem_host = REMOTE_PCIHOST(qdev_new(TYPE_REMOTE_PCIHOST));
+
+rem_host->mr_pci_mem = pci_memory;
+rem_host->mr_sys_mem = system_memory;
+rem_host->mr_sys_io = system_io;
+
+s->host = rem_host;
+
+object_property_add_child(OBJECT(s), "remote-pcihost", OBJECT(rem_host));
+memory_region_add_subregion_overlap(system_memory, 0x0, pci_memory, -1);
+
+qdev_realize(DEVICE(rem_host), sysbus_get_default(), &error_fatal);
+}
+
+static void remote_machine_class_init(ObjectClass *oc, void *data)
+{
+MachineClass *mc = MACHINE_CLASS(oc);
+
+mc->init = remote_machine_init;
+mc->desc = "Experimental remote machine";
+}
+
+static const TypeInfo remote_machine = {
+.name = TYPE_REMOTE_MACHINE,
+.parent = TYPE_MACHINE,
+.instance_size = sizeof(RemoteMachineState),
+.class_init = remote_machine_class_init,
+};
+
+static void remote_machine_register_types(void)
+{
+type_register_static(&remote_machine);
+}
+
+type_init(remote_machine_register_types);
diff --git a/hw/meson.build b/hw/meson.build
index 010de7219c..e615d72d4d 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -56,6 +56,7 @@ subdir('moxie')
 subdir('nios2')
 subdir('openrisc')
 subdir('ppc')
+subdir('remote')
 subdir('riscv')
 subdir('rx')
 subdir('s390x')
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
new file mode 100644
index 00

[PULL v3 10/27] multi-process: Add config option for multi-process QEMU

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Add configuration options to enable or disable multiprocess QEMU code

Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
6cc37253e35418ebd7b675a31a3df6e3c7a12dc1.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 configure | 10 ++
 meson.build   |  4 +++-
 Kconfig.host  |  4 
 hw/Kconfig|  1 +
 hw/remote/Kconfig |  3 +++
 5 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 hw/remote/Kconfig

diff --git a/configure b/configure
index e85d6baf8f..a73869860b 100755
--- a/configure
+++ b/configure
@@ -463,6 +463,7 @@ skip_meson=no
 gettext="auto"
 fuse="auto"
 fuse_lseek="auto"
+multiprocess="no"
 
 malloc_trim="auto"
 
@@ -797,6 +798,7 @@ Linux)
   linux="yes"
   linux_user="yes"
   vhost_user=${default_feature:-yes}
+  multiprocess=${default_feature:-yes}
 ;;
 esac
 
@@ -1556,6 +1558,10 @@ for opt do
   ;;
   --disable-fuse-lseek) fuse_lseek="disabled"
   ;;
+  --enable-multiprocess) multiprocess="yes"
+  ;;
+  --disable-multiprocess) multiprocess="no"
+  ;;
   *)
   echo "ERROR: unknown option $opt"
   echo "Try '$0 --help' for more information"
@@ -1908,6 +1914,7 @@ disabled with --disable-FEATURE, default is enabled if 
available
   libdaxctl   libdaxctl support
   fuseFUSE block device export
   fuse-lseek  SEEK_HOLE/SEEK_DATA support for FUSE exports
+  multiprocessMultiprocess QEMU support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -6082,6 +6089,9 @@ fi
 if test "$have_mlockall" = "yes" ; then
   echo "HAVE_MLOCKALL=y" >> $config_host_mak
 fi
+if test "$multiprocess" = "yes" ; then
+  echo "CONFIG_MULTIPROCESS_ALLOWED=y" >> $config_host_mak
+fi
 if test "$fuzzing" = "yes" ; then
   # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the
   # needed CFLAGS have already been provided
diff --git a/meson.build b/meson.build
index 2d8b433ff0..7a7283a97d 100644
--- a/meson.build
+++ b/meson.build
@@ -1210,7 +1210,8 @@ host_kconfig = \
   ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
   (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
   ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
-  ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : [])
+  ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \
+  ('CONFIG_MULTIPROCESS_ALLOWED' in config_host ? 
['CONFIG_MULTIPROCESS_ALLOWED=y'] : [])
 
 ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
 
@@ -2626,6 +2627,7 @@ summary_info += {'libpmem support':   
config_host.has_key('CONFIG_LIBPMEM')}
 summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
 summary_info += {'libudev':   libudev.found()}
 summary_info += {'FUSE lseek':fuse_lseek.found()}
+summary_info += {'Multiprocess QEMU': 
config_host.has_key('CONFIG_MULTIPROCESS_ALLOWED')}
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
 if not supported_cpus.contains(cpu)
diff --git a/Kconfig.host b/Kconfig.host
index a9a55a9c31..24255ef441 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -37,3 +37,7 @@ config VIRTFS
 
 config PVRDMA
 bool
+
+config MULTIPROCESS_ALLOWED
+bool
+imply MULTIPROCESS
diff --git a/hw/Kconfig b/hw/Kconfig
index d4cec9e476..8ea26479c4 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -27,6 +27,7 @@ source pci-host/Kconfig
 source pcmcia/Kconfig
 source pci/Kconfig
 source rdma/Kconfig
+source remote/Kconfig
 source rtc/Kconfig
 source scsi/Kconfig
 source sd/Kconfig
diff --git a/hw/remote/Kconfig b/hw/remote/Kconfig
new file mode 100644
index 00..54844467a0
--- /dev/null
+++ b/hw/remote/Kconfig
@@ -0,0 +1,3 @@
+config MULTIPROCESS
+bool
+depends on PCI && KVM
-- 
2.29.2



[PULL v3 15/27] multi-process: define MPQemuMsg format and transmission functions

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Defines MPQemuMsg, which is the message that is sent to the remote
process. This message is sent over QIOChannel and is used to
command the remote process to perform various tasks.
Define transmission functions used by proxy and by remote.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
56ca8bcf95195b2b195b08f6b9565b6d7410bce5.1611938319.git.jag.ra...@oracle.com

[Replace struct iovec send[2] = {0} with {} to make clang happy as
suggested by Peter Maydell .
--Stefan]

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS |   2 +
 meson.build |   1 +
 hw/remote/trace.h   |   1 +
 include/hw/remote/mpqemu-link.h |  63 ++
 include/sysemu/iothread.h   |   6 +
 hw/remote/mpqemu-link.c | 205 
 iothread.c  |   6 +
 hw/remote/meson.build   |   1 +
 hw/remote/trace-events  |   4 +
 9 files changed, 289 insertions(+)
 create mode 100644 hw/remote/trace.h
 create mode 100644 include/hw/remote/mpqemu-link.h
 create mode 100644 hw/remote/mpqemu-link.c
 create mode 100644 hw/remote/trace-events

diff --git a/MAINTAINERS b/MAINTAINERS
index 45979452ed..97137f617b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3205,6 +3205,8 @@ F: hw/pci-host/remote.c
 F: include/hw/pci-host/remote.h
 F: hw/remote/machine.c
 F: include/hw/remote/machine.h
+F: hw/remote/mpqemu-link.c
+F: include/hw/remote/mpqemu-link.h
 
 Build and test automation
 -
diff --git a/meson.build b/meson.build
index 7a7283a97d..43215c74e3 100644
--- a/meson.build
+++ b/meson.build
@@ -1800,6 +1800,7 @@ if have_system
 'net',
 'softmmu',
 'ui',
+'hw/remote',
   ]
 endif
 trace_events_subdirs += [
diff --git a/hw/remote/trace.h b/hw/remote/trace.h
new file mode 100644
index 00..5d5e3ac720
--- /dev/null
+++ b/hw/remote/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_remote.h"
diff --git a/include/hw/remote/mpqemu-link.h b/include/hw/remote/mpqemu-link.h
new file mode 100644
index 00..cac699cb42
--- /dev/null
+++ b/include/hw/remote/mpqemu-link.h
@@ -0,0 +1,63 @@
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MPQEMU_LINK_H
+#define MPQEMU_LINK_H
+
+#include "qom/object.h"
+#include "qemu/thread.h"
+#include "io/channel.h"
+
+#define REMOTE_MAX_FDS 8
+
+#define MPQEMU_MSG_HDR_SIZE offsetof(MPQemuMsg, data.u64)
+
+/**
+ * MPQemuCmd:
+ *
+ * MPQemuCmd enum type to specify the command to be executed on the remote
+ * device.
+ *
+ * This uses a private protocol between QEMU and the remote process. vfio-user
+ * protocol would supersede this in the future.
+ *
+ */
+typedef enum {
+MPQEMU_CMD_MAX,
+} MPQemuCmd;
+
+/**
+ * MPQemuMsg:
+ * @cmd: The remote command
+ * @size: Size of the data to be shared
+ * @data: Structured data
+ * @fds: File descriptors to be shared with remote device
+ *
+ * MPQemuMsg Format of the message sent to the remote device from QEMU.
+ *
+ */
+typedef struct {
+int cmd;
+size_t size;
+
+union {
+uint64_t u64;
+} data;
+
+int fds[REMOTE_MAX_FDS];
+int num_fds;
+} MPQemuMsg;
+
+bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp);
+
+bool mpqemu_msg_valid(MPQemuMsg *msg);
+
+#endif
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 0c5284dbbc..f177142f16 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -57,4 +57,10 @@ IOThread *iothread_create(const char *id, Error **errp);
 void iothread_stop(IOThread *iothread);
 void iothread_destroy(IOThread *iothread);
 
+/*
+ * Returns true if executing withing IOThread context,
+ * false otherwise.
+ */
+bool qemu_in_iothread(void);
+
 #endif /* IOTHREAD_H */
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
new file mode 100644
index 00..0d1899fd94
--- /dev/null
+++ b/hw/remote/mpqemu-link.c
@@ -0,0 +1,205 @@
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/module.h"
+#include "hw/remote/mpqemu-link.h"
+#include "qapi/error.h"
+#include "qemu/iov.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "io/channel.h"
+#include "sysemu/iothread.h"
+#include "trace.h"
+
+/*
+ * Send message over the ioc QIOChannel.
+ * This function is safe to call from:
+ * - main loop in co-r

[PULL v3 13/27] io: add qio_channel_writev_full_all helper

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds qio_channel_writev_full_all() to transmit both data and FDs.
Refactors existing code to use this helper.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: John G Johnson 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Acked-by: Daniel P. Berrangé 
Message-id: 
480fbf1fe4152495d60596c9b665124549b426a5.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/io/channel.h | 25 +
 io/channel.c | 15 ++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index ab9ea77959..19e76fc32f 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -777,4 +777,29 @@ void qio_channel_set_aio_fd_handler(QIOChannel *ioc,
 IOHandler *io_write,
 void *opaque);
 
+/**
+ * qio_channel_writev_full_all:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @fds: an array of file handles to send
+ * @nfds: number of file handles in @fds
+ * @errp: pointer to a NULL-initialized error object
+ *
+ *
+ * Behaves like qio_channel_writev_full but will attempt
+ * to send all data passed (file handles and memory regions).
+ * The function will wait for all requested data
+ * to be written, yielding from the current coroutine
+ * if required.
+ *
+ * Returns: 0 if all bytes were written, or -1 on error
+ */
+
+int qio_channel_writev_full_all(QIOChannel *ioc,
+const struct iovec *iov,
+size_t niov,
+int *fds, size_t nfds,
+Error **errp);
+
 #endif /* QIO_CHANNEL_H */
diff --git a/io/channel.c b/io/channel.c
index 93d449dee2..0d4b8b5160 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -156,6 +156,15 @@ int qio_channel_writev_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
Error **errp)
+{
+return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+}
+
+int qio_channel_writev_full_all(QIOChannel *ioc,
+const struct iovec *iov,
+size_t niov,
+int *fds, size_t nfds,
+Error **errp)
 {
 int ret = -1;
 struct iovec *local_iov = g_new(struct iovec, niov);
@@ -168,7 +177,8 @@ int qio_channel_writev_all(QIOChannel *ioc,
 
 while (nlocal_iov > 0) {
 ssize_t len;
-len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp);
+len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
+  errp);
 if (len == QIO_CHANNEL_ERR_BLOCK) {
 if (qemu_in_coroutine()) {
 qio_channel_yield(ioc, G_IO_OUT);
@@ -182,6 +192,9 @@ int qio_channel_writev_all(QIOChannel *ioc,
 }
 
 iov_discard_front(&local_iov, &nlocal_iov, len);
+
+fds = NULL;
+nfds = 0;
 }
 
 ret = 0;
-- 
2.29.2



[PULL v3 09/27] memory: alloc RAM from file at offset

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

Allow RAM MemoryRegion to be created from an offset in a file, instead
of allocating at offset of 0 by default. This is needed to synchronize
RAM between QEMU & remote process.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
609996697ad8617e3b01df38accc5c208c24d74e.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 include/exec/memory.h |  2 ++
 include/exec/ram_addr.h   |  4 ++--
 include/qemu/mmap-alloc.h |  4 +++-
 backends/hostmem-memfd.c  |  2 +-
 hw/misc/ivshmem.c |  3 ++-
 softmmu/memory.c  |  3 ++-
 softmmu/physmem.c | 12 +++-
 util/mmap-alloc.c |  8 +---
 util/oslib-posix.c|  2 +-
 9 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index c6ce74fb79..0f66fcc6c2 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -992,6 +992,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @size: size of the region.
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @fd: the fd to mmap.
+ * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
@@ -1003,6 +1004,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
 uint64_t size,
 bool share,
 int fd,
+ram_addr_t offset,
 Error **errp);
 #endif
 
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 40b16609ab..3cb9791df3 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -121,8 +121,8 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, 
MemoryRegion *mr,
uint32_t ram_flags, const char *mem_path,
bool readonly, Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
- uint32_t ram_flags, int fd, bool readonly,
- Error **errp);
+ uint32_t ram_flags, int fd, off_t offset,
+ bool readonly, Error **errp);
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
   MemoryRegion *mr, Error **errp);
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 8b7a5c70f3..456ff87df1 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -17,6 +17,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
  *  @readonly: true for a read-only mapping, false for read/write.
  *  @shared: map has RAM_SHARED flag.
  *  @is_pmem: map has RAM_PMEM flag.
+ *  @map_offset: map starts at offset of map_offset from the start of fd
  *
  * Return:
  *  On success, return a pointer to the mapped area.
@@ -27,7 +28,8 @@ void *qemu_ram_mmap(int fd,
 size_t align,
 bool readonly,
 bool shared,
-bool is_pmem);
+bool is_pmem,
+off_t map_offset);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
 
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index e5626d4330..69b0ae30bb 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -55,7 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error 
**errp)
 name = host_memory_backend_get_name(backend);
 memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
name, backend->size,
-   backend->share, fd, errp);
+   backend->share, fd, 0, errp);
 g_free(name);
 }
 
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 0505b52c98..603e992a7f 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -495,7 +495,8 @@ static void process_msg_shmem(IVShmemState *s, int fd, 
Error **errp)
 
 /* mmap the region and map into the BAR2 */
 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
-   "ivshmem.bar2", size, true, fd, &local_err);
+   "ivshmem.bar2", size, true, fd, 0,
+   &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 676c298b60..b4f48c0c29 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1612,6 +1612,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
 uint64_t size,
 bool share,
 int fd,

[PULL v3 00/27] Block patches

2021-02-05 Thread Stefan Hajnoczi
The following changes since commit e2c5093c993ef646e4e28f7aa78429853bcc06ac:

  iotests: 30: drop from auto group (and effectively from make check) 
(2021-02-05 15:16:13 +)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to b07011f375bda3319cf72eee7cb18d310078387b:

  docs: fix Parallels Image "dirty bitmap" section (2021-02-05 16:36:36 +)


Pull request

v3:
 * Replace {0} array initialization with {} to make clang happy [Peter]



Denis V. Lunev (1):
  docs: fix Parallels Image "dirty bitmap" section

Elena Ufimtseva (8):
  multi-process: add configure and usage information
  io: add qio_channel_writev_full_all helper
  io: add qio_channel_readv_full_all_eof & qio_channel_readv_full_all
helpers
  multi-process: define MPQemuMsg format and transmission functions
  multi-process: introduce proxy object
  multi-process: add proxy communication functions
  multi-process: Forward PCI config space acceses to the remote process
  multi-process: perform device reset in the remote process

Jagannathan Raman (11):
  memory: alloc RAM from file at offset
  multi-process: Add config option for multi-process QEMU
  multi-process: setup PCI host bridge for remote device
  multi-process: setup a machine object for remote device process
  multi-process: Initialize message handler in remote device
  multi-process: Associate fd of a PCIDevice with its object
  multi-process: setup memory manager for remote device
  multi-process: PCI BAR read/write handling for proxy & remote
endpoints
  multi-process: Synchronize remote memory
  multi-process: create IOHUB object to handle irq
  multi-process: Retrieve PCI info from remote process

John G Johnson (1):
  multi-process: add the concept description to
docs/devel/qemu-multiprocess

Stefan Hajnoczi (6):
  .github: point Repo Lockdown bot to GitLab repo
  gitmodules: use GitLab repos instead of qemu.org
  gitlab-ci: remove redundant GitLab repo URL command
  docs: update README to use GitLab repo URLs
  pc-bios: update mirror URLs to GitLab
  get_maintainer: update repo URL to GitLab

 MAINTAINERS   |  24 +
 README.rst|   4 +-
 docs/devel/index.rst  |   1 +
 docs/devel/multi-process.rst  | 966 ++
 docs/system/index.rst |   1 +
 docs/system/multi-process.rst |  64 ++
 docs/interop/parallels.txt|   2 +-
 configure |  10 +
 meson.build   |   5 +-
 hw/remote/trace.h |   1 +
 include/exec/memory.h |   2 +
 include/exec/ram_addr.h   |   4 +-
 include/hw/pci-host/remote.h  |  30 +
 include/hw/pci/pci_ids.h  |   3 +
 include/hw/remote/iohub.h |  42 +
 include/hw/remote/machine.h   |  38 +
 include/hw/remote/memory.h|  19 +
 include/hw/remote/mpqemu-link.h   |  99 +++
 include/hw/remote/proxy-memory-listener.h |  28 +
 include/hw/remote/proxy.h |  48 ++
 include/io/channel.h  |  78 ++
 include/qemu/mmap-alloc.h |   4 +-
 include/sysemu/iothread.h |   6 +
 backends/hostmem-memfd.c  |   2 +-
 hw/misc/ivshmem.c |   3 +-
 hw/pci-host/remote.c  |  75 ++
 hw/remote/iohub.c | 119 +++
 hw/remote/machine.c   |  80 ++
 hw/remote/memory.c|  65 ++
 hw/remote/message.c   | 230 ++
 hw/remote/mpqemu-link.c   | 267 ++
 hw/remote/proxy-memory-listener.c | 227 +
 hw/remote/proxy.c | 379 +
 hw/remote/remote-obj.c| 203 +
 io/channel.c  | 116 ++-
 iothread.c|   6 +
 softmmu/memory.c  |   3 +-
 softmmu/physmem.c |  12 +-
 util/mmap-alloc.c |   8 +-
 util/oslib-posix.c|   2 +-
 .github/lockdown.yml  |   8 +-
 .gitlab-ci.yml|   1 -
 .gitmodules   |  44 +-
 Kconfig.host  |   4 +
 hw/Kconfig|   1 +
 hw/meson.build|   1 +
 hw/pci-host/Kconfig   |   3 +
 hw/pci-host/meson.build   |   1 +
 hw/remote/Kconfig |   4 +
 hw/remote/meson.build |  13 +
 hw/remote/trace-events|   4 +
 pc-bios/README   

[PULL v3 08/27] multi-process: add configure and usage information

2021-02-05 Thread Stefan Hajnoczi
From: Elena Ufimtseva 

Adds documentation explaining the command-line arguments needed
to use multi-process.

Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
49f757a84e5dd6fae14b22544897d1124c5fdbad.1611938319.git.jag.ra...@oracle.com

[Move orphan docs/multi-process.rst document into docs/system/ and add
it to index.rst to prevent Sphinx "document isn't included in any
toctree" error.
--Stefan]

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |  1 +
 docs/system/index.rst |  1 +
 docs/system/multi-process.rst | 64 +++
 3 files changed, 66 insertions(+)
 create mode 100644 docs/system/multi-process.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 2ff1ead4ab..9d2fe7f8db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3200,6 +3200,7 @@ M: Jagannathan Raman 
 M: John G Johnson 
 S: Maintained
 F: docs/devel/multi-process.rst
+F: docs/system/multi-process.rst
 
 Build and test automation
 -
diff --git a/docs/system/index.rst b/docs/system/index.rst
index d40f72c92b..625b494372 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -34,6 +34,7 @@ Contents:
pr-manager
targets
security
+   multi-process
deprecated
removed-features
build-platforms
diff --git a/docs/system/multi-process.rst b/docs/system/multi-process.rst
new file mode 100644
index 00..46bb0cafc2
--- /dev/null
+++ b/docs/system/multi-process.rst
@@ -0,0 +1,64 @@
+Multi-process QEMU
+==
+
+This document describes how to configure and use multi-process qemu.
+For the design document refer to docs/devel/qemu-multiprocess.
+
+1) Configuration
+
+
+multi-process is enabled by default for targets that enable KVM
+
+
+2) Usage
+
+
+Multi-process QEMU requires an orchestrator to launch.
+
+Following is a description of command-line used to launch mpqemu.
+
+* Orchestrator:
+
+  - The Orchestrator creates a unix socketpair
+
+  - It launches the remote process and passes one of the
+sockets to it via command-line.
+
+  - It then launches QEMU and specifies the other socket as an option
+to the Proxy device object
+
+* Remote Process:
+
+  - QEMU can enter remote process mode by using the "remote" machine
+option.
+
+  - The orchestrator creates a "remote-object" with details about
+the device and the file descriptor for the device
+
+  - The remaining options are no different from how one launches QEMU with
+devices.
+
+  - Example command-line for the remote process is as follows:
+
+  /usr/bin/qemu-system-x86_64\
+  -machine x-remote  \
+  -device lsi53c895a,id=lsi0 \
+  -drive id=drive_image2,file=/build/ol7-nvme-test-1.qcow2   \
+  -device scsi-hd,id=drive2,drive=drive_image2,bus=lsi0.0,scsi-id=0  \
+  -object x-remote-object,id=robj1,devid=lsi1,fd=4,
+
+* QEMU:
+
+  - Since parts of the RAM are shared between QEMU & remote process, a
+memory-backend-memfd is required to facilitate this, as follows:
+
+-object memory-backend-memfd,id=mem,size=2G
+
+  - A "x-pci-proxy-dev" device is created for each of the PCI devices emulated
+in the remote process. A "socket" sub-option specifies the other end of
+unix channel created by orchestrator. The "id" sub-option must be specified
+and should be the same as the "id" specified for the remote PCI device
+
+  - Example commandline for QEMU is as follows:
+
+  -device x-pci-proxy-dev,id=lsi0,socket=3
-- 
2.29.2



[PULL v3 06/27] get_maintainer: update repo URL to GitLab

2021-02-05 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-7-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 scripts/get_maintainer.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 271f5ff42a..e5499b94b4 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1377,7 +1377,7 @@ sub vcs_exists {
warn("$P: No supported VCS found.  Add --nogit to options?\n");
warn("Using a git repository produces better results.\n");
warn("Try latest git repository using:\n");
-   warn("git clone https://git.qemu.org/git/qemu.git\n";);
+   warn("git clone https://gitlab.com/qemu-project/qemu.git\n";);
$printed_novcs = 1;
 }
 return 0;
-- 
2.29.2



[PULL v3 01/27] .github: point Repo Lockdown bot to GitLab repo

2021-02-05 Thread Stefan Hajnoczi
Use the GitLab repo URL as the main repo location in order to reduce
load on qemu.org.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Message-id: 2021015017.156802-2-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .github/lockdown.yml | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/lockdown.yml b/.github/lockdown.yml
index 9acc393f1c..07fc2f31ee 100644
--- a/.github/lockdown.yml
+++ b/.github/lockdown.yml
@@ -10,8 +10,8 @@ issues:
   comment: |
 Thank you for your interest in the QEMU project.
 
-This repository is a read-only mirror of the project's master
-repostories hosted on https://git.qemu.org/git/qemu.git.
+This repository is a read-only mirror of the project's repostories hosted
+at https://gitlab.com/qemu-project/qemu.git.
 The project does not process issues filed on GitHub.
 
 The project issues are tracked on Launchpad:
@@ -24,8 +24,8 @@ pulls:
   comment: |
 Thank you for your interest in the QEMU project.
 
-This repository is a read-only mirror of the project's master
-repostories hosted on https://git.qemu.org/git/qemu.git.
+This repository is a read-only mirror of the project's repostories hosted
+on https://gitlab.com/qemu-project/qemu.git.
 The project does not process merge requests filed on GitHub.
 
 QEMU welcomes contributions of code (either fixing bugs or adding new
-- 
2.29.2



[PULL v3 11/27] multi-process: setup PCI host bridge for remote device

2021-02-05 Thread Stefan Hajnoczi
From: Jagannathan Raman 

PCI host bridge is setup for the remote device process. It is
implemented using remote-pcihost object. It is an extension of the PCI
host bridge setup by QEMU.
Remote-pcihost configures a PCI bus which could be used by the remote
PCI device to latch on to.

Signed-off-by: Jagannathan Raman 
Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
0871ba857abb2eafacde07e7fe66a3f12415bfb2.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |  2 +
 include/hw/pci-host/remote.h | 29 ++
 hw/pci-host/remote.c | 75 
 hw/pci-host/Kconfig  |  3 ++
 hw/pci-host/meson.build  |  1 +
 hw/remote/Kconfig|  1 +
 6 files changed, 111 insertions(+)
 create mode 100644 include/hw/pci-host/remote.h
 create mode 100644 hw/pci-host/remote.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d2fe7f8db..45e777bc55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3201,6 +3201,8 @@ M: John G Johnson 
 S: Maintained
 F: docs/devel/multi-process.rst
 F: docs/system/multi-process.rst
+F: hw/pci-host/remote.c
+F: include/hw/pci-host/remote.h
 
 Build and test automation
 -
diff --git a/include/hw/pci-host/remote.h b/include/hw/pci-host/remote.h
new file mode 100644
index 00..06b8a83a4b
--- /dev/null
+++ b/include/hw/pci-host/remote.h
@@ -0,0 +1,29 @@
+/*
+ * PCI Host for remote device
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef REMOTE_PCIHOST_H
+#define REMOTE_PCIHOST_H
+
+#include "exec/memory.h"
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_REMOTE_PCIHOST "remote-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(RemotePCIHost, REMOTE_PCIHOST)
+
+struct RemotePCIHost {
+/*< private >*/
+PCIExpressHost parent_obj;
+/*< public >*/
+
+MemoryRegion *mr_pci_mem;
+MemoryRegion *mr_sys_io;
+};
+
+#endif
diff --git a/hw/pci-host/remote.c b/hw/pci-host/remote.c
new file mode 100644
index 00..eee45444ef
--- /dev/null
+++ b/hw/pci-host/remote.c
@@ -0,0 +1,75 @@
+/*
+ * Remote PCI host device
+ *
+ * Unlike PCI host devices that model physical hardware, the purpose
+ * of this PCI host is to host multi-process QEMU devices.
+ *
+ * Multi-process QEMU extends the PCI host of a QEMU machine into a
+ * remote process. Any PCI device attached to the remote process is
+ * visible in the QEMU guest. This allows existing QEMU device models
+ * to be reused in the remote process.
+ *
+ * This PCI host is purely a container for PCI devices. It's fake in the
+ * sense that the guest never sees this PCI host and has no way of
+ * accessing it. Its job is just to provide the environment that QEMU
+ * PCI device models need when running in a remote process.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci-host/remote.h"
+#include "exec/memory.h"
+
+static const char *remote_pcihost_root_bus_path(PCIHostState *host_bridge,
+PCIBus *rootbus)
+{
+return ":00";
+}
+
+static void remote_pcihost_realize(DeviceState *dev, Error **errp)
+{
+PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+RemotePCIHost *s = REMOTE_PCIHOST(dev);
+
+pci->bus = pci_root_bus_new(DEVICE(s), "remote-pci",
+s->mr_pci_mem, s->mr_sys_io,
+0, TYPE_PCIE_BUS);
+}
+
+static void remote_pcihost_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+
+hc->root_bus_path = remote_pcihost_root_bus_path;
+dc->realize = remote_pcihost_realize;
+
+dc->user_creatable = false;
+set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+dc->fw_name = "pci";
+}
+
+static const TypeInfo remote_pcihost_info = {
+.name = TYPE_REMOTE_PCIHOST,
+.parent = TYPE_PCIE_HOST_BRIDGE,
+.instance_size = sizeof(RemotePCIHost),
+.class_init = remote_pcihost_class_init,
+};
+
+static void remote_pcihost_register(void)
+{
+type_register_static(&remote_pcihost_info);
+}
+
+type_init(remote_pcihost_register)
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index eb03f0489d..8b8c763c28 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -65,3 +65,6 @@ config PCI_POWERNV
 select PCI_EXPRESS
 select MSI_NONBROKEN
 select PCIE_PORT
+
+config REMOTE_PCIHOST
+bool
diff --git a/hw/pci-host/meson.build b

[PULL v3 07/27] multi-process: add the concept description to docs/devel/qemu-multiprocess

2021-02-05 Thread Stefan Hajnoczi
From: John G Johnson 

Signed-off-by: John G Johnson 
Signed-off-by: Elena Ufimtseva 
Signed-off-by: Jagannathan Raman 
Reviewed-by: Stefan Hajnoczi 
Message-id: 
02a68adef99f5df6a380bf8fd7b90948777e411c.1611938319.git.jag.ra...@oracle.com
Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |   7 +
 docs/devel/index.rst |   1 +
 docs/devel/multi-process.rst | 966 +++
 3 files changed, 974 insertions(+)
 create mode 100644 docs/devel/multi-process.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 00626941f1..2ff1ead4ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3194,6 +3194,13 @@ S: Maintained
 F: hw/semihosting/
 F: include/hw/semihosting/
 
+Multi-process QEMU
+M: Elena Ufimtseva 
+M: Jagannathan Raman 
+M: John G Johnson 
+S: Maintained
+F: docs/devel/multi-process.rst
+
 Build and test automation
 -
 Build and test automation
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 98a7016a9b..22854e334d 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -37,3 +37,4 @@ Contents:
clocks
qom
block-coroutine-wrapper
+   multi-process
diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst
new file mode 100644
index 00..69699329d6
--- /dev/null
+++ b/docs/devel/multi-process.rst
@@ -0,0 +1,966 @@
+This is the design document for multi-process QEMU. It does not
+necessarily reflect the status of the current implementation, which
+may lack features or be considerably different from what is described
+in this document. This document is still useful as a description of
+the goals and general direction of this feature.
+
+Please refer to the following wiki for latest details:
+https://wiki.qemu.org/Features/MultiProcessQEMU
+
+Multi-process QEMU
+===
+
+QEMU is often used as the hypervisor for virtual machines running in the
+Oracle cloud. Since one of the advantages of cloud computing is the
+ability to run many VMs from different tenants in the same cloud
+infrastructure, a guest that compromised its hypervisor could
+potentially use the hypervisor's access privileges to access data it is
+not authorized for.
+
+QEMU can be susceptible to security attacks because it is a large,
+monolithic program that provides many features to the VMs it services.
+Many of these features can be configured out of QEMU, but even a reduced
+configuration QEMU has a large amount of code a guest can potentially
+attack. Separating QEMU reduces the attack surface by aiding to
+limit each component in the system to only access the resources that
+it needs to perform its job.
+
+QEMU services
+-
+
+QEMU can be broadly described as providing three main services. One is a
+VM control point, where VMs can be created, migrated, re-configured, and
+destroyed. A second is to emulate the CPU instructions within the VM,
+often accelerated by HW virtualization features such as Intel's VT
+extensions. Finally, it provides IO services to the VM by emulating HW
+IO devices, such as disk and network devices.
+
+A multi-process QEMU
+
+
+A multi-process QEMU involves separating QEMU services into separate
+host processes. Each of these processes can be given only the privileges
+it needs to provide its service, e.g., a disk service could be given
+access only to the disk images it provides, and not be allowed to
+access other files, or any network devices. An attacker who compromised
+this service would not be able to use this exploit to access files or
+devices beyond what the disk service was given access to.
+
+A QEMU control process would remain, but in multi-process mode, will
+have no direct interfaces to the VM. During VM execution, it would still
+provide the user interface to hot-plug devices or live migrate the VM.
+
+A first step in creating a multi-process QEMU is to separate IO services
+from the main QEMU program, which would continue to provide CPU
+emulation. i.e., the control process would also be the CPU emulation
+process. In a later phase, CPU emulation could be separated from the
+control process.
+
+Separating IO services
+--
+
+Separating IO services into individual host processes is a good place to
+begin for a couple of reasons. One is the sheer number of IO devices QEMU
+can emulate provides a large surface of interfaces which could potentially
+be exploited, and, indeed, have been a source of exploits in the past.
+Another is the modular nature of QEMU device emulation code provides
+interface points where the QEMU functions that perform device emulation
+can be separated from the QEMU functions that manage the emulation of
+guest CPU instructions. The devices emulated in the separate process are
+referred to as remote devices.
+
+QEMU device emulation
+~
+
+QEMU uses an object oriented SW architecture for device emulation code.
+Configured objects are all compiled into the QEMU binary, then objects

Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Minwoo Im
On 21-02-05 08:22:52, Keith Busch wrote:
> On Sat, Feb 06, 2021 at 01:07:57AM +0900, Minwoo Im wrote:
> > If multipath is enabled, the namespace head and hidden namespace will be
> > created.  In this case, /sys/block/nvme0n1/queue/nr_zones are not
> > returning proper value for the namespace itself.  By the way, the hidden
> > namespace /sys/block/nvme0c0n1/queue/nr_zones are returning properly.
> > 
> > Is it okay for sysfs of the head namespace node (nvme0n1) not to manage
> > the request queue attributes like nr_zones?
> 
> This should fix it. Untested, as my dev machine is in need of repair,
> but if someone can confirm this is successful, I can send it to the
> kernel list.
> 
> ---
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 65bd6efa5e1c..eb18949bb999 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -677,6 +677,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct 
> nvme_id_ns *id)
>   if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
>   blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
>  ns->head->disk->queue);
> + if (blk_queue_is_zoned(ns->queue))
> + blk_revalidate_disk_zones(ns->head->disk, NULL);
>  }
>  
>  void nvme_mpath_remove_disk(struct nvme_ns_head *head)
> --

Thanks Keith,

Just for sharing testing result based on this kernel quickly:

In blk_revalidate_disk_zones(), 

488 int blk_revalidate_disk_zones(struct gendisk *disk,
489   void (*update_driver_data)(struct 
gendisk *disk))
490 {
491 struct request_queue *q = disk->queue;
492 struct blk_revalidate_zone_args args = {
493 .disk   = disk,
494 };
495 unsigned int noio_flag;
496 int ret;
497
498 if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
499 return -EIO;
500 if (WARN_ON_ONCE(!queue_is_mq(q)))
501 return -EIO;
 

(q->mq_ops == NULL) in this case, so that the q->nr_zones are not
getting set.



[PULL v3 04/27] docs: update README to use GitLab repo URLs

2021-02-05 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-5-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 58b9f2dc15..ce39d89077 100644
--- a/README.rst
+++ b/README.rst
@@ -60,7 +60,7 @@ The QEMU source code is maintained under the GIT version 
control system.
 
 .. code-block:: shell
 
-   git clone https://git.qemu.org/git/qemu.git
+   git clone https://gitlab.com/qemu-project/qemu.git
 
 When submitting patches, one common approach is to use 'git
 format-patch' and/or 'git send-email' to format & send the mail to the
@@ -78,7 +78,7 @@ The QEMU website is also maintained under source control.
 
 .. code-block:: shell
 
-  git clone https://git.qemu.org/git/qemu-web.git
+  git clone https://gitlab.com/qemu-project/qemu-web.git
 
 * ``_
 
-- 
2.29.2



[PULL v3 03/27] gitlab-ci: remove redundant GitLab repo URL command

2021-02-05 Thread Stefan Hajnoczi
It is no longer necessary to point .gitmodules at GitLab repos when
running in GitLab CI since they are now used all the time.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-4-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .gitlab-ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7c0db64710..28a83afb91 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,7 +18,6 @@ include:
   image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
   before_script:
 - JOBS=$(expr $(nproc) + 1)
-- sed -i s,git.qemu.org/git,gitlab.com/qemu-project, .gitmodules
   script:
 - mkdir build
 - cd build
-- 
2.29.2



[PULL v3 05/27] pc-bios: update mirror URLs to GitLab

2021-02-05 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-6-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 pc-bios/README | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pc-bios/README b/pc-bios/README
index 33f9754ad3..db7129ef64 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -20,7 +20,7 @@
   legacy x86 software to communicate with an attached serial console as
   if a video card were attached.  The master sources reside in a subversion
   repository at http://sgabios.googlecode.com/svn/trunk.  A git mirror is
-  available at https://git.qemu.org/git/sgabios.git.
+  available at https://gitlab.com/qemu-project/sgabios.git.
 
 - The PXE roms come from the iPXE project. Built with BANNER_TIME 0.
   Sources available at http://ipxe.org.  Vendor:Device ID -> ROM mapping:
@@ -37,7 +37,7 @@
 
 - The u-boot binary for e500 comes from the upstream denx u-boot project where
   it was compiled using the qemu-ppce500 target.
-  A git mirror is available at: https://git.qemu.org/git/u-boot.git
+  A git mirror is available at: https://gitlab.com/qemu-project/u-boot.git
   The hash used to compile the current version is: 2072e72
 
 - Skiboot (https://github.com/open-power/skiboot/) is an OPAL
-- 
2.29.2



[PULL v3 02/27] gitmodules: use GitLab repos instead of qemu.org

2021-02-05 Thread Stefan Hajnoczi
qemu.org is running out of bandwidth and the QEMU project is moving
towards a gating CI on GitLab. Use the GitLab repos instead of qemu.org
(they will become mirrors).

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Wainer dos Santos Moschetta 
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 2021015017.156802-3-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 .gitmodules | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 2bdeeacef8..08b1b48a09 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,66 +1,66 @@
 [submodule "roms/seabios"]
path = roms/seabios
-   url = https://git.qemu.org/git/seabios.git/
+   url = https://gitlab.com/qemu-project/seabios.git/
 [submodule "roms/SLOF"]
path = roms/SLOF
-   url = https://git.qemu.org/git/SLOF.git
+   url = https://gitlab.com/qemu-project/SLOF.git
 [submodule "roms/ipxe"]
path = roms/ipxe
-   url = https://git.qemu.org/git/ipxe.git
+   url = https://gitlab.com/qemu-project/ipxe.git
 [submodule "roms/openbios"]
path = roms/openbios
-   url = https://git.qemu.org/git/openbios.git
+   url = https://gitlab.com/qemu-project/openbios.git
 [submodule "roms/qemu-palcode"]
path = roms/qemu-palcode
-   url = https://git.qemu.org/git/qemu-palcode.git
+   url = https://gitlab.com/qemu-project/qemu-palcode.git
 [submodule "roms/sgabios"]
path = roms/sgabios
-   url = https://git.qemu.org/git/sgabios.git
+   url = https://gitlab.com/qemu-project/sgabios.git
 [submodule "dtc"]
path = dtc
-   url = https://git.qemu.org/git/dtc.git
+   url = https://gitlab.com/qemu-project/dtc.git
 [submodule "roms/u-boot"]
path = roms/u-boot
-   url = https://git.qemu.org/git/u-boot.git
+   url = https://gitlab.com/qemu-project/u-boot.git
 [submodule "roms/skiboot"]
path = roms/skiboot
-   url = https://git.qemu.org/git/skiboot.git
+   url = https://gitlab.com/qemu-project/skiboot.git
 [submodule "roms/QemuMacDrivers"]
path = roms/QemuMacDrivers
-   url = https://git.qemu.org/git/QemuMacDrivers.git
+   url = https://gitlab.com/qemu-project/QemuMacDrivers.git
 [submodule "ui/keycodemapdb"]
path = ui/keycodemapdb
-   url = https://git.qemu.org/git/keycodemapdb.git
+   url = https://gitlab.com/qemu-project/keycodemapdb.git
 [submodule "capstone"]
path = capstone
-   url = https://git.qemu.org/git/capstone.git
+   url = https://gitlab.com/qemu-project/capstone.git
 [submodule "roms/seabios-hppa"]
path = roms/seabios-hppa
-   url = https://git.qemu.org/git/seabios-hppa.git
+   url = https://gitlab.com/qemu-project/seabios-hppa.git
 [submodule "roms/u-boot-sam460ex"]
path = roms/u-boot-sam460ex
-   url = https://git.qemu.org/git/u-boot-sam460ex.git
+   url = https://gitlab.com/qemu-project/u-boot-sam460ex.git
 [submodule "tests/fp/berkeley-testfloat-3"]
path = tests/fp/berkeley-testfloat-3
-   url = https://git.qemu.org/git/berkeley-testfloat-3.git
+   url = https://gitlab.com/qemu-project/berkeley-testfloat-3.git
 [submodule "tests/fp/berkeley-softfloat-3"]
path = tests/fp/berkeley-softfloat-3
-   url = https://git.qemu.org/git/berkeley-softfloat-3.git
+   url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
 [submodule "roms/edk2"]
path = roms/edk2
-   url = https://git.qemu.org/git/edk2.git
+   url = https://gitlab.com/qemu-project/edk2.git
 [submodule "slirp"]
path = slirp
-   url = https://git.qemu.org/git/libslirp.git
+   url = https://gitlab.com/qemu-project/libslirp.git
 [submodule "roms/opensbi"]
path = roms/opensbi
-   url =   https://git.qemu.org/git/opensbi.git
+   url =   https://gitlab.com/qemu-project/opensbi.git
 [submodule "roms/qboot"]
path = roms/qboot
-   url = https://git.qemu.org/git/qboot.git
+   url = https://gitlab.com/qemu-project/qboot.git
 [submodule "meson"]
path = meson
-   url = https://git.qemu.org/git/meson.git
+   url = https://gitlab.com/qemu-project/meson.git
 [submodule "roms/vbootrom"]
path = roms/vbootrom
-   url = https://git.qemu.org/git/vbootrom.git
+   url = https://gitlab.com/qemu-project/vbootrom.git
-- 
2.29.2



[PATCH v2 08/10] iotests/264: add mirror-cancel test-case

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Check that cancel doesn't wait for 10s of nbd reconnect timeout.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/264 | 38 ++
 tests/qemu-iotests/264.out |  4 ++--
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/tests/qemu-iotests/264 b/tests/qemu-iotests/264
index 6feeaa4056..347e53add5 100755
--- a/tests/qemu-iotests/264
+++ b/tests/qemu-iotests/264
@@ -27,25 +27,26 @@ from iotests import qemu_img_create, file_path, 
qemu_nbd_popen
 
 disk_a, disk_b, nbd_sock = file_path('disk_a', 'disk_b', 'nbd-sock')
 nbd_uri = 'nbd+unix:///?socket=' + nbd_sock
-size = 5 * 1024 * 1024
 wait_limit = 3.0
 wait_step = 0.2
 
 
 class TestNbdReconnect(iotests.QMPTestCase):
-def setUp(self):
-qemu_img_create('-f', iotests.imgfmt, disk_a, str(size))
-qemu_img_create('-f', iotests.imgfmt, disk_b, str(size))
+def init_vm(self, disk_size):
+qemu_img_create('-f', iotests.imgfmt, disk_a, str(disk_size))
+qemu_img_create('-f', iotests.imgfmt, disk_b, str(disk_size))
 self.vm = iotests.VM().add_drive(disk_a)
 self.vm.launch()
-self.vm.hmp_qemu_io('drive0', 'write 0 {}'.format(size))
+self.vm.hmp_qemu_io('drive0', 'write 0 {}'.format(disk_size))
 
 def tearDown(self):
 self.vm.shutdown()
 os.remove(disk_a)
 os.remove(disk_b)
 
-def test(self):
+def start_job(self, job):
+"""Stat job with nbd target and kill the server"""
+assert job in ('blockdev-backup', 'blockdev-mirror')
 with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
 result = self.vm.qmp('blockdev-add',
  **{'node_name': 'backup0',
@@ -55,7 +56,7 @@ class TestNbdReconnect(iotests.QMPTestCase):
 'path': nbd_sock},
  'reconnect-delay': 10}})
 self.assert_qmp(result, 'return', {})
-result = self.vm.qmp('blockdev-backup', device='drive0',
+result = self.vm.qmp(job, device='drive0',
  sync='full', target='backup0',
  speed=(1 * 1024 * 1024))
 self.assert_qmp(result, 'return', {})
@@ -73,7 +74,8 @@ class TestNbdReconnect(iotests.QMPTestCase):
 
 jobs = self.vm.qmp('query-block-jobs')['return']
 # Check that job is still in progress
-self.assertTrue(jobs and jobs[0]['offset'] < jobs[0]['len'])
+self.assertTrue(jobs)
+self.assertTrue(jobs[0]['offset'] < jobs[0]['len'])
 
 result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0)
 self.assert_qmp(result, 'return', {})
@@ -81,12 +83,32 @@ class TestNbdReconnect(iotests.QMPTestCase):
 # Emulate server down time for 1 second
 time.sleep(1)
 
+def test_backup(self):
+size = 5 * 1024 * 1024
+self.init_vm(size)
+self.start_job('blockdev-backup')
+
 with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
 e = self.vm.event_wait('BLOCK_JOB_COMPLETED')
 self.assertEqual(e['data']['offset'], size)
 result = self.vm.qmp('blockdev-del', node_name='backup0')
 self.assert_qmp(result, 'return', {})
 
+def test_mirror_cancel(self):
+# Mirror speed limit doesn't work well enough, it seems that mirror
+# will run many parallel requests anyway. MAX_IN_FLIGHT is 16 and
+# MAX_IO_BYTES is 1M in mirror.c, so let's use 20M disk.
+self.init_vm(20 * 1024 * 1024)
+self.start_job('blockdev-mirror')
+
+result = self.vm.qmp('block-job-cancel', device='drive0')
+self.assert_qmp(result, 'return', {})
+
+start_t = time.time()
+self.vm.event_wait('BLOCK_JOB_CANCELLED')
+delta_t = time.time() - start_t
+self.assertTrue(delta_t < 2.0)
+
 
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/264.out b/tests/qemu-iotests/264.out
index ae1213e6f8..fbc63e62f8 100644
--- a/tests/qemu-iotests/264.out
+++ b/tests/qemu-iotests/264.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 1 tests
+Ran 2 tests
 
 OK
-- 
2.29.2




[PATCH v2 05/10] block/mirror: implement .cancel job handler

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Cancel in-flight io on target to not waste the time.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 block/mirror.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 8e1ad6eceb..9faffe4707 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1179,6 +1179,14 @@ static bool mirror_drained_poll(BlockJob *job)
 return !!s->in_flight;
 }
 
+static void mirror_cancel(Job *job)
+{
+MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+BlockDriverState *target = blk_bs(s->target);
+
+bdrv_cancel_in_flight(target);
+}
+
 static const BlockJobDriver mirror_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(MirrorBlockJob),
@@ -1190,6 +1198,7 @@ static const BlockJobDriver mirror_job_driver = {
 .abort  = mirror_abort,
 .pause  = mirror_pause,
 .complete   = mirror_complete,
+.cancel = mirror_cancel,
 },
 .drained_poll   = mirror_drained_poll,
 };
-- 
2.29.2




[PATCH v2 04/10] job: add .cancel handler for the driver

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
To be used in mirror in the following commit to cancel in-flight io on
target to not waste the time.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/qemu/job.h | 5 +
 job.c  | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 32aabb1c60..efc6fa7544 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -251,6 +251,11 @@ struct JobDriver {
  */
 void (*clean)(Job *job);
 
+/**
+ * If the callback is not NULL, it will be invoked in job_cancel_async
+ */
+void (*cancel)(Job *job);
+
 
 /** Called when the job is freed */
 void (*free)(Job *job);
diff --git a/job.c b/job.c
index 3aaaebafe2..289edee143 100644
--- a/job.c
+++ b/job.c
@@ -715,6 +715,9 @@ static int job_finalize_single(Job *job)
 
 static void job_cancel_async(Job *job, bool force)
 {
+if (job->driver->cancel) {
+job->driver->cancel(job);
+}
 if (job->user_paused) {
 /* Do not call job_enter here, the caller will handle it.  */
 if (job->driver->user_resume) {
-- 
2.29.2




[PATCH v2 10/10] iotests/264: add backup-cancel test-case

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Check that cancel doesn't wait for 10s of nbd reconnect timeout.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/264 | 21 ++---
 tests/qemu-iotests/264.out |  4 ++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/qemu-iotests/264 b/tests/qemu-iotests/264
index 347e53add5..4f96825a22 100755
--- a/tests/qemu-iotests/264
+++ b/tests/qemu-iotests/264
@@ -94,13 +94,7 @@ class TestNbdReconnect(iotests.QMPTestCase):
 result = self.vm.qmp('blockdev-del', node_name='backup0')
 self.assert_qmp(result, 'return', {})
 
-def test_mirror_cancel(self):
-# Mirror speed limit doesn't work well enough, it seems that mirror
-# will run many parallel requests anyway. MAX_IN_FLIGHT is 16 and
-# MAX_IO_BYTES is 1M in mirror.c, so let's use 20M disk.
-self.init_vm(20 * 1024 * 1024)
-self.start_job('blockdev-mirror')
-
+def cancel_job(self):
 result = self.vm.qmp('block-job-cancel', device='drive0')
 self.assert_qmp(result, 'return', {})
 
@@ -109,6 +103,19 @@ class TestNbdReconnect(iotests.QMPTestCase):
 delta_t = time.time() - start_t
 self.assertTrue(delta_t < 2.0)
 
+def test_mirror_cancel(self):
+# Mirror speed limit doesn't work well enough, it seems that mirror
+# will run many parallel requests anyway. MAX_IN_FLIGHT is 16 and
+# MAX_IO_BYTES is 1M in mirror.c, so let's use 20M disk.
+self.init_vm(20 * 1024 * 1024)
+self.start_job('blockdev-mirror')
+self.cancel_job()
+
+def test_backup_cancel(self):
+self.init_vm(5 * 1024 * 1024)
+self.start_job('blockdev-backup')
+self.cancel_job()
+
 
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/264.out b/tests/qemu-iotests/264.out
index fbc63e62f8..8d7e996700 100644
--- a/tests/qemu-iotests/264.out
+++ b/tests/qemu-iotests/264.out
@@ -1,5 +1,5 @@
-..
+...
 --
-Ran 2 tests
+Ran 3 tests
 
 OK
-- 
2.29.2




[PATCH v2 02/10] block/nbd: implement .bdrv_cancel_in_flight

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Just stop waiting for connection in existing requests.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 block/nbd.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/block/nbd.c b/block/nbd.c
index b3cbbeb4b0..c26dc5a54f 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -2458,6 +2458,18 @@ static const char *const nbd_strong_runtime_opts[] = {
 NULL
 };
 
+static void nbd_cancel_in_flight(BlockDriverState *bs)
+{
+BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+reconnect_delay_timer_del(s);
+
+if (s->state == NBD_CLIENT_CONNECTING_WAIT) {
+s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+qemu_co_queue_restart_all(&s->free_sema);
+}
+}
+
 static BlockDriver bdrv_nbd = {
 .format_name= "nbd",
 .protocol_name  = "nbd",
@@ -2484,6 +2496,7 @@ static BlockDriver bdrv_nbd = {
 .bdrv_co_block_status   = nbd_client_co_block_status,
 .bdrv_dirname   = nbd_dirname,
 .strong_runtime_opts= nbd_strong_runtime_opts,
+.bdrv_cancel_in_flight  = nbd_cancel_in_flight,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
@@ -2512,6 +2525,7 @@ static BlockDriver bdrv_nbd_tcp = {
 .bdrv_co_block_status   = nbd_client_co_block_status,
 .bdrv_dirname   = nbd_dirname,
 .strong_runtime_opts= nbd_strong_runtime_opts,
+.bdrv_cancel_in_flight  = nbd_cancel_in_flight,
 };
 
 static BlockDriver bdrv_nbd_unix = {
@@ -2540,6 +2554,7 @@ static BlockDriver bdrv_nbd_unix = {
 .bdrv_co_block_status   = nbd_client_co_block_status,
 .bdrv_dirname   = nbd_dirname,
 .strong_runtime_opts= nbd_strong_runtime_opts,
+.bdrv_cancel_in_flight  = nbd_cancel_in_flight,
 };
 
 static void bdrv_nbd_init(void)
-- 
2.29.2




[PATCH v2 09/10] block/backup: implement .cancel job handler

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Cancel in-flight io on target to not waste the time.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 block/backup.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/block/backup.c b/block/backup.c
index cc525d5544..94e6dcd72e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -35,6 +35,7 @@ typedef struct BackupBlockJob {
 BlockJob common;
 BlockDriverState *backup_top;
 BlockDriverState *source_bs;
+BlockDriverState *target_bs;
 
 BdrvDirtyBitmap *sync_bitmap;
 
@@ -329,6 +330,13 @@ static void coroutine_fn backup_set_speed(BlockJob *job, 
int64_t speed)
 }
 }
 
+static void backup_cancel(Job *job)
+{
+BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
+
+bdrv_cancel_in_flight(s->target_bs);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
@@ -340,6 +348,7 @@ static const BlockJobDriver backup_job_driver = {
 .abort  = backup_abort,
 .clean  = backup_clean,
 .pause  = backup_pause,
+.cancel = backup_cancel,
 },
 .set_speed = backup_set_speed,
 };
@@ -528,6 +537,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 
 job->backup_top = backup_top;
 job->source_bs = bs;
+job->target_bs = target;
 job->on_source_error = on_source_error;
 job->on_target_error = on_target_error;
 job->sync_mode = sync_mode;
-- 
2.29.2




[PATCH v2 07/10] iotests.py: qemu_nbd_popen: remove pid file after use

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
To not interfere with other qemu_nbd_popen() calls in same test.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/iotests.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 00be68eca3..4e758308f2 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -296,7 +296,9 @@ def qemu_nbd_list_log(*args: str) -> str:
 @contextmanager
 def qemu_nbd_popen(*args):
 '''Context manager running qemu-nbd within the context'''
-pid_file = file_path("pid")
+pid_file = file_path("qemu_nbd_popen-nbd-pid-file")
+
+assert not os.path.exists(pid_file)
 
 cmd = list(qemu_nbd_args)
 cmd.extend(('--persistent', '--pid-file', pid_file))
@@ -314,6 +316,8 @@ def qemu_nbd_popen(*args):
 time.sleep(0.01)
 yield
 finally:
+if os.path.exists(pid_file):
+os.remove(pid_file)
 log('Kill NBD server')
 p.kill()
 p.wait()
-- 
2.29.2




[PATCH v2 01/10] block: add new BlockDriver handler: bdrv_cancel_in_flight

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
It will be used to stop retrying NBD requests on mirror cancel.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 include/block/block.h |  3 +++
 include/block/block_int.h |  9 +
 block/io.c| 11 +++
 3 files changed, 23 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 0a9f2c187c..2f2698074e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -849,4 +849,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t 
src_offset,
 BdrvChild *dst, int64_t dst_offset,
 int64_t bytes, BdrvRequestFlags read_flags,
 BdrvRequestFlags write_flags);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 22a2789d35..88e4111939 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -352,6 +352,15 @@ struct BlockDriver {
 bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
 int64_t *map, BlockDriverState **file);
 
+/*
+ * This informs the driver that we are no longer interested in the result
+ * of in-flight requests, so don't waste the time if possible.
+ *
+ * One example usage is to avoid waiting for an nbd target node reconnect
+ * timeout during job-cancel.
+ */
+void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
 /*
  * Invalidate any cached meta-data.
  */
diff --git a/block/io.c b/block/io.c
index b0435ed670..ca2dca3007 100644
--- a/block/io.c
+++ b/block/io.c
@@ -3460,3 +3460,14 @@ out:
 
 return ret;
 }
+
+void bdrv_cancel_in_flight(BlockDriverState *bs)
+{
+if (!bs || !bs->drv) {
+return;
+}
+
+if (bs->drv->bdrv_cancel_in_flight) {
+bs->drv->bdrv_cancel_in_flight(bs);
+}
+}
-- 
2.29.2




[PATCH v2 06/10] iotests/264: move to python unittest

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
We are going to add more test cases, so use the library supporting test
cases.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/264 | 93 ++
 tests/qemu-iotests/264.out | 20 ++--
 2 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/tests/qemu-iotests/264 b/tests/qemu-iotests/264
index e725cefd47..6feeaa4056 100755
--- a/tests/qemu-iotests/264
+++ b/tests/qemu-iotests/264
@@ -20,13 +20,10 @@
 #
 
 import time
+import os
 
 import iotests
-from iotests import qemu_img_create, file_path, qemu_nbd_popen, log
-
-iotests.script_initialize(
-supported_fmts=['qcow2'],
-)
+from iotests import qemu_img_create, file_path, qemu_nbd_popen
 
 disk_a, disk_b, nbd_sock = file_path('disk_a', 'disk_b', 'nbd-sock')
 nbd_uri = 'nbd+unix:///?socket=' + nbd_sock
@@ -34,46 +31,62 @@ size = 5 * 1024 * 1024
 wait_limit = 3.0
 wait_step = 0.2
 
-qemu_img_create('-f', iotests.imgfmt, disk_a, str(size))
-qemu_img_create('-f', iotests.imgfmt, disk_b, str(size))
 
-with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
-vm = iotests.VM().add_drive(disk_a)
-vm.launch()
-vm.hmp_qemu_io('drive0', 'write 0 {}'.format(size))
+class TestNbdReconnect(iotests.QMPTestCase):
+def setUp(self):
+qemu_img_create('-f', iotests.imgfmt, disk_a, str(size))
+qemu_img_create('-f', iotests.imgfmt, disk_b, str(size))
+self.vm = iotests.VM().add_drive(disk_a)
+self.vm.launch()
+self.vm.hmp_qemu_io('drive0', 'write 0 {}'.format(size))
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(disk_a)
+os.remove(disk_b)
+
+def test(self):
+with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
+result = self.vm.qmp('blockdev-add',
+ **{'node_name': 'backup0',
+'driver': 'raw',
+'file': {'driver': 'nbd',
+ 'server': {'type': 'unix',
+'path': nbd_sock},
+ 'reconnect-delay': 10}})
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp('blockdev-backup', device='drive0',
+ sync='full', target='backup0',
+ speed=(1 * 1024 * 1024))
+self.assert_qmp(result, 'return', {})
+
+# Wait for some progress
+t = 0.0
+while t < wait_limit:
+jobs = self.vm.qmp('query-block-jobs')['return']
+if jobs and jobs[0]['offset'] > 0:
+break
+time.sleep(wait_step)
+t += wait_step
 
-vm.qmp_log('blockdev-add', filters=[iotests.filter_qmp_testfiles],
-   **{'node_name': 'backup0',
-  'driver': 'raw',
-  'file': {'driver': 'nbd',
-   'server': {'type': 'unix', 'path': nbd_sock},
-   'reconnect-delay': 10}})
-vm.qmp_log('blockdev-backup', device='drive0', sync='full',
-   target='backup0', speed=(1 * 1024 * 1024))
+self.assertTrue(jobs and jobs[0]['offset'] > 0)  # job started
 
-# Wait for some progress
-t = 0.0
-while t < wait_limit:
-jobs = vm.qmp('query-block-jobs')['return']
-if jobs and jobs[0]['offset'] > 0:
-break
-time.sleep(wait_step)
-t += wait_step
+jobs = self.vm.qmp('query-block-jobs')['return']
+# Check that job is still in progress
+self.assertTrue(jobs and jobs[0]['offset'] < jobs[0]['len'])
 
-if jobs and jobs[0]['offset'] > 0:
-log('Backup job is started')
+result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0)
+self.assert_qmp(result, 'return', {})
 
-jobs = vm.qmp('query-block-jobs')['return']
-if jobs and jobs[0]['offset'] < jobs[0]['len']:
-log('Backup job is still in progress')
+# Emulate server down time for 1 second
+time.sleep(1)
 
-vm.qmp_log('block-job-set-speed', device='drive0', speed=0)
+with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
+e = self.vm.event_wait('BLOCK_JOB_COMPLETED')
+self.assertEqual(e['data']['offset'], size)
+result = self.vm.qmp('blockdev-del', node_name='backup0')
+self.assert_qmp(result, 'return', {})
 
-# Emulate server down time for 1 second
-time.sleep(1)
 
-with qemu_nbd_popen('-k', nbd_sock, '-f', iotests.imgfmt, disk_b):
-e = vm.event_wait('BLOCK_JOB_COMPLETED')
-log('Backup completed: {}'.format(e['data']['offset']))
-vm.qmp_log('blockdev-del', node_name='backup0')
-vm.shutdown()
+if __name__ == '__main__':
+iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/264.ou

[PATCH v2 03/10] block/raw-format: implement .bdrv_cancel_in_flight handler

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
We are going to cancel in-flight requests on mirror nbd target on job
cancel. Still nbd is often used not directly but as raw-format child.
So, add pass-through handler here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
---
 block/raw-format.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/raw-format.c b/block/raw-format.c
index 42ec50802b..7717578ed6 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -575,6 +575,11 @@ static const char *const raw_strong_runtime_opts[] = {
 NULL
 };
 
+static void raw_cancel_in_flight(BlockDriverState *bs)
+{
+bdrv_cancel_in_flight(bs->file->bs);
+}
+
 BlockDriver bdrv_raw = {
 .format_name  = "raw",
 .instance_size= sizeof(BDRVRawState),
@@ -608,6 +613,7 @@ BlockDriver bdrv_raw = {
 .bdrv_has_zero_init   = &raw_has_zero_init,
 .strong_runtime_opts  = raw_strong_runtime_opts,
 .mutable_opts = mutable_opts,
+.bdrv_cancel_in_flight = raw_cancel_in_flight,
 };
 
 static void bdrv_raw_init(void)
-- 
2.29.2




[PATCH v2 00/10] mirror: cancel nbd reconnect

2021-02-05 Thread Vladimir Sementsov-Ogievskiy
Hi all!

The problem

Assume we have mirror job with nbd target node with enabled reconnect.
Connection failed. So, all current requests to nbd node are waiting for
nbd driver to reconnect. And they will wait for reconnect-delay time
specified in nbd blockdev options. This timeout may be long enough, for
example, we in Virtuozzo use 300 seconds by default.

So, if at this moment user tries to cancel the job, job will wait for
its in-flight requests to finish up to 300 seconds. From the user point
of view, cancelling the job takes a long time. Bad.

Solution

Let's just cancel "waiting for reconnect in in-flight request coroutines"
on mirror (and backup) cancel. Welcome the series below.

v2: wording, rebase on master, add Eric's r-bs, update test-output in
last commit

Vladimir Sementsov-Ogievskiy (10):
  block: add new BlockDriver handler: bdrv_cancel_in_flight
  block/nbd: implement .bdrv_cancel_in_flight
  block/raw-format: implement .bdrv_cancel_in_flight handler
  job: add .cancel handler for the driver
  block/mirror: implement .cancel job handler
  iotests/264: move to python unittest
  iotests.py: qemu_nbd_popen: remove pid file after use
  iotests/264: add mirror-cancel test-case
  block/backup: implement .cancel job handler
  iotests/264: add backup-cancel test-case

 include/block/block.h |   3 +
 include/block/block_int.h |   9 +++
 include/qemu/job.h|   5 ++
 block/backup.c|  10 +++
 block/io.c|  11 +++
 block/mirror.c|   9 +++
 block/nbd.c   |  15 
 block/raw-format.c|   6 ++
 job.c |   3 +
 tests/qemu-iotests/264| 140 ++
 tests/qemu-iotests/264.out|  20 ++---
 tests/qemu-iotests/iotests.py |   6 +-
 12 files changed, 172 insertions(+), 65 deletions(-)

-- 
2.29.2




Re: [PATCH v2 29/36] blockdev: qmp_x_blockdev_reopen: acquire all contexts

2021-02-05 Thread Kevin Wolf
Am 05.02.2021 um 17:16 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 05.02.2021 19:01, Kevin Wolf wrote:
> > Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> > > During reopen we may add backing bs from other aio context, which may
> > > lead to changing original context of top bs.
> > > 
> > > We are going to move graph modification to prepare stage. So, it will
> > > be possible that bdrv_flush() in bdrv_reopen_prepare called on bs in
> > > non-original aio context, which we didn't aquire which leads to crash.
> > > 
> > > More correct would be to acquire all aio context we are going to work
> > > with. And the simplest ways is to just acquire all of them. It may be
> > > optimized later if needed.
> > > 
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > 
> > I'm afraid it's not as easy. Holding the lock of more than one
> > AioContext is always a bit risky with respect to deadlocks.
> > 
> > For example, changing the AioContext of a node with
> > bdrv_set_aio_context_ignore() has explicit rules that are now violated:
> > 
> >   * The caller must own the AioContext lock for the old AioContext of bs, 
> > but it
> >   * must not own the AioContext lock for new_context (unless new_context is 
> > the
> >   * same as the current context of bs).
> > 
> > Draining while holding all AioContext locks is suspicious, too. I think
> > I have seen deadlocks before, which is why bdrv_drain_all_*() are
> > careful to only ever lock a single AioContext at a time.
> 
> That's not good :\ Hmm, probably we just should flush everything
> before all graph modifications.

Would that have to be a separate phase before prepare then?

I suppose the same problem exists with drv->bdrv_reopen_prepare, which
might be called in a different state (both graph structure and
AioContext) than before. I'll have to see the patch first that reorders
things, but this callback has always had the problem that sometimes it
wants the old state and sometimes it wants the new state...

Kevin




Re: [PATCH v2 28/36] block: add bdrv_set_backing_noperm() transaction action

2021-02-05 Thread Kevin Wolf
Am 05.02.2021 um 17:06 hat Vladimir Sementsov-Ogievskiy geschrieben:
> 05.02.2021 17:00, Kevin Wolf wrote:
> > Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> > > Split out no-perm part of bdrv_set_backing_hd() as a separate
> > > transaction action. Note the in case of existing BdrvChild we reuse it,
> > > not recreate, just to do less actions.
> > > 
> > > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > > ---
> > >   block.c | 111 +---
> > >   1 file changed, 89 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/block.c b/block.c
> > > index 54fb6d24bd..617cba9547 100644
> > > --- a/block.c
> > > +++ b/block.c
> > > @@ -101,6 +101,7 @@ static int bdrv_attach_child_common(BlockDriverState 
> > > *child_bs,
> > >   uint64_t perm, uint64_t shared_perm,
> > >   void *opaque, BdrvChild **child,
> > >   GSList **tran, Error **errp);
> > > +static void bdrv_remove_backing(BlockDriverState *bs, GSList **tran);
> > >   static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 
> > > BlockReopenQueue
> > >  *queue, Error **errp);
> > > @@ -3194,45 +3195,111 @@ static BdrvChildRole 
> > > bdrv_backing_role(BlockDriverState *bs)
> > >   }
> > >   }
> > > +typedef struct BdrvSetBackingNoPermState {
> > > +BlockDriverState *bs;
> > > +BlockDriverState *backing_bs;
> > > +BlockDriverState *old_inherits_from;
> > > +GSList *attach_tran;
> > > +} BdrvSetBackingNoPermState;
> > 
> > Why do we need the nested attach_tran instead of just including these
> > actions in the outer transaction?
> > 
> > > +static void bdrv_set_backing_noperm_abort(void *opaque)
> > > +{
> > > +BdrvSetBackingNoPermState *s = opaque;
> > > +
> > > +if (s->backing_bs) {
> > > +s->backing_bs->inherits_from = s->old_inherits_from;
> > > +}
> > > +
> > > +tran_abort(s->attach_tran);
> > > +
> > > +bdrv_refresh_limits(s->bs, NULL);
> > > +if (s->old_inherits_from) {
> > > +bdrv_refresh_limits(s->old_inherits_from, NULL);
> > > +}
> > 
> > How is bs->inherits_from related to limits? I don't see a
> > bdrv_refresh_limits() call in bdrv_set_backing_noperm() that this would
> > undo.
> > 
> > > +}
> > > +
> > > +static void bdrv_set_backing_noperm_commit(void *opaque)
> > > +{
> > > +BdrvSetBackingNoPermState *s = opaque;
> > > +
> > > +tran_commit(s->attach_tran);
> > > +}
> > > +
> > > +static TransactionActionDrv bdrv_set_backing_noperm_drv = {
> > > +.abort = bdrv_set_backing_noperm_abort,
> > > +.commit = bdrv_set_backing_noperm_commit,
> > > +.clean = g_free,
> > > +};
> > > +
> > >   /*
> > >* Sets the bs->backing link of a BDS. A new reference is created; 
> > > callers
> > >* which don't need their own reference any more must call bdrv_unref().
> > >*/
> > > -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState 
> > > *backing_hd,
> > > - Error **errp)
> > > +static int bdrv_set_backing_noperm(BlockDriverState *bs,
> > > +   BlockDriverState *backing_bs,
> > > +   GSList **tran, Error **errp)
> > >   {
> > > -bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
> > > -bdrv_inherits_from_recursive(backing_hd, bs);
> > > +int ret = 0;
> > > +bool update_inherits_from = bdrv_chain_contains(bs, backing_bs) &&
> > > +bdrv_inherits_from_recursive(backing_bs, bs);
> > > +GSList *attach_tran = NULL;
> > > +BdrvSetBackingNoPermState *s;
> > >   if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
> > > -return;
> > > +return -EPERM;
> > >   }
> > > -if (backing_hd) {
> > > -bdrv_ref(backing_hd);
> > > +if (bs->backing && backing_bs) {
> > > +bdrv_replace_child_safe(bs->backing, backing_bs, tran);
> > > +} else if (bs->backing && !backing_bs) {
> > > +bdrv_remove_backing(bs, tran);
> > > +} else if (backing_bs) {
> > > +assert(!bs->backing);
> > > +ret = bdrv_attach_child_noperm(bs, backing_bs, "backing",
> > > +   &child_of_bds, 
> > > bdrv_backing_role(bs),
> > > +   &bs->backing, &attach_tran, errp);
> > > +if (ret < 0) {
> > > +tran_abort(attach_tran);
> > 
> > This looks wrong to me, we'll call tran_abort() a second time through
> > bdrv_set_backing_noperm_abort() when the outer transaction aborts.
> > 
> > I also notice that the other two if branches do just add things to the
> > outer 'tran', it's just this branch that gets a nested one.
> > 
> > > +return ret;
> > > +}
> > >   }
> > > -if (bs->backing) {
> > > -/* Cannot be frozen, we checked that above */
> > > -   

Re: [PATCH v2 28/36] block: add bdrv_set_backing_noperm() transaction action

2021-02-05 Thread Kevin Wolf
Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> Split out no-perm part of bdrv_set_backing_hd() as a separate
> transaction action. Note the in case of existing BdrvChild we reuse it,
> not recreate, just to do less actions.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

>  /*
>   * Sets the bs->backing link of a BDS. A new reference is created; callers
>   * which don't need their own reference any more must call bdrv_unref().
>   */
> -void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
> - Error **errp)
> +static int bdrv_set_backing_noperm(BlockDriverState *bs,
> +   BlockDriverState *backing_bs,
> +   GSList **tran, Error **errp)
>  {
> -bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
> -bdrv_inherits_from_recursive(backing_hd, bs);
> +int ret = 0;
> +bool update_inherits_from = bdrv_chain_contains(bs, backing_bs) &&
> +bdrv_inherits_from_recursive(backing_bs, bs);
> +GSList *attach_tran = NULL;
> +BdrvSetBackingNoPermState *s;
>  
>  if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
> -return;
> +return -EPERM;
>  }
>  
> -if (backing_hd) {
> -bdrv_ref(backing_hd);
> +if (bs->backing && backing_bs) {
> +bdrv_replace_child_safe(bs->backing, backing_bs, tran);

The old code with separate bdrv_unref_child() and then
bdrv_attach_child() tried to make the AioContests of bs and backing_bs
compatible by moving one of the nodes if necessary.

bdrv_replace_child_safe() doesn't seem to do that, but it only asserts
that both nodes are already in the same context.

I see that iotest 245 doesn't crash, which I think it should if this
were broken, but where does the switch happen now?

Kevin




Re: [PULL v2 00/27] Block patches

2021-02-05 Thread Peter Maydell
On Fri, 5 Feb 2021 at 16:21, Stefan Hajnoczi  wrote:
> Thanks, I update the patch in question.
>
> It looks like the GitLab CI doesn't include a clang version that
> produces this error because the pipeline passed for me:
> https://gitlab.com/stefanha/qemu/-/pipelines/251524779
>
> Is there something clang-specific you want to check in the CI? Maybe
> clang 3.4, the oldest version supported according to ./configure?

Would probably be nice I guess. My ad-hoc builds use clang 6,
which is what tripped up here.

thanks
-- PMM



Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Keith Busch
On Sat, Feb 06, 2021 at 01:07:57AM +0900, Minwoo Im wrote:
> If multipath is enabled, the namespace head and hidden namespace will be
> created.  In this case, /sys/block/nvme0n1/queue/nr_zones are not
> returning proper value for the namespace itself.  By the way, the hidden
> namespace /sys/block/nvme0c0n1/queue/nr_zones are returning properly.
> 
> Is it okay for sysfs of the head namespace node (nvme0n1) not to manage
> the request queue attributes like nr_zones?

This should fix it. Untested, as my dev machine is in need of repair,
but if someone can confirm this is successful, I can send it to the
kernel list.

---
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 65bd6efa5e1c..eb18949bb999 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -677,6 +677,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct 
nvme_id_ns *id)
if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
   ns->head->disk->queue);
+   if (blk_queue_is_zoned(ns->queue))
+   blk_revalidate_disk_zones(ns->head->disk, NULL);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
--



Re: [PULL v2 00/27] Block patches

2021-02-05 Thread Stefan Hajnoczi
On Thu, Feb 04, 2021 at 05:35:31PM +, Peter Maydell wrote:
> On Thu, 4 Feb 2021 at 15:43, Stefan Hajnoczi  wrote:
> >
> > The following changes since commit db754f8ccaf2f073c9aed46a4389e9c0c2080399:
> >
> >   Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210202' 
> > into staging (2021-02-03 19:35:57 +)
> >
> > are available in the Git repository at:
> >
> >   https://gitlab.com/stefanha/qemu.git tags/block-pull-request
> >
> > for you to fetch changes up to abe42229db7b87caa11b3c02835ebf9d384e0bd4:
> >
> >   docs: fix Parallels Image "dirty bitmap" section (2021-02-04 15:17:10 
> > +)
> >
> > 
> > Pull request
> >
> > v2:
> >  * Rebase to resolve memory_region_init_ram_from_file() conflict due to the 
> > new
> >offset argument that was added in qemu.git/master in the meantime [Peter]
> >
> > 
> 
> Fails to compile, clang:
> 
> ../../hw/remote/mpqemu-link.c:40:29: error: suggest braces around
> initialization of subobject [-Werror,-Wmissing-braces]
> struct iovec send[2] = {0};
> ^
> {}
> 
> 
> Don't use {0}, use {} -- the former may be the C standard thing,
> but the latter is the one all our supported compilers accept
> without complaint. (cf eg commit 039d4e3df0).

Thanks, I update the patch in question.

It looks like the GitLab CI doesn't include a clang version that
produces this error because the pipeline passed for me:
https://gitlab.com/stefanha/qemu/-/pipelines/251524779

Is there something clang-specific you want to check in the CI? Maybe
clang 3.4, the oldest version supported according to ./configure?

Stefan


signature.asc
Description: PGP signature


Re: [PULL v2 00/27] Block patches

2021-02-05 Thread Stefan Hajnoczi
On Thu, Feb 04, 2021 at 10:49:26AM -0800, elena wrote:
> On Thu, Feb 04, 2021 at 05:35:31PM +, Peter Maydell wrote:
> > On Thu, 4 Feb 2021 at 15:43, Stefan Hajnoczi  wrote:
> > >
> > > The following changes since commit 
> > > db754f8ccaf2f073c9aed46a4389e9c0c2080399:
> > >
> > >   Merge remote-tracking branch 
> > > 'remotes/rth-gitlab/tags/pull-tcg-20210202' into staging (2021-02-03 
> > > 19:35:57 +)
> > >
> > > are available in the Git repository at:
> > >
> > >   https://gitlab.com/stefanha/qemu.git tags/block-pull-request
> > >
> > > for you to fetch changes up to abe42229db7b87caa11b3c02835ebf9d384e0bd4:
> > >
> > >   docs: fix Parallels Image "dirty bitmap" section (2021-02-04 15:17:10 
> > > +)
> > >
> > > 
> > > Pull request
> > >
> > > v2:
> > >  * Rebase to resolve memory_region_init_ram_from_file() conflict due to 
> > > the new
> > >offset argument that was added in qemu.git/master in the meantime 
> > > [Peter]
> > >
> > > 
> > 
> > Fails to compile, clang:
> > 
> > ../../hw/remote/mpqemu-link.c:40:29: error: suggest braces around
> > initialization of subobject [-Werror,-Wmissing-braces]
> > struct iovec send[2] = {0};
> > ^
> > {}
> 
> Stefan, should we make changes for the patch?

I'll send another revision of this pull request since it's a trivial
fix.

Thanks,
Stefan


signature.asc
Description: PGP signature


Re: [PATCH v2 29/36] blockdev: qmp_x_blockdev_reopen: acquire all contexts

2021-02-05 Thread Vladimir Sementsov-Ogievskiy

05.02.2021 19:01, Kevin Wolf wrote:

Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:

During reopen we may add backing bs from other aio context, which may
lead to changing original context of top bs.

We are going to move graph modification to prepare stage. So, it will
be possible that bdrv_flush() in bdrv_reopen_prepare called on bs in
non-original aio context, which we didn't aquire which leads to crash.

More correct would be to acquire all aio context we are going to work
with. And the simplest ways is to just acquire all of them. It may be
optimized later if needed.

Signed-off-by: Vladimir Sementsov-Ogievskiy 


I'm afraid it's not as easy. Holding the lock of more than one
AioContext is always a bit risky with respect to deadlocks.

For example, changing the AioContext of a node with
bdrv_set_aio_context_ignore() has explicit rules that are now violated:

  * The caller must own the AioContext lock for the old AioContext of bs, but it
  * must not own the AioContext lock for new_context (unless new_context is the
  * same as the current context of bs).

Draining while holding all AioContext locks is suspicious, too. I think
I have seen deadlocks before, which is why bdrv_drain_all_*() are
careful to only ever lock a single AioContext at a time.

Kevin



That's not good :\ Hmm, probably we just should flush everything before all 
graph modifications.

--
Best regards,
Vladimir



Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Keith Busch
On Sat, Feb 06, 2021 at 01:07:57AM +0900, Minwoo Im wrote:
> On 21-02-05 08:02:10, Keith Busch wrote:
> > On Fri, Feb 05, 2021 at 09:33:54PM +0900, Minwoo Im wrote:
> > > On 21-02-05 12:42:30, Klaus Jensen wrote:
> > > > On Feb  5 12:25, i...@dantalion.nl wrote:
> > > > > On 05-02-2021 11:39, Klaus Jensen wrote:
> > > > > > This is a good way to report it ;)
> > > > > > It is super helpful and super appreciated! Thanks!
> > > > > 
> > > > > Good to know :)
> > > > > 
> > > > > > I cant reproduce that. Can you share your qemu configuration, kernel
> > > > > > version?
> > > > > 
> > > > > I create the image and launch QEMU with:
> > > > > qemu-img create -f raw znsssd.img 16777216
> > > > > 
> > > > > qemu-system-x86_64 -name qemuzns -m 4G -cpu Haswell -smp 2 -hda \
> > > > > ./arch-qemu.qcow2 -net user,hostfwd=tcp::-:22,\
> > > > > hostfwd=tcp::-:2000 -net nic \
> > > > > -drive file=./znsssd.img,id=mynvme,format=raw,if=none \
> > > > > -device nvme-subsys,id=subsys0 \
> > > > > -device nvme,serial=baz,id=nvme2,zoned.append_size_limit=131072,\
> > > > > subsys=subsys0 \
> > > > > -device nvme-ns,id=ns2,drive=mynvme,nsid=2,logical_block_size=4096,\
> > > > > physical_block_size=4096,zoned=true,zoned.zone_size=131072,\
> > > > > zoned.zone_capacity=131072,zoned.max_open=0,zoned.max_active=0,bus=nvme2
> > > > > 
> > > > > This should create 128 zones as 16777216 / 131072 = 128. My qemu 
> > > > > version
> > > > > is on d79d797b0dd02c33dc9428123c18ae97127e967b of nvme-next.
> > > > > 
> > > > > I don't actually think the subsys is needed when you use bus=, that is
> > > > > just something left over from trying to identify why the nvme device 
> > > > > was
> > > > > not initializing.
> > > > > 
> > > > > I use an Arch qcow image with kernel version 5.10.12
> > > > 
> > > > Thanks - I can reproduce it now.
> > > > 
> > > > Happens only when the subsystem is involved. Looks like a kernel issue
> > > > to me since the zones are definitely there when using nvme-cli.
> > > 
> > > Yes, it looks like it happens when CONFIG_NVME_MULTIPATH=y and subsys is
> > > given for namespace sharing.  In that case, the actual hidden namespace
> > > for nvme0n1 might be nvme0c0n1.
> > > 
> > > lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0c0n1 -> 
> > > ../devices/pci:00/:00:06.0/nvme/nvme0/nvme0c0n1/
> > > lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0n1 -> 
> > > ../devices/virtual/nvme-subsystem/nvme-subsys0/nvme0n1/   
> > > 
> > > cat /sys/block/nvme0c0n1/queue/nr_zones returns proper value.
> > > 
> > > > 
> > > > Stuff also seems to be initialized in the kernel since blkzone report
> > > > works.
> > > > 
> > > > Keith, this might be some fun for you :) ?
> > > 
> > > I also really want to ask about the policy of head namespace policy
> > > in kernel. :)
> > 
> > What's the question? It looks like I'm missing some part of the context.
> 
> If multipath is enabled, the namespace head and hidden namespace will be
> created.  In this case, /sys/block/nvme0n1/queue/nr_zones are not
> returning proper value for the namespace itself.  By the way, the hidden
> namespace /sys/block/nvme0c0n1/queue/nr_zones are returning properly.
> 
> Is it okay for sysfs of the head namespace node (nvme0n1) not to manage
> the request queue attributes like nr_zones?

Gotcha.

The q->nr_zones is not a stacking limit, so the virtual device that's
made visible is not inheriting the path device that contains this
setting. I'll see about getting a kernel fix proposed.



Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Minwoo Im
On 21-02-05 08:02:10, Keith Busch wrote:
> On Fri, Feb 05, 2021 at 09:33:54PM +0900, Minwoo Im wrote:
> > On 21-02-05 12:42:30, Klaus Jensen wrote:
> > > On Feb  5 12:25, i...@dantalion.nl wrote:
> > > > On 05-02-2021 11:39, Klaus Jensen wrote:
> > > > > This is a good way to report it ;)
> > > > > It is super helpful and super appreciated! Thanks!
> > > > 
> > > > Good to know :)
> > > > 
> > > > > I cant reproduce that. Can you share your qemu configuration, kernel
> > > > > version?
> > > > 
> > > > I create the image and launch QEMU with:
> > > > qemu-img create -f raw znsssd.img 16777216
> > > > 
> > > > qemu-system-x86_64 -name qemuzns -m 4G -cpu Haswell -smp 2 -hda \
> > > > ./arch-qemu.qcow2 -net user,hostfwd=tcp::-:22,\
> > > > hostfwd=tcp::-:2000 -net nic \
> > > > -drive file=./znsssd.img,id=mynvme,format=raw,if=none \
> > > > -device nvme-subsys,id=subsys0 \
> > > > -device nvme,serial=baz,id=nvme2,zoned.append_size_limit=131072,\
> > > > subsys=subsys0 \
> > > > -device nvme-ns,id=ns2,drive=mynvme,nsid=2,logical_block_size=4096,\
> > > > physical_block_size=4096,zoned=true,zoned.zone_size=131072,\
> > > > zoned.zone_capacity=131072,zoned.max_open=0,zoned.max_active=0,bus=nvme2
> > > > 
> > > > This should create 128 zones as 16777216 / 131072 = 128. My qemu version
> > > > is on d79d797b0dd02c33dc9428123c18ae97127e967b of nvme-next.
> > > > 
> > > > I don't actually think the subsys is needed when you use bus=, that is
> > > > just something left over from trying to identify why the nvme device was
> > > > not initializing.
> > > > 
> > > > I use an Arch qcow image with kernel version 5.10.12
> > > 
> > > Thanks - I can reproduce it now.
> > > 
> > > Happens only when the subsystem is involved. Looks like a kernel issue
> > > to me since the zones are definitely there when using nvme-cli.
> > 
> > Yes, it looks like it happens when CONFIG_NVME_MULTIPATH=y and subsys is
> > given for namespace sharing.  In that case, the actual hidden namespace
> > for nvme0n1 might be nvme0c0n1.
> > 
> > lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0c0n1 -> 
> > ../devices/pci:00/:00:06.0/nvme/nvme0/nvme0c0n1/
> > lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0n1 -> 
> > ../devices/virtual/nvme-subsystem/nvme-subsys0/nvme0n1/   
> > 
> > cat /sys/block/nvme0c0n1/queue/nr_zones returns proper value.
> > 
> > > 
> > > Stuff also seems to be initialized in the kernel since blkzone report
> > > works.
> > > 
> > > Keith, this might be some fun for you :) ?
> > 
> > I also really want to ask about the policy of head namespace policy
> > in kernel. :)
> 
> What's the question? It looks like I'm missing some part of the context.

If multipath is enabled, the namespace head and hidden namespace will be
created.  In this case, /sys/block/nvme0n1/queue/nr_zones are not
returning proper value for the namespace itself.  By the way, the hidden
namespace /sys/block/nvme0c0n1/queue/nr_zones are returning properly.

Is it okay for sysfs of the head namespace node (nvme0n1) not to manage
the request queue attributes like nr_zones?



Re: [PATCH v2 28/36] block: add bdrv_set_backing_noperm() transaction action

2021-02-05 Thread Vladimir Sementsov-Ogievskiy

05.02.2021 17:00, Kevin Wolf wrote:

Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:

Split out no-perm part of bdrv_set_backing_hd() as a separate
transaction action. Note the in case of existing BdrvChild we reuse it,
not recreate, just to do less actions.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block.c | 111 +---
  1 file changed, 89 insertions(+), 22 deletions(-)

diff --git a/block.c b/block.c
index 54fb6d24bd..617cba9547 100644
--- a/block.c
+++ b/block.c
@@ -101,6 +101,7 @@ static int bdrv_attach_child_common(BlockDriverState 
*child_bs,
  uint64_t perm, uint64_t shared_perm,
  void *opaque, BdrvChild **child,
  GSList **tran, Error **errp);
+static void bdrv_remove_backing(BlockDriverState *bs, GSList **tran);
  
  static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue

 *queue, Error **errp);
@@ -3194,45 +3195,111 @@ static BdrvChildRole 
bdrv_backing_role(BlockDriverState *bs)
  }
  }
  
+typedef struct BdrvSetBackingNoPermState {

+BlockDriverState *bs;
+BlockDriverState *backing_bs;
+BlockDriverState *old_inherits_from;
+GSList *attach_tran;
+} BdrvSetBackingNoPermState;


Why do we need the nested attach_tran instead of just including these
actions in the outer transaction?


+static void bdrv_set_backing_noperm_abort(void *opaque)
+{
+BdrvSetBackingNoPermState *s = opaque;
+
+if (s->backing_bs) {
+s->backing_bs->inherits_from = s->old_inherits_from;
+}
+
+tran_abort(s->attach_tran);
+
+bdrv_refresh_limits(s->bs, NULL);
+if (s->old_inherits_from) {
+bdrv_refresh_limits(s->old_inherits_from, NULL);
+}


How is bs->inherits_from related to limits? I don't see a
bdrv_refresh_limits() call in bdrv_set_backing_noperm() that this would
undo.


+}
+
+static void bdrv_set_backing_noperm_commit(void *opaque)
+{
+BdrvSetBackingNoPermState *s = opaque;
+
+tran_commit(s->attach_tran);
+}
+
+static TransactionActionDrv bdrv_set_backing_noperm_drv = {
+.abort = bdrv_set_backing_noperm_abort,
+.commit = bdrv_set_backing_noperm_commit,
+.clean = g_free,
+};
+
  /*
   * Sets the bs->backing link of a BDS. A new reference is created; callers
   * which don't need their own reference any more must call bdrv_unref().
   */
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp)
+static int bdrv_set_backing_noperm(BlockDriverState *bs,
+   BlockDriverState *backing_bs,
+   GSList **tran, Error **errp)
  {
-bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
-bdrv_inherits_from_recursive(backing_hd, bs);
+int ret = 0;
+bool update_inherits_from = bdrv_chain_contains(bs, backing_bs) &&
+bdrv_inherits_from_recursive(backing_bs, bs);
+GSList *attach_tran = NULL;
+BdrvSetBackingNoPermState *s;
  
  if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {

-return;
+return -EPERM;
  }
  
-if (backing_hd) {

-bdrv_ref(backing_hd);
+if (bs->backing && backing_bs) {
+bdrv_replace_child_safe(bs->backing, backing_bs, tran);
+} else if (bs->backing && !backing_bs) {
+bdrv_remove_backing(bs, tran);
+} else if (backing_bs) {
+assert(!bs->backing);
+ret = bdrv_attach_child_noperm(bs, backing_bs, "backing",
+   &child_of_bds, bdrv_backing_role(bs),
+   &bs->backing, &attach_tran, errp);
+if (ret < 0) {
+tran_abort(attach_tran);


This looks wrong to me, we'll call tran_abort() a second time through
bdrv_set_backing_noperm_abort() when the outer transaction aborts.

I also notice that the other two if branches do just add things to the
outer 'tran', it's just this branch that gets a nested one.


+return ret;
+}
  }
  
-if (bs->backing) {

-/* Cannot be frozen, we checked that above */
-bdrv_unref_child(bs, bs->backing);
-bs->backing = NULL;
-}
+s = g_new(BdrvSetBackingNoPermState, 1);
+*s = (BdrvSetBackingNoPermState) {
+.bs = bs,
+.backing_bs = backing_bs,
+.old_inherits_from = backing_bs ? backing_bs->inherits_from : NULL,
+};
+tran_prepend(tran, &bdrv_set_backing_noperm_drv, s);
  
-if (!backing_hd) {

-goto out;
+/*
+ * If backing_bs was already part of bs's backing chain, and
+ * inherits_from pointed recursively to bs then let's update it to
+ * point directly to bs (else it will become NULL).


Setting it to NULL was previously done by bdrv_unref_child().

bdrv_replace_child_safe() and bdrv_remove_backing() don't seem t

Re: [PATCH 1/9] tests/qtest/arm-cpu-features: Remove Cortex-A15 check

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 4:33 PM, Andrew Jones wrote:
> On Fri, Feb 05, 2021 at 04:15:45PM +0100, Philippe Mathieu-Daudé wrote:
>> Hi Drew,
>>
>> On 2/5/21 3:59 PM, Andrew Jones wrote:
>>> On Fri, Feb 05, 2021 at 03:43:37PM +0100, Philippe Mathieu-Daudé wrote:
 Support for ARMv7 has been dropped in commit 82bf7ae84ce
 ("target/arm: Remove KVM support for 32-bit Arm hosts"),
 no need to check for Cortex A15 host cpu anymore.

 Signed-off-by: Philippe Mathieu-Daudé 
 ---
  tests/qtest/arm-cpu-features.c | 4 
  1 file changed, 4 deletions(-)

 diff --git a/tests/qtest/arm-cpu-features.c 
 b/tests/qtest/arm-cpu-features.c
 index 8252b85bb85..c59c3cb002b 100644
 --- a/tests/qtest/arm-cpu-features.c
 +++ b/tests/qtest/arm-cpu-features.c
 @@ -515,10 +515,6 @@ static void test_query_cpu_model_expansion_kvm(const 
 void *data)
  QDict *resp;
  char *error;
  
 -assert_error(qts, "cortex-a15",
 -"We cannot guarantee the CPU type 'cortex-a15' works "
 -"with KVM on this host", NULL);
 -
>>>
>>> This isn't testing anything regarding 32-bit KVM host support. It's
>>> testing that an error is returned when a given cpu type that can't
>>> be known to work with KVM is used. We know that the cortex-a15 can't
>>> be known to work. If we were to use a 64-bit cpu type here then there's
>>> a chance that it would work, failing the test that an error be returned.
>>
>> This was my first understanding, but then why does it fail?
>>
>> PASS 1 qtest-aarch64/arm-cpu-features /aarch64/arm/query-cpu-model-expansion
>> **
>> ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
>> assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
>> 'cortex-a15' works " "with KVM on this host"))
>> ERROR qtest-aarch64/arm-cpu-features - Bail out!
>> ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
>> assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
>> 'cortex-a15' works " "with KVM on this host"))
>> make: *** [Makefile.mtest:905: run-test-111] Error 1
>>
>> FWIW when tracing (cavium thunderX1 host, dmesg reports 0x431f0a11):
>> kvm_vcpu_ioctl cpu_index 0, type 0x4020aeae, arg 0x9b7f9b18
> 
> Hmm... I don't know. It works for me
> 
> $ QTEST_QEMU_BINARY=./qemu-system-aarch64 ./tests/qtest/arm-cpu-features
> /aarch64/arm/query-cpu-model-expansion: OK
> /aarch64/arm/kvm/query-cpu-model-expansion: OK
> /aarch64/arm/kvm/query-cpu-model-expansion/sve-off: OK
> /aarch64/arm/max/query-cpu-model-expansion/sve-max-vq-8: OK
> /aarch64/arm/max/query-cpu-model-expansion/sve-off: OK

Thanks, that helped.

I ran my tests including the "Restrict v7A TCG cpus to TCG accel"
patch which removes the A15 in KVM-only build:
https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg08051.html
 So when TCG is disabled,

So I get:

{ "execute": "query-cpu-model-expansion", 'arguments': { 'type': 'full',
'model': { 'name': 'cortex-a15' }}}
{
"error": {
"class": "GenericError",
"desc": "The CPU type 'cortex-a15' is not a recognized ARM CPU type"
}
}

which fails the g_str_equal().

BTW is there some easy way to dump QMP traffic on stdio?

> 
> $ lscpu
> Architecture:aarch64
> Byte Order:  Little Endian
> CPU(s):  48
> On-line CPU(s) list: 0-47
> Thread(s) per core:  1
> Core(s) per cluster: 16
> Socket(s):   -
> Cluster(s):  3
> NUMA node(s):1
> Vendor ID:   Cavium
> Model:   1
> Model name:  ThunderX 88XX
> Stepping:0x1
> BogoMIPS:200.00
> NUMA node0 CPU(s):   0-47
> Flags:   fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid
> 
>>
>>>
  assert_has_feature_enabled(qts, "host", "aarch64");
  
  /* Enabling and disabling pmu should always work. */
 -- 
 2.26.2


>>>
>>> This file could use a cleanup patch regarding the dropping of 32-bit KVM
>>> support though. At least the comment in main(), "For now we only run KVM
>>> specific tests..." could be reworded. It was written that way when we
>>> planned to try testing on 32-bit KVM too eventually, but we never did,
>>> and now we'll never need to.
>>>
>>> Thanks,
>>> drew
>>>
>>>
>>
> 
> 



Re: [PATCH v11 00/13] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2021-02-05 Thread Keith Busch
On Fri, Feb 05, 2021 at 09:33:54PM +0900, Minwoo Im wrote:
> On 21-02-05 12:42:30, Klaus Jensen wrote:
> > On Feb  5 12:25, i...@dantalion.nl wrote:
> > > On 05-02-2021 11:39, Klaus Jensen wrote:
> > > > This is a good way to report it ;)
> > > > It is super helpful and super appreciated! Thanks!
> > > 
> > > Good to know :)
> > > 
> > > > I cant reproduce that. Can you share your qemu configuration, kernel
> > > > version?
> > > 
> > > I create the image and launch QEMU with:
> > > qemu-img create -f raw znsssd.img 16777216
> > > 
> > > qemu-system-x86_64 -name qemuzns -m 4G -cpu Haswell -smp 2 -hda \
> > > ./arch-qemu.qcow2 -net user,hostfwd=tcp::-:22,\
> > > hostfwd=tcp::-:2000 -net nic \
> > > -drive file=./znsssd.img,id=mynvme,format=raw,if=none \
> > > -device nvme-subsys,id=subsys0 \
> > > -device nvme,serial=baz,id=nvme2,zoned.append_size_limit=131072,\
> > > subsys=subsys0 \
> > > -device nvme-ns,id=ns2,drive=mynvme,nsid=2,logical_block_size=4096,\
> > > physical_block_size=4096,zoned=true,zoned.zone_size=131072,\
> > > zoned.zone_capacity=131072,zoned.max_open=0,zoned.max_active=0,bus=nvme2
> > > 
> > > This should create 128 zones as 16777216 / 131072 = 128. My qemu version
> > > is on d79d797b0dd02c33dc9428123c18ae97127e967b of nvme-next.
> > > 
> > > I don't actually think the subsys is needed when you use bus=, that is
> > > just something left over from trying to identify why the nvme device was
> > > not initializing.
> > > 
> > > I use an Arch qcow image with kernel version 5.10.12
> > 
> > Thanks - I can reproduce it now.
> > 
> > Happens only when the subsystem is involved. Looks like a kernel issue
> > to me since the zones are definitely there when using nvme-cli.
> 
> Yes, it looks like it happens when CONFIG_NVME_MULTIPATH=y and subsys is
> given for namespace sharing.  In that case, the actual hidden namespace
> for nvme0n1 might be nvme0c0n1.
> 
> lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0c0n1 -> 
> ../devices/pci:00/:00:06.0/nvme/nvme0/nvme0c0n1/
> lrwxrwxrwx 1 root root 0 Feb  5 12:30 /sys/block/nvme0n1 -> 
> ../devices/virtual/nvme-subsystem/nvme-subsys0/nvme0n1/   
> 
> cat /sys/block/nvme0c0n1/queue/nr_zones returns proper value.
> 
> > 
> > Stuff also seems to be initialized in the kernel since blkzone report
> > works.
> > 
> > Keith, this might be some fun for you :) ?
> 
> I also really want to ask about the policy of head namespace policy
> in kernel. :)

What's the question? It looks like I'm missing some part of the context.



Re: [PATCH v2 29/36] blockdev: qmp_x_blockdev_reopen: acquire all contexts

2021-02-05 Thread Kevin Wolf
Am 27.11.2020 um 15:45 hat Vladimir Sementsov-Ogievskiy geschrieben:
> During reopen we may add backing bs from other aio context, which may
> lead to changing original context of top bs.
> 
> We are going to move graph modification to prepare stage. So, it will
> be possible that bdrv_flush() in bdrv_reopen_prepare called on bs in
> non-original aio context, which we didn't aquire which leads to crash.
> 
> More correct would be to acquire all aio context we are going to work
> with. And the simplest ways is to just acquire all of them. It may be
> optimized later if needed.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

I'm afraid it's not as easy. Holding the lock of more than one
AioContext is always a bit risky with respect to deadlocks.

For example, changing the AioContext of a node with
bdrv_set_aio_context_ignore() has explicit rules that are now violated:

 * The caller must own the AioContext lock for the old AioContext of bs, but it
 * must not own the AioContext lock for new_context (unless new_context is the
 * same as the current context of bs).

Draining while holding all AioContext locks is suspicious, too. I think
I have seen deadlocks before, which is why bdrv_drain_all_*() are
careful to only ever lock a single AioContext at a time.

Kevin




Re: [RFC PATCH 9/9] tests/qtest/arm-cpu-features: Restrict TCG-only tests

2021-02-05 Thread Claudio Fontana
On 2/5/21 4:30 PM, Philippe Mathieu-Daudé wrote:
> On 2/5/21 4:20 PM, Claudio Fontana wrote:
>> On 2/5/21 3:43 PM, Philippe Mathieu-Daudé wrote:
>>> Some tests explicitly request the TCG accelerator. As these
>>> tests will obviously fails if TCG is not present, disable
>>> them in such case.
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> ---
>>> Cc: Roman Bolshakov 
>>> Cc: Claudio Fontana 
>>>
>>> RFC because of the TODO.
>>>
>>> Roman posted a series to have a QMP command to query enabled
>>> accelerators.
>>> ---
>>>  tests/qtest/arm-cpu-features.c | 33 +
>>>  1 file changed, 29 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
>>> index c59c3cb002b..c6e86282b66 100644
>>> --- a/tests/qtest/arm-cpu-features.c
>>> +++ b/tests/qtest/arm-cpu-features.c
>>> @@ -20,7 +20,7 @@
>>>   */
>>>  #define SVE_MAX_VQ 16
>>>  
>>> -#define MACHINE "-machine virt,gic-version=max -accel tcg "
>>> +#define MACHINE_TCG "-machine virt,gic-version=max -accel tcg "
>>>  #define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "
>>>  #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
>>>  "  'arguments': { 'type': 'full', "
>>> @@ -41,6 +41,16 @@ static bool kvm_enabled(QTestState *qts)
>>>  return enabled;
>>>  }
>>>  
>>> +static bool tcg_enabled(QTestState *qts)
>>> +{
>>> +/* TODO: Implement QMP query-accel? */
>>> +#ifdef CONFIG_TCG
>>> +return true;
>>> +#else
>>> +return false;
>>> +#endif /* CONFIG_TCG */
>>
>>
>> I would not use the same name as the existing tcg_enabled(), which has 
>> different semantics, even in test code;
>>
>> what you mean here is tcg_available() right?
> 
> No, I meant static tcg_enabled as the kvm_enabled() earlier method:

Aha, so it's the other way around, we are actually testing if the TCG 
accelerator is currently selected in QEMU,
and the patch implements it using CONFIG_TCG as a placeholder for it, since we 
do not have query-accel yet, got it.

> 
> static bool kvm_enabled(QTestState *qts)
> {
> QDict *resp, *qdict;
> bool enabled;
> 
> resp = qtest_qmp(qts, "{ 'execute': 'query-kvm' }");
> g_assert(qdict_haskey(resp, "return"));
> qdict = qdict_get_qdict(resp, "return");
> g_assert(qdict_haskey(qdict, "enabled"));
> enabled = qdict_get_bool(qdict, "enabled");
> qobject_unref(resp);
> 
> return enabled;
> }
> 
> This should be moved to something generic to QTest IMO,
> and we need some runtime qtest_is_accelerator_enabled().
> 

Agreed,

thanks,

Claudio



Re: [RFC PATCH 0/2] Allow changing bs->file on reopen

2021-02-05 Thread Kevin Wolf
Am 05.02.2021 um 13:47 hat Alberto Garcia geschrieben:
> On Thu 21 Jan 2021 11:52:17 AM CET, Kevin Wolf wrote:
> >> Hmm, still, removing a filter which want to unshare WRITE even when
> >> doesn't have any parents will be a problem anyway, so we'll need a
> >> new command to drop filter with a logic like in bdrv_drop_filter in
> >> my series.
> >> 
> >> Or, we can introduce multiple reopen.. So that x-blockdev-reopen will
> >> take a list of BlockdevOptions, and do all modifications in one
> >> transaction. Than we'll be able to drop filter by transactional
> >> update of top node child and removing filter child link.
> >
> > Internally, we already have reopen queues anyway, so it would make
> > sense to me to expose them externally and take a list of
> > BlockdevOptions.  This way we should be able to do even complex
> > changes of the graph where adding some edges requires the removal of
> > other edges in a single atomic operation.
> 
> So you mean changing the signature to something like this?
> 
> { 'command': 'x-blockdev-reopen',
>   'data': { 'options': ['BlockdevOptions'] } }
> 
> It should be easy to make that change, I can have a look.

Yes, this is what I had in mind.

Kevin




Re: [PATCH 1/9] tests/qtest/arm-cpu-features: Remove Cortex-A15 check

2021-02-05 Thread Andrew Jones
On Fri, Feb 05, 2021 at 04:15:45PM +0100, Philippe Mathieu-Daudé wrote:
> Hi Drew,
> 
> On 2/5/21 3:59 PM, Andrew Jones wrote:
> > On Fri, Feb 05, 2021 at 03:43:37PM +0100, Philippe Mathieu-Daudé wrote:
> >> Support for ARMv7 has been dropped in commit 82bf7ae84ce
> >> ("target/arm: Remove KVM support for 32-bit Arm hosts"),
> >> no need to check for Cortex A15 host cpu anymore.
> >>
> >> Signed-off-by: Philippe Mathieu-Daudé 
> >> ---
> >>  tests/qtest/arm-cpu-features.c | 4 
> >>  1 file changed, 4 deletions(-)
> >>
> >> diff --git a/tests/qtest/arm-cpu-features.c 
> >> b/tests/qtest/arm-cpu-features.c
> >> index 8252b85bb85..c59c3cb002b 100644
> >> --- a/tests/qtest/arm-cpu-features.c
> >> +++ b/tests/qtest/arm-cpu-features.c
> >> @@ -515,10 +515,6 @@ static void test_query_cpu_model_expansion_kvm(const 
> >> void *data)
> >>  QDict *resp;
> >>  char *error;
> >>  
> >> -assert_error(qts, "cortex-a15",
> >> -"We cannot guarantee the CPU type 'cortex-a15' works "
> >> -"with KVM on this host", NULL);
> >> -
> > 
> > This isn't testing anything regarding 32-bit KVM host support. It's
> > testing that an error is returned when a given cpu type that can't
> > be known to work with KVM is used. We know that the cortex-a15 can't
> > be known to work. If we were to use a 64-bit cpu type here then there's
> > a chance that it would work, failing the test that an error be returned.
> 
> This was my first understanding, but then why does it fail?
> 
> PASS 1 qtest-aarch64/arm-cpu-features /aarch64/arm/query-cpu-model-expansion
> **
> ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
> assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
> 'cortex-a15' works " "with KVM on this host"))
> ERROR qtest-aarch64/arm-cpu-features - Bail out!
> ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
> assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
> 'cortex-a15' works " "with KVM on this host"))
> make: *** [Makefile.mtest:905: run-test-111] Error 1
> 
> FWIW when tracing (cavium thunderX1 host, dmesg reports 0x431f0a11):
> kvm_vcpu_ioctl cpu_index 0, type 0x4020aeae, arg 0x9b7f9b18

Hmm... I don't know. It works for me

$ QTEST_QEMU_BINARY=./qemu-system-aarch64 ./tests/qtest/arm-cpu-features
/aarch64/arm/query-cpu-model-expansion: OK
/aarch64/arm/kvm/query-cpu-model-expansion: OK
/aarch64/arm/kvm/query-cpu-model-expansion/sve-off: OK
/aarch64/arm/max/query-cpu-model-expansion/sve-max-vq-8: OK
/aarch64/arm/max/query-cpu-model-expansion/sve-off: OK

$ lscpu
Architecture:aarch64
Byte Order:  Little Endian
CPU(s):  48
On-line CPU(s) list: 0-47
Thread(s) per core:  1
Core(s) per cluster: 16
Socket(s):   -
Cluster(s):  3
NUMA node(s):1
Vendor ID:   Cavium
Model:   1
Model name:  ThunderX 88XX
Stepping:0x1
BogoMIPS:200.00
NUMA node0 CPU(s):   0-47
Flags:   fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid

> 
> > 
> >>  assert_has_feature_enabled(qts, "host", "aarch64");
> >>  
> >>  /* Enabling and disabling pmu should always work. */
> >> -- 
> >> 2.26.2
> >>
> >>
> > 
> > This file could use a cleanup patch regarding the dropping of 32-bit KVM
> > support though. At least the comment in main(), "For now we only run KVM
> > specific tests..." could be reworded. It was written that way when we
> > planned to try testing on 32-bit KVM too eventually, but we never did,
> > and now we'll never need to.
> > 
> > Thanks,
> > drew
> > 
> > 
> 




Re: [PATCH 0/9] hw/arm/virt: Improve CPU help and fix testing under KVM

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 3:43 PM, Philippe Mathieu-Daudé wrote:
> Yet again bugfixes and cleanup patches noticed while
> rebasing my "Support disabling TCG on ARM (part 2)" series.
> 
> Sending them independently as they aren't directly dependent
> of it so don't have to be delayed by other unanswered questions.

Proven wrong 45min later, not trivial and not ready yet =)

> Please review,
> 
> Phil.



Re: [RFC PATCH 9/9] tests/qtest/arm-cpu-features: Restrict TCG-only tests

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 4:20 PM, Claudio Fontana wrote:
> On 2/5/21 3:43 PM, Philippe Mathieu-Daudé wrote:
>> Some tests explicitly request the TCG accelerator. As these
>> tests will obviously fails if TCG is not present, disable
>> them in such case.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>> Cc: Roman Bolshakov 
>> Cc: Claudio Fontana 
>>
>> RFC because of the TODO.
>>
>> Roman posted a series to have a QMP command to query enabled
>> accelerators.
>> ---
>>  tests/qtest/arm-cpu-features.c | 33 +
>>  1 file changed, 29 insertions(+), 4 deletions(-)
>>
>> diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
>> index c59c3cb002b..c6e86282b66 100644
>> --- a/tests/qtest/arm-cpu-features.c
>> +++ b/tests/qtest/arm-cpu-features.c
>> @@ -20,7 +20,7 @@
>>   */
>>  #define SVE_MAX_VQ 16
>>  
>> -#define MACHINE "-machine virt,gic-version=max -accel tcg "
>> +#define MACHINE_TCG "-machine virt,gic-version=max -accel tcg "
>>  #define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "
>>  #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
>>  "  'arguments': { 'type': 'full', "
>> @@ -41,6 +41,16 @@ static bool kvm_enabled(QTestState *qts)
>>  return enabled;
>>  }
>>  
>> +static bool tcg_enabled(QTestState *qts)
>> +{
>> +/* TODO: Implement QMP query-accel? */
>> +#ifdef CONFIG_TCG
>> +return true;
>> +#else
>> +return false;
>> +#endif /* CONFIG_TCG */
> 
> 
> I would not use the same name as the existing tcg_enabled(), which has 
> different semantics, even in test code;
> 
> what you mean here is tcg_available() right?

No, I meant static tcg_enabled as the kvm_enabled() earlier method:

static bool kvm_enabled(QTestState *qts)
{
QDict *resp, *qdict;
bool enabled;

resp = qtest_qmp(qts, "{ 'execute': 'query-kvm' }");
g_assert(qdict_haskey(resp, "return"));
qdict = qdict_get_qdict(resp, "return");
g_assert(qdict_haskey(qdict, "enabled"));
enabled = qdict_get_bool(qdict, "enabled");
qobject_unref(resp);

return enabled;
}

This should be moved to something generic to QTest IMO,
and we need some runtime qtest_is_accelerator_enabled().



Re: [RFC PATCH 9/9] tests/qtest/arm-cpu-features: Restrict TCG-only tests

2021-02-05 Thread Andrew Jones
On Fri, Feb 05, 2021 at 03:43:45PM +0100, Philippe Mathieu-Daudé wrote:
> Some tests explicitly request the TCG accelerator. As these
> tests will obviously fails if TCG is not present, disable
> them in such case.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Cc: Roman Bolshakov 
> Cc: Claudio Fontana 
> 
> RFC because of the TODO.
> 
> Roman posted a series to have a QMP command to query enabled
> accelerators.
> ---
>  tests/qtest/arm-cpu-features.c | 33 +
>  1 file changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
> index c59c3cb002b..c6e86282b66 100644
> --- a/tests/qtest/arm-cpu-features.c
> +++ b/tests/qtest/arm-cpu-features.c
> @@ -20,7 +20,7 @@
>   */
>  #define SVE_MAX_VQ 16
>  
> -#define MACHINE "-machine virt,gic-version=max -accel tcg "
> +#define MACHINE_TCG "-machine virt,gic-version=max -accel tcg "
>  #define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "

Should probably also drop the TCG fallback from MACHINE_KVM when
TCG is not present and then find another way to confirm KVM is
present in the kvm tests prior to calling qtest_init().

>  #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
>  "  'arguments': { 'type': 'full', "
> @@ -41,6 +41,16 @@ static bool kvm_enabled(QTestState *qts)
>  return enabled;
>  }
>  
> +static bool tcg_enabled(QTestState *qts)
> +{
> +/* TODO: Implement QMP query-accel? */
> +#ifdef CONFIG_TCG
> +return true;
> +#else
> +return false;
> +#endif /* CONFIG_TCG */
> +}
> +
>  static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
>  {
>  return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
> @@ -352,7 +362,12 @@ static void sve_tests_sve_max_vq_8(const void *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max,sve-max-vq=8");
> +qts = qtest_init(MACHINE_TCG "-cpu max,sve-max-vq=8");

Won't this fail when TCG isn't present? If so, then the test will
either have already aborted or at least qts can't be passed to
tcg_enabled().

> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  assert_sve_vls(qts, "max", BIT_ULL(8) - 1, NULL);
>  
> @@ -387,7 +402,12 @@ static void sve_tests_sve_off(const void *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max,sve=off");
> +qts = qtest_init(MACHINE_TCG "-cpu max,sve=off");
> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  /* SVE is off, so the map should be empty. */
>  assert_sve_vls(qts, "max", 0, NULL);
> @@ -443,7 +463,12 @@ static void test_query_cpu_model_expansion(const void 
> *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max");
> +qts = qtest_init(MACHINE_TCG "-cpu max");
> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  /* Test common query-cpu-model-expansion input validation */
>  assert_type_full(qts);
> -- 
> 2.26.2
>

Thanks,
drew 




Re: [PATCH 6/9] hw/arm/virt: Display list of valid CPUs for the Virt machine

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 4:12 PM, Andrew Jones wrote:
> On Fri, Feb 05, 2021 at 03:43:42PM +0100, Philippe Mathieu-Daudé wrote:
>> The Virt machine is restricted to a subset of the CPU provided
>> by QEMU. Instead of having the user run '--cpu help' and try
>> each CPUs until finding a match, display the list from start:
>>
>>   $ qemu-system-aarch64 -M virt -cpu cortex-a8
>>   qemu-system-aarch64: mach-virt: CPU type cortex-a8 not supported
>>   qemu-system-aarch64: mach-virt: Please select one of the following CPU 
>> types:  cortex-a7, cortex-a15, cortex-a53, cortex-a57, cortex-a72, host, max
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  hw/arm/virt.c | 11 +++
>>  1 file changed, 11 insertions(+)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 7802d3a66e8..6ffe091804f 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -1830,9 +1830,20 @@ static void machvirt_init(MachineState *machine)
>>  
>>  if (!cpu_type_valid(machine->cpu_type)) {
>>  int len = strlen(machine->cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX);
>> +g_autoptr(GString) s = g_string_new(NULL);
>>  
>>  error_report("mach-virt: CPU type %.*s not supported",
>>   len, machine->cpu_type);
>> +
>> +for (n = 0; n < ARRAY_SIZE(valid_cpus); n++) {
>> +len = strlen(valid_cpus[n]) - strlen(ARM_CPU_TYPE_SUFFIX);
>> +g_string_append_printf(s, " %.*s", len, valid_cpus[n]);
>> +if (n + 1 < ARRAY_SIZE(valid_cpus)) {
>> +g_string_append_c(s, ',');
>> +}
>> +}
>> +error_report("mach-virt: Please select one of the following CPU 
>> types: %s",
>> + g_string_free(s, FALSE));
>>  exit(1);
>>  }
>>  
>> -- 
>> 2.26.2
>>
> 
> It'd be nice if './qemu-system-aarch64 -M virt -cpu \?' would only output
> the CPUs that the virt machine type supports. Then this error message
> could suggest running that in order to get the list.

+1 very nice =) But not how the command line options processing
works. Maybe later after John Snow command line rework is merged?



Re: [PATCH 4/9] tests/qtest/cdrom-test: Only allow the Virt machine under KVM

2021-02-05 Thread Philippe Mathieu-Daudé
On 2/5/21 4:08 PM, Andrew Jones wrote:
> On Fri, Feb 05, 2021 at 03:43:40PM +0100, Philippe Mathieu-Daudé wrote:
>> Only the Virt and Versal machines are supported under KVM.
>> Restrict the other ones to TCG.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  tests/qtest/cdrom-test.c | 5 -
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
>> index 5af944a5fb7..ac02f2bb4f1 100644
>> --- a/tests/qtest/cdrom-test.c
>> +++ b/tests/qtest/cdrom-test.c
>> @@ -222,9 +222,12 @@ int main(int argc, char **argv)
>>  add_cdrom_param_tests(mips64machines);
>>  } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) {
>>  const char *armmachines[] = {
>> +#ifdef CONFIG_TCG
>>  "realview-eb", "realview-eb-mpcore", "realview-pb-a8",
>>  "realview-pbx-a9", "versatileab", "versatilepb", "vexpress-a15",
>> -"vexpress-a9", "virt", NULL
>> +"vexpress-a9",
>> +#endif /* CONFIG_TCG */
>> +"virt", NULL
>>  };
>>  add_cdrom_param_tests(armmachines);
>>  } else {
>> -- 
>> 2.26.2
>>
> 
> Don't we need to use a runtime check for this? I'd guess we can
> build a QEMU that supports both KVM and TCG and then attempt to
> run this test with KVM, which would still try all these other
> machine types.

Yes, I followed commit c51a5a23d87 fix ("qtest: unbreak non-TCG
builds in bios-tables-test").
We need that QMP 'query-accelerators' command then.



Re: [PATCH 8/9] hw/arm/virt: Restrict 32-bit CPUs to TCG

2021-02-05 Thread Andrew Jones
On Fri, Feb 05, 2021 at 03:43:44PM +0100, Philippe Mathieu-Daudé wrote:
> Support for ARMv7 has been dropped in commit 82bf7ae84ce
> ("target/arm: Remove KVM support for 32-bit Arm hosts").
> Restrict the 32-bit CPUs to --enable-tcg builds.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/arm/virt.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index f5e4a6ec914..ab6300650f9 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -197,8 +197,10 @@ static const int a15irqmap[] = {
>  };
>  
>  static const char *valid_cpus[] = {
> +#ifdef CONFIG_TCG
>  ARM_CPU_TYPE_NAME("cortex-a7"),
>  ARM_CPU_TYPE_NAME("cortex-a15"),
> +#endif /* CONFIG_TCG */
>  #ifdef TARGET_AARCH64
>  ARM_CPU_TYPE_NAME("cortex-a53"),
>  ARM_CPU_TYPE_NAME("cortex-a57"),
> -- 
> 2.26.2
>

So this filters the cpus out of KVM only builds, which seems
reasonable to do. Of course, if the build is for both KVM and
TCG, then the cpus won't be filtered out and we'll have to rely
on the runtime checks to error out if one where to try a 32-bit
cpu with KVM. But that's fine too, so

Reviewed-by: Andrew Jones 

Thanks,
drew




Re: [PATCH] iotests: 30: drop from auto group (and effectively from make check)

2021-02-05 Thread Peter Maydell
On Fri, 5 Feb 2021 at 14:48, Eric Blake  wrote:
>
> On 2/5/21 5:10 AM, Vladimir Sementsov-Ogievskiy wrote:
> > and trying to reproduce it on top of
> > "block: update graph permissions update" I had 634 successful
> > iterations
> > and then the following crash (which looks much better):
>
> This part of the commit message is odd - if we check it in to git as
> written, you're pointing to a future commit, while still stating that it
> is not a perfect commit.  But maybe by the time that commit gets in
> we'll have figured out this last crash and corrected it as well.
> Sticking to just the first two logs is fine by me.
>
>
> >
> > So it seems reasonable to drop test from auto group at least until we
> > merge "block: update graph permissions update"
> >
> > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > ---
> >
> > Note: be free to shorten commit message if needed :)
>
> Indeed.  But as to the patch itself, I agree, and may Peter wants to
> apply it directly to master instead of waiting for it to come through on
> of the block maintainers?
>
> Reviewed-by: Eric Blake 

Thanks; I have applied this to master, after trimming the
part of the commit message that refers to as-yet-unapplied
patch series.

-- PMM



Re: [RFC PATCH 9/9] tests/qtest/arm-cpu-features: Restrict TCG-only tests

2021-02-05 Thread Claudio Fontana
On 2/5/21 3:43 PM, Philippe Mathieu-Daudé wrote:
> Some tests explicitly request the TCG accelerator. As these
> tests will obviously fails if TCG is not present, disable
> them in such case.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Cc: Roman Bolshakov 
> Cc: Claudio Fontana 
> 
> RFC because of the TODO.
> 
> Roman posted a series to have a QMP command to query enabled
> accelerators.
> ---
>  tests/qtest/arm-cpu-features.c | 33 +
>  1 file changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
> index c59c3cb002b..c6e86282b66 100644
> --- a/tests/qtest/arm-cpu-features.c
> +++ b/tests/qtest/arm-cpu-features.c
> @@ -20,7 +20,7 @@
>   */
>  #define SVE_MAX_VQ 16
>  
> -#define MACHINE "-machine virt,gic-version=max -accel tcg "
> +#define MACHINE_TCG "-machine virt,gic-version=max -accel tcg "
>  #define MACHINE_KVM "-machine virt,gic-version=max -accel kvm -accel tcg "
>  #define QUERY_HEAD  "{ 'execute': 'query-cpu-model-expansion', " \
>  "  'arguments': { 'type': 'full', "
> @@ -41,6 +41,16 @@ static bool kvm_enabled(QTestState *qts)
>  return enabled;
>  }
>  
> +static bool tcg_enabled(QTestState *qts)
> +{
> +/* TODO: Implement QMP query-accel? */
> +#ifdef CONFIG_TCG
> +return true;
> +#else
> +return false;
> +#endif /* CONFIG_TCG */


I would not use the same name as the existing tcg_enabled(), which has 
different semantics, even in test code;

what you mean here is tcg_available() right?



> +}
> +
>  static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
>  {
>  return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
> @@ -352,7 +362,12 @@ static void sve_tests_sve_max_vq_8(const void *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max,sve-max-vq=8");
> +qts = qtest_init(MACHINE_TCG "-cpu max,sve-max-vq=8");
> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  assert_sve_vls(qts, "max", BIT_ULL(8) - 1, NULL);
>  
> @@ -387,7 +402,12 @@ static void sve_tests_sve_off(const void *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max,sve=off");
> +qts = qtest_init(MACHINE_TCG "-cpu max,sve=off");
> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  /* SVE is off, so the map should be empty. */
>  assert_sve_vls(qts, "max", 0, NULL);
> @@ -443,7 +463,12 @@ static void test_query_cpu_model_expansion(const void 
> *data)
>  {
>  QTestState *qts;
>  
> -qts = qtest_init(MACHINE "-cpu max");
> +qts = qtest_init(MACHINE_TCG "-cpu max");
> +
> +if (!tcg_enabled(qts)) {
> +qtest_quit(qts);
> +return;
> +}
>  
>  /* Test common query-cpu-model-expansion input validation */
>  assert_type_full(qts);
> 




Re: [PATCH 4/9] tests/qtest/cdrom-test: Only allow the Virt machine under KVM

2021-02-05 Thread Peter Maydell
On Fri, 5 Feb 2021 at 15:08, Andrew Jones  wrote:
>
> On Fri, Feb 05, 2021 at 03:43:40PM +0100, Philippe Mathieu-Daudé wrote:
> > Only the Virt and Versal machines are supported under KVM.
> > Restrict the other ones to TCG.
> >
> > Signed-off-by: Philippe Mathieu-Daudé 
> > ---
> >  tests/qtest/cdrom-test.c | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
> > index 5af944a5fb7..ac02f2bb4f1 100644
> > --- a/tests/qtest/cdrom-test.c
> > +++ b/tests/qtest/cdrom-test.c
> > @@ -222,9 +222,12 @@ int main(int argc, char **argv)
> >  add_cdrom_param_tests(mips64machines);
> >  } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) {
> >  const char *armmachines[] = {
> > +#ifdef CONFIG_TCG
> >  "realview-eb", "realview-eb-mpcore", "realview-pb-a8",
> >  "realview-pbx-a9", "versatileab", "versatilepb", 
> > "vexpress-a15",
> > -"vexpress-a9", "virt", NULL
> > +"vexpress-a9",
> > +#endif /* CONFIG_TCG */
> > +"virt", NULL
> >  };
> >  add_cdrom_param_tests(armmachines);
> >  } else {
> > --
> > 2.26.2
> >
>
> Don't we need to use a runtime check for this? I'd guess we can
> build a QEMU that supports both KVM and TCG and then attempt to
> run this test with KVM, which would still try all these other
> machine types.

More generally, it would be nice to avoid hardcoding into the
tests what accelerators particular machines work with, because
then if we move a machine into or out of the "TCG-only" list
we now have multiple places to update. Ideally we should
be able to just change the main meson.build files and have
everything else cope.

-- PMM



Re: [PATCH 1/9] tests/qtest/arm-cpu-features: Remove Cortex-A15 check

2021-02-05 Thread Philippe Mathieu-Daudé
Hi Drew,

On 2/5/21 3:59 PM, Andrew Jones wrote:
> On Fri, Feb 05, 2021 at 03:43:37PM +0100, Philippe Mathieu-Daudé wrote:
>> Support for ARMv7 has been dropped in commit 82bf7ae84ce
>> ("target/arm: Remove KVM support for 32-bit Arm hosts"),
>> no need to check for Cortex A15 host cpu anymore.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>  tests/qtest/arm-cpu-features.c | 4 
>>  1 file changed, 4 deletions(-)
>>
>> diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
>> index 8252b85bb85..c59c3cb002b 100644
>> --- a/tests/qtest/arm-cpu-features.c
>> +++ b/tests/qtest/arm-cpu-features.c
>> @@ -515,10 +515,6 @@ static void test_query_cpu_model_expansion_kvm(const 
>> void *data)
>>  QDict *resp;
>>  char *error;
>>  
>> -assert_error(qts, "cortex-a15",
>> -"We cannot guarantee the CPU type 'cortex-a15' works "
>> -"with KVM on this host", NULL);
>> -
> 
> This isn't testing anything regarding 32-bit KVM host support. It's
> testing that an error is returned when a given cpu type that can't
> be known to work with KVM is used. We know that the cortex-a15 can't
> be known to work. If we were to use a 64-bit cpu type here then there's
> a chance that it would work, failing the test that an error be returned.

This was my first understanding, but then why does it fail?

PASS 1 qtest-aarch64/arm-cpu-features /aarch64/arm/query-cpu-model-expansion
**
ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
'cortex-a15' works " "with KVM on this host"))
ERROR qtest-aarch64/arm-cpu-features - Bail out!
ERROR:../../tests/qtest/arm-cpu-features.c:543:test_query_cpu_model_expansion_kvm:
assertion failed: (g_str_equal(_error, "We cannot guarantee the CPU type
'cortex-a15' works " "with KVM on this host"))
make: *** [Makefile.mtest:905: run-test-111] Error 1

FWIW when tracing (cavium thunderX1 host, dmesg reports 0x431f0a11):
kvm_vcpu_ioctl cpu_index 0, type 0x4020aeae, arg 0x9b7f9b18

> 
>>  assert_has_feature_enabled(qts, "host", "aarch64");
>>  
>>  /* Enabling and disabling pmu should always work. */
>> -- 
>> 2.26.2
>>
>>
> 
> This file could use a cleanup patch regarding the dropping of 32-bit KVM
> support though. At least the comment in main(), "For now we only run KVM
> specific tests..." could be reworded. It was written that way when we
> planned to try testing on 32-bit KVM too eventually, but we never did,
> and now we'll never need to.
> 
> Thanks,
> drew
> 
> 



Re: [PATCH 7/9] hw/arm/virt: Do not include 64-bit CPUs in 32-bit build

2021-02-05 Thread Andrew Jones
On Fri, Feb 05, 2021 at 03:43:43PM +0100, Philippe Mathieu-Daudé wrote:
> Similarly to commit 210f47840dd, remove 64-bit CPUs (which have
> never been available on 32-bit build, see commit d14d42f19bf),
> to fix:
> 
>   $ make check-qtest-arm
>   ...
>   Running test qtest-arm/device-introspect-test
>   missing object type 'cortex-a53-arm-cpu'
>   Broken pipe
>   ../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from signal 
> 6 (Aborted) (core dumped)
>   ERROR qtest-arm/device-introspect-test - too few tests run (expected 6, got 
> 5)
> 
> Reviewed-by: Alistair Francis 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/arm/virt.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 6ffe091804f..f5e4a6ec914 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -199,9 +199,11 @@ static const int a15irqmap[] = {
>  static const char *valid_cpus[] = {
>  ARM_CPU_TYPE_NAME("cortex-a7"),
>  ARM_CPU_TYPE_NAME("cortex-a15"),
> +#ifdef TARGET_AARCH64
>  ARM_CPU_TYPE_NAME("cortex-a53"),
>  ARM_CPU_TYPE_NAME("cortex-a57"),
>  ARM_CPU_TYPE_NAME("cortex-a72"),
> +#endif /* TARGET_AARCH64 */
>  ARM_CPU_TYPE_NAME("host"),
>  ARM_CPU_TYPE_NAME("max"),
>  };
> -- 
> 2.26.2
>

Reviewed-by: Andrew Jones 




Re: [PATCH 6/9] hw/arm/virt: Display list of valid CPUs for the Virt machine

2021-02-05 Thread Andrew Jones
On Fri, Feb 05, 2021 at 03:43:42PM +0100, Philippe Mathieu-Daudé wrote:
> The Virt machine is restricted to a subset of the CPU provided
> by QEMU. Instead of having the user run '--cpu help' and try
> each CPUs until finding a match, display the list from start:
> 
>   $ qemu-system-aarch64 -M virt -cpu cortex-a8
>   qemu-system-aarch64: mach-virt: CPU type cortex-a8 not supported
>   qemu-system-aarch64: mach-virt: Please select one of the following CPU 
> types:  cortex-a7, cortex-a15, cortex-a53, cortex-a57, cortex-a72, host, max
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/arm/virt.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 7802d3a66e8..6ffe091804f 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -1830,9 +1830,20 @@ static void machvirt_init(MachineState *machine)
>  
>  if (!cpu_type_valid(machine->cpu_type)) {
>  int len = strlen(machine->cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX);
> +g_autoptr(GString) s = g_string_new(NULL);
>  
>  error_report("mach-virt: CPU type %.*s not supported",
>   len, machine->cpu_type);
> +
> +for (n = 0; n < ARRAY_SIZE(valid_cpus); n++) {
> +len = strlen(valid_cpus[n]) - strlen(ARM_CPU_TYPE_SUFFIX);
> +g_string_append_printf(s, " %.*s", len, valid_cpus[n]);
> +if (n + 1 < ARRAY_SIZE(valid_cpus)) {
> +g_string_append_c(s, ',');
> +}
> +}
> +error_report("mach-virt: Please select one of the following CPU 
> types: %s",
> + g_string_free(s, FALSE));
>  exit(1);
>  }
>  
> -- 
> 2.26.2
>

It'd be nice if './qemu-system-aarch64 -M virt -cpu \?' would only output
the CPUs that the virt machine type supports. Then this error message
could suggest running that in order to get the list.

Thanks,
drew




  1   2   >