[PULL 23/56] hw/block/nvme: merge implicitly/explicitly opened processing masks

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

Implicitly and explicitly opended zones are always bulk processed
together, so merge the two processing masks.

Signed-off-by: Klaus Jensen 
Tested-by: Dmitry Fomichev 
Reviewed-by: Dmitry Fomichev 
---
 hw/block/nvme.c | 27 +++
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 551878338e5d..a7245a7e05a1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1740,11 +1740,10 @@ typedef uint16_t (*op_handler_t)(NvmeNamespace *, 
NvmeZone *,
 
 enum NvmeZoneProcessingMask {
 NVME_PROC_CURRENT_ZONE= 0,
-NVME_PROC_IMP_OPEN_ZONES  = 1 << 0,
-NVME_PROC_EXP_OPEN_ZONES  = 1 << 1,
-NVME_PROC_CLOSED_ZONES= 1 << 2,
-NVME_PROC_READ_ONLY_ZONES = 1 << 3,
-NVME_PROC_FULL_ZONES  = 1 << 4,
+NVME_PROC_OPENED_ZONES= 1 << 0,
+NVME_PROC_CLOSED_ZONES= 1 << 1,
+NVME_PROC_READ_ONLY_ZONES = 1 << 2,
+NVME_PROC_FULL_ZONES  = 1 << 3,
 };
 
 static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
@@ -1885,10 +1884,8 @@ static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, 
NvmeZone *zone,
 
 switch (zs) {
 case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-proc_zone = proc_mask & NVME_PROC_IMP_OPEN_ZONES;
-break;
 case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-proc_zone = proc_mask & NVME_PROC_EXP_OPEN_ZONES;
+proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
 break;
 case NVME_ZONE_STATE_CLOSED:
 proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
@@ -1929,15 +1926,14 @@ static uint16_t nvme_do_zone_op(NvmeNamespace *ns, 
NvmeZone *zone,
 }
 }
 }
-if (proc_mask & NVME_PROC_IMP_OPEN_ZONES) {
+if (proc_mask & NVME_PROC_OPENED_ZONES) {
 QTAILQ_FOREACH_SAFE(zone, >imp_open_zones, entry, next) {
 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
 if (status != NVME_SUCCESS) {
 goto out;
 }
 }
-}
-if (proc_mask & NVME_PROC_EXP_OPEN_ZONES) {
+
 QTAILQ_FOREACH_SAFE(zone, >exp_open_zones, entry, next) {
 status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr);
 if (status != NVME_SUCCESS) {
@@ -2012,7 +2008,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 
 case NVME_ZONE_ACTION_CLOSE:
 if (all) {
-proc_mask = NVME_PROC_IMP_OPEN_ZONES | NVME_PROC_EXP_OPEN_ZONES;
+proc_mask = NVME_PROC_OPENED_ZONES;
 }
 trace_pci_nvme_close_zone(slba, zone_idx, all);
 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone);
@@ -2020,8 +2016,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 
 case NVME_ZONE_ACTION_FINISH:
 if (all) {
-proc_mask = NVME_PROC_IMP_OPEN_ZONES | NVME_PROC_EXP_OPEN_ZONES |
-NVME_PROC_CLOSED_ZONES;
+proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
 }
 trace_pci_nvme_finish_zone(slba, zone_idx, all);
 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone);
@@ -2029,8 +2024,8 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 
 case NVME_ZONE_ACTION_RESET:
 if (all) {
-proc_mask = NVME_PROC_IMP_OPEN_ZONES | NVME_PROC_EXP_OPEN_ZONES |
-NVME_PROC_CLOSED_ZONES | NVME_PROC_FULL_ZONES;
+proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
+NVME_PROC_FULL_ZONES;
 }
 trace_pci_nvme_reset_zone(slba, zone_idx, all);
 status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone);
-- 
2.30.0




[PULL 14/56] hw/block/nvme: Support allocated CNS command variants

2021-02-08 Thread Klaus Jensen
From: Niklas Cassel 

Many CNS commands have "allocated" command variants. These include
a namespace as long as it is allocated, that is a namespace is
included regardless if it is active (attached) or not.

While these commands are optional (they are mandatory for controllers
supporting the namespace attachment command), our QEMU implementation
is more complete by actually providing support for these CNS values.

However, since our QEMU model currently does not support the namespace
attachment command, these new allocated CNS commands will return the
same result as the active CNS command variants.

The reason for not hooking up this command completely is because the
NVMe specification requires the namespace management command to be
supported if the namespace attachment command is supported.

Signed-off-by: Niklas Cassel 
Signed-off-by: Dmitry Fomichev 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 20 
 hw/block/nvme.c  |  8 
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 19347cf69e52..adb5806365a3 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -813,14 +813,18 @@ typedef struct QEMU_PACKED NvmePSD {
 #define NVME_IDENTIFY_DATA_SIZE 4096
 
 enum NvmeIdCns {
-NVME_ID_CNS_NS= 0x00,
-NVME_ID_CNS_CTRL  = 0x01,
-NVME_ID_CNS_NS_ACTIVE_LIST= 0x02,
-NVME_ID_CNS_NS_DESCR_LIST = 0x03,
-NVME_ID_CNS_CS_NS = 0x05,
-NVME_ID_CNS_CS_CTRL   = 0x06,
-NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
-NVME_ID_CNS_IO_COMMAND_SET= 0x1c,
+NVME_ID_CNS_NS= 0x00,
+NVME_ID_CNS_CTRL  = 0x01,
+NVME_ID_CNS_NS_ACTIVE_LIST= 0x02,
+NVME_ID_CNS_NS_DESCR_LIST = 0x03,
+NVME_ID_CNS_CS_NS = 0x05,
+NVME_ID_CNS_CS_CTRL   = 0x06,
+NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
+NVME_ID_CNS_NS_PRESENT_LIST   = 0x10,
+NVME_ID_CNS_NS_PRESENT= 0x11,
+NVME_ID_CNS_CS_NS_PRESENT_LIST= 0x1a,
+NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
+NVME_ID_CNS_IO_COMMAND_SET= 0x1c,
 };
 
 typedef struct QEMU_PACKED NvmeIdCtrl {
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 27679c8be816..f1cc66d381a1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2008,16 +2008,24 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest 
*req)
 
 switch (le32_to_cpu(c->cns)) {
 case NVME_ID_CNS_NS:
+ /* fall through */
+case NVME_ID_CNS_NS_PRESENT:
 return nvme_identify_ns(n, req);
 case NVME_ID_CNS_CS_NS:
+ /* fall through */
+case NVME_ID_CNS_CS_NS_PRESENT:
 return nvme_identify_ns_csi(n, req);
 case NVME_ID_CNS_CTRL:
 return nvme_identify_ctrl(n, req);
 case NVME_ID_CNS_CS_CTRL:
 return nvme_identify_ctrl_csi(n, req);
 case NVME_ID_CNS_NS_ACTIVE_LIST:
+ /* fall through */
+case NVME_ID_CNS_NS_PRESENT_LIST:
 return nvme_identify_nslist(n, req);
 case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
+ /* fall through */
+case NVME_ID_CNS_CS_NS_PRESENT_LIST:
 return nvme_identify_nslist_csi(n, req);
 case NVME_ID_CNS_NS_DESCR_LIST:
 return nvme_identify_ns_descr_list(n, req);
-- 
2.30.0




[PULL 12/56] hw/block/nvme: Add Commands Supported and Effects log

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

This log page becomes necessary to implement to allow checking for
Zone Append command support in Zoned Namespace Command Set.

This commit adds the code to report this log page for NVM Command
Set only. The parts that are specific to zoned operation will be
added later in the series.

All incoming admin and i/o commands are now only processed if their
corresponding support bits are set in this log. This provides an
easy way to control what commands to support and what not to
depending on set CC.CSS.

Signed-off-by: Dmitry Fomichev 
Reviewed-by: Niklas Cassel 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme-ns.h|   1 +
 include/block/nvme.h  |  19 
 hw/block/nvme.c   | 102 ++
 hw/block/trace-events |   1 +
 4 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index aeca810fc7a8..bdeaf1c0de84 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -30,6 +30,7 @@ typedef struct NvmeNamespace {
 int32_t  bootindex;
 int64_t  size;
 NvmeIdNs id_ns;
+const uint32_t *iocs;
 
 NvmeNamespaceParams params;
 
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 11ac1c2b7dfb..397f7ca3b5cb 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -752,10 +752,27 @@ enum NvmeSmartWarn {
 NVME_SMART_FAILED_VOLATILE_MEDIA  = 1 << 4,
 };
 
+typedef struct NvmeEffectsLog {
+uint32_tacs[256];
+uint32_tiocs[256];
+uint8_t resv[2048];
+} NvmeEffectsLog;
+
+enum {
+NVME_CMD_EFF_CSUPP  = 1 << 0,
+NVME_CMD_EFF_LBCC   = 1 << 1,
+NVME_CMD_EFF_NCC= 1 << 2,
+NVME_CMD_EFF_NIC= 1 << 3,
+NVME_CMD_EFF_CCC= 1 << 4,
+NVME_CMD_EFF_CSE_MASK   = 3 << 16,
+NVME_CMD_EFF_UUID_SEL   = 1 << 19,
+};
+
 enum NvmeLogIdentifier {
 NVME_LOG_ERROR_INFO = 0x01,
 NVME_LOG_SMART_INFO = 0x02,
 NVME_LOG_FW_SLOT_INFO   = 0x03,
+NVME_LOG_CMD_EFFECTS= 0x05,
 };
 
 typedef struct QEMU_PACKED NvmePSD {
@@ -868,6 +885,7 @@ enum NvmeIdCtrlFrmw {
 
 enum NvmeIdCtrlLpa {
 NVME_LPA_NS_SMART = 1 << 0,
+NVME_LPA_CSE  = 1 << 1,
 NVME_LPA_EXTENDED = 1 << 2,
 };
 
@@ -1076,6 +1094,7 @@ static inline void _nvme_check_size(void)
 QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
 QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
 QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
+QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
 QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
 QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
 QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4d1ca8c466c5..05e799623c41 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -112,6 +112,30 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_TIMESTAMP]= NVME_FEAT_CAP_CHANGE,
 };
 
+static const uint32_t nvme_cse_acs[256] = {
+[NVME_ADM_CMD_DELETE_SQ]= NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_CREATE_SQ]= NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_GET_LOG_PAGE] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_DELETE_CQ]= NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_CREATE_CQ]= NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_IDENTIFY] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_ABORT]= NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
+};
+
+static const uint32_t nvme_cse_iocs_none[256];
+
+static const uint32_t nvme_cse_iocs_nvm[256] = {
+[NVME_CMD_FLUSH]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+[NVME_CMD_WRITE_ZEROES] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+[NVME_CMD_WRITE]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
+[NVME_CMD_DSM]  = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+[NVME_CMD_COMPARE]  = NVME_CMD_EFF_CSUPP,
+};
+
 static void nvme_process_sq(void *opaque);
 
 static uint16_t nvme_cid(NvmeRequest *req)
@@ -1306,10 +1330,6 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
   req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
 
-if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_ADMIN_ONLY) {
-return NVME_INVALID_OPCODE | NVME_DNR;
-}
-
 if (!nvme_nsid_valid(n, nsid)) {
 return NVME_INVALID_NSID | NVME_DNR;
 }
@@ -1319,6 +1339,11 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
+if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+return 

[PULL 07/56] hw/block/nvme: fix bad clearing of CAP

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

Commit 37712e00b1f0 ("hw/block/nvme: factor out pmr setup") changed the
control flow such that the CAP register is erronously cleared after
nvme_init_pmr() has configured it. Since the entire NvmeCtrl structure
is zero-filled initially, there is no need for the explicit clearing, so
just remove it.

Fixes: 37712e00b1f0 ("hw/block/nvme: factor out pmr setup")
Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
Reviewed-by: Minwoo Im 
---
 hw/block/nvme.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b9313fdc4762..de52487aaf06 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -3038,7 +3038,6 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->psd[0].enlat = cpu_to_le32(0x10);
 id->psd[0].exlat = cpu_to_le32(0x4);
 
-n->bar.cap = 0;
 NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
 NVME_CAP_SET_CQR(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
-- 
2.30.0




[PULL 15/56] nvme: Make ZNS-related definitions

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

Define values and structures that are needed to support Zoned
Namespace Command Set (NVMe TP 4053).

Signed-off-by: Dmitry Fomichev 
Acked-by: Stefan Hajnoczi 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 114 ++-
 1 file changed, 113 insertions(+), 1 deletion(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index adb5806365a3..9494246f1f59 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -489,6 +489,9 @@ enum NvmeIoCommands {
 NVME_CMD_COMPARE= 0x05,
 NVME_CMD_WRITE_ZEROES   = 0x08,
 NVME_CMD_DSM= 0x09,
+NVME_CMD_ZONE_MGMT_SEND = 0x79,
+NVME_CMD_ZONE_MGMT_RECV = 0x7a,
+NVME_CMD_ZONE_APPEND= 0x7d,
 };
 
 typedef struct QEMU_PACKED NvmeDeleteQ {
@@ -654,9 +657,13 @@ typedef struct QEMU_PACKED NvmeAerResult {
 uint8_t resv;
 } NvmeAerResult;
 
+typedef struct QEMU_PACKED NvmeZonedResult {
+uint64_t slba;
+} NvmeZonedResult;
+
 typedef struct QEMU_PACKED NvmeCqe {
 uint32_tresult;
-uint32_trsvd;
+uint32_tdw1;
 uint16_tsq_head;
 uint16_tsq_id;
 uint16_tcid;
@@ -685,6 +692,7 @@ enum NvmeStatusCodes {
 NVME_INVALID_USE_OF_CMB = 0x0012,
 NVME_INVALID_PRP_OFFSET = 0x0013,
 NVME_CMD_SET_CMB_REJECTED   = 0x002b,
+NVME_INVALID_CMD_SET= 0x002c,
 NVME_LBA_RANGE  = 0x0080,
 NVME_CAP_EXCEEDED   = 0x0081,
 NVME_NS_NOT_READY   = 0x0082,
@@ -709,6 +717,14 @@ enum NvmeStatusCodes {
 NVME_CONFLICTING_ATTRS  = 0x0180,
 NVME_INVALID_PROT_INFO  = 0x0181,
 NVME_WRITE_TO_RO= 0x0182,
+NVME_ZONE_BOUNDARY_ERROR= 0x01b8,
+NVME_ZONE_FULL  = 0x01b9,
+NVME_ZONE_READ_ONLY = 0x01ba,
+NVME_ZONE_OFFLINE   = 0x01bb,
+NVME_ZONE_INVALID_WRITE = 0x01bc,
+NVME_ZONE_TOO_MANY_ACTIVE   = 0x01bd,
+NVME_ZONE_TOO_MANY_OPEN = 0x01be,
+NVME_ZONE_INVAL_TRANSITION  = 0x01bf,
 NVME_WRITE_FAULT= 0x0280,
 NVME_UNRECOVERED_READ   = 0x0281,
 NVME_E2E_GUARD_ERROR= 0x0282,
@@ -894,6 +910,11 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
 uint8_t vs[1024];
 } NvmeIdCtrl;
 
+typedef struct NvmeIdCtrlZoned {
+uint8_t zasl;
+uint8_t rsvd1[4095];
+} NvmeIdCtrlZoned;
+
 enum NvmeIdCtrlOacs {
 NVME_OACS_SECURITY  = 1 << 0,
 NVME_OACS_FORMAT= 1 << 1,
@@ -1022,6 +1043,12 @@ typedef struct QEMU_PACKED NvmeLBAF {
 uint8_t rp;
 } NvmeLBAF;
 
+typedef struct QEMU_PACKED NvmeLBAFE {
+uint64_tzsze;
+uint8_t zdes;
+uint8_t rsvd9[7];
+} NvmeLBAFE;
+
 #define NVME_NSID_BROADCAST 0x
 
 typedef struct QEMU_PACKED NvmeIdNs {
@@ -1081,10 +1108,24 @@ enum NvmeNsIdentifierType {
 
 enum NvmeCsi {
 NVME_CSI_NVM= 0x00,
+NVME_CSI_ZONED  = 0x02,
 };
 
 #define NVME_SET_CSI(vec, csi) (vec |= (uint8_t)(1 << (csi)))
 
+typedef struct QEMU_PACKED NvmeIdNsZoned {
+uint16_tzoc;
+uint16_tozcs;
+uint32_tmar;
+uint32_tmor;
+uint32_trrl;
+uint32_tfrl;
+uint8_t rsvd20[2796];
+NvmeLBAFE   lbafe[16];
+uint8_t rsvd3072[768];
+uint8_t vs[256];
+} NvmeIdNsZoned;
+
 /*Deallocate Logical Block Features*/
 #define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat)   ((dlfeat) & 0x10)
 #define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat)((dlfeat) & 0x08)
@@ -1117,10 +1158,76 @@ enum NvmeIdNsDps {
 DPS_FIRST_EIGHT = 8,
 };
 
+enum NvmeZoneAttr {
+NVME_ZA_FINISHED_BY_CTLR = 1 << 0,
+NVME_ZA_FINISH_RECOMMENDED   = 1 << 1,
+NVME_ZA_RESET_RECOMMENDED= 1 << 2,
+NVME_ZA_ZD_EXT_VALID = 1 << 7,
+};
+
+typedef struct QEMU_PACKED NvmeZoneReportHeader {
+uint64_tnr_zones;
+uint8_t rsvd[56];
+} NvmeZoneReportHeader;
+
+enum NvmeZoneReceiveAction {
+NVME_ZONE_REPORT = 0,
+NVME_ZONE_REPORT_EXTENDED= 1,
+};
+
+enum NvmeZoneReportType {
+NVME_ZONE_REPORT_ALL = 0,
+NVME_ZONE_REPORT_EMPTY   = 1,
+NVME_ZONE_REPORT_IMPLICITLY_OPEN = 2,
+NVME_ZONE_REPORT_EXPLICITLY_OPEN = 3,
+NVME_ZONE_REPORT_CLOSED  = 4,
+NVME_ZONE_REPORT_FULL= 5,
+NVME_ZONE_REPORT_READ_ONLY   = 6,
+NVME_ZONE_REPORT_OFFLINE = 7,
+};
+
+enum NvmeZoneType {
+NVME_ZONE_TYPE_RESERVED  = 0x00,
+NVME_ZONE_TYPE_SEQ_WRITE = 0x02,
+};
+
+enum NvmeZoneSendAction {
+NVME_ZONE_ACTION_RSD = 0x00,
+NVME_ZONE_ACTION_CLOSE   = 0x01,
+NVME_ZONE_ACTION_FINISH  = 0x02,
+NVME_ZONE_ACTION_OPEN= 0x03,
+NVME_ZONE_ACTION_RESET   = 0x04,
+NVME_ZONE_ACTION_OFFLINE = 0x05,
+NVME_ZONE_ACTION_SET_ZD_EXT  = 0x10,
+};
+
+typedef struct QEMU_PACKED NvmeZoneDescr {
+uint8_t  

[PULL 11/56] hw/block/nvme: Combine nvme_write_zeroes() and nvme_write()

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

Move write processing to nvme_do_write() that now handles both WRITE
and WRITE ZEROES. Both nvme_write() and nvme_write_zeroes() become
inline helper functions.

Signed-off-by: Dmitry Fomichev 
Reviewed-by: Niklas Cassel 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 78 ---
 hw/block/trace-events |  1 -
 2 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 905fd1ba93f5..4d1ca8c466c5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1231,32 +1231,7 @@ invalid:
 return status | NVME_DNR;
 }
 
-static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
-{
-NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
-NvmeNamespace *ns = req->ns;
-uint64_t slba = le64_to_cpu(rw->slba);
-uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
-uint64_t offset = nvme_l2b(ns, slba);
-uint32_t count = nvme_l2b(ns, nlb);
-uint16_t status;
-
-trace_pci_nvme_write_zeroes(nvme_cid(req), nvme_nsid(ns), slba, nlb);
-
-status = nvme_check_bounds(ns, slba, nlb);
-if (status) {
-trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-return status;
-}
-
-block_acct_start(blk_get_stats(req->ns->blkconf.blk), >acct, 0,
- BLOCK_ACCT_WRITE);
-req->aiocb = blk_aio_pwrite_zeroes(req->ns->blkconf.blk, offset, count,
-   BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
-return NVME_NO_COMPLETE;
-}
-
-static uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool wrz)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
 NvmeNamespace *ns = req->ns;
@@ -1270,10 +1245,12 @@ static uint16_t nvme_write(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
  nvme_nsid(ns), nlb, data_size, slba);
 
-status = nvme_check_mdts(n, data_size);
-if (status) {
-trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
-goto invalid;
+if (!wrz) {
+status = nvme_check_mdts(n, data_size);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
+goto invalid;
+}
 }
 
 status = nvme_check_bounds(ns, slba, nlb);
@@ -1282,21 +1259,28 @@ static uint16_t nvme_write(NvmeCtrl *n, NvmeRequest 
*req)
 goto invalid;
 }
 
-status = nvme_map_dptr(n, data_size, req);
-if (status) {
-goto invalid;
-}
-
 data_offset = nvme_l2b(ns, slba);
 
-block_acct_start(blk_get_stats(blk), >acct, data_size,
- BLOCK_ACCT_WRITE);
-if (req->qsg.sg) {
-req->aiocb = dma_blk_write(blk, >qsg, data_offset,
-   BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+if (!wrz) {
+status = nvme_map_dptr(n, data_size, req);
+if (status) {
+goto invalid;
+}
+
+block_acct_start(blk_get_stats(blk), >acct, data_size,
+ BLOCK_ACCT_WRITE);
+if (req->qsg.sg) {
+req->aiocb = dma_blk_write(blk, >qsg, data_offset,
+   BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+} else {
+req->aiocb = blk_aio_pwritev(blk, data_offset, >iov, 0,
+ nvme_rw_cb, req);
+}
 } else {
-req->aiocb = blk_aio_pwritev(blk, data_offset, >iov, 0,
- nvme_rw_cb, req);
+block_acct_start(blk_get_stats(blk), >acct, 0, BLOCK_ACCT_WRITE);
+req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
+   BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
+   req);
 }
 return NVME_NO_COMPLETE;
 
@@ -1305,6 +1289,16 @@ invalid:
 return status | NVME_DNR;
 }
 
+static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
+{
+return nvme_do_write(n, req, false);
+}
+
+static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
+{
+return nvme_do_write(n, req, true);
+}
+
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
 uint32_t nsid = le32_to_cpu(req->cmd.nsid);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index ec1b43220eff..60262b03c901 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -43,7 +43,6 @@ pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t 
opcode, const char *opna
 pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, 
uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 
0x%"PRIx64""
 pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, 
uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb 
%"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
 pci_nvme_rw_cb(uint16_t cid, const char *blkname) 

[PULL 05/56] hw/block/nvme: add the dataset management command

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

Add support for the Dataset Management command and the Deallocate
attribute. Deallocation results in discards being sent to the underlying
block device. Whether of not the blocks are actually deallocated is
affected by the same factors as Write Zeroes (see previous commit).

 format | discard | dsm (512B)  dsm (4KiB)  dsm (64KiB)

  qcow2ignore   n   n   n
  qcow2unmapn   n   y
  raw  ignore   n   n   n
  raw  unmapn   y   y

Again, a raw format and 4KiB LBAs are preferable.

In order to set the Namespace Preferred Deallocate Granularity and
Alignment fields (NPDG and NPDA), choose a sane minimum discard
granularity of 4KiB. If we are using a passthru device supporting
discard at a 512B granularity, user should set the discard_granularity
property explicitly. NPDG and NPDA will also account for the
cluster_size of the block driver if required (i.e. for QCOW2).

See NVM Express 1.3d, Section 6.7 ("Dataset Management command").

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/nvme.h|  2 +
 hw/block/nvme-ns.c | 30 --
 hw/block/nvme.c| 98 +-
 3 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e080a2318a50..574333caa3f9 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -28,6 +28,7 @@ typedef struct NvmeRequest {
 struct NvmeNamespace*ns;
 BlockAIOCB  *aiocb;
 uint16_tstatus;
+void*opaque;
 NvmeCqe cqe;
 NvmeCmd cmd;
 BlockAcctCookie acct;
@@ -60,6 +61,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
 case NVME_CMD_WRITE:return "NVME_NVM_CMD_WRITE";
 case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+case NVME_CMD_DSM:  return "NVME_NVM_CMD_DSM";
 default:return "NVME_NVM_CMD_UNKNOWN";
 }
 }
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 53ded460348e..37f95951a6b8 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -28,10 +28,14 @@
 #include "nvme.h"
 #include "nvme-ns.h"
 
-static void nvme_ns_init(NvmeNamespace *ns)
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 {
+BlockDriverInfo bdi;
 NvmeIdNs *id_ns = >id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
+int npdg;
 
 ns->id_ns.dlfeat = 0x9;
 
@@ -43,8 +47,19 @@ static void nvme_ns_init(NvmeNamespace *ns)
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
 
-/* support DULBE */
-id_ns->nsfeat |= 0x4;
+/* support DULBE and I/O optimization fields */
+id_ns->nsfeat |= (0x4 | 0x10);
+
+npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
+
+if (bdrv_get_info(blk_bs(ns->blkconf.blk), ) >= 0 &&
+bdi.cluster_size > ns->blkconf.discard_granularity) {
+npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
+}
+
+id_ns->npda = id_ns->npdg = npdg - 1;
+
+return 0;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -60,6 +75,11 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, 
Error **errp)
 return -1;
 }
 
+if (ns->blkconf.discard_granularity == -1) {
+ns->blkconf.discard_granularity =
+MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+}
+
 ns->size = blk_getlength(ns->blkconf.blk);
 if (ns->size < 0) {
 error_setg_errno(errp, -ns->size, "could not get blockdev size");
@@ -93,7 +113,9 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 return -1;
 }
 
-nvme_ns_init(ns);
+if (nvme_ns_init(ns, errp)) {
+return -1;
+}
 
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6e6bdb338ad7..f019d43788ac 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -242,6 +242,7 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 static void nvme_req_clear(NvmeRequest *req)
 {
 req->ns = NULL;
+req->opaque = NULL;
 memset(>cqe, 0x0, sizeof(req->cqe));
 req->status = NVME_SUCCESS;
 }
@@ -978,6 +979,99 @@ static void nvme_rw_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(nvme_cq(req), req);
 }
 
+static void nvme_aio_discard_cb(void *opaque, int ret)
+{
+NvmeRequest *req = opaque;
+uintptr_t *discards = (uintptr_t *)>opaque;
+
+trace_pci_nvme_aio_discard_cb(nvme_cid(req));
+
+if (ret) {
+nvme_aio_err(req, ret);
+}
+
+(*discards)--;
+
+if (*discards) {
+return;
+}
+
+

[PULL 09/56] hw/block/nvme: Generate namespace UUIDs

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

In NVMe 1.4, a namespace must report an ID descriptor of UUID type
if it doesn't support EUI64 or NGUID. Add a new namespace property,
"uuid", that provides the user the option to either specify the UUID
explicitly or have a UUID generated automatically every time a
namespace is initialized.

Suggested-by: Klaus Jensen 
Signed-off-by: Dmitry Fomichev 
Reviewed-by: Keith Busch 
Reviewed-by: Niklas Cassel 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme-ns.h | 1 +
 hw/block/nvme-ns.c | 1 +
 hw/block/nvme.c| 9 +
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index ed3d7e65d597..aeca810fc7a8 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -21,6 +21,7 @@
 
 typedef struct NvmeNamespaceParams {
 uint32_t nsid;
+QemuUUID uuid;
 } NvmeNamespaceParams;
 
 typedef struct NvmeNamespace {
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index a0de53e71878..f6d752b71467 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -151,6 +151,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
+DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f54c5c6ea44d..7b243a56efdf 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1765,6 +1765,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 
 static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
 {
+NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)>cmd;
 uint32_t nsid = le32_to_cpu(c->nsid);
 uint8_t list[NVME_IDENTIFY_DATA_SIZE];
@@ -1784,7 +1785,8 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, 
NvmeRequest *req)
 return NVME_INVALID_NSID | NVME_DNR;
 }
 
-if (unlikely(!nvme_ns(n, nsid))) {
+ns = nvme_ns(n, nsid);
+if (unlikely(!ns)) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
@@ -1793,12 +1795,11 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl 
*n, NvmeRequest *req)
 /*
  * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data
  * structure, a Namespace UUID (nidt = 0x3) must be reported in the
- * Namespace Identification Descriptor. Add a very basic Namespace UUID
- * here.
+ * Namespace Identification Descriptor. Add the namespace UUID here.
  */
 ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
 ns_descrs->uuid.hdr.nidl = NVME_NIDT_UUID_LEN;
-stl_be_p(_descrs->uuid.v, nsid);
+memcpy(_descrs->uuid.v, ns->params.uuid.data, NVME_NIDT_UUID_LEN);
 
 return nvme_dma(n, list, NVME_IDENTIFY_DATA_SIZE,
 DMA_DIRECTION_FROM_DEVICE, req);
-- 
2.30.0




[PULL 10/56] hw/block/nvme: Separate read and write handlers

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

The majority of code in nvme_rw() is becoming read- or write-specific.
Move these parts to two separate handlers, nvme_read() and nvme_write()
to make the code more readable and to remove multiple is_write checks
that has been present in the i/o path.

This is a refactoring patch, no change in functionality.

Signed-off-by: Dmitry Fomichev 
Reviewed-by: Niklas Cassel 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 107 --
 hw/block/trace-events |   3 +-
 2 files changed, 74 insertions(+), 36 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 7b243a56efdf..905fd1ba93f5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1176,6 +1176,61 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
+static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
+NvmeNamespace *ns = req->ns;
+uint64_t slba = le64_to_cpu(rw->slba);
+uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+uint64_t data_size = nvme_l2b(ns, nlb);
+uint64_t data_offset;
+BlockBackend *blk = ns->blkconf.blk;
+uint16_t status;
+
+trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, data_size, slba);
+
+status = nvme_check_mdts(n, data_size);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
+goto invalid;
+}
+
+status = nvme_check_bounds(ns, slba, nlb);
+if (status) {
+trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
+goto invalid;
+}
+
+status = nvme_map_dptr(n, data_size, req);
+if (status) {
+goto invalid;
+}
+
+if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+status = nvme_check_dulbe(ns, slba, nlb);
+if (status) {
+goto invalid;
+}
+}
+
+data_offset = nvme_l2b(ns, slba);
+
+block_acct_start(blk_get_stats(blk), >acct, data_size,
+ BLOCK_ACCT_READ);
+if (req->qsg.sg) {
+req->aiocb = dma_blk_read(blk, >qsg, data_offset,
+  BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+} else {
+req->aiocb = blk_aio_preadv(blk, data_offset, >iov, 0,
+nvme_rw_cb, req);
+}
+return NVME_NO_COMPLETE;
+
+invalid:
+block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
+return status | NVME_DNR;
+}
+
 static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
@@ -1201,22 +1256,19 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, 
NvmeRequest *req)
 return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
 NvmeNamespace *ns = req->ns;
-uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
 uint64_t slba = le64_to_cpu(rw->slba);
-
+uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
 uint64_t data_size = nvme_l2b(ns, nlb);
-uint64_t data_offset = nvme_l2b(ns, slba);
-enum BlockAcctType acct = req->cmd.opcode == NVME_CMD_WRITE ?
-BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
+uint64_t data_offset;
 BlockBackend *blk = ns->blkconf.blk;
 uint16_t status;
 
-trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode),
-  nvme_nsid(ns), nlb, data_size, slba);
+trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
+ nvme_nsid(ns), nlb, data_size, slba);
 
 status = nvme_check_mdts(n, data_size);
 if (status) {
@@ -1230,43 +1282,27 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 goto invalid;
 }
 
-if (acct == BLOCK_ACCT_READ) {
-if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
-status = nvme_check_dulbe(ns, slba, nlb);
-if (status) {
-goto invalid;
-}
-}
-}
-
 status = nvme_map_dptr(n, data_size, req);
 if (status) {
 goto invalid;
 }
 
-block_acct_start(blk_get_stats(blk), >acct, data_size, acct);
+data_offset = nvme_l2b(ns, slba);
+
+block_acct_start(blk_get_stats(blk), >acct, data_size,
+ BLOCK_ACCT_WRITE);
 if (req->qsg.sg) {
-if (acct == BLOCK_ACCT_WRITE) {
-req->aiocb = dma_blk_write(blk, >qsg, data_offset,
-   BDRV_SECTOR_SIZE, nvme_rw_cb, req);
-} else {
-req->aiocb = dma_blk_read(blk, >qsg, data_offset,
-  BDRV_SECTOR_SIZE, nvme_rw_cb, req);
-}
+req->aiocb = dma_blk_write(blk, >qsg, data_offset,
+   BDRV_SECTOR_SIZE, nvme_rw_cb, req);
 } else {
-if (acct == BLOCK_ACCT_WRITE) {
-req->aiocb = blk_aio_pwritev(blk, data_offset, >iov, 0,
-

[PULL 03/56] hw/block/nvme: add dulbe support

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

Add support for reporting the Deallocated or Unwritten Logical Block
Error (DULBE).

Rely on the block status flags reported by the block layer and consider
any block with the BDRV_BLOCK_ZERO flag to be deallocated.

Multiple factors affect when a Write Zeroes command result in
deallocation of blocks.

  * the underlying file system block size
  * the blockdev format
  * the 'discard' and 'logical_block_size' parameters

 format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
-
  qcow2ignore   n  n  y
  qcow2unmapn  n  y
  raw  ignore   n  y  y
  raw  unmapn  y  y

So, this works best with an image in raw format and 4KiB LBAs, since
holes can then be punched on a per-block basis (this assumes a file
system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
of 64KiB by default and blocks will only be marked deallocated if a full
cluster is zeroed or discarded. However, this *is* consistent with the
spec since Write Zeroes "should" deallocate the block if the Deallocate
attribute is set and "may" deallocate if the Deallocate attribute is not
set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
always set).

Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/nvme-ns.h|  4 ++
 include/block/nvme.h  |  5 +++
 hw/block/nvme-ns.c|  8 ++--
 hw/block/nvme.c   | 91 ++-
 hw/block/trace-events |  4 ++
 5 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 83734f4606e1..44bf6271b744 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
 NvmeIdNs id_ns;
 
 NvmeNamespaceParams params;
+
+struct {
+uint32_t err_rec;
+} features;
 } NvmeNamespace;
 
 static inline uint32_t nvme_nsid(NvmeNamespace *ns)
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 3e02d9ca9843..b663d11e60c1 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -693,6 +693,7 @@ enum NvmeStatusCodes {
 NVME_E2E_REF_ERROR  = 0x0284,
 NVME_CMP_FAILURE= 0x0285,
 NVME_ACCESS_DENIED  = 0x0286,
+NVME_DULB   = 0x0287,
 NVME_MORE   = 0x2000,
 NVME_DNR= 0x4000,
 NVME_NO_COMPLETE= 0x,
@@ -909,6 +910,9 @@ enum NvmeIdCtrlLpa {
 #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
 #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
 
+#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
+#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
+
 enum NvmeFeatureIds {
 NVME_ARBITRATION= 0x1,
 NVME_POWER_MANAGEMENT   = 0x2,
@@ -1029,6 +1033,7 @@ enum NvmeNsIdentifierType {
 
 
 #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
+#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
 #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
 #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
 #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 2670787d2630..53ded460348e 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
 NvmeIdNs *id_ns = >id_ns;
 int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
 
-if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
-ns->id_ns.dlfeat = 0x9;
-}
+ns->id_ns.dlfeat = 0x9;
 
 id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
 
@@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
 /* no thin provisioning */
 id_ns->ncap = id_ns->nsze;
 id_ns->nuse = id_ns->ncap;
+
+/* support DULBE */
+id_ns->nsfeat |= 0x4;
 }
 
 static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -93,6 +94,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
**errp)
 }
 
 nvme_ns_init(ns);
+
 if (nvme_register_namespace(n, ns, errp)) {
 return -1;
 }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index a245ff8ceb2c..6e6bdb338ad7 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
 
 static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
 [NVME_TEMPERATURE_THRESHOLD]= NVME_FEAT_CAP_CHANGE,
+[NVME_ERROR_RECOVERY]   = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
 [NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
 [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
 [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
@@ -878,6 +879,49 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace 
*ns, uint64_t slba,
 return NVME_SUCCESS;
 }
 

[PULL 04/56] nvme: add namespace I/O optimization fields to shared header

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

This adds the NPWG, NPWA, NPDG, NPDA and NOWS family of fields to the
shared nvme.h header for use by later patches.

Signed-off-by: Klaus Jensen 
Cc: Stefan Hajnoczi 
Cc: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Minwoo Im 
---
 include/block/nvme.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index b663d11e60c1..11ac1c2b7dfb 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -996,7 +996,12 @@ typedef struct QEMU_PACKED NvmeIdNs {
 uint16_tnabspf;
 uint16_tnoiob;
 uint8_t nvmcap[16];
-uint8_t rsvd64[40];
+uint16_tnpwg;
+uint16_tnpwa;
+uint16_tnpdg;
+uint16_tnpda;
+uint16_tnows;
+uint8_t rsvd74[30];
 uint8_t nguid[16];
 uint64_teui64;
 NvmeLBAFlbaf[16];
-- 
2.30.0




[PULL 06/56] hw/block/nvme: add compare command

2021-02-08 Thread Klaus Jensen
From: Gollu Appalanaidu 

Add the Compare command.

This implementation uses a bounce buffer to read in the data from
storage and then compare with the host supplied buffer.

Signed-off-by: Gollu Appalanaidu 
[k.jensen: rebased]
Signed-off-by: Klaus Jensen 
Reviewed-by: Minwoo Im 
Reviewed-by: Keith Busch 
---
 hw/block/nvme.c   | 101 +-
 hw/block/trace-events |   2 +
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f019d43788ac..b9313fdc4762 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -999,6 +999,51 @@ static void nvme_aio_discard_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(nvme_cq(req), req);
 }
 
+struct nvme_compare_ctx {
+QEMUIOVector iov;
+uint8_t *bounce;
+size_t len;
+};
+
+static void nvme_compare_cb(void *opaque, int ret)
+{
+NvmeRequest *req = opaque;
+NvmeNamespace *ns = req->ns;
+struct nvme_compare_ctx *ctx = req->opaque;
+g_autofree uint8_t *buf = NULL;
+uint16_t status;
+
+trace_pci_nvme_compare_cb(nvme_cid(req));
+
+if (!ret) {
+block_acct_done(blk_get_stats(ns->blkconf.blk), >acct);
+} else {
+block_acct_failed(blk_get_stats(ns->blkconf.blk), >acct);
+nvme_aio_err(req, ret);
+goto out;
+}
+
+buf = g_malloc(ctx->len);
+
+status = nvme_dma(nvme_ctrl(req), buf, ctx->len, DMA_DIRECTION_TO_DEVICE,
+  req);
+if (status) {
+req->status = status;
+goto out;
+}
+
+if (memcmp(buf, ctx->bounce, ctx->len)) {
+req->status = NVME_CMP_FAILURE;
+}
+
+out:
+qemu_iovec_destroy(>iov);
+g_free(ctx->bounce);
+g_free(ctx);
+
+nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
 static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeNamespace *ns = req->ns;
@@ -1072,6 +1117,57 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
 return status;
 }
 
+static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
+NvmeNamespace *ns = req->ns;
+BlockBackend *blk = ns->blkconf.blk;
+uint64_t slba = le64_to_cpu(rw->slba);
+uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+size_t len = nvme_l2b(ns, nlb);
+int64_t offset = nvme_l2b(ns, slba);
+uint8_t *bounce = NULL;
+struct nvme_compare_ctx *ctx = NULL;
+uint16_t status;
+
+trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+
+status = nvme_check_mdts(n, len);
+if (status) {
+trace_pci_nvme_err_mdts(nvme_cid(req), len);
+return status;
+}
+
+status = nvme_check_bounds(ns, slba, nlb);
+if (status) {
+trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
+return status;
+}
+
+if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+status = nvme_check_dulbe(ns, slba, nlb);
+if (status) {
+return status;
+}
+}
+
+bounce = g_malloc(len);
+
+ctx = g_new(struct nvme_compare_ctx, 1);
+ctx->bounce = bounce;
+ctx->len = len;
+
+req->opaque = ctx;
+
+qemu_iovec_init(>iov, 1);
+qemu_iovec_add(>iov, bounce, len);
+
+block_acct_start(blk_get_stats(blk), >acct, len, BLOCK_ACCT_READ);
+blk_aio_preadv(blk, offset, >iov, 0, nvme_compare_cb, req);
+
+return NVME_NO_COMPLETE;
+}
+
 static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
 {
 block_acct_start(blk_get_stats(req->ns->blkconf.blk), >acct, 0,
@@ -1201,6 +1297,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 case NVME_CMD_WRITE:
 case NVME_CMD_READ:
 return nvme_rw(n, req);
+case NVME_CMD_COMPARE:
+return nvme_compare(n, req);
 case NVME_CMD_DSM:
 return nvme_dsm(n, req);
 default:
@@ -2925,7 +3023,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->cqes = (0x4 << 4) | 0x4;
 id->nn = cpu_to_le32(n->num_namespaces);
 id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
-   NVME_ONCS_FEATURES | NVME_ONCS_DSM);
+   NVME_ONCS_FEATURES | NVME_ONCS_DSM |
+   NVME_ONCS_COMPARE);
 
 id->vwc = 0x1;
 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 1ffe0b3f76b5..68a4c8ed35e0 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -46,6 +46,8 @@ pci_nvme_write_zeroes(uint16_t cid, uint32_t nsid, uint64_t 
slba, uint32_t nlb)
 pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, 
bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed 
%d"
 pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid 
%"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
 pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t 
nlb) "cid %"PRIu16" nsid %"PRIu32" slba 

[PULL 08/56] hw/block/nvme: Process controller reset and shutdown differently

2021-02-08 Thread Klaus Jensen
From: Dmitry Fomichev 

Controller reset ans subsystem shutdown are handled very much the same
in the current code, but some of the steps should be different in these
two cases.

Introduce two new functions, nvme_reset_ctrl() and nvme_shutdown_ctrl(),
to separate some portions of the code from nvme_clear_ctrl(). The steps
that are made different between reset and shutdown are that BAR.CC is not
reset to zero upon the shutdown and namespace data is flushed to
backing storage as a part of shutdown handling, but not upon reset.

Suggested-by: Klaus Jensen 
Signed-off-by: Dmitry Fomichev 
Reviewed-by: Keith Busch 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme-ns.h |  2 +-
 hw/block/nvme-ns.c |  2 +-
 hw/block/nvme.c| 24 ++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 44bf6271b744..ed3d7e65d597 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -73,6 +73,6 @@ typedef struct NvmeCtrl NvmeCtrl;
 
 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
 void nvme_ns_drain(NvmeNamespace *ns);
-void nvme_ns_flush(NvmeNamespace *ns);
+void nvme_ns_shutdown(NvmeNamespace *ns);
 
 #endif /* NVME_NS_H */
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 37f95951a6b8..a0de53e71878 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -129,7 +129,7 @@ void nvme_ns_drain(NvmeNamespace *ns)
 blk_drain(ns->blkconf.blk);
 }
 
-void nvme_ns_flush(NvmeNamespace *ns)
+void nvme_ns_shutdown(NvmeNamespace *ns)
 {
 blk_flush(ns->blkconf.blk);
 }
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index de52487aaf06..f54c5c6ea44d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2295,6 +2295,20 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
 n->aer_queued = 0;
 n->outstanding_aers = 0;
 n->qs_created = false;
+}
+
+static void nvme_ctrl_reset(NvmeCtrl *n)
+{
+nvme_clear_ctrl(n);
+n->bar.cc = 0;
+}
+
+static void nvme_ctrl_shutdown(NvmeCtrl *n)
+{
+NvmeNamespace *ns;
+int i;
+
+nvme_clear_ctrl(n);
 
 for (i = 1; i <= n->num_namespaces; i++) {
 ns = nvme_ns(n, i);
@@ -2302,10 +2316,8 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
 continue;
 }
 
-nvme_ns_flush(ns);
+nvme_ns_shutdown(ns);
 }
-
-n->bar.cc = 0;
 }
 
 static int nvme_start_ctrl(NvmeCtrl *n)
@@ -2472,12 +2484,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 }
 } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
 trace_pci_nvme_mmio_stopped();
-nvme_clear_ctrl(n);
+nvme_ctrl_reset(n);
 n->bar.csts &= ~NVME_CSTS_READY;
 }
 if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
 trace_pci_nvme_mmio_shutdown_set();
-nvme_clear_ctrl(n);
+nvme_ctrl_shutdown(n);
 n->bar.cc = data;
 n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
 } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
@@ -3088,7 +3100,7 @@ static void nvme_exit(PCIDevice *pci_dev)
 {
 NvmeCtrl *n = NVME(pci_dev);
 
-nvme_clear_ctrl(n);
+nvme_ctrl_shutdown(n);
 g_free(n->cq);
 g_free(n->sq);
 g_free(n->aer_reqs);
-- 
2.30.0




[PULL 02/56] hw/block/nvme: pull aio error handling

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

Add a new function, nvme_aio_err, to handle errors resulting from AIOs
and use this from the callbacks.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c | 61 +
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b0816d19eff4..a245ff8ceb2c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -878,6 +878,41 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace 
*ns, uint64_t slba,
 return NVME_SUCCESS;
 }
 
+static void nvme_aio_err(NvmeRequest *req, int ret)
+{
+uint16_t status = NVME_SUCCESS;
+Error *local_err = NULL;
+
+switch (req->cmd.opcode) {
+case NVME_CMD_READ:
+status = NVME_UNRECOVERED_READ;
+break;
+case NVME_CMD_FLUSH:
+case NVME_CMD_WRITE:
+case NVME_CMD_WRITE_ZEROES:
+status = NVME_WRITE_FAULT;
+break;
+default:
+status = NVME_INTERNAL_DEV_ERROR;
+break;
+}
+
+trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
+
+error_setg_errno(_err, -ret, "aio failed");
+error_report_err(local_err);
+
+/*
+ * Set the command status code to the first encountered error but allow a
+ * subsequent Internal Device Error to trump it.
+ */
+if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
+return;
+}
+
+req->status = status;
+}
+
 static void nvme_rw_cb(void *opaque, int ret)
 {
 NvmeRequest *req = opaque;
@@ -887,37 +922,13 @@ static void nvme_rw_cb(void *opaque, int ret)
 BlockAcctCookie *acct = >acct;
 BlockAcctStats *stats = blk_get_stats(blk);
 
-Error *local_err = NULL;
-
 trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
 
 if (!ret) {
 block_acct_done(stats, acct);
 } else {
-uint16_t status;
-
 block_acct_failed(stats, acct);
-
-switch (req->cmd.opcode) {
-case NVME_CMD_READ:
-status = NVME_UNRECOVERED_READ;
-break;
-case NVME_CMD_FLUSH:
-case NVME_CMD_WRITE:
-case NVME_CMD_WRITE_ZEROES:
-status = NVME_WRITE_FAULT;
-break;
-default:
-status = NVME_INTERNAL_DEV_ERROR;
-break;
-}
-
-trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
-
-error_setg_errno(_err, -ret, "aio failed");
-error_report_err(local_err);
-
-req->status = status;
+nvme_aio_err(req, ret);
 }
 
 nvme_enqueue_req_completion(nvme_cq(req), req);
-- 
2.30.0




[PULL 00/56] emulated nvme patches

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

The following changes since commit 4f799257b323e1238a900fd0c71c2057863e0308:

  Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2021-02-08' into 
staging (2021-02-08 16:12:21 +)

are available in the Git repository at:

  git://git.infradead.org/qemu-nvme.git tags/nvme-next-pull-request

for you to fetch changes up to 3e22762edc74be3e1ecafc361351a9640d114978:

  hw/block/nvme: refactor the logic for zone write checks (2021-02-08 21:15:54 
+0100)


Emulated NVMe device updates

  * deallocate or unwritten logical block error feature (me)
  * dataset management command (me)
  * compare command (Gollu Appalanaidu)
  * namespace types (Niklas Cassel)
  * zoned namespaces (Dmitry Fomichev)
  * smart critical warning toggle (Zhenwei Pi)
  * allow cmb and pmr to coexist (Andrzej Jakowski, me)
  * pmr rds/wds support (Naveen Nagar)
  * cmb v1.4 logic (Padmakar Kalghatgi)

And a lot of smaller fixes from Gollu Appalanaidu, Minwoo Im and me.



Andrzej Jakowski (1):
  hw/block/nvme: indicate CMB support through controller capabilities
register

Dmitry Fomichev (11):
  hw/block/nvme: Process controller reset and shutdown differently
  hw/block/nvme: Generate namespace UUIDs
  hw/block/nvme: Separate read and write handlers
  hw/block/nvme: Combine nvme_write_zeroes() and nvme_write()
  hw/block/nvme: Add Commands Supported and Effects log
  nvme: Make ZNS-related definitions
  hw/block/nvme: Support Zoned Namespace Command Set
  hw/block/nvme: Introduce max active and open zone limits
  hw/block/nvme: Support Zone Descriptor Extensions
  hw/block/nvme: Document zoned parameters in usage text
  hw/block/nvme: Correct error status for unaligned ZA

Gollu Appalanaidu (4):
  hw/block/nvme: add compare command
  hw/block/nvme: fix set feature for error recovery
  hw/block/nvme: fix set feature save field check
  hw/block/nvme: align with existing style

Klaus Jensen (26):
  hw/block/nvme: remove superfluous NvmeCtrl parameter
  hw/block/nvme: pull aio error handling
  hw/block/nvme: add dulbe support
  nvme: add namespace I/O optimization fields to shared header
  hw/block/nvme: add the dataset management command
  hw/block/nvme: fix bad clearing of CAP
  hw/block/nvme: fix for non-msix machines
  hw/block/nvme: conditionally enable DULBE for zoned namespaces
  hw/block/nvme: fix shutdown/reset logic
  hw/block/nvme: merge implicitly/explicitly opened processing masks
  hw/block/nvme: enum style fix
  hw/block/nvme: zero out zones on reset
  hw/block/nvme: add missing string representations for commands
  hw/block/nvme: remove unnecessary check for append
  hw/block/nvme: fix zone write finalize
  hw/block/nvme: add size to mmio read/write trace events
  hw/block/nvme: fix 64 bit register hi/lo split writes
  hw/block/nvme: move msix table and pba to BAR 0
  hw/block/nvme: allow cmb and pmr to coexist
  hw/block/nvme: rename PMR/CMB shift/mask fields
  hw/block/nvme: remove redundant zeroing of PMR registers
  hw/block/nvme: disable PMR at boot up
  hw/block/nvme: bump to v1.4
  hw/block/nvme: lift cmb restrictions
  hw/block/nvme: fix zone boundary check for append
  hw/block/nvme: refactor the logic for zone write checks

Minwoo Im (7):
  hw/block/nvme: remove unused argument in nvme_ns_init_zoned
  hw/block/nvme: open code for volatile write cache
  hw/block/nvme: remove unused argument in nvme_ns_init_blk
  hw/block/nvme: split setup and register for namespace
  hw/block/nvme: remove unused argument in nvme_ns_setup
  hw/block/nvme: error if drive less than a zone size
  hw/block/nvme: fix wrong parameter name 'cross_read'

Naveen Nagar (1):
  hw/block/nvme: add PMR RDS/WDS support

Niklas Cassel (2):
  hw/block/nvme: Add support for Namespace Types
  hw/block/nvme: Support allocated CNS command variants

Padmakar Kalghatgi (1):
  hw/block/nvme: move cmb logic to v1.4

Zhenwei Pi (3):
  nvme: introduce bit 5 for critical warning
  hw/block/nvme: add smart_critical_warning property
  hw/block/nvme: trigger async event during injecting smart warning

 hw/block/nvme-ns.h|  112 +-
 hw/block/nvme.h   |   31 +-
 include/block/nvme.h  |  340 +-
 hw/block/nvme-ns.c|  290 -
 hw/block/nvme.c   | 2333 -
 hw/block/trace-events |   50 +-
 6 files changed, 2823 insertions(+), 333 deletions(-)

-- 
2.30.0




[PULL 01/56] hw/block/nvme: remove superfluous NvmeCtrl parameter

2021-02-08 Thread Klaus Jensen
From: Klaus Jensen 

nvme_check_bounds has no use of the NvmeCtrl parameter; remove it.

Signed-off-by: Klaus Jensen 
Reviewed-by: Minwoo Im 
---
 hw/block/nvme.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 27d2c72716eb..b0816d19eff4 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -866,8 +866,8 @@ static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t 
len)
 return NVME_SUCCESS;
 }
 
-static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
- uint64_t slba, uint32_t nlb)
+static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
+ uint32_t nlb)
 {
 uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
 
@@ -943,7 +943,7 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest 
*req)
 
 trace_pci_nvme_write_zeroes(nvme_cid(req), nvme_nsid(ns), slba, nlb);
 
-status = nvme_check_bounds(n, ns, slba, nlb);
+status = nvme_check_bounds(ns, slba, nlb);
 if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
 return status;
@@ -979,7 +979,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 goto invalid;
 }
 
-status = nvme_check_bounds(n, ns, slba, nlb);
+status = nvme_check_bounds(ns, slba, nlb);
 if (status) {
 trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
 goto invalid;
-- 
2.30.0




Re: [RFC PATCH v2 0/4] Allow changing bs->file on reopen

2021-02-08 Thread Vladimir Sementsov-Ogievskiy

08.02.2021 21:44, Alberto Garcia wrote:

Hi,

this series allows changing bs->file using x-blockdev-reopen. Read
here for more details:

https://lists.gnu.org/archive/html/qemu-block/2021-01/msg00437.html

Version 2 of the series introduces a very significant change:
x-blockdev-reopen now receives a list of BlockdevOptions instead of
just one, so it is possible to reopen multiple block devices using a
single transaction.

This is still an RFC, I haven't updated the documentation and the
structure of the patches will probably change in the future, but I'd
like to know your opinion about the approach.

These patches apply on top of Vladimir's branch:

git: https://src.openvz.org/scm/~vsementsov/qemu.git
tag: up-block-topologic-perm-v2


Patchew understands "Based-on: MESSAGE_ID" tag, so, you can add:

Based-on: <20201127144522.29991-1-vsement...@virtuozzo.com>



Regards,

Berto

Alberto Garcia (4):
   block: Allow changing bs->file on reopen
   iotests: Update 245 to support replacing files with x-blockdev-reopen
   block: Support multiple reopening with x-blockdev-reopen
   iotests: Test reopening multiple devices at the same time

  qapi/block-core.json   |   2 +-
  include/block/block.h  |   2 +
  block.c|  81 +--
  blockdev.c |  85 +---
  tests/qemu-iotests/155 |   9 ++-
  tests/qemu-iotests/165 |   4 +-
  tests/qemu-iotests/245 | 128 -
  tests/qemu-iotests/245.out |   4 +-
  tests/qemu-iotests/248 |   2 +-
  tests/qemu-iotests/248.out |   2 +-
  tests/qemu-iotests/298 |   4 +-
  11 files changed, 254 insertions(+), 69 deletions(-)




--
Best regards,
Vladimir



Re: [PATCH 0/3] fix build failures from incorrectly skipped container build jobs

2021-02-08 Thread Thomas Huth

On 09/02/2021 07.01, Stefan Weil wrote:

Am 08.02.21 um 19:12 schrieb Daniel P. Berrangé:


On Mon, Feb 08, 2021 at 07:08:39PM +0100, Philippe Mathieu-Daudé wrote:

On 2/8/21 6:22 PM, Daniel P. Berrangé wrote:

On Mon, Feb 08, 2021 at 04:33:36PM +, Daniel P. Berrangé wrote:

This series fixes a problem with our gitlab CI rules that cause
container builds to be skipped. See the commit description in the
first patch for the details on this problem.

The overall result of this series though is a small increase in overall
pipeline time.

Previously

  - When container jobs are skipped: approx 1hr 5 mins
  - When container jobs are run, cached by docker: approx 1hr 15 mins
  - When container jobs are run, not cached by docker: approx 1hr 30 mins

With this series applied the first scenario no longer exists, so
all piplines are either 1hr 15 or 1hr 30 depending on whether the
container phase is skipped.

I mean to say the biggest problem I see is the cross-win64-system
job. This consumes 1 hour 5 minutes all on its own. It is at least
15 minutes longer that every other job AFAICT. So no matter how
well we parallelize stuff, 1 hr 5 is a hard lower limit on pipeline
duration right now.

We might want to consider how to split the win64 job or cut down
what it does in some way ?

When the win64 build was added (on Debian), it was to mostly to cover
the WHPX. Later we moved mingw jobs to Fedora. I just checked and
WHPX is no more built, and nobody complained, so it is not relevant
anymore.

I don't mind much what you do with the Gitlab win64 job, as this config
is better covered on Cirrus. I'd like to keep the win32 job because it
has been useful to catch 32-bit issues.

I'm not suggesting we remove it. Most developers won't setup Cirrus CI,
so will only run GitLab CI jobs.  IME it is good to have both win32
and win64 coverage because things do break differently on them - especially
if you use bad printf format strings that are not independant of host
word size



I would not say that something is not relevant just because nobody 
complains. Nobody would miss any CI if everything were always fine. So 
people would miss WHPX CI as soon as there are changes (which happen 
infrequently) and something breaks. WHPX should be covered by the w64 build, 
and as many as possible other features where I currently see a "NO" in the 
configure output as well.


Yes, I agree, we should add WHPX there again. Could somebody please check 
whether the headers are already available in the latest Fedora? Then we 
might simply switch the container to use the latest version of Fedora instead.


Otherwise we should install the headers manually there, like we did in 
commit d3dd34a1e5e134 for the now-removed Debian container.


Nevertheless I don't think that each CI job must run frequently. Each run 
not only costs time, but also energy, and contributes to our climate change.


I think that for the win32 and win64 jobs it would be sufficient to run them 
weekly, maybe even alternating if that is possible.


Maybe it would be sufficient to run those jobs only on tags (so that they 
get checked for pull requests) and on the master and staging branch?


 Thomas




Re: [PATCH 2/3] gitlab: add fine grained job deps for all build jobs

2021-02-08 Thread Thomas Huth

On 08/02/2021 17.33, Daniel P. Berrangé wrote:

This allows the build jobs to start running as soon as their respective
container image is ready, instead of waiting for all container builds
to finish.

Signed-off-by: Daniel P. Berrangé 
---
  .gitlab-ci.yml | 58 ++
  1 file changed, 58 insertions(+)


Please also adjust .gitlab-ci.d/crossbuilds.yml

 Thanks,
  Thomas




Re: [PATCH 1/3] gitlab: always build container images

2021-02-08 Thread Thomas Huth

On 08/02/2021 17.33, Daniel P. Berrangé wrote:
[...]

For example, consider pushing 5 commits, one of which contains a
dockerfile change. This will trigger a CI pipeline for the
containers. Now consider you do some more work on the branch and push 3
further commits, so you now have a branch of 8 commits. For the second
push GitLab will only look at the 3 most recent commits, the other 5
were already present. Thus GitLab will not realize that the branch has
dockerfile changes that need to trigger the container build.

This can cause real world problems:

  - Push 5 commits to branch "foo", including a dockerfile change

 => rebuilds the container images with content from "foo"
 => build jobs runs against containers from "foo"

  - Refresh your master branch with latest upstream master

 => rebuilds the container images with content from "master"
 => build jobs runs against containers from "master"

  - Push 3 more commits to branch "foo", with no dockerfile change

 => no container rebuild triggers
 => build jobs runs against containers from "master"

The "changes" conditional in gitlab is OK, *provided* your build
jobs are not relying on any external state from previous builds.

This is NOT the case in QEMU, because we are building container
images and these are cached. This is a scenario in which the
"changes" conditional is not usuable.

The only other way to avoid this problem would be to use the git
branch name as the container image tag, instead of always using
"latest".
I'm basically fine with your patch, but let me ask one more thing: Won't we 
still have the problem if the user pushes to different branches 
simultaneously? E.g. the user pushes to "foo" with changes to dockerfiles, 
containers start to get rebuild, then pushes to master without waiting for 
the previous CI to finish, then the containers get rebuild from the "master" 
job without the local changes to the dockerfiles. Then in the "foo" CI 
pipelines the following jobs might run with the containers that have been 
built by the "master" job...


So if we really want to get it bulletproof, do we have to use the git branch 
name as the container image tag?


 Thomas




Re: [PULL v3 00/27] Block patches

2021-02-08 Thread Thomas Huth

On 08/02/2021 21.21, Stefan Hajnoczi wrote:

On Mon, Feb 08, 2021 at 11:02:57AM +0100, Philippe Mathieu-Daudé wrote:

On 2/8/21 10:27 AM, Stefan Hajnoczi wrote:

On Sat, Feb 06, 2021 at 05:03:20PM +, Peter Maydell wrote:

On Fri, 5 Feb 2021 at 22:53, Peter Maydell  wrote:


On Fri, 5 Feb 2021 at 16:45, Stefan Hajnoczi  wrote:


The following changes since commit e2c5093c993ef646e4e28f7aa78429853bcc06ac:

   iotests: 30: drop from auto group (and effectively from make check) 
(2021-02-05 15:16:13 +)

are available in the Git repository at:

   https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to b07011f375bda3319cf72eee7cb18d310078387b:

   docs: fix Parallels Image "dirty bitmap" section (2021-02-05 16:36:36 +)


Pull request

v3:
  * Replace {0} array initialization with {} to make clang happy [Peter]





Fails 'make check' on s390x host:


I gave this a rerun to check it was reproducible (it is) and realised
I missed what looks like an important line in the log. As usual,
trying to disentangle which lines of a parallel make check correspond
to the failure is pretty tricky, but the lines
  Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'

are probably the proximate causes of the assertion failures.

MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-rx tests/qtest/qos-test --tap -k
PASS 45 qtest-rx/qmp-cmd-test /rx/qmp/query-memory-size-summary
SKIP
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-s390x tests/qtest/pxe-test --tap -k
PASS 46 qtest-rx/qmp-cmd-test /rx/qmp/query-memory-devices
Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'
PASS 47 qtest-rx/qmp-cmd-test /rx/qmp/query-replay
PASS 48 qtest-rx/qmp-cmd-test /rx/qmp/query-yank
PASS 49 qtest-rx/qmp-cmd-test /rx/qmp/query-name
PASS 50 qtest-rx/qmp-cmd-test /rx/qmp/query-iothreads
PASS 51 qtest-rx/qmp-cmd-test /rx/qmp/query-fdsets
PASS 52 qtest-rx/qmp-cmd-test /rx/qmp/query-command-line-options
PASS 53 qtest-rx/qmp-cmd-test /rx/qmp/query-acpi-ospm-status
PASS 54 qtest-rx/qmp-cmd-test /rx/qmp/object-add-failure-modes
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
QTEST_QEMU_IMG=./qemu-img
G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
QTEST_QEMU_BINARY=./qemu-system-s390x tests/qtest/test-netfilter --tap
-k
Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'
socket_accept failed: Resource temporarily unavailable
socket_accept failed: Resource temporarily unavailable
**
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
**
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
signal 6 (Aborted) (core dumped)
../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
signal 6 (Aborted) (core dumped)
ERROR qtest-s390x/pxe-test - Bail out!
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
ERROR qtest-s390x/test-netfilter - Bail out!
ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
Makefile.mtest:3113: recipe for target 'run-test-387' failed
make: *** [run-test-387] Error 1
make: *** Waiting for unfinished jobs
Makefile.mtest:3121: recipe for target 'run-test-388' failed


Hi Elena and Jag,
Please take a look at this QOM failure. I guess remote-pcihost is being
built but pcie-host-bridge is missing from the s390x-softmmu target.


Fix suggested here:
https://www.mail-archive.com/qemu-block@nongnu.org/msg80536.html

But beside the fix what would be better is to restrict this feature
where it makes sense (we are having hard time building/testing all
features, better enable new ones where they are used).

Would it be enough to enable this feature on X86 hosts/targets for
mainstream CI?


Trying to check if I understand correctly:

Instead of writing configure/meson rules that enable the feature
whenever the dependencies are satisfied (KVM and PCI), explicitly enable
it on x86 targets only. The rationale is that it's not being used and
hasn't been tested on non-x86 targets, so it's not really supported
there yet.


I haven't looked very close at the patches, but if I got that right, the 
problem is that this features depends on the availability of a certain PCI-e 
device. So the easiest solution is maybe to add a "depends on PCI_EXPRESS" 
in the "config MULTIPROCESS", 

Re: [PATCH 0/3] fix build failures from incorrectly skipped container build jobs

2021-02-08 Thread Stefan Weil

Am 08.02.21 um 19:12 schrieb Daniel P. Berrangé:


On Mon, Feb 08, 2021 at 07:08:39PM +0100, Philippe Mathieu-Daudé wrote:

On 2/8/21 6:22 PM, Daniel P. Berrangé wrote:

On Mon, Feb 08, 2021 at 04:33:36PM +, Daniel P. Berrangé wrote:

This series fixes a problem with our gitlab CI rules that cause
container builds to be skipped. See the commit description in the
first patch for the details on this problem.

The overall result of this series though is a small increase in overall
pipeline time.

Previously

  - When container jobs are skipped: approx 1hr 5 mins
  - When container jobs are run, cached by docker: approx 1hr 15 mins
  - When container jobs are run, not cached by docker: approx 1hr 30 mins

With this series applied the first scenario no longer exists, so
all piplines are either 1hr 15 or 1hr 30 depending on whether the
container phase is skipped.

I mean to say the biggest problem I see is the cross-win64-system
job. This consumes 1 hour 5 minutes all on its own. It is at least
15 minutes longer that every other job AFAICT. So no matter how
well we parallelize stuff, 1 hr 5 is a hard lower limit on pipeline
duration right now.

We might want to consider how to split the win64 job or cut down
what it does in some way ?

When the win64 build was added (on Debian), it was to mostly to cover
the WHPX. Later we moved mingw jobs to Fedora. I just checked and
WHPX is no more built, and nobody complained, so it is not relevant
anymore.

I don't mind much what you do with the Gitlab win64 job, as this config
is better covered on Cirrus. I'd like to keep the win32 job because it
has been useful to catch 32-bit issues.

I'm not suggesting we remove it. Most developers won't setup Cirrus CI,
so will only run GitLab CI jobs.  IME it is good to have both win32
and win64 coverage because things do break differently on them - especially
if you use bad printf format strings that are not independant of host
word size



I would not say that something is not relevant just because nobody 
complains. Nobody would miss any CI if everything were always fine. So 
people would miss WHPX CI as soon as there are changes (which happen 
infrequently) and something breaks. WHPX should be covered by the w64 
build, and as many as possible other features where I currently see a 
"NO" in the configure output as well.


Nevertheless I don't think that each CI job must run frequently. Each 
run not only costs time, but also energy, and contributes to our climate 
change.


I think that for the win32 and win64 jobs it would be sufficient to run 
them weekly, maybe even alternating if that is possible.


Stefan






Re: [PATCH 3/5] target/ppc: Drop use of gdb_get_float64() and ldfq_p()

2021-02-08 Thread David Gibson
On Mon, Feb 08, 2021 at 11:34:26AM +, Peter Maydell wrote:
> We used to make a distinction between 'float64'/'float32' types and
> the 'uint64_t'/'uint32_t' types, requiring special conversion
> operations to go between them.  We've now dropped this distinction as
> unnecessary, and the 'float*' types remain primarily for
> documentation purposes when used in places like the function
> prototypes of TCG helper functions.
> 
> This means that there's no need for a special gdb_get_float64()
> function to write a float64 value to the GDB protocol buffer; we can
> just use gdb_get_reg64().
> 
> Similarly, for reading a value out of the GDB buffer into a float64
> we can use ldq_p() and need not use ldfq_p().
> 
> Signed-off-by: Peter Maydell 

Acked-by: David Gibson 

> ---
>  target/ppc/gdbstub.c| 8 
>  target/ppc/translate_init.c.inc | 4 ++--
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
> index 01459dd31d2..c28319fb974 100644
> --- a/target/ppc/gdbstub.c
> +++ b/target/ppc/gdbstub.c
> @@ -130,7 +130,7 @@ int ppc_cpu_gdb_read_register(CPUState *cs, GByteArray 
> *buf, int n)
>  gdb_get_regl(buf, env->gpr[n]);
>  } else if (n < 64) {
>  /* fprs */
> -gdb_get_float64(buf, *cpu_fpr_ptr(env, n - 32));
> +gdb_get_reg64(buf, *cpu_fpr_ptr(env, n - 32));
>  } else {
>  switch (n) {
>  case 64:
> @@ -184,7 +184,7 @@ int ppc_cpu_gdb_read_register_apple(CPUState *cs, 
> GByteArray *buf, int n)
>  gdb_get_reg64(buf, env->gpr[n]);
>  } else if (n < 64) {
>  /* fprs */
> -gdb_get_float64(buf, *cpu_fpr_ptr(env, n - 32));
> +gdb_get_reg64(buf, *cpu_fpr_ptr(env, n - 32));
>  } else if (n < 96) {
>  /* Altivec */
>  gdb_get_reg64(buf, n - 64);
> @@ -241,7 +241,7 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t 
> *mem_buf, int n)
>  env->gpr[n] = ldtul_p(mem_buf);
>  } else if (n < 64) {
>  /* fprs */
> -*cpu_fpr_ptr(env, n - 32) = ldfq_p(mem_buf);
> +*cpu_fpr_ptr(env, n - 32) = ldq_p(mem_buf);
>  } else {
>  switch (n) {
>  case 64:
> @@ -291,7 +291,7 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, 
> uint8_t *mem_buf, int n)
>  env->gpr[n] = ldq_p(mem_buf);
>  } else if (n < 64) {
>  /* fprs */
> -*cpu_fpr_ptr(env, n - 32) = ldfq_p(mem_buf);
> +*cpu_fpr_ptr(env, n - 32) = ldq_p(mem_buf);
>  } else {
>  switch (n) {
>  case 64 + 32:
> diff --git a/target/ppc/translate_init.c.inc b/target/ppc/translate_init.c.inc
> index 9867d0a6e4a..7bd111d905e 100644
> --- a/target/ppc/translate_init.c.inc
> +++ b/target/ppc/translate_init.c.inc
> @@ -9907,7 +9907,7 @@ static int gdb_get_float_reg(CPUPPCState *env, 
> GByteArray *buf, int n)
>  {
>  uint8_t *mem_buf;
>  if (n < 32) {
> -gdb_get_float64(buf, *cpu_fpr_ptr(env, n));
> +gdb_get_reg64(buf, *cpu_fpr_ptr(env, n));
>  mem_buf = gdb_get_reg_ptr(buf, 8);
>  ppc_maybe_bswap_register(env, mem_buf, 8);
>  return 8;
> @@ -9925,7 +9925,7 @@ static int gdb_set_float_reg(CPUPPCState *env, uint8_t 
> *mem_buf, int n)
>  {
>  if (n < 32) {
>  ppc_maybe_bswap_register(env, mem_buf, 8);
> -*cpu_fpr_ptr(env, n) = ldfq_p(mem_buf);
> +*cpu_fpr_ptr(env, n) = ldq_p(mem_buf);
>  return 8;
>  }
>  if (n == 32) {

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[PATCH v2] char: don't fail when client is not connected

2021-02-08 Thread Pavel Dovgalyuk
This patch checks that ioc is not null before
using it in tcp socket tcp_chr_add_watch function.

The failure occurs in replay mode of the execution,
when monitor and serial port are tcp servers,
and there are no clients connected to them:

-monitor tcp:127.0.0.1:8081,server,nowait
-serial tcp:127.0.0.1:8082,server,nowait


Signed-off-by: Pavel Dovgalyuk 
Reviewed-by: Marc-André Lureau 
---
 chardev/char-socket.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8dd0..cef1d9438f 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -385,6 +385,9 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t 
len)
 static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
 {
 SocketChardev *s = SOCKET_CHARDEV(chr);
+if (!s->ioc) {
+return NULL;
+}
 return qio_channel_create_watch(s->ioc, cond);
 }
 




Re: [PATCH v2] hw/net: fsl_etsec: Reverse the RCTRL.RSF logic

2021-02-08 Thread David Gibson
On Tue, Feb 09, 2021 at 09:22:41AM +0800, Bin Meng wrote:
> From: Bin Meng 
> 
> Per MPC8548ERM [1] chapter 14.5.3.4.1:
> 
> When RCTRL.RSF is 1, frames less than 64 bytes are accepted upon
> a DA match. But currently QEMU does the opposite.
> 
> When RCTRL.RSF is 0, short frames are silently dropped, however
> we cannot drop such frames in QEMU as of today, due to both slirp
> and tap networking do not pad short frames (e.g.: an ARP packet)
> to the minimum frame size of 60 bytes.
> 
> If eTSEC is programmed to reject short frames, ARP requests will be
> dropped, preventing the guest from becoming visible on the network.
> 
> The same issue was reported on e1000 and vmxenet3 before, see:
> 
> commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)")
> commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)")
> 
> Ideally this should be fixed on the slirp/tap networking side to
> pad short frames to the minimum frame length, but I am not sure
> whether that's doable.
> 
> This commit reverses the RCTRL.RSF testing logic to match the spec.
> The log message is updated to mention the reject short frames
> functionality is unimplemented.
> 
> [1] https://www.nxp.com/docs/en/reference-manual/MPC8548ERM.pdf
> 
> Fixes: eb1e7c3e5146 ("Add Enhanced Three-Speed Ethernet Controller (eTSEC)")
> Signed-off-by: Bin Meng 

Applied to ppc-for-6.0.

Thanks for the excellent commit message with the links to the relevant
documentation.

> 
> ---
> 
> Changes in v2:
> - rewrite the commit message and reverse the RCTRL.RSF test logic
> 
>  hw/net/fsl_etsec/rings.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
> index 121415a..f89aa7f 100644
> --- a/hw/net/fsl_etsec/rings.c
> +++ b/hw/net/fsl_etsec/rings.c
> @@ -502,10 +502,17 @@ ssize_t etsec_rx_ring_write(eTSEC *etsec, const uint8_t 
> *buf, size_t size)
>  return -1;
>  }
>  
> -if ((etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) {
> +/*
> + * Both slirp and tap networking do not pad short frames
> + * (e.g.: an ARP packet) to the minimum frame size of 60 bytes.
> + *
> + * If eTSEC is programmed to reject short frames, ARP requests
> + * will be dropped, preventing the guest from becoming visible
> + * on the network.
> + */
> +if (!(etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) {
>  /* CRC is not in the packet yet, so short frame is below 60 bytes */
> -RING_DEBUG("%s: Drop short frame\n", __func__);
> -return -1;
> +RING_DEBUG("%s: Drop short frame not implemented\n", __func__);
>  }
>  
>  rx_init_frame(etsec, buf, size);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Bug 1580586] Re: Qemu WinXP SP3 second loadvm freezes Guest OS

2021-02-08 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1580586

Title:
  Qemu WinXP SP3 second loadvm freezes Guest OS

Status in QEMU:
  Expired

Bug description:
  Using Qemu-system-i386 to run WinXP SP3 with the following command
  line:

  qemu-system-i386 -hda qcow2/windowsxp_32bits_dd.qcow2 -m 1024  -net
  user,smb=/shared -vga std -net nic,model=rtl8139 -rtc
  base=localtime,clock=vm -s -snapshot

  savevm works fine, and the first loadvm to the snapshot works
  properly, but the next ones will all freeze the guest OS.

  First I thought it was due to the clock but adding the rtc options did
  not fix it.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1580586/+subscriptions



[Bug 1597138] Re: Deadlock on Windows 10 pop-up

2021-02-08 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1597138

Title:
  Deadlock on Windows 10 pop-up

Status in QEMU:
  Expired

Bug description:
  I was able to install and can log in but whenever a pop-up is attempted the 
VM appears to deadlock.
  I can still kill -9 the process and recover but the VM and the QEmu console 
both hang with no error output.

  At first I thought it was UAC but renaming a file causes a pop-up and that 
also deadlocks.
  I rebuilt QEmu 2.6.0 with debug info and did a thread back-trace once the 
deadlock occurs.
  See the attachment for the trace.

  I am attempting to setup GPU pass-thru with a GTX 970 but this
  deadlock occurs with -vga std (and no GPU pass-thru) as well.

  (I cannot install or start Windows 7 but I am told this is a known
  bug.)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1597138/+subscriptions



[Bug 1598612] Re: Windows for Workgroups 3.11 installer crashes with a general protection fault

2021-02-08 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1598612

Title:
  Windows for Workgroups 3.11 installer crashes with a general
  protection fault

Status in QEMU:
  Expired

Bug description:
  I used only disk images from here:
  
http://ia801606.us.archive.org/zipview.php?zip=/22/items/IBM_PC_Compatibles_TOSEC_2012_04_23/IBM_PC_Compatibles_TOSEC_2012_04_23.zip

  When I try to install Windows for Workgroups 3.11 on either PC DOS
  2000 or MS-DOS 6.22, the installer crashes after entering the
  graphical part with two dialogs containing:

  Application Error
  WINSETUP caused a General Protection Fault in module 
0EDF:7011WINSETUP will close.

  Application Error
  WINSETUP caused a General Protection Fault in module USER.EXE at 0001:40B6.

  And then:
  Standard Mode: Bad Fault in MS-DOS Extender.
  Fault: 000D Stack Dump:   0070
  Raw fault frame: EC= IP=5EF7 CS=037F FL=3087 SP=FFEE SS=02DF

  This happens both with and without KVM. I tested with QEMU from Ubuntu
  14.04 and 16.04 and recent GIT
  (ef8757f1fe8095a256ee617e4dbac69d3b33ae94).

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1598612/+subscriptions



[Bug 1598029] Re: failed to boot a customized kernel if emulating Broadwell/Skylake

2021-02-08 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1598029

Title:
  failed to boot a customized kernel if emulating Broadwell/Skylake

Status in QEMU:
  Expired

Bug description:
  Hardware: X86-64, Intel(R) Core(TM) i7-6500U( Skylake)
  OS: Linux Mint 18
  Host Kernel: 4.5.7 + PaX/Grsecurity
  Qemu: QEMU emulator version 2.5.0 (Debian 1:2.5+dfsg-5ubuntu10.2)

  [Reproduction Steps]
  1, Install a Debian 8 in the guest
  2, Install a customized kernel( using same config to Debian 8)
  3, Reboot:
  qemu-system-x86_64 -hda debian8-test.img -boot d  -m 2048 -enable-kvm -usb 
-usbdevice tablet -net nic -net tap,ifname=tap0,script=no -cpu Broadwell -smp 2

  or

  qemu-system-x86_64 -hda debian8-test.img -boot d  -m 2048 -enable-kvm
  -usb -usbdevice tablet -net nic -net tap,ifname=tap0,script=no -cpu
  host -smp 2

  [Actual Result]  
  kernel panic or can't login in the system

  [Workaround]
  qemu-system-x86_64 -hda debian8-test.img -boot d  -m 2048 -enable-kvm -usb 
-usbdevice tablet -net nic -net tap,ifname=tap0,script=no -cpu Haswell -smp 2

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1598029/+subscriptions



[Bug 1877384] Re: 9pfs file create with mapped-xattr can fail on overlayfs

2021-02-08 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877384

Title:
  9pfs file create with mapped-xattr can fail on overlayfs

Status in QEMU:
  Expired

Bug description:
  QEMU Version: 3.1.0 as packaged in debian buster, but the code appears to do 
the same in master.
  qemu command-line: qemu-system-x86_64 -m 1G -nographic -nic 
"user,model=virtio-net-pci,tftp=$(pwd),net=10.0.2.0/24,host=10.0.2.2" -fsdev 
local,id=fs,path=$thisdir/..,security_model=mapped-xattr -device 
virtio-9p-pci,fsdev=fs,mount_tag=fs -drive 
"file=$rootdisk,if=virtio,format=raw" -kernel "$kernel" -initrd "$initrd" 
-append "$append"

  
  I'm using CI that runs in a Docker container and runs a qemu VM with code and 
results shared via virtio 9p.
  The 9p fsdev is configured with security_model=mapped-xattr
  When the test code attempts to create a log file in an existing directory, 
open with O_CREAT fails with -ENOENT.

  The relevant strace excerpt is:

  28791 openat(11, ".", O_RDONLY|O_NOFOLLOW|O_PATH|O_DIRECTORY) = 20
  28791 openat(20, "src", O_RDONLY|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW|O_DIRECTORY) 
= 21
  28791 fcntl(21, F_SETFL, O_RDONLY|O_DIRECTORY) = 0
  28791 close(20) = 0
  28791 openat(21, "client.log", 
O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK|O_NOFOLLOW, 0600) = 20
  28791 fcntl(20, F_SETFL, O_WRONLY|O_CREAT|O_NONBLOCK|O_NOFOLLOW) = 0
  28791 lsetxattr("/proc/self/fd/21/client.log", "user.virtfs.uid", "\0\0\0", 
4, 0) = -1 ENOENT (No such file or directory)

  My hypothesis for what's going wrong is since the Docker container's
  overlayfs copies-up on writes, when it opens the file it's created a
  new version of the `src` directory containing a `client.log`, but this
  new src directory isn't accessible by file descriptor 20 and the
  lsetxattr call is instead attempting to set attributes on the path in
  the old `src` directory.

  Looking at the code, a fix would be to change `hw/9pfs/9p-local.c` and
  change `local_open2` to instead of calling `local_set_xattrat` to set
  the xattrs by directory file descriptor and file name, to have a
  version of local_set_xattrat` which uses `fsetxattr` to set the virtfs
  attributes instead of the `fsetxattrat_nofollow` helper.

  This reliably happened for me in CI, but I don't have access to the CI
  host or the time to strip the test down to make a minimal test case,
  and had difficulty reproducing the error on other machines.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877384/+subscriptions



Re: [PATCH RFC] pvrdma: wean code off pvrdma_ring.h kernel header

2021-02-08 Thread Jason Wang



On 2021/2/9 上午1:28, Paolo Bonzini wrote:

On 08/02/21 18:17, Cornelia Huck wrote:

On Fri, 29 Jan 2021 16:27:19 +0100
Cornelia Huck  wrote:


On Fri, 22 Jan 2021 19:00:29 +0100
Cornelia Huck  wrote:


The pvrdma code relies on the pvrdma_ring.h kernel header for some
basic ring buffer handling. The content of that header isn't very
exciting, but contains some (q)atomic_*() invocations that (a)
cause manual massaging when doing a headers update, and (b) are
an indication that we probably should not be importing that header
at all.

Let's reimplement the ring buffer handling directly in the pvrdma
code instead. This arguably also improves readability of the code.

Importing the header can now be dropped.

Signed-off-by: Cornelia Huck 
---

Compile-tested only, needs both testing and more eyeballs :)


Friendly ping :)

Suggestions for a test setup to do some sanity checks (that does not
require special hardware) also welcome.


Can I interest anyone in this? I'd be happy doing sanity tests myself,
but I have a hard time figuring out even where to start...


Reviewed-by: Paolo Bonzini 

Jason, Michael, are you going to pick this up?

Paolo



I will queue this.

Thanks








---
  hw/rdma/vmw/pvrdma.h  |   5 +-
  hw/rdma/vmw/pvrdma_cmd.c  |   6 +-
  hw/rdma/vmw/pvrdma_dev_ring.c |  41 ---
  hw/rdma/vmw/pvrdma_dev_ring.h |   9 +-
  hw/rdma/vmw/pvrdma_main.c |   4 +-
  .../infiniband/hw/vmw_pvrdma/pvrdma_ring.h    | 114 
--

  scripts/update-linux-headers.sh   |   3 +-
  7 files changed, 38 insertions(+), 144 deletions(-)
  delete mode 100644 
include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h 



diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index 1d36a76f1e3b..d08965d3e2d5 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -26,7 +26,6 @@
  #include "../rdma_backend_defs.h"
  #include "../rdma_rm_defs.h"
  -#include 
"standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h"
  #include 
"standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h"

  #include "pvrdma_dev_ring.h"
  #include "qom/object.h"
@@ -64,10 +63,10 @@ typedef struct DSRInfo {
  union pvrdma_cmd_req *req;
  union pvrdma_cmd_resp *rsp;
  -    struct pvrdma_ring *async_ring_state;
+    PvrdmaRingState *async_ring_state;
  PvrdmaRing async;
  -    struct pvrdma_ring *cq_ring_state;
+    PvrdmaRingState *cq_ring_state;
  PvrdmaRing cq;
  } DSRInfo;
  diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 692125ac2681..f59879e2574e 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -262,7 +262,7 @@ static int create_cq_ring(PCIDevice *pci_dev , 
PvrdmaRing **ring,

  r = g_malloc(sizeof(*r));
  *ring = r;
  -    r->ring_state = (struct pvrdma_ring *)
+    r->ring_state = (PvrdmaRingState *)
  rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
    if (!r->ring_state) {
@@ -398,7 +398,7 @@ static int create_qp_rings(PCIDevice *pci_dev, 
uint64_t pdir_dma,

  *rings = sr;
    /* Create send ring */
-    sr->ring_state = (struct pvrdma_ring *)
+    sr->ring_state = (PvrdmaRingState *)
  rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
  if (!sr->ring_state) {
  rdma_error_report("Failed to map to QP ring state");
@@ -639,7 +639,7 @@ static int create_srq_ring(PCIDevice *pci_dev, 
PvrdmaRing **ring,

  r = g_malloc(sizeof(*r));
  *ring = r;
  -    r->ring_state = (struct pvrdma_ring *)
+    r->ring_state = (PvrdmaRingState *)
  rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
  if (!r->ring_state) {
  rdma_error_report("Failed to map tp SRQ ring state");
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c 
b/hw/rdma/vmw/pvrdma_dev_ring.c

index f0bcde74b06a..074ac59b84db 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.c
+++ b/hw/rdma/vmw/pvrdma_dev_ring.c
@@ -22,11 +22,10 @@
  #include "trace.h"
    #include "../rdma_utils.h"
-#include 
"standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h"

  #include "pvrdma_dev_ring.h"
    int pvrdma_ring_init(PvrdmaRing *ring, const char *name, 
PCIDevice *dev,
- struct pvrdma_ring *ring_state, uint32_t 
max_elems,

+ PvrdmaRingState *ring_state, uint32_t max_elems,
   size_t elem_sz, dma_addr_t *tbl, uint32_t 
npages)

  {
  int i;
@@ -73,48 +72,54 @@ out:
    void *pvrdma_ring_next_elem_read(PvrdmaRing *ring)
  {
-    int e;
-    unsigned int idx = 0, offset;
+    unsigned int idx, offset;
+    const uint32_t tail = qatomic_read(>ring_state->prod_tail);
+    const uint32_t head = qatomic_read(>ring_state->cons_head);
  -    e = pvrdma_idx_ring_has_data(ring->ring_state, 
ring->max_elems, );

-    if (e <= 0) {
+    if (tail & ~((ring->max_elems << 1) - 1) ||
+    head & ~((ring->max_elems << 1) - 1) ||
+    tail 

Re: [PATCH 3/3] virtio-net: graceful fallback to vhost=off for tap netdev

2021-02-08 Thread Jason Wang



On 2021/2/9 上午3:59, Yuri Benditovich wrote:

On Mon, Feb 8, 2021 at 6:11 AM Jason Wang  wrote:


On 2021/2/5 上午4:29, Yuri Benditovich wrote:

Currently virtio-net silently clears features if they are
not supported by respective vhost. This may create migration
problems in future if vhost features on the source and destination
are different. Implement graceful fallback to no-vhost mode
when some acked features contradict with vhost. The decision is
taken on set_features call and the vhost will be disabled
till next reset (or migration).
Such fallback is currently enabled only for TAP netdev.

Signed-off-by: Yuri Benditovich 
---
   hw/net/virtio-net.c | 58 ++---
   1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 5150f295e8..b353060e63 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -515,6 +515,15 @@ static RxFilterInfo 
*virtio_net_query_rxfilter(NetClientState *nc)
   return info;
   }

+static void virtio_net_allow_vhost(VirtIONet *n, bool allow)
+{
+int i;
+for (i = 0; i < n->max_queues; i++) {
+NetClientState *nc = qemu_get_subqueue(n->nic, i)->peer;
+nc->vhost_net_disabled = !allow;
+}
+}
+
   static void virtio_net_reset(VirtIODevice *vdev)
   {
   VirtIONet *n = VIRTIO_NET(vdev);
@@ -552,6 +561,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
   assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
   }
   }
+virtio_net_allow_vhost(n, true);
   }

   static void peer_test_vnet_hdr(VirtIONet *n)
@@ -689,6 +699,15 @@ static void virtio_net_set_queues(VirtIONet *n)
   }
   }

+static bool can_disable_vhost(VirtIONet *n)
+{
+NetClientState *peer = qemu_get_queue(n->nic)->peer;
+if (!get_vhost_net(peer)) {
+return false;
+}
+return !peer || peer->info->type == NET_CLIENT_DRIVER_TAP;
+}
+
   static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);

   static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t 
features,
@@ -725,14 +744,14 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
   return features;
   }

-virtio_clear_feature(, VIRTIO_NET_F_RSS);
-virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
-features = vhost_net_get_features(get_vhost_net(nc->peer), features);
-vdev->backend_features = features;
+vdev->backend_features = vhost_net_get_features(get_vhost_net(nc->peer), 
features);

-if (n->mtu_bypass_backend &&
-(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
-features |= (1ULL << VIRTIO_NET_F_MTU);
+if (!can_disable_vhost(n)) {
+features = vdev->backend_features;
+if (n->mtu_bypass_backend &&
+(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
+features |= (1ULL << VIRTIO_NET_F_MTU);
+}
   }

   return features;
@@ -872,10 +891,25 @@ static void failover_add_primary(VirtIONet *n, Error 
**errp)
   error_propagate(errp, err);
   }

+static bool check_vhost_features(VirtIONet *n, uint64_t features)
+{
+NetClientState *nc = qemu_get_queue(n->nic);
+uint64_t filtered;
+if (n->rss_data.redirect) {
+return false;
+}
+filtered = vhost_net_get_features(get_vhost_net(nc->peer), features);
+if (filtered != features) {
+return false;
+}
+return true;
+}
+
   static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
   {
   VirtIONet *n = VIRTIO_NET(vdev);
   Error *err = NULL;
+bool disable_vhost = false;
   int i;

   if (n->mtu_bypass_backend &&
@@ -894,13 +928,21 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
 VIRTIO_F_VERSION_1),
  virtio_has_feature(features,
 VIRTIO_NET_F_HASH_REPORT));
-
   n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
   virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
   n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
   virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
   n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);

+if (can_disable_vhost(n)) {
+disable_vhost = !check_vhost_features(n, features);
+}
+if (disable_vhost) {
+warn_report("Some of requested features aren't supported by vhost, "
+"vhost is turned off till next reset");
+virtio_net_allow_vhost(n, false);
+}


This looks more complicated than I expected. See
virtio_net_vhost_status() we had a fallback there:

  r = vhost_net_start(vdev, n->nic->ncs, queues);
  if (r < 0) {
  error_report("unable to start vhost net: %d: "
   "falling back on userspace 

Re: [PATCH 3/3] virtio-net: graceful fallback to vhost=off for tap netdev

2021-02-08 Thread Jason Wang



On 2021/2/9 上午3:46, Yuri Benditovich wrote:

On Mon, Feb 8, 2021 at 5:15 AM Jason Wang  wrote:


On 2021/2/5 下午9:38, Michael S. Tsirkin wrote:

On Thu, Feb 04, 2021 at 10:29:15PM +0200, Yuri Benditovich wrote:

Currently virtio-net silently clears features if they are
not supported by respective vhost. This may create migration
problems in future if vhost features on the source and destination
are different. Implement graceful fallback to no-vhost mode
when some acked features contradict with vhost. The decision is
taken on set_features call and the vhost will be disabled
till next reset (or migration).
Such fallback is currently enabled only for TAP netdev.

Signed-off-by: Yuri Benditovich

Sounds good, but I don't think we should do this if
vhostforce=on is set.


If we do this, does it mean we won't maintain migration compatibility
when vhostforce is on?

AFAIU, the 'vhostforce=on' should mean the vhost can't be disabled (if
I'm not mistaken this is typically used for vhost-user).
So we can view this case as similar to vhost-vdpa and vhost-user.



Right, but since it was used by libivrt. Then it turns out to be a 
compatibility breaker.


Thanks





Thanks



Also, let's document this behaviour with the vhost option so people
are not suprized.






Re: [PATCH] vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support

2021-02-08 Thread Jason Wang



On 2021/2/9 上午2:37, Peter Xu wrote:

On Mon, Feb 08, 2021 at 11:21:23AM +0800, Jason Wang wrote:

[...]


I'm not sure I remember it right, but we seem to have similar discussion
previously on "what if the user didn't specify ats=on" - I think at that time
the conclusion was that we ignore the failure since that's not a valid
configuration for qemu.


Yes, but I think I was wrong at that time.

I can't say you're wrong - I actually still agree with you that at least
there's a priority of things we'd do, and this one is not extremely important
if that's not a major use case (say, if you will 100% always suggest an user to
use ats=on for a viommu enabled vhost).



Right, but it depends on e.g how libvirt use that. As far as I know, 
they do enable ATS. But it would still an issue if libvirt want to 
support vIOMMUs other than intel.






The other issue I'm worried is (I think I mentioned it somewhere, but just to
double confirm): I'd like to make sure SMMU and virtio-iommu are the only IOMMU
platform that will use vhost.


For upstream, it won't be easy :)

Sorry I definitely didn't make myself clear... :)

To be explicit, does ppc use vhost kernel too?



I think the answer is yes.



  Since I know at least ppc has
its own translation unit and its iommu notifier in qemu, so I'm unsure whether
the same patch would break ppc too, because vhost could also ignore all UNMAP
sent by the ppc vIOMMU.



If this is true, we probably need to fix that.







Otherwise IIUC we need to fix those vIOMMUs too.


Right, last time I check AMD IOMMU emulation, it simply trigger device IOTLB
invalidation during IOTLB invalidation which looks wrong.

I did quickly grep IOMMU_NOTIFIER_UNMAP in amd_iommu.c and saw nothing. It
seems amd iommu is not ready for any kind of IOMMU notifiers yet.

Thanks,



Right.

Thanks









Re: [PATCH v2 2/2] hw/ssi: xilinx_spips: Implement basic QSPI DMA support

2021-02-08 Thread Bin Meng
Hi Edgar,

On Mon, Feb 8, 2021 at 11:17 PM Edgar E. Iglesias
 wrote:
>
>
>
> On Mon, Feb 8, 2021 at 3:45 PM Bin Meng  wrote:
>>
>> Hi Edgar,
>>
>> On Mon, Feb 8, 2021 at 10:34 PM Edgar E. Iglesias
>>  wrote:
>> >
>> >
>> >
>> > On Mon, 8 Feb 2021, 15:10 Bin Meng,  wrote:
>> >>
>> >> Hi Edgar,
>> >>
>> >> On Mon, Feb 8, 2021 at 8:44 PM Edgar E. Iglesias
>> >>  wrote:
>> >> >
>> >> > On Mon, Feb 08, 2021 at 01:25:24PM +0800, Bin Meng wrote:
>> >> > > From: Xuzhou Cheng 
>> >> > >
>> >> > > ZynqMP QSPI supports SPI transfer using DMA mode, but currently this
>> >> > > is unimplemented. When QSPI is programmed to use DMA mode, QEMU will
>> >> > > crash. This is observed when testing VxWorks 7.
>> >> > >
>> >> > > Add a basic implementation of QSPI DMA functionality.
>> >> > >
>> >> > > Signed-off-by: Xuzhou Cheng 
>> >> > > Signed-off-by: Bin Meng 
>> >> >
>> >> > + Francisco
>> >> >
>> >> > Hi,
>> >> >
>> >> > Like Peter commented on the previous version, the DMA unit is actully 
>> >> > separate.
>> >>
>> >> Is it really separate? In the Xilinx ZynqMP datasheet, it's an
>> >> integrated DMA unit dedicated for QSPI usage. IIUC, other modules on
>> >> the ZynqMP SoC cannot use it to do any DMA transfer. To me this is no
>> >> different like a DMA engine in a ethernet controller.
>> >
>> >
>> > Yes, it's a separate module.
>> >
>> >>
>> >> > This module is better modelled by pushing data through the Stream 
>> >> > framework
>> >> > into the DMA. The DMA model is not upstream but can be found here:
>> >> > https://github.com/Xilinx/qemu/blob/master/hw/dma/csu_stream_dma.c
>> >> >
>> >>
>> >> What's the benefit of modeling it using the stream framework?
>> >
>> >
>> >
>> > Because it matches real hw and this particular dma exists in various 
>> > instances, not only in qspi. We don't want duplicate implementations of 
>> > the same dma.
>> >
>>
>> Would you please share more details, like what other peripherals are
>> using this same DMA model?
>>
>
> It's used by the Crypto blocks (SHA, AES) and by the bitstream programming 
> blocks on the ZynqMP.
> In Versal there's the same plus some additional uses of this DMA...

Sigh, it's not obvious from the ZynqMP datasheet. Indeed the crypto
blocks seem to be using the same IP that QSPI uses for its DMA mode.
With that additional information, I agree modeling the DMA as a
separate model makes sense.

Will investigate the Xilinx fork, and report back.

Regards,
Bin



Re: [PATCH v1 1/1] MAINTAINERS: Add a SiFIve machine section

2021-02-08 Thread Palmer Dabbelt

"SiFive", not "SiFIve", in the subject.

On Mon, 08 Feb 2021 18:11:27 PST (-0800), Alistair Francis wrote:

Signed-off-by: Alistair Francis 
Acked-by: Bin Meng 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d8b0bf966..c347d49bd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1359,6 +1359,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
 F: include/hw/misc/mchp_pfsoc_ioscb.h
 F: include/hw/misc/mchp_pfsoc_sysreg.h

+SiFive Machines
+M: Alistair Francis 
+M: Bin Meng 
+M: Palmer Dabbelt 
+L: qemu-ri...@nongnu.org
+S: Supported
+F: hw/*/*sifive*.c
+F: include/hw/*/*sifive*.h
+
 RX Machines
 ---
 rx-gdbsim


Aside from that

Reviewed-by: Palmer Dabbelt 
Acked-by: Palmer Dabbelt 

Thanks!



Re: [PATCH v1 1/1] MAINTAINERS: Add a SiFIve machine section

2021-02-08 Thread Bin Meng
On Tue, Feb 9, 2021 at 10:11 AM Alistair Francis
 wrote:

nits: SiFIve => SiFive in the title

>
> Signed-off-by: Alistair Francis 
> Acked-by: Bin Meng 
> ---
>  MAINTAINERS | 9 +
>  1 file changed, 9 insertions(+)
>

Regards,
Bin



[PATCH v1 1/1] MAINTAINERS: Add a SiFIve machine section

2021-02-08 Thread Alistair Francis
Signed-off-by: Alistair Francis 
Acked-by: Bin Meng 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d8b0bf966..c347d49bd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1359,6 +1359,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
 F: include/hw/misc/mchp_pfsoc_ioscb.h
 F: include/hw/misc/mchp_pfsoc_sysreg.h
 
+SiFive Machines
+M: Alistair Francis 
+M: Bin Meng 
+M: Palmer Dabbelt 
+L: qemu-ri...@nongnu.org
+S: Supported
+F: hw/*/*sifive*.c
+F: include/hw/*/*sifive*.h
+
 RX Machines
 ---
 rx-gdbsim
-- 
2.30.0




Booting nuvoton qemu machine with -kernel

2021-02-08 Thread Joel Stanley
Hello nuvoton qemu people,

I was attempting to use the npcm750-evb machine to boot test some
Linux kernel changes, by running the kernel directly without a
filesystem (and therefore not running the bootrom or u-boot). I do
this with the Aspeed machines quite often, and it's really useful.

The machine appeared to hang; attaching gdb showed it jumping around
various addresses but nothing that matched up with the symbols in my
vmlinux.

To reproduce grab these two files:

wget https://builds.tuxbuild.com/1oDtoGMge0cGKE1uoNPpnSSnqwi/zImage
wget 
https://builds.tuxbuild.com/1oDuLU3E0keoRISk6R06PYBeW2G/dtbs/nuvoton-npcm750-evb.dtb

My command line looked like this:

 qemu-system-arm -M npcm750-evb  -nographic -kernel zImage -dtb
nuvoton-npcm750-evb.dtb

Is this something you've tested? Is there something I'm missing?

Cheers,

Joel



[PATCH v3 3/3] tests/qtests: Add npcm7xx emc model test

2021-02-08 Thread Doug Evans via
Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Reviewed-by: Peter Maydell 
Signed-off-by: Doug Evans 
---

Differences from v2:
- remove use of C99 mixed decls/statements

 tests/qtest/meson.build|   1 +
 tests/qtest/npcm7xx_emc-test.c | 812 +
 2 files changed, 813 insertions(+)
 create mode 100644 tests/qtest/npcm7xx_emc-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c83bc211b6..f7c369f3d5 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -136,6 +136,7 @@ qtests_sparc64 = \
 
 qtests_npcm7xx = \
   ['npcm7xx_adc-test',
+   'npcm7xx_emc-test',
'npcm7xx_gpio-test',
'npcm7xx_pwm-test',
'npcm7xx_rng-test',
diff --git a/tests/qtest/npcm7xx_emc-test.c b/tests/qtest/npcm7xx_emc-test.c
new file mode 100644
index 00..95712dc3b5
--- /dev/null
+++ b/tests/qtest/npcm7xx_emc-test.c
@@ -0,0 +1,812 @@
+/*
+ * QTests for Nuvoton NPCM7xx EMC Modules.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "libqos/libqos.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qemu/bitops.h"
+#include "qemu/iov.h"
+
+/* Name of the emc device. */
+#define TYPE_NPCM7XX_EMC "npcm7xx-emc"
+
+/* Timeout for various operations, in seconds. */
+#define TIMEOUT_SECONDS 10
+
+/* Address in memory of the descriptor. */
+#define DESC_ADDR (1 << 20) /* 1 MiB */
+
+/* Address in memory of the data packet. */
+#define DATA_ADDR (DESC_ADDR + 4096)
+
+#define CRC_LENGTH 4
+
+#define NUM_TX_DESCRIPTORS 3
+#define NUM_RX_DESCRIPTORS 2
+
+/* Size of tx,rx test buffers. */
+#define TX_DATA_LEN 64
+#define RX_DATA_LEN 64
+
+#define TX_STEP_COUNT 1
+#define RX_STEP_COUNT 1
+
+/* 32-bit register indices. */
+typedef enum NPCM7xxPWMRegister {
+/* Control registers. */
+REG_CAMCMR,
+REG_CAMEN,
+
+/* There are 16 CAMn[ML] registers. */
+REG_CAMM_BASE,
+REG_CAML_BASE,
+
+REG_TXDLSA = 0x22,
+REG_RXDLSA,
+REG_MCMDR,
+REG_MIID,
+REG_MIIDA,
+REG_FFTCR,
+REG_TSDR,
+REG_RSDR,
+REG_DMARFC,
+REG_MIEN,
+
+/* Status registers. */
+REG_MISTA,
+REG_MGSTA,
+REG_MPCNT,
+REG_MRPC,
+REG_MRPCC,
+REG_MREPC,
+REG_DMARFS,
+REG_CTXDSA,
+REG_CTXBSA,
+REG_CRXDSA,
+REG_CRXBSA,
+
+NPCM7XX_NUM_EMC_REGS,
+} NPCM7xxPWMRegister;
+
+enum { NUM_CAMML_REGS = 16 };
+
+/* REG_CAMCMR fields */
+/* Enable CAM Compare */
+#define REG_CAMCMR_ECMP (1 << 4)
+/* Accept Unicast Packet */
+#define REG_CAMCMR_AUP (1 << 0)
+
+/* REG_MCMDR fields */
+/* Software Reset */
+#define REG_MCMDR_SWR (1 << 24)
+/* Frame Transmission On */
+#define REG_MCMDR_TXON (1 << 8)
+/* Accept Long Packet */
+#define REG_MCMDR_ALP (1 << 1)
+/* Frame Reception On */
+#define REG_MCMDR_RXON (1 << 0)
+
+/* REG_MIEN fields */
+/* Enable Transmit Completion Interrupt */
+#define REG_MIEN_ENTXCP (1 << 18)
+/* Enable Transmit Interrupt */
+#define REG_MIEN_ENTXINTR (1 << 16)
+/* Enable Receive Good Interrupt */
+#define REG_MIEN_ENRXGD (1 << 4)
+/* ENable Receive Interrupt */
+#define REG_MIEN_ENRXINTR (1 << 0)
+
+/* REG_MISTA fields */
+/* Transmit Bus Error Interrupt */
+#define REG_MISTA_TXBERR (1 << 24)
+/* Transmit Descriptor Unavailable Interrupt */
+#define REG_MISTA_TDU (1 << 23)
+/* Transmit Completion Interrupt */
+#define REG_MISTA_TXCP (1 << 18)
+/* Transmit Interrupt */
+#define REG_MISTA_TXINTR (1 << 16)
+/* Receive Bus Error Interrupt */
+#define REG_MISTA_RXBERR (1 << 11)
+/* Receive Descriptor Unavailable Interrupt */
+#define REG_MISTA_RDU (1 << 10)
+/* DMA Early Notification Interrupt */
+#define REG_MISTA_DENI (1 << 9)
+/* Maximum Frame Length Interrupt */
+#define REG_MISTA_DFOI (1 << 8)
+/* Receive Good Interrupt */
+#define REG_MISTA_RXGD (1 << 4)
+/* Packet Too Long Interrupt */
+#define REG_MISTA_PTLE (1 << 3)
+/* Receive Interrupt */
+#define REG_MISTA_RXINTR (1 << 0)
+
+typedef struct NPCM7xxEMCTxDesc NPCM7xxEMCTxDesc;
+typedef struct NPCM7xxEMCRxDesc NPCM7xxEMCRxDesc;
+
+struct NPCM7xxEMCTxDesc {
+uint32_t flags;
+uint32_t txbsa;
+uint32_t status_and_length;
+uint32_t ntxdsa;
+};
+
+struct NPCM7xxEMCRxDesc {
+uint32_t status_and_length;
+uint32_t rxbsa;
+uint32_t reserved;
+uint32_t nrxdsa;
+};
+
+/* NPCM7xxEMCTxDesc.flags values */
+/* Owner: 0 = cpu, 1 = emc */
+#define TX_DESC_FLAG_OWNER_MASK (1 << 31)
+/* Transmit interrupt enable */
+#define TX_DESC_FLAG_INTEN (1 

[PATCH v3 2/3] hw/arm: Add npcm7xx emc model

2021-02-08 Thread Doug Evans via
This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Reviewed-by: Peter Maydell 
Signed-off-by: Doug Evans 
---

Differences from v2:
- none, patch ok as is

 docs/system/arm/nuvoton.rst |  3 ++-
 hw/arm/npcm7xx.c| 50 +++--
 include/hw/arm/npcm7xx.h|  2 ++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index a1786342e2..c6e9a4c17e 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst
@@ -43,6 +43,7 @@ Supported devices
  * GPIO controller
  * Analog to Digital Converter (ADC)
  * Pulse Width Modulation (PWM)
+ * Ethernet controller (EMC)
 
 Missing devices
 ---
@@ -56,7 +57,7 @@ Missing devices
* Shared memory (SHM)
* eSPI slave interface
 
- * Ethernet controllers (GMAC and EMC)
+ * Ethernet controller (GMAC)
  * USB device (USBD)
  * SMBus controller (SMBF)
  * Peripheral SPI controller (PSPI)
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 72040d4079..94b79ff4c0 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -82,6 +82,8 @@ enum NPCM7xxInterrupt {
 NPCM7XX_UART1_IRQ,
 NPCM7XX_UART2_IRQ,
 NPCM7XX_UART3_IRQ,
+NPCM7XX_EMC1RX_IRQ  = 15,
+NPCM7XX_EMC1TX_IRQ,
 NPCM7XX_TIMER0_IRQ  = 32,   /* Timer Module 0 */
 NPCM7XX_TIMER1_IRQ,
 NPCM7XX_TIMER2_IRQ,
@@ -104,6 +106,8 @@ enum NPCM7xxInterrupt {
 NPCM7XX_OHCI_IRQ= 62,
 NPCM7XX_PWM0_IRQ= 93,   /* PWM module 0 */
 NPCM7XX_PWM1_IRQ,   /* PWM module 1 */
+NPCM7XX_EMC2RX_IRQ  = 114,
+NPCM7XX_EMC2TX_IRQ,
 NPCM7XX_GPIO0_IRQ   = 116,
 NPCM7XX_GPIO1_IRQ,
 NPCM7XX_GPIO2_IRQ,
@@ -152,6 +156,12 @@ static const hwaddr npcm7xx_pwm_addr[] = {
 0xf0104000,
 };
 
+/* Register base address for each EMC Module */
+static const hwaddr npcm7xx_emc_addr[] = {
+0xf0825000,
+0xf0826000,
+};
+
 static const struct {
 hwaddr regs_addr;
 uint32_t unconnected_pins;
@@ -365,6 +375,10 @@ static void npcm7xx_init(Object *obj)
 for (i = 0; i < ARRAY_SIZE(s->pwm); i++) {
 object_initialize_child(obj, "pwm[*]", >pwm[i], TYPE_NPCM7XX_PWM);
 }
+
+for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
+object_initialize_child(obj, "emc[*]", >emc[i], TYPE_NPCM7XX_EMC);
+}
 }
 
 static void npcm7xx_realize(DeviceState *dev, Error **errp)
@@ -537,6 +551,40 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
 sysbus_connect_irq(sbd, i, npcm7xx_irq(s, NPCM7XX_PWM0_IRQ + i));
 }
 
+/*
+ * EMC Modules. Cannot fail.
+ * The mapping of the device to its netdev backend works as follows:
+ * emc[i] = nd_table[i]
+ * This works around the inability to specify the netdev property for the
+ * emc device: it's not pluggable and thus the -device option can't be
+ * used.
+ */
+QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_emc_addr) != ARRAY_SIZE(s->emc));
+QEMU_BUILD_BUG_ON(ARRAY_SIZE(s->emc) != 2);
+for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
+s->emc[i].emc_num = i;
+SysBusDevice *sbd = SYS_BUS_DEVICE(>emc[i]);
+if (nd_table[i].used) {
+qemu_check_nic_model(_table[i], TYPE_NPCM7XX_EMC);
+qdev_set_nic_properties(DEVICE(sbd), _table[i]);
+}
+/*
+ * The device exists regardless of whether it's connected to a QEMU
+ * netdev backend. So always instantiate it even if there is no
+ * backend.
+ */
+sysbus_realize(sbd, _abort);
+sysbus_mmio_map(sbd, 0, npcm7xx_emc_addr[i]);
+int tx_irq = i == 0 ? NPCM7XX_EMC1TX_IRQ : NPCM7XX_EMC2TX_IRQ;
+int rx_irq = i == 0 ? NPCM7XX_EMC1RX_IRQ : NPCM7XX_EMC2RX_IRQ;
+/*
+ * N.B. The values for the second argument sysbus_connect_irq are
+ * chosen to match the registration order in npcm7xx_emc_realize.
+ */
+sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, tx_irq));
+sysbus_connect_irq(sbd, 1, npcm7xx_irq(s, rx_irq));
+}
+
 /*
  * Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
  * specified, but this is a programming error.
@@ -621,8 +669,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
 create_unimplemented_device("npcm7xx.vcd",  0xf081,  64 * KiB);
 create_unimplemented_device("npcm7xx.ece",  0xf082,   8 * KiB);
 create_unimplemented_device("npcm7xx.vdma", 0xf0822000,   8 * KiB);
-create_unimplemented_device("npcm7xx.emc1", 0xf0825000,   4 * KiB);
-create_unimplemented_device("npcm7xx.emc2", 0xf0826000,   4 * KiB);
 create_unimplemented_device("npcm7xx.usbd[0]",  0xf083,   4 * KiB);
 

[PATCH v3 1/3] hw/net: Add npcm7xx emc model

2021-02-08 Thread Doug Evans via
This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Signed-off-by: Doug Evans 
---

Differences from v2:
- move call to qemu_set_irq
- remove use of C99 mixed decls/statements
- add use of g_autofree

 hw/net/meson.build   |   1 +
 hw/net/npcm7xx_emc.c | 857 +++
 hw/net/trace-events  |  17 +
 include/hw/net/npcm7xx_emc.h | 286 
 4 files changed, 1161 insertions(+)
 create mode 100644 hw/net/npcm7xx_emc.c
 create mode 100644 include/hw/net/npcm7xx_emc.h

diff --git a/hw/net/meson.build b/hw/net/meson.build
index 4a7051b54a..af0749c42b 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -35,6 +35,7 @@ softmmu_ss.add(when: 'CONFIG_I82596_COMMON', if_true: 
files('i82596.c'))
 softmmu_ss.add(when: 'CONFIG_SUNHME', if_true: files('sunhme.c'))
 softmmu_ss.add(when: 'CONFIG_FTGMAC100', if_true: files('ftgmac100.c'))
 softmmu_ss.add(when: 'CONFIG_SUNGEM', if_true: files('sungem.c'))
+softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_emc.c'))
 
 softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_eth.c'))
 softmmu_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_fec.c'))
diff --git a/hw/net/npcm7xx_emc.c b/hw/net/npcm7xx_emc.c
new file mode 100644
index 00..714a742ba7
--- /dev/null
+++ b/hw/net/npcm7xx_emc.c
@@ -0,0 +1,857 @@
+/*
+ * Nuvoton NPCM7xx EMC Module
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * Unsupported/unimplemented features:
+ * - MCMDR.FDUP (full duplex) is ignored, half duplex is not supported
+ * - Only CAM0 is supported, CAM[1-15] are not
+ *   - writes to CAMEN.[1-15] are ignored, these bits always read as zeroes
+ * - MII is not implemented, MIIDA.BUSY and MIID always return zero
+ * - MCMDR.LBK is not implemented
+ * - MCMDR.{OPMOD,ENSQE,AEP,ARP} are not supported
+ * - H/W FIFOs are not supported, MCMDR.FFTCR is ignored
+ * - MGSTA.SQE is not supported
+ * - pause and control frames are not implemented
+ * - MGSTA.CCNT is not supported
+ * - MPCNT, DMARFS are not implemented
+ */
+
+#include "qemu/osdep.h"
+
+/* For crc32 */
+#include 
+
+#include "qemu-common.h"
+#include "hw/irq.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "hw/net/npcm7xx_emc.h"
+#include "net/eth.h"
+#include "migration/vmstate.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "sysemu/dma.h"
+#include "trace.h"
+
+#define CRC_LENGTH 4
+
+/*
+ * The maximum size of a (layer 2) ethernet frame as defined by 802.3.
+ * 1518 = 6(dest macaddr) + 6(src macaddr) + 2(proto) + 4(crc) + 1500(payload)
+ * This does not include an additional 4 for the vlan field (802.1q).
+ */
+#define MAX_ETH_FRAME_SIZE 1518
+
+static const char *emc_reg_name(int regno)
+{
+#define REG(name) case REG_ ## name: return #name;
+switch (regno) {
+REG(CAMCMR)
+REG(CAMEN)
+REG(TXDLSA)
+REG(RXDLSA)
+REG(MCMDR)
+REG(MIID)
+REG(MIIDA)
+REG(FFTCR)
+REG(TSDR)
+REG(RSDR)
+REG(DMARFC)
+REG(MIEN)
+REG(MISTA)
+REG(MGSTA)
+REG(MPCNT)
+REG(MRPC)
+REG(MRPCC)
+REG(MREPC)
+REG(DMARFS)
+REG(CTXDSA)
+REG(CTXBSA)
+REG(CRXDSA)
+REG(CRXBSA)
+case REG_CAMM_BASE + 0: return "CAM0M";
+case REG_CAML_BASE + 0: return "CAM0L";
+case REG_CAMM_BASE + 2 ... REG_CAMML_LAST:
+/* Only CAM0 is supported, fold the others into something simple. */
+if (regno & 1) {
+return "CAML";
+} else {
+return "CAMM";
+}
+default: return "UNKNOWN";
+}
+#undef REG
+}
+
+static void emc_reset(NPCM7xxEMCState *emc)
+{
+trace_npcm7xx_emc_reset(emc->emc_num);
+
+memset(>regs[0], 0, sizeof(emc->regs));
+
+/* These regs have non-zero reset values. */
+emc->regs[REG_TXDLSA] = 0xfffc;
+emc->regs[REG_RXDLSA] = 0xfffc;
+emc->regs[REG_MIIDA] = 0x0090;
+emc->regs[REG_FFTCR] = 0x0101;
+emc->regs[REG_DMARFC] = 0x0800;
+emc->regs[REG_MPCNT] = 0x7fff;
+
+emc->tx_active = false;
+emc->rx_active = false;
+}
+
+static void npcm7xx_emc_reset(DeviceState *dev)
+{
+NPCM7xxEMCState *emc = NPCM7XX_EMC(dev);
+emc_reset(emc);
+}
+
+static void emc_soft_reset(NPCM7xxEMCState *emc)
+{
+/*
+  

[PATCH v3 0/3] Add npcm7xx emc model

2021-02-08 Thread Doug Evans via
This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Doug Evans (3):
  hw/net: Add npcm7xx emc model
  hw/arm: Add npcm7xx emc model
  tests/qtests: Add npcm7xx emc model test

 docs/system/arm/nuvoton.rst|   3 +-
 hw/arm/npcm7xx.c   |  50 +-
 hw/net/meson.build |   1 +
 hw/net/npcm7xx_emc.c   | 857 +
 hw/net/trace-events|  17 +
 include/hw/arm/npcm7xx.h   |   2 +
 include/hw/net/npcm7xx_emc.h   | 286 +++
 tests/qtest/meson.build|   1 +
 tests/qtest/npcm7xx_emc-test.c | 812 +++
 9 files changed, 2026 insertions(+), 3 deletions(-)
 create mode 100644 hw/net/npcm7xx_emc.c
 create mode 100644 include/hw/net/npcm7xx_emc.h
 create mode 100644 tests/qtest/npcm7xx_emc-test.c

-- 
2.30.0.478.g8a0d178c01-goog

Differences from v2:

1/3 hw/net: Add npcm7xx emc model

- move call to qemu_set_irq
- remove use of C99 mixed decls/statements
- add use of g_autofree

2/3 hw/arm: Add npcm7xx emc model

- none, patch ok as is

3/3 tests/qtests: Add npcm7xx emc model test

- remove use of C99 mixed decls/statements



Re: [PATCH v3 3/9] hw/ssi: Add SiFive SPI controller support

2021-02-08 Thread Palmer Dabbelt

On Mon, 08 Feb 2021 17:44:17 PST (-0800), alistai...@gmail.com wrote:

On Mon, Jan 25, 2021 at 11:34 PM Philippe Mathieu-Daudé  wrote:


On 1/26/21 7:00 AM, Bin Meng wrote:
> From: Bin Meng 
>
> This adds the SiFive SPI controller model for the FU540 SoC.
> The direct memory-mapped SPI flash mode is unsupported.
>
> Signed-off-by: Bin Meng 
>
> ---
>
> Changes in v3:
> - Simplify flush txfifo logic
>
> Changes in v2:
> - Log guest error when trying to write reserved registers
> - Log guest error when trying to access out-of-bounds registers
> - log guest error when writing to reserved bits for chip select
>   registers and watermark registers
> - Log unimplemented warning when trying to write direct-map flash
>   interface registers
> - Add test tx fifo full logic in sifive_spi_read(), hence remove
>   setting the tx fifo full flag in sifive_spi_write().
> - Populate register with their default value
>
>  include/hw/ssi/sifive_spi.h |  47 +
>  hw/ssi/sifive_spi.c | 358 
>  hw/ssi/Kconfig  |   4 +
>  hw/ssi/meson.build  |   1 +
>  4 files changed, 410 insertions(+)
>  create mode 100644 include/hw/ssi/sifive_spi.h
>  create mode 100644 hw/ssi/sifive_spi.c

Missing MAINTAINERS entry (if there are no other comments on
this series, maybe the maintainer can directly add one).


Yep, I'm adding this section to the RISC-V machines:

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d8b0bf966..c347d49bd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1359,6 +1359,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
F: include/hw/misc/mchp_pfsoc_ioscb.h
F: include/hw/misc/mchp_pfsoc_sysreg.h

+SiFive Machines
+M: Alistair Francis 
+M: Bin Meng 
+M: Palmer Dabbelt 
+L: qemu-ri...@nongnu.org
+S: Supported
+F: hw/*/*sifive*.c
+F: include/hw/*/*sifive*.h
+
RX Machines
---
rx-gdbsim


Can I get an Ack from you Bin that you are ok with that?

@Palmer Dabbelt let me know if you would prefer something else.


Sorry, I hadn't seen this go by.

Acked-by: Palmer Dabbelt 

Thanks!



Alistair







Re: [PATCH v2 6/7] goldfish_rtc: re-arm the alarm after migration

2021-02-08 Thread Alistair Francis
On Sat, Feb 6, 2021 at 6:46 AM Laurent Vivier  wrote:
>
> Le 26/01/2021 à 00:44, Alistair Francis a écrit :
> > On Sat, Jan 23, 2021 at 7:06 AM Laurent Vivier  wrote:
> >>
> >> Is there someone to merge this?
> >
> > Do you mean just this patch or the whole series?
>
> Sorry, I missed your mail.
>
> I mean only this patch.

Ok, I have applied it.

Thanks!

Applied to riscv-to-apply.next

Alistair

>
> Thanks,
> Laurent
>



Re: [PATCH v3 3/9] hw/ssi: Add SiFive SPI controller support

2021-02-08 Thread Bin Meng
On Tue, Feb 9, 2021 at 9:44 AM Alistair Francis  wrote:
>
> On Mon, Jan 25, 2021 at 11:34 PM Philippe Mathieu-Daudé  
> wrote:
> >
> > On 1/26/21 7:00 AM, Bin Meng wrote:
> > > From: Bin Meng 
> > >
> > > This adds the SiFive SPI controller model for the FU540 SoC.
> > > The direct memory-mapped SPI flash mode is unsupported.
> > >
> > > Signed-off-by: Bin Meng 
> > >
> > > ---
> > >
> > > Changes in v3:
> > > - Simplify flush txfifo logic
> > >
> > > Changes in v2:
> > > - Log guest error when trying to write reserved registers
> > > - Log guest error when trying to access out-of-bounds registers
> > > - log guest error when writing to reserved bits for chip select
> > >   registers and watermark registers
> > > - Log unimplemented warning when trying to write direct-map flash
> > >   interface registers
> > > - Add test tx fifo full logic in sifive_spi_read(), hence remove
> > >   setting the tx fifo full flag in sifive_spi_write().
> > > - Populate register with their default value
> > >
> > >  include/hw/ssi/sifive_spi.h |  47 +
> > >  hw/ssi/sifive_spi.c | 358 
> > >  hw/ssi/Kconfig  |   4 +
> > >  hw/ssi/meson.build  |   1 +
> > >  4 files changed, 410 insertions(+)
> > >  create mode 100644 include/hw/ssi/sifive_spi.h
> > >  create mode 100644 hw/ssi/sifive_spi.c
> >
> > Missing MAINTAINERS entry (if there are no other comments on
> > this series, maybe the maintainer can directly add one).
>
> Yep, I'm adding this section to the RISC-V machines:
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 8d8b0bf966..c347d49bd2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1359,6 +1359,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
> F: include/hw/misc/mchp_pfsoc_ioscb.h
> F: include/hw/misc/mchp_pfsoc_sysreg.h
>
> +SiFive Machines
> +M: Alistair Francis 
> +M: Bin Meng 
> +M: Palmer Dabbelt 
> +L: qemu-ri...@nongnu.org
> +S: Supported
> +F: hw/*/*sifive*.c
> +F: include/hw/*/*sifive*.h
> +
> RX Machines
> ---
> rx-gdbsim
>
>
> Can I get an Ack from you Bin that you are ok with that?
>

Acked-by: Bin Meng 

> @Palmer Dabbelt let me know if you would prefer something else.
>

Regards,
Bin



Re: [PATCH v3 3/9] hw/ssi: Add SiFive SPI controller support

2021-02-08 Thread Alistair Francis
On Mon, Jan 25, 2021 at 11:34 PM Philippe Mathieu-Daudé  wrote:
>
> On 1/26/21 7:00 AM, Bin Meng wrote:
> > From: Bin Meng 
> >
> > This adds the SiFive SPI controller model for the FU540 SoC.
> > The direct memory-mapped SPI flash mode is unsupported.
> >
> > Signed-off-by: Bin Meng 
> >
> > ---
> >
> > Changes in v3:
> > - Simplify flush txfifo logic
> >
> > Changes in v2:
> > - Log guest error when trying to write reserved registers
> > - Log guest error when trying to access out-of-bounds registers
> > - log guest error when writing to reserved bits for chip select
> >   registers and watermark registers
> > - Log unimplemented warning when trying to write direct-map flash
> >   interface registers
> > - Add test tx fifo full logic in sifive_spi_read(), hence remove
> >   setting the tx fifo full flag in sifive_spi_write().
> > - Populate register with their default value
> >
> >  include/hw/ssi/sifive_spi.h |  47 +
> >  hw/ssi/sifive_spi.c | 358 
> >  hw/ssi/Kconfig  |   4 +
> >  hw/ssi/meson.build  |   1 +
> >  4 files changed, 410 insertions(+)
> >  create mode 100644 include/hw/ssi/sifive_spi.h
> >  create mode 100644 hw/ssi/sifive_spi.c
>
> Missing MAINTAINERS entry (if there are no other comments on
> this series, maybe the maintainer can directly add one).

Yep, I'm adding this section to the RISC-V machines:

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d8b0bf966..c347d49bd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1359,6 +1359,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
F: include/hw/misc/mchp_pfsoc_ioscb.h
F: include/hw/misc/mchp_pfsoc_sysreg.h

+SiFive Machines
+M: Alistair Francis 
+M: Bin Meng 
+M: Palmer Dabbelt 
+L: qemu-ri...@nongnu.org
+S: Supported
+F: hw/*/*sifive*.c
+F: include/hw/*/*sifive*.h
+
RX Machines
---
rx-gdbsim


Can I get an Ack from you Bin that you are ok with that?

@Palmer Dabbelt let me know if you would prefer something else.

Alistair

>



Re: [PATCH v3 3/9] hw/ssi: Add SiFive SPI controller support

2021-02-08 Thread Alistair Francis
On Mon, Jan 25, 2021 at 10:03 PM Bin Meng  wrote:
>
> From: Bin Meng 
>
> This adds the SiFive SPI controller model for the FU540 SoC.
> The direct memory-mapped SPI flash mode is unsupported.
>
> Signed-off-by: Bin Meng 

Reviewed-by: Alistair Francis 

Alistair

>
> ---
>
> Changes in v3:
> - Simplify flush txfifo logic
>
> Changes in v2:
> - Log guest error when trying to write reserved registers
> - Log guest error when trying to access out-of-bounds registers
> - log guest error when writing to reserved bits for chip select
>   registers and watermark registers
> - Log unimplemented warning when trying to write direct-map flash
>   interface registers
> - Add test tx fifo full logic in sifive_spi_read(), hence remove
>   setting the tx fifo full flag in sifive_spi_write().
> - Populate register with their default value
>
>  include/hw/ssi/sifive_spi.h |  47 +
>  hw/ssi/sifive_spi.c | 358 
>  hw/ssi/Kconfig  |   4 +
>  hw/ssi/meson.build  |   1 +
>  4 files changed, 410 insertions(+)
>  create mode 100644 include/hw/ssi/sifive_spi.h
>  create mode 100644 hw/ssi/sifive_spi.c
>
> diff --git a/include/hw/ssi/sifive_spi.h b/include/hw/ssi/sifive_spi.h
> new file mode 100644
> index 00..47d0d6a47c
> --- /dev/null
> +++ b/include/hw/ssi/sifive_spi.h
> @@ -0,0 +1,47 @@
> +/*
> + * QEMU model of the SiFive SPI Controller
> + *
> + * Copyright (c) 2021 Wind River Systems, Inc.
> + *
> + * Author:
> + *   Bin Meng 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see .
> + */
> +
> +#ifndef HW_SIFIVE_SPI_H
> +#define HW_SIFIVE_SPI_H
> +
> +#define SIFIVE_SPI_REG_NUM  (0x78 / 4)
> +
> +#define TYPE_SIFIVE_SPI "sifive.spi"
> +#define SIFIVE_SPI(obj) OBJECT_CHECK(SiFiveSPIState, (obj), TYPE_SIFIVE_SPI)
> +
> +typedef struct SiFiveSPIState {
> +SysBusDevice parent_obj;
> +
> +MemoryRegion mmio;
> +qemu_irq irq;
> +
> +uint32_t num_cs;
> +qemu_irq *cs_lines;
> +
> +SSIBus *spi;
> +
> +Fifo8 tx_fifo;
> +Fifo8 rx_fifo;
> +
> +uint32_t regs[SIFIVE_SPI_REG_NUM];
> +} SiFiveSPIState;
> +
> +#endif /* HW_SIFIVE_SPI_H */
> diff --git a/hw/ssi/sifive_spi.c b/hw/ssi/sifive_spi.c
> new file mode 100644
> index 00..0c9ebca3c8
> --- /dev/null
> +++ b/hw/ssi/sifive_spi.c
> @@ -0,0 +1,358 @@
> +/*
> + * QEMU model of the SiFive SPI Controller
> + *
> + * Copyright (c) 2021 Wind River Systems, Inc.
> + *
> + * Author:
> + *   Bin Meng 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see .
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/irq.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/sysbus.h"
> +#include "hw/ssi/ssi.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/fifo8.h"
> +#include "qemu/log.h"
> +#include "qemu/module.h"
> +#include "hw/ssi/sifive_spi.h"
> +
> +#define R_SCKDIV(0x00 / 4)
> +#define R_SCKMODE   (0x04 / 4)
> +#define R_CSID  (0x10 / 4)
> +#define R_CSDEF (0x14 / 4)
> +#define R_CSMODE(0x18 / 4)
> +#define R_DELAY0(0x28 / 4)
> +#define R_DELAY1(0x2C / 4)
> +#define R_FMT   (0x40 / 4)
> +#define R_TXDATA(0x48 / 4)
> +#define R_RXDATA(0x4C / 4)
> +#define R_TXMARK(0x50 / 4)
> +#define R_RXMARK(0x54 / 4)
> +#define R_FCTRL (0x60 / 4)
> +#define R_FFMT  (0x64 / 4)
> +#define R_IE(0x70 / 4)
> +#define R_IP(0x74 / 4)
> +
> +#define FMT_DIR (1 << 3)
> +
> +#define TXDATA_FULL (1 << 31)
> +#define RXDATA_EMPTY(1 << 31)
> +
> +#define IE_TXWM (1 << 0)
> +#define IE_RXWM (1 << 1)
> +
> +#define IP_TXWM (1 << 0)
> +#define IP_RXWM (1 << 1)
> +
> +#define FIFO_CAPACITY   8
> +
> +static void sifive_spi_txfifo_reset(SiFiveSPIState *s)
> +{
> +

Re: KVM Guest

2021-02-08 Thread Huacai Chen
I think it can be removed.

Huacai

On Tue, Feb 9, 2021 at 12:40 AM Jiaxun Yang  wrote:
>
>
>
> On Mon, Feb 8, 2021, at 3:39 AM, Thomas Bogendoerfer wrote:
> > On Wed, Feb 03, 2021 at 08:52:34PM +0800, Jiaxun Yang wrote:
> > >
> > >
> > > On Wed, Feb 3, 2021, at 8:34 PM, Thomas Bogendoerfer wrote:
> > > > Hi,
> > > >
> > > > Does kvm guest kernel still work ? I'm playing with current mips-next
> > > > and starting a kvm guest kernel inside an emulated malta system also
> > > > running a mips-next kernel. The kvm guest kernel starts, but hangs
> > > > in calibrate_delay (at least that's what info registers in qemu monitor
> > > > suggests). Ayn ideas ?
> > >
> > > The current KVM guest kernel is actually a Trap-and-emul guest kernel.
> > > VZ based KVM uses the same binary with the host one, so does TCG.
> > >
> > > TE KVM is current unmaintained. I'll try to get a malta and do some test.
> >
> > hmm, so it looks broken, is unmaintained, how about removing it ?
>
> Probably. I got remote access of a CoreLV malta but no luck to boot kernel as 
> well.
>
> + Huacai as KVM/MIPS Maintainer.
> + Philippe as QEMU/MIPS Maintainer.
> + qemu-devel for wider audience.
>
> If nobody intended to maintain it probably it's time to remove it.
>
> >
> > Thomas.
> >
> > --
> > Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> > good idea.[ RFC1925, 2.3 ]
> >
>
> --
> - Jiaxun



Re: [PATCH v2 1/3] hw/net: Add npcm7xx emc model

2021-02-08 Thread Doug Evans
On Mon, Feb 8, 2021 at 9:17 AM Peter Maydell 
wrote:

> On Tue, 2 Feb 2021 at 23:29, Doug Evans  wrote:
> >
> > This is a 10/100 ethernet device that has several features.
> > Only the ones needed by the Linux driver have been implemented.
> > See npcm7xx_emc.c for a list of unimplemented features.
> >
> > Reviewed-by: Hao Wu 
> > Reviewed-by: Avi Fishman 
> > Signed-off-by: Doug Evans 
> > ---
> >  hw/net/meson.build   |   1 +
> >  hw/net/npcm7xx_emc.c | 852 +++
> >  hw/net/trace-events  |  17 +
> >  include/hw/net/npcm7xx_emc.h | 286 
> >  4 files changed, 1156 insertions(+)
> >  create mode 100644 hw/net/npcm7xx_emc.c
> >  create mode 100644 include/hw/net/npcm7xx_emc.h
>
> > +static void emc_reset(NPCM7xxEMCState *emc)
> > +{
> > +trace_npcm7xx_emc_reset(emc->emc_num);
> > +
> > +memset(>regs[0], 0, sizeof(emc->regs));
> > +
> > +/* These regs have non-zero reset values. */
> > +emc->regs[REG_TXDLSA] = 0xfffc;
> > +emc->regs[REG_RXDLSA] = 0xfffc;
> > +emc->regs[REG_MIIDA] = 0x0090;
> > +emc->regs[REG_FFTCR] = 0x0101;
> > +emc->regs[REG_DMARFC] = 0x0800;
> > +emc->regs[REG_MPCNT] = 0x7fff;
> > +
> > +emc->tx_active = false;
> > +emc->rx_active = false;
> > +
> > +qemu_set_irq(emc->tx_irq, 0);
> > +qemu_set_irq(emc->rx_irq, 0);
> > +}
> > +
> > +static void npcm7xx_emc_reset(DeviceState *dev)
> > +{
> > +NPCM7xxEMCState *emc = NPCM7XX_EMC(dev);
> > +emc_reset(emc);
> > +}
>
> You can't call qemu_set_irq() from a DeviceState::reset method.
> Usually it's OK just not to try to set the outbound IRQs and
> to assume that the device you're connected to has reset to the
> state where its inbound IRQ line is not asserted. If you really
> need to set the irq line then you need to switch to 3-phase
> reset (some of the other npcm7xx devices do this). But I
> suspect that just moving the qemu_set_irq() calls to
> emc_soft_reset() would be enough.
>

Ah. Fixed in v3.

Don't put local variable declarations in the middle of functions,
> please. Coding style says they should be at the start of a
> block (so, here, the start of the function). It looks like you've
> got middle-of-function declarations in several places in other
> functions too, so could you fix them all up please?
>

Fixed in v3.
Maybe now's a good time though to revisit this rule.
QEMU uses C99, and mixed decls/statements is an easy improvement to the
coding standards.
I'm guessing this is an uncontroversial request. Is there just inertia
behind not making the change thus far?


> Optional, but you might consider using g_autofree for
> malloced_buf, which would let the compiler deal with
> g_free()ing it for you on all the function exit paths.
>

Done in v3.

Thanks.


[PATCH v2] hw/net: fsl_etsec: Reverse the RCTRL.RSF logic

2021-02-08 Thread Bin Meng
From: Bin Meng 

Per MPC8548ERM [1] chapter 14.5.3.4.1:

When RCTRL.RSF is 1, frames less than 64 bytes are accepted upon
a DA match. But currently QEMU does the opposite.

When RCTRL.RSF is 0, short frames are silently dropped, however
we cannot drop such frames in QEMU as of today, due to both slirp
and tap networking do not pad short frames (e.g.: an ARP packet)
to the minimum frame size of 60 bytes.

If eTSEC is programmed to reject short frames, ARP requests will be
dropped, preventing the guest from becoming visible on the network.

The same issue was reported on e1000 and vmxenet3 before, see:

commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)")
commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)")

Ideally this should be fixed on the slirp/tap networking side to
pad short frames to the minimum frame length, but I am not sure
whether that's doable.

This commit reverses the RCTRL.RSF testing logic to match the spec.
The log message is updated to mention the reject short frames
functionality is unimplemented.

[1] https://www.nxp.com/docs/en/reference-manual/MPC8548ERM.pdf

Fixes: eb1e7c3e5146 ("Add Enhanced Three-Speed Ethernet Controller (eTSEC)")
Signed-off-by: Bin Meng 

---

Changes in v2:
- rewrite the commit message and reverse the RCTRL.RSF test logic

 hw/net/fsl_etsec/rings.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 121415a..f89aa7f 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -502,10 +502,17 @@ ssize_t etsec_rx_ring_write(eTSEC *etsec, const uint8_t 
*buf, size_t size)
 return -1;
 }
 
-if ((etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) {
+/*
+ * Both slirp and tap networking do not pad short frames
+ * (e.g.: an ARP packet) to the minimum frame size of 60 bytes.
+ *
+ * If eTSEC is programmed to reject short frames, ARP requests
+ * will be dropped, preventing the guest from becoming visible
+ * on the network.
+ */
+if (!(etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) {
 /* CRC is not in the packet yet, so short frame is below 60 bytes */
-RING_DEBUG("%s: Drop short frame\n", __func__);
-return -1;
+RING_DEBUG("%s: Drop short frame not implemented\n", __func__);
 }
 
 rx_init_frame(etsec, buf, size);
-- 
2.7.4




Re: [RFC 0/1] vhost-vdmabuf: Add virtio based Dmabuf device

2021-02-08 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20210208233225.2084469-1-vivek.kasire...@intel.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20210208233225.2084469-1-vivek.kasire...@intel.com
Subject: [RFC 0/1] vhost-vdmabuf: Add virtio based Dmabuf device

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/20210208233225.2084469-1-vivek.kasire...@intel.com 
-> patchew/20210208233225.2084469-1-vivek.kasire...@intel.com
Switched to a new branch 'test'
1835e33 vhost-vdmabuf: Add virtio based Dmabuf device

=== OUTPUT BEGIN ===
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#106: 
new file mode 100644

ERROR: do not initialise statics to 0 or NULL
#155: FILE: hw/virtio/vhost-vdmabuf.c:45:
+static bool have_event = false;

ERROR: code indent should never use tabs
#287: FILE: hw/virtio/vhost-vdmabuf.c:177:
+^Ireturn;$

ERROR: code indent should never use tabs
#288: FILE: hw/virtio/vhost-vdmabuf.c:178:
+^I}$

ERROR: code indent should never use tabs
#397: FILE: hw/virtio/vhost-vdmabuf.c:287:
+^Ireturn NULL;$

ERROR: code indent should never use tabs
#415: FILE: hw/virtio/vhost-vdmabuf.c:305:
+^I^I^I^I^IVDMABUFDisplay *dpy,$

ERROR: braces {} are necessary for all arms of this statement
#425: FILE: hw/virtio/vhost-vdmabuf.c:315:
+if (ioctl(fd, VIRTIO_VDMABUF_IOCTL_RELEASE, ))
[...]

ERROR: braces {} are necessary for all arms of this statement
#456: FILE: hw/virtio/vhost-vdmabuf.c:346:
+if (!have_event)
[...]

ERROR: trailing whitespace
#531: FILE: hw/virtio/vhost-vdmabuf.c:421:
+$

ERROR: code indent should never use tabs
#705: FILE: include/hw/virtio/vhost-vdmabuf.h:31:
+^I__u64 id;$

ERROR: code indent should never use tabs
#706: FILE: include/hw/virtio/vhost-vdmabuf.h:32:
+^I/* 8B long Random number */$

ERROR: code indent should never use tabs
#707: FILE: include/hw/virtio/vhost-vdmabuf.h:33:
+^Iint rng_key[2];$

ERROR: code indent should never use tabs
#711: FILE: include/hw/virtio/vhost-vdmabuf.h:37:
+^I/* buf_id of new buf */$

ERROR: code indent should never use tabs
#712: FILE: include/hw/virtio/vhost-vdmabuf.h:38:
+^Ivirtio_vdmabuf_buf_id_t buf_id;$

ERROR: code indent should never use tabs
#713: FILE: include/hw/virtio/vhost-vdmabuf.h:39:
+^I/* size of private data */$

ERROR: code indent should never use tabs
#714: FILE: include/hw/virtio/vhost-vdmabuf.h:40:
+^Iint size;$

ERROR: code indent should never use tabs
#718: FILE: include/hw/virtio/vhost-vdmabuf.h:44:
+^Istruct virtio_vdmabuf_e_hdr hdr;$

ERROR: code indent should never use tabs
#719: FILE: include/hw/virtio/vhost-vdmabuf.h:45:
+^I/* ptr to private data */$

ERROR: code indent should never use tabs
#720: FILE: include/hw/virtio/vhost-vdmabuf.h:46:
+^Ivoid *data;$

ERROR: code indent should never use tabs
#728: FILE: include/hw/virtio/vhost-vdmabuf.h:54:
+^I/* IN parameters */$

ERROR: code indent should never use tabs
#729: FILE: include/hw/virtio/vhost-vdmabuf.h:55:
+^I/* vdmabuf id to be imported */$

ERROR: code indent should never use tabs
#730: FILE: include/hw/virtio/vhost-vdmabuf.h:56:
+^Ivirtio_vdmabuf_buf_id_t buf_id;$

ERROR: code indent should never use tabs
#731: FILE: include/hw/virtio/vhost-vdmabuf.h:57:
+^I/* flags */$

ERROR: code indent should never use tabs
#732: FILE: include/hw/virtio/vhost-vdmabuf.h:58:
+^Iint flags;$

ERROR: code indent should never use tabs
#733: FILE: include/hw/virtio/vhost-vdmabuf.h:59:
+^I/* OUT parameters */$

ERROR: code indent should never use tabs
#734: FILE: include/hw/virtio/vhost-vdmabuf.h:60:
+^I/* exported dma buf fd */$

ERROR: code indent should never use tabs
#735: FILE: include/hw/virtio/vhost-vdmabuf.h:61:
+^Iint fd;$

ERROR: code indent should never use tabs
#741: FILE: include/hw/virtio/vhost-vdmabuf.h:67:
+^I/* IN parameters */$

ERROR: code indent should never use tabs
#742: FILE: include/hw/virtio/vhost-vdmabuf.h:68:
+^I/* DMA buf fd to be exported */$

ERROR: code indent should never use tabs
#743: FILE: include/hw/virtio/vhost-vdmabuf.h:69:
+^Iint fd;$

ERROR: code indent should never use tabs
#744: FILE: include/hw/virtio/vhost-vdmabuf.h:70:
+^I/* exported dma buf id */$

ERROR: code indent should never use tabs
#745: FILE: include/hw/virtio/vhost-vdmabuf.h:71:
+^Ivirtio_vdmabuf_buf_id_t buf_id;$

ERROR: code indent should never use tabs
#746: FILE: include/hw/virtio/vhost-vdmabuf.h:72:
+^Iint sz_priv;$

ERROR: code indent should never use tabs
#747: FILE: include/hw/virtio/vhost-vdmabuf.h:73:
+^Ichar *priv;$

total: 33 errors, 1 warnings, 718 lines checked

Commit 1835e330d677 (vhost-vdmabuf: Add virtio based Dmabuf 

Re: [Bug 1914849] Re: mprotect fails after MacOS 11.2 on arm mac

2021-02-08 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/161280769492.2878.8851519112088854609.mal...@chaenomeles.canonical.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 
161280769492.2878.8851519112088854609.mal...@chaenomeles.canonical.com
Subject: [Bug 1914849] Re: mprotect fails after MacOS 11.2 on arm mac

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] 
patchew/161280769492.2878.8851519112088854609.mal...@chaenomeles.canonical.com 
-> 
patchew/161280769492.2878.8851519112088854609.mal...@chaenomeles.canonical.com
 - [tag update]  patchew/20210129005845.416272-1-wuhao...@google.com -> 
patchew/20210129005845.416272-1-wuhao...@google.com
 - [tag update]  
patchew/20210208024625.271018-1-richard.hender...@linaro.org -> 
patchew/20210208024625.271018-1-richard.hender...@linaro.org
 - [tag update]  patchew/20210208163339.1159514-1-berra...@redhat.com -> 
patchew/20210208163339.1159514-1-berra...@redhat.com
Switched to a new branch 'test'
f2c73d0 mprotect fails after MacOS 11.2 on arm mac

=== OUTPUT BEGIN ===
ERROR: Missing Signed-off-by: line(s)

total: 1 errors, 0 warnings, 9 lines checked

Commit f2c73d03916c (mprotect fails after MacOS 11.2 on arm mac) has style 
problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/161280769492.2878.8851519112088854609.mal...@chaenomeles.canonical.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [RFC PATCH v2 5/6] accel/tcg: Refactor debugging tlb_assert_iotlb_entry_for_ptr_present()

2021-02-08 Thread Richard Henderson
On 2/8/21 5:52 AM, Philippe Mathieu-Daudé wrote:
> On 2/8/21 9:42 AM, Alex Bennée wrote:
>>
>> Philippe Mathieu-Daudé  writes:
>>
>>> Refactor debug code as tlb_assert_iotlb_entry_for_ptr_present() helper.
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> ---
>>> What this code does is out of my league, but refactoring it allow
>>> keeping tlb_addr_write() local to accel/tcg/cputlb.c in the next
>>> patch.
>>
>> The assertion that the table entry is current is just a simple
>> housekeeping one. The details of how the MTE implementation uses
>> (abuses?) the iotlb entries requires a closer reading of the code.
>>
>>> ---
>>>  include/exec/exec-all.h |  9 +
>>>  accel/tcg/cputlb.c  | 14 ++
>>>  target/arm/mte_helper.c | 11 ++-
>>>  target/arm/sve_helper.c | 10 ++
>>>  4 files changed, 27 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>>> index f933c74c446..c5e8e355b7f 100644
>>> --- a/include/exec/exec-all.h
>>> +++ b/include/exec/exec-all.h
>>> @@ -296,6 +296,15 @@ void tlb_set_page_with_attrs(CPUState *cpu, 
>>> target_ulong vaddr,
>>>  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>>>hwaddr paddr, int prot,
>>>int mmu_idx, target_ulong size);
>>> +
>>> +/*
>>> + * Find the iotlbentry for ptr.  This *must* be present in the TLB
>>> + * because we just found the mapping.
>>> + */
>>> +void tlb_assert_iotlb_entry_for_ptr_present(CPUArchState *env, int 
>>> ptr_mmu_idx,
>>> +uint64_t ptr,
>>> +MMUAccessType ptr_access,
>>> +uintptr_t index);
>>
>> Probably worth making this an empty inline for the non CONFIG_DEBUG_TCG
>> case so we can eliminate the call to an empty function.
> 
> But then we can't make tlb_addr_write() static (next patch) and
> we still have to include "tcg/tcg.h" for the TCG_OVERSIZED_GUEST
> definition...

Certainly you can, though it's not especially pretty:

#ifdef CONFIG_DEBUG_TCG
void tlb_assert_iotlb_entry_for_ptr_present
  (CPUArchState *env, int ptr_mmu_idx,
   uint64_t ptr, MMUAccessType ptr_access,
   uintptr_t index);
#else
static inline void
tlb_assert_iotlb_entry_for_ptr_present
  (CPUArchState *env, int ptr_mmu_idx,
   uint64_t ptr, MMUAccessType ptr_access,
   uintptr_t index)
{ }
#endif


r~



Re: [PATCH v2 13/15] tcg/arm: Implement TCG_TARGET_HAS_shv_vec

2021-02-08 Thread Richard Henderson
On 2/8/21 12:50 PM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 03:28, Richard Henderson
>  wrote:
>>
>> The three vector shift by vector operations are all implemented via
>> expansion.  Therefore do not actually set TCG_TARGET_HAS_shv_vec,
>> as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the
>> instruction stream, and therefore also do not appear in tcg_target_op_def.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>>  tcg/arm/tcg-target.opc.h |  3 ++
>>  tcg/arm/tcg-target.c.inc | 61 +++-
>>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
>> +switch (opc) {
>> +case INDEX_op_shlv_vec:
>> +/*
>> + * Merely propagate shlv_vec to arm_ushl_vec.
>> + * In this way we don't set TCG_TARGET_HAS_shv_vec
>> + * because everything is done via expansion.
>> + */
>> +v2 = temp_tcgv_vec(arg_temp(a2));
>> +vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
>> +  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
>> +break;
> 
> tcg/aarch64 seems to set TCG_TARGET_HAS_shv_vec and
> only do the right-shifts via expand_op. Is there a difference
> between the two that means Neon has to do it this way, or is it
> just a "works either way" thing?

It's a works either way thing.


r~

> 
>> +
> 
> Reviewed-by: Peter Maydell 
> 
> thanks
> -- PMM
> 




[PATCH v2 6/9] hw/i386: declare ACPI mother board resource for MMCONFIG region

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

Declare PNP0C01 device to reserve MMCONFIG region to conform to the
spec better and play nice with guest BIOSes/OSes.

According to PCI Firmware Specification, MMCONFIG region must be
reserved by declaring a motherboard resource. It's optional to reserve
the region in memory map by Int 15 E820h or EFIGetMemoryMap.
If guest BIOS doesn't reserve the region in memory map without the
reservation by mother board resource, guest linux abandons to use
MMCFG.

TDVF [0] [1] doesn't reserve MMCONFIG the region in memory map.
On the other hand OVMF reserves it in memory map without declaring a
motherboard resource. With memory map reservation, linux guest uses
MMCONFIG region. However it doesn't comply to PCI Firmware
specification.

[0] TDX: Intel Trust Domain Extension

https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html
[1] TDX Virtual Firmware
https://github.com/tianocore/edk2-staging/tree/TDVF

The change to DSDT is as follows.
@@ -68,32 +68,90 @@

 If ((CDW3 != Local0))
 {
 CDW1 |= 0x10
 }

 CDW3 = Local0
 }
 Else
 {
 CDW1 |= 0x04
 }

 Return (Arg3)
 }
 }
+
+Device (DRAC)
+{
+Name (_HID, "PNP0C01" /* System Board */)  // _HID: Hardware ID
+OperationRegion (DRR0, PCI_Config, 0x60, 0x08)
+Field (DRR0, DWordAcc, NoLock, Preserve)
+{
+PEBL,   32,
+PEBH,   32
+}
+
+Name (RBUF, ResourceTemplate ()
+{
+QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, 
NonCacheable, ReadWrite,
+0x, // Granularity
+0x, // Range Minimum
+0x, // Range Maximum
+0x, // Translation Offset
+0x, // Length
+,, _Y00, AddressRangeMemory, TypeStatic)
+})
+Method (_CRS, 0, Serialized)  // _CRS: Current Resource Settings
+{
+CreateDWordField (RBUF, \_SB.DRAC._Y00._MIN, MINL)  // _MIN: 
Minimum Base Address
+CreateDWordField (RBUF, 0x12, MINH)
+CreateDWordField (RBUF, \_SB.DRAC._Y00._MAX, MAXL)  // _MAX: 
Maximum Base Address
+CreateDWordField (RBUF, 0x1A, MAXH)
+CreateQWordField (RBUF, \_SB.DRAC._Y00._LEN, _LEN)  // _LEN: 
Length
+Local0 = PEBL /* \_SB_.DRAC.PEBL */
+Local1 = (Local0 & One)
+Local2 = (Local0 & 0x06)
+Local3 = (Local0 & 0xFFF8)
+Local4 = PEBH /* \_SB_.DRAC.PEBH */
+If ((Local1 == One))
+{
+MINL = Local3
+MINH = Local4
+MAXL = Local3
+MAXH = Local4
+If ((Local2 == Zero))
+{
+_LEN = 0x1000
+}
+
+If ((Local2 == 0x02))
+{
+_LEN = 0x0800
+}
+
+If ((Local2 == 0x04))
+{
+_LEN = 0x0400
+}
+}
+
+Return (RBUF) /* \_SB_.DRAC.RBUF */
+}
+}
 }

 Scope (_SB)
 {
 Device (HPET)
 {
 Name (_HID, EisaId ("PNP0103") /* HPET System Timer */)  // _HID: 
Hardware ID
 Name (_UID, Zero)  // _UID: Unique ID
 OperationRegion (HPTM, SystemMemory, 0xFED0, 0x0400)
 Field (HPTM, DWordAcc, Lock, Preserve)
 {
 VEND,   32,
 PRD,32
 }

 Method (_STA, 0, NotSerialized)  // _STA: Status

Signed-off-by: Isaku Yamahata 
Acked-by: Jiewen Yao 
---
 hw/i386/acpi-build.c | 172 +++
 1 file changed, 172 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index c2f11d95d8..bcb1f65c1d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1066,6 +1066,177 @@ static void build_q35_pci0_int(Aml *table)
 aml_append(table, sb_scope);
 }
 
+static Aml *build_q35_dram_controller(void)
+{
+/*
+ * DSDT is created with revision 1 which means 32bit integer.
+ * When the method of _CRS is called to determine MMCONFIG region,
+ * only port io is allowed to access PCI configuration space.
+ * It means qword access isn't allowed.
+ *
+ * Device(DRAC)
+ * {
+ * Name(_HID, EisaId("PNP0C01"))
+ * OperationRegion(DRR0, PCI_Config, 0x0060, 0x8)
+ *  

[PATCH v2 5/9] acpi: add test case for smm unsupported -machine smm=off

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

Signed-off-by: Isaku Yamahata 
---
 tests/qtest/bios-tables-test.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 669202fc95..096d15db68 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -969,6 +969,17 @@ static void test_acpi_q35_tcg_numamem(void)
 free_test_data();
 }
 
+static void test_acpi_q35_tcg_nosmm(void)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = MACHINE_Q35;
+data.variant = ".nosmm";
+test_acpi_one("-machine smm=off", );
+free_test_data();
+}
+
 static void test_acpi_piix4_tcg_numamem(void)
 {
 test_data data;
@@ -1325,6 +1336,7 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp);
 qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem);
 qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
+qtest_add_func("acpi/q35/nosmm", test_acpi_q35_tcg_nosmm);
 qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
 qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
 qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat);
-- 
2.17.1




[PATCH v2 4/9] acpi: set fadt.smi_cmd to zero when SMM is not supported

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

>From table 5.9 SMI_CMD of ACPI spec
> This field is reserved and must be zero on system
> that does not support System Management mode.

When smm is not enabled, set it to zero to comform to the spec.
When -machine smm=off is passed, the change to FACP is as follows.

@@ -1,46 +1,46 @@
 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20180105 (64-bit version)
  * Copyright (c) 2000 - 2018 Intel Corporation
  *
- * Disassembly of tests/data/acpi/q35/FACP, Fri Feb  5 16:57:04 2021
+ * Disassembly of /tmp/aml-1OQYX0, Fri Feb  5 16:57:04 2021
  *
  * ACPI Data Table [FACP]
  *
  * Format: [HexOffset DecimalOffset ByteLength]  FieldName : FieldValue
  */

 [000h    4]Signature : "FACP"[Fixed ACPI 
Description Table (FADT)]
 [004h 0004   4] Table Length : 00F4
 [008h 0008   1] Revision : 03
-[009h 0009   1] Checksum : 1F
+[009h 0009   1] Checksum : D6
 [00Ah 0010   6]   Oem ID : "BOCHS "
 [010h 0016   8] Oem Table ID : "BXPCFACP"
 [018h 0024   4] Oem Revision : 0001
 [01Ch 0028   4]  Asl Compiler ID : "BXPC"
 [020h 0032   4]Asl Compiler Revision : 0001

 [024h 0036   4] FACS Address : 
 [028h 0040   4] DSDT Address : 
 [02Ch 0044   1]Model : 01
 [02Dh 0045   1]   PM Profile : 00 [Unspecified]
 [02Eh 0046   2]SCI Interrupt : 0009
-[030h 0048   4] SMI Command Port : 00B2
-[034h 0052   1]ACPI Enable Value : 02
-[035h 0053   1]   ACPI Disable Value : 03
+[030h 0048   4] SMI Command Port : 
+[034h 0052   1]ACPI Enable Value : 00
+[035h 0053   1]   ACPI Disable Value : 00
 [036h 0054   1]   S4BIOS Command : 00
 [037h 0055   1]  P-State Control : 00
 [038h 0056   4] PM1A Event Block Address : 0600
 [03Ch 0060   4] PM1B Event Block Address : 
 [040h 0064   4]   PM1A Control Block Address : 0604
 [044h 0068   4]   PM1B Control Block Address : 
 [048h 0072   4]PM2 Control Block Address : 
 [04Ch 0076   4]   PM Timer Block Address : 0608
 [050h 0080   4]   GPE0 Block Address : 0620
 [054h 0084   4]   GPE1 Block Address : 
 [058h 0088   1]   PM1 Event Block Length : 04
 [059h 0089   1] PM1 Control Block Length : 02
 [05Ah 0090   1] PM2 Control Block Length : 00
 [05Bh 0091   1]PM Timer Block Length : 04
 [05Ch 0092   1]GPE0 Block Length : 10
 [05Dh 0093   1]GPE1 Block Length : 00

Signed-off-by: Isaku Yamahata 
---
 hw/i386/acpi-build.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f56d699c7f..c2f11d95d8 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -139,6 +139,8 @@ const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio = {
 static void init_common_fadt_data(MachineState *ms, Object *o,
   AcpiFadtData *data)
 {
+X86MachineState *x86ms = X86_MACHINE(ms);
+bool smm_enabled = x86_machine_is_smm_enabled(x86ms);
 uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL);
 AmlAddressSpace as = AML_AS_SYSTEM_IO;
 AcpiFadtData fadt = {
@@ -159,12 +161,16 @@ static void init_common_fadt_data(MachineState *ms, 
Object *o,
 .rtc_century = RTC_CENTURY,
 .plvl2_lat = 0xfff /* C2 state not supported */,
 .plvl3_lat = 0xfff /* C3 state not supported */,
-.smi_cmd = ACPI_PORT_SMI_CMD,
+.smi_cmd = smm_enabled ? ACPI_PORT_SMI_CMD : 0,
 .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL),
 .acpi_enable_cmd =
-object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL),
+smm_enabled ?
+object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL) :
+0,
 .acpi_disable_cmd =
-object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL),
+smm_enabled ?
+object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL) :
+0,
 .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io },
 .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8,
   .address = io + 0x04 },
-- 
2.17.1




[PATCH v2 2/9] qtest: update tests/qtest/bios-tables-test-allowed-diff.h

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

The following tests will modify acpi tables.
prepare qtests to allow acpi table change.
add new tables for new tests.
- tests/data/acpi/q35/DSDT.nosmm
- tests/data/acpi/q35/FACP.nosmm
- tests/data/acpi/q35/DSDT.nohpet

Signed-off-by: Isaku Yamahata 
---
 tests/data/acpi/q35/DSDT.nohpet |  0
 tests/data/acpi/q35/DSDT.nosmm  |  0
 tests/data/acpi/q35/FACP.nosmm  |  0
 tests/qtest/bios-tables-test-allowed-diff.h | 13 +
 4 files changed, 13 insertions(+)
 create mode 100644 tests/data/acpi/q35/DSDT.nohpet
 create mode 100644 tests/data/acpi/q35/DSDT.nosmm
 create mode 100644 tests/data/acpi/q35/FACP.nosmm

diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/q35/DSDT.nosmm b/tests/data/acpi/q35/DSDT.nosmm
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/data/acpi/q35/FACP.nosmm b/tests/data/acpi/q35/FACP.nosmm
new file mode 100644
index 00..e69de29bb2
diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..b79ac495c2 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,14 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/DSDT",
+"tests/data/acpi/q35/DSDT.tis",
+"tests/data/acpi/q35/DSDT.bridge",
+"tests/data/acpi/q35/DSDT.ipmibt",
+"tests/data/acpi/q35/DSDT.cphp",
+"tests/data/acpi/q35/DSDT.memhp",
+"tests/data/acpi/q35/DSDT.numamem",
+"tests/data/acpi/q35/DSDT.dimmpxm",
+"tests/data/acpi/q35/DSDT.acpihmat",
+"tests/data/acpi/q35/DSDT.mmio64",
+"tests/data/acpi/q35/DSDT.nosmm",
+"tests/data/acpi/q35/FACP.nosmm",
+"tests/data/acpi/q35/DSDT.nohpet",
-- 
2.17.1




Re: [PATCH v4 11/14] qapi/introspect.py: add type hint annotations

2021-02-08 Thread John Snow

On 2/8/21 4:39 PM, John Snow wrote:


I'm once again terminally confused about when to use _lower_case and
when to use CamelCase for such variables.



That's my fault for not using them consistently.

Generally:

TitleCase: Classes, Real Type Names :tm:
lowercase: instance names (and certain built-in types like str/bool/int)
UPPERCASE: "Constants". This is an extremely loose idea in Python.

I use the "_" prefix for any of the above categories to indicate 
something not intended to be used outside of the current scope. These 
types won't be accessible outside the module by default.


TypeVars I use "T", "U", "V", etc unless I bind them to another type; 
then I use e.g. NodeT instead.


When it comes to things like type aliases, I believe I instinctively 
used lowercase because I am not creating a new Real Type and wanted some 
visual distinction from a real class name. (aliases created in this way 
cannot be used with isinstance and hold no significance to mypy.)


That's why I used _stub, _scalar, _nonscalar, and _value for those types 
there. Then I disregarded my own convention and used TreeValue; perhaps 
that ought to be tree_value for consistency as it's not a Real Type :tm:


...but then we have the SchemaInfo type aliases, which I named using the 
same type name as they use in QAPI to help paint the association (and 
pick up 'git grep' searchers.)


Not fantastically consistent, sorry. Feel free to express a preference, 
I clearly don't have a universally applied one.


(Current leaning: rename TreeValue to tree_value, but leave everything 
else as it is.)


Addendum: pylint wants any non-underscored type alias to be treated like 
a class name, as CamelCase.


I guess it just exempts underscore prefixed things. So, it does have to 
stay "TreeValue".


--js




Re: [PATCH 1/1] acpi: Implement ACPI ERST support for guests

2021-02-08 Thread Eric Devolder
Paolo,
Thanks for the feedback. I've posted v2 with changes based on Igor's feedback.
I've also included a qtest per your feedback.
Eric


From: Paolo Bonzini 
Sent: Tuesday, November 3, 2020 3:16 PM
To: Eric DeVolder ; qemu-devel@nongnu.org 

Cc: m...@redhat.com ; imamm...@redhat.com 
; marcel.apfelb...@gmail.com ; 
r...@twiddle.net ; ehabk...@redhat.com ; 
konrad.w...@oracle.com ; boris.ostrov...@oracle.com 

Subject: Re: [PATCH 1/1] acpi: Implement ACPI ERST support for guests

On 26/10/20 21:19, Eric DeVolder wrote:
> This changeset introduces support for the ACPI Error Record
> Serialization Table, ERST.
>
> ERST is defined in [1], and the error records are defined
> according to [2].
>
> This changeset generates the ACPI ERST table, which OSPM
> follows to program the associated ERST device. The ERST
> device occupies 8KiB of address space, with the first 8 bytes
> containing registers, and the remainder being an exchange
> buffer for reading/writing error records.
>
> The ERST device contains two registers, ACTION and VALUE
> registers, following closely the ERST operations.
>
> The ERST device only examines the record for the signature 'CPER',
> its record identifier, and bounds-checks the length against the size
> of the the CPER record header; otherwise all other record fields
> and payload are ignored.
>
> There are two options for this device:
>   -global acpi-erst.size=X
>   -global acpi-erst.filename=Y
> The size X, if not specified, defaults to 64KiB, and must be within
> 64KiB and 1MiB.
> The filename Y, if not specified, defaults to "acpi-erst.backing".
> The ERST backing storage is not mapped into the guest address space,
> just the 8KiB programming area is mapped into the guest.
>
> This has been utilized as a backend for the Linux pstore feature.
>
> [1] ACPI 6.3 Specification, 18.3 Error Serialization
>  https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
>
> [2] UEFI 2.8 Specification, Appendix N Common Platform Error Record
>  https://uefi.org/sites/default/files/resources/UEFI_Spec_2_8_final.pdf

In addition to what Igor said, this really needs a test for the device
(using qtest).  It doesn't have to be super fine-grained, but at least a
smoke test that the action and value registers do what they mean for a
sample error recording operation.

Paolo



Re: [PATCH 0/1] acpi: Implement ACPI ERST support for guests

2021-02-08 Thread Eric Devolder
Igor,
Thanks for the feedback. Please see EJD: inline below.
I've posted v2 of this patch based on your feedback.
eric


From: Igor Mammedov 
Sent: Tuesday, November 3, 2020 8:57 AM
To: Eric DeVolder 
Cc: qemu-devel@nongnu.org ; m...@redhat.com 
; marcel.apfelb...@gmail.com ; 
pbonz...@redhat.com ; r...@twiddle.net ; 
ehabk...@redhat.com ; konrad.w...@oracle.com 
; boris.ostrov...@oracle.com 
; ler...@redhat.com 
Subject: Re: [PATCH 0/1] acpi: Implement ACPI ERST support for guests

On Mon, 26 Oct 2020 16:19:32 -0400
Eric DeVolder  wrote:

> This changeset introduces support for the ACPI Error Record
> Serialization Table, ERST.
>
> The change to hw/acpi/meson.build simply adds in the new .c file
> for compilation.
>
> The change to hw/i386/acpi-build.c calls out the building of the
> ERST table (and also creates the associated device).
>
> The new file hw/acpi/erst.c contains the building of the ERST
> table, as well as the simple device for exchanging error records.
>
> The new file include/hw/acpi/erst.h contains associated definitions
> and declarations for ERST.
>
> The primary description of this changeset is in the patch commit
> message.
>
> NOTES: When reviewing, I would especially appreciate feedback
> on the following topics:
>
> - The hope is to have ERST always present if ACPI is enabled, however,
>   I have found it difficult to devise a method for passing the base
>   address that does not require the workaround at the bottom of
>   build_erst(). The issues I encountered are:
>   - desire to keep this is common ACPI code
>   - the device requires a qdev_new(), this needs to happen early,
> thus the workaround in build_erst()
>   - the base address is machine/arch specific (eg ARM vs x86)
>   I've not found a nice way to thread this needle, so what I've settled
>   on is to simply lump ERST on to the CONFIG_ACPI (rather than a
>   separate CONFIG_ACPI_ERST), and the workaround at the bottom of
>   build_erst(). I suspect there is a better way for a built-in/
>   always present device. This does not support "-device acpi-erst,...".
>
> - I found a base address that "worked", but would like an address
>   that would be known to be availabe, and then to document/reserve
>   it for ERST. This takes into account that the base address can be
>   different for x86 vs ARM.
>
> - I've run this through checkpatch, and all issues addressed except
>   for the long lines in build_erst(). For readable I left the long
>   lines, but will change if asked.
>
> - What else do I need to provide?

For now, I have just several generic comments:

1. that's quite a lot code to maintain, why not use existing UEFI vars
   as pstore storage instead?
   Not sure ancient ACPI table is a way to go, with NVDIMMs around
   it probably possible to use pstores ram backend or make it work
   with nvdimms directly. The only benefit of ERST is that it should
   just work without extra configuration, but then UEFI backend
   would probably also just work.

EJD: UEFI is not available in all virtual machines types. While perhaps 
ancient, ACPI ERST has been around for along time, and most bare metal (x86_64) 
machines implement this in BIOS.
EJD: My exposure to NVDIMM is limited, but it seems utilizing it as a storage 
backend to pstore would be quite difficult.

2. patch is too big to review, please split it up in smaller chunks.

EJD: Done.

3. Use of packed structures is discouraged in new ACPI code,
   see build_ghes_v2() as an example for building ACPI tables.

EJD: Done. Thanks for the pointer.

4. Maybe instead of SYSBUS device, implement it as a PCI device and
   use its BAR/control registers for pstore storage and control interface.
   It could save you headache of picking address where to map it +
   it would take care of migration part automatically, as firmware
   would do it for you and then QEMU could pickup firmware programmed
   address and put it into ERST table.

EJD: Thanks for the idea. For now I've left it as a SYSBUS device; we can 
revisit as needed.

5. instead of dealing with file for storage directly, reuse hostmem backend
   to provide it to for your device. ex: pc-dimm. i.e. split device
   on frontend and backend

EJD: I had looked into that prior to posting v1. The entire ERST storage is not 
memory mapped, just an exchange buffer. So the hostmem backend is not suitable 
for this purpose.


> Signed-off-by: Eric DeVolder 
>
> ---
>  hw/acpi/erst.c | 909 
> +
>  hw/acpi/meson.build|   1 +
>  hw/i386/acpi-build.c   |   4 +
>  include/hw/acpi/erst.h |  97 ++
>  4 files changed, 1011 insertions(+)
>  create mode 100644 hw/acpi/erst.c
>  create mode 100644 include/hw/acpi/erst.h
>



[PATCH v2 3/7] ACPI ERST: support for ACPI ERST feature

2021-02-08 Thread Eric DeVolder
This change implements the support for the ACPI ERST feature[1,2].

The size of the ACPI ERST storage is declared via the QEMU
global parameter acpi-erst.size. The size can range from 64KiB
to to 64MiB. The default is 64KiB.

The location of the ACPI ERST storage backing file is delared
via the QEMU global parameter acpi-erst.filename. The default
is acpi-erst.backing.

[1] "Advanced Configuration and Power Interface Specification",
version 6.2, May 2017.
https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf

[2] "Unified Extensible Firmware Interface Specification",
version 2.8, March 2019.
https://uefi.org/sites/default/files/resources/UEFI_Spec_2_8_final.pdf

Signed-off-by: Eric DeVolder 
---
 hw/acpi/erst.c | 952 +
 1 file changed, 952 insertions(+)
 create mode 100644 hw/acpi/erst.c

diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
new file mode 100644
index 000..3a342f9
--- /dev/null
+++ b/hw/acpi/erst.c
@@ -0,0 +1,952 @@
+/*
+ * ACPI Error Record Serialization Table, ERST, Implementation
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * See ACPI specification,
+ * "ACPI Platform Error Interfaces" : "Error Serialization"
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see 
+ */
+
+#include 
+#include 
+#include 
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "qom/object_interfaces.h"
+#include "qemu/error-report.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/acpi-defs.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "exec/address-spaces.h"
+#include "hw/acpi/erst.h"
+
+#ifdef _ERST_DEBUG
+#define erst_debug(fmt, ...) \
+do { fprintf(stderr, fmt, ## __VA_ARGS__); fflush(stderr); } while (0)
+#else
+#define erst_debug(fmt, ...) do { } while (0)
+#endif
+
+/* See UEFI spec, Appendix N Common Platform Error Record */
+/* UEFI CPER allows for an OSPM book keeping area in the record */
+#define UEFI_CPER_RECORD_MIN_SIZE 128U
+#define UEFI_CPER_SIZE_OFFSET 20U
+#define UEFI_CPER_RECORD_ID_OFFSET 96U
+#define IS_UEFI_CPER_RECORD(ptr) \
+(((ptr)[0] == 'C') && \
+ ((ptr)[1] == 'P') && \
+ ((ptr)[2] == 'E') && \
+ ((ptr)[3] == 'R'))
+#define THE_UEFI_CPER_RECORD_ID(ptr) \
+(*(uint64_t *)(&(ptr)[UEFI_CPER_RECORD_ID_OFFSET]))
+
+#define ERST_INVALID_RECORD_ID (~0UL)
+#define ERST_EXECUTE_OPERATION_MAGIC 0x9CUL
+#define ERST_CSR_ACTION (0UL << 3) /* action (cmd) */
+#define ERST_CSR_VALUE  (1UL << 3) /* argument/value (data) */
+
+/*
+ * As ERST_IOMEM_SIZE is used to map the ERST into the guest,
+ * it should/must be an integer multiple of PAGE_SIZE.
+ * NOTE that any change to this value will make any pre-
+ * existing backing files, not of the same ERST_IOMEM_SIZE,
+ * unusable to the guest.
+ */
+#define ERST_IOMEM_SIZE (2UL * 4096)
+
+/*
+ * This implementation is an ACTION (cmd) and VALUE (data)
+ * interface consisting of just two 64-bit registers.
+ */
+#define ERST_REG_LEN (2UL * sizeof(uint64_t))
+
+/*
+ * The space not utilized by the register interface is the
+ * buffer for exchanging ERST record contents.
+ */
+#define ERST_RECORD_SIZE (ERST_IOMEM_SIZE - ERST_REG_LEN)
+
+/*
+ * Mode to be used for backing file
+ */
+#define ERST_BACKING_FILE_MODE 0644 /* S_IRWXU|S_IRWXG */
+
+#define ACPIERST(obj) \
+OBJECT_CHECK(ERSTDeviceState, (obj), TYPE_ACPI_ERST)
+#define ACPIERST_CLASS(oc) \
+OBJECT_CLASS_CHECK(ERSTDeviceStateClass, (oc), TYPE_ACPI_ERST)
+#define ACPIERST_GET_CLASS(obj) \
+OBJECT_GET_CLASS(ERSTDeviceStateClass, (obj), TYPE_ACPI_ERST)
+
+static hwaddr erst_base;
+
+typedef struct {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+uint32_t prop_size;
+char *prop_filename;
+hwaddr base;
+
+uint8_t operation;
+uint8_t busy_status;
+uint8_t command_status;
+uint32_t record_offset;
+uint32_t record_count;
+uint64_t reg_action;
+uint64_t reg_value;
+uint64_t record_identifier;
+
+unsigned next_record_index;
+uint8_t record[ERST_RECORD_SIZE]; /* read/written directly by guest */
+uint8_t tmp_record[ERST_RECORD_SIZE]; /* intermediate manipulation buffer 
*/
+uint8_t *nvram; /* persistent storage, of length prop_size */
+
+} ERSTDeviceState;
+
+static void 

Re: [PATCH RESEND] hw/net: fsl_etsec: Do not reject short frames

2021-02-08 Thread Bin Meng
Cc'ing libSLiRP

Hi Peter,

On Tue, Feb 9, 2021 at 12:09 AM Peter Maydell  wrote:
>
> On Mon, 8 Feb 2021 at 14:53, Bin Meng  wrote:
> >
> > From: Bin Meng 
> >
> > As of today both slirp and tap networking do not pad short frames
> > (e.g.: an ARP packet) to the minimum frame size of 60 bytes.
> >
> > If eTSEC is programmed to reject short frames, ARP requests will be
> > dropped, preventing the guest from becoming visible on the network.
> >
> > The same issue was reported on e1000 and vmxenet3 before, see:
> >
> > commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)")
> > commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)")
>
> How a short frame should be handled is ethernet device specific:
> what is correct for one device model doesn't necessarily apply
> to another.

I digged some history about the above 2 commits and they are the same
issue caused by slirp and tap networking, and workarounded in the
ethernet controller models.

>
> > Ideally this should be fixed on the slirp/tap networking side to
> > pad short frames to the minimum frame length, but I am not sure
> > whether that's doable.
>
> It would be useful to investigate further exactly where these
> short frames are coming from. If one guest is sending out short
> frames, or we are doing tap networking and get a genuine short
> frame from some external host then we should pass them to the
> guest as short frames; if QEMU itself is generating frames (eg
> from the 'fake' hosts in usermode networking) then it should be
> generating valid frames, not bogus ones, and we should fix whatever
> bit of code that is.

>From what I can tell it's the QEMU networking codes that generate such
short frames.

However it looks no one has ever attempted to fix that in the QEMU
networking, instead the ethernet controller models are patched in the
*receive* path, which is to pad such short frames to 60 bytes in e1000
and vmxnet3.

>
> > This commit changes to codes to ignore the RCTRL_RSF setting and
> > still allow receiving the short frame. The log message is updated
> > to mention the reject short frames functionality is unimplemented.
> >
> > Signed-off-by: Bin Meng 
> > ---
> >
> > RESEND using correct email address
> >
> >  hw/net/fsl_etsec/rings.c | 11 +--
> >  1 file changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
> > index 121415a..503b4d3 100644
> > --- a/hw/net/fsl_etsec/rings.c
> > +++ b/hw/net/fsl_etsec/rings.c
> > @@ -502,10 +502,17 @@ ssize_t etsec_rx_ring_write(eTSEC *etsec, const 
> > uint8_t *buf, size_t size)
> >  return -1;
> >  }
> >
> > +/*
> > + * Both slirp and tap networking do not pad short frames
> > + * (e.g.: an ARP packet) to the minimum frame size of 60 bytes.
> > + *
> > + * If eTSEC is programmed to reject short frames, ARP requests
> > + * will be dropped, preventing the guest from becoming visible
> > + * on the network.
> > + */
> >  if ((etsec->regs[RCTRL].value & RCTRL_RSF) && (size < 60)) {
> >  /* CRC is not in the packet yet, so short frame is below 60 bytes 
> > */
> > -RING_DEBUG("%s: Drop short frame\n", __func__);
> > -return -1;
> > +RING_DEBUG("%s: Drop short frame not implemented\n", __func__);
> >  }
>
> This doesn't look right. If the guest programs the device to
> reject frames less than 60 bytes and then expects to recieve a
> frame that's less than 60 bytes, that's a guest bug. If QEMU
> itself is generating packets to send and they're short that sounds
> like a bug elsewhere in QEMU.
>
> But I think the actual problem here is much simpler:
> the datasheet says
> # RSF: Receive short frame mode. When set, enables the reception of
> # frames shorter than 64 bytes. [...]
> #0 Ethernet frames less than 64B in length are silently dropped
> #1 Frames less than 64B are accepted upon a DA match
> (https://www.nxp.com/docs/en/reference-manual/MPC8548ERM.pdf chapter 14)
>
> whereas the QEMU code is doing the reverse: dropping short
> packets if the bit is 1.

Yes, that's correct. I will revise my commit message in v2.

>
> If you fix this bug by reversing the sense of the test on the
> RSF bit, does it make your guest happier ?

Yes.

Regards,
Bin



[RFC 1/1] vhost-vdmabuf: Add virtio based Dmabuf device

2021-02-08 Thread Vivek Kasireddy
This patch provides the implementation of the virtio dmabuf device
that is used to share a dmabuf created in the Guest with the Host.
Once the vhost vdmabuf kernel driver on the Host alerts Qemu about
a new dmabuf from the Guest, it is first imported and then converted
into a texture using EGL and eventually displayed on the screen.

Cc: Gerd Hoffmann 
Signed-off-by: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 configure   |   8 +
 hw/virtio/meson.build   |   1 +
 hw/virtio/vhost-backend.c   |  10 +
 hw/virtio/vhost-vdmabuf.c   | 526 
 include/hw/pci/pci.h|   1 +
 include/hw/virtio/vhost-backend.h   |   2 +
 include/hw/virtio/vhost-vdmabuf.h   |  76 +++
 include/standard-headers/linux/virtio_ids.h |   1 +
 include/ui/console.h|   1 +
 linux-headers/linux/vhost.h |   3 +
 meson.build |   1 +
 ui/console.c|   7 +
 12 files changed, 637 insertions(+)
 create mode 100644 hw/virtio/vhost-vdmabuf.c
 create mode 100644 include/hw/virtio/vhost-vdmabuf.h

diff --git a/configure b/configure
index a34f91171d..90a35317bf 100755
--- a/configure
+++ b/configure
@@ -345,6 +345,7 @@ vhost_net="$default_feature"
 vhost_crypto="$default_feature"
 vhost_scsi="$default_feature"
 vhost_vsock="$default_feature"
+vhost_vdmabuf="$default_feature"
 vhost_user="no"
 vhost_user_blk_server="auto"
 vhost_user_fs="$default_feature"
@@ -1263,6 +1264,10 @@ for opt do
   ;;
   --enable-vhost-vsock) vhost_vsock="yes"
   ;;
+  --disable-vhost-vdmabuf) vhost_vdmabuf="no"
+  ;;
+  --enable-vhost-vdmabuf) vhost_vdmabuf="yes"
+  ;;
   --disable-vhost-user-blk-server) vhost_user_blk_server="disabled"
   ;;
   --enable-vhost-user-blk-server) vhost_user_blk_server="enabled"
@@ -5766,6 +5771,9 @@ if test "$vhost_vsock" = "yes" ; then
 echo "CONFIG_VHOST_USER_VSOCK=y" >> $config_host_mak
   fi
 fi
+if test "$vhost_vdmabuf" = "yes" ; then
+  echo "CONFIG_VHOST_VDMABUF=y" >> $config_host_mak
+fi
 if test "$vhost_kernel" = "yes" ; then
   echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak
 fi
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index fbff9bc9d4..f2f5408fda 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -21,6 +21,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: 
files('vhost-user-fs.c'))
 virtio_ss.add(when: ['CONFIG_VHOST_USER_FS', 'CONFIG_VIRTIO_PCI'], if_true: 
files('vhost-user-fs-pci.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c'))
 virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c', 
'vhost-vsock-common.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_VDMABUF', if_true: files('vhost-vdmabuf.c'))
 virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: 
files('vhost-user-vsock.c', 'vhost-vsock-common.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c'))
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 31b33bde37..39a64e101d 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -214,6 +214,13 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev 
*dev, int start)
 }
 #endif /* CONFIG_VHOST_VSOCK */
 
+#ifdef CONFIG_VHOST_VDMABUF
+static int vhost_kernel_vdmabuf_set_running(struct vhost_dev *dev, int start)
+{
+return vhost_kernel_call(dev, VHOST_VDMABUF_SET_RUNNING, );
+}
+#endif /* CONFIG_VHOST_VDMABUF */
+
 static void vhost_kernel_iotlb_read(void *opaque)
 {
 struct vhost_dev *dev = opaque;
@@ -321,6 +328,9 @@ static const VhostOps kernel_ops = {
 .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid,
 .vhost_vsock_set_running = vhost_kernel_vsock_set_running,
 #endif /* CONFIG_VHOST_VSOCK */
+#ifdef CONFIG_VHOST_VDMABUF
+.vhost_vdmabuf_set_running = vhost_kernel_vdmabuf_set_running,
+#endif /* CONFIG_VHOST_VDMABUF */
 .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
 .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
 };
diff --git a/hw/virtio/vhost-vdmabuf.c b/hw/virtio/vhost-vdmabuf.c
new file mode 100644
index 00..06890e6b2a
--- /dev/null
+++ b/hw/virtio/vhost-vdmabuf.c
@@ -0,0 +1,526 @@
+/*
+ * Implementation of Virtio based Dmabuf device -- mostly inspired by
+ * vfio/display.c and vhost-vsock.c.
+ *
+ * Copyright 2021 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+#include "monitor/monitor.h"
+#include "virtio-pci.h"
+#include "qemu/module.h"
+#include "qemu/uuid.h"
+#include "sysemu/sysemu.h"
+#include "ui/console.h"

[PATCH v2 2/7] ACPI ERST: header file for erst

2021-02-08 Thread Eric DeVolder
This change introduces the defintions for ACPI ERST support.

Signed-off-by: Eric DeVolder 
---
 include/hw/acpi/erst.h | 77 ++
 1 file changed, 77 insertions(+)
 create mode 100644 include/hw/acpi/erst.h

diff --git a/include/hw/acpi/erst.h b/include/hw/acpi/erst.h
new file mode 100644
index 000..be9b3fa
--- /dev/null
+++ b/include/hw/acpi/erst.h
@@ -0,0 +1,77 @@
+/*
+ * ACPI Error Record Serialization Table, ERST, Implementation
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * See ACPI specification, "ACPI Platform Error Interfaces"
+ *  "Error Serialization"
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see 
+ */
+#ifndef HW_ACPI_ERST_H
+#define HW_ACPI_ERST_H
+
+void build_erst(GArray *table_data, BIOSLinker *linker, hwaddr base);
+
+#define TYPE_ACPI_ERST "acpi-erst"
+
+#define ACPI_ERST_ACTION_BEGIN_WRITE_OPERATION 0x0
+#define ACPI_ERST_ACTION_BEGIN_READ_OPERATION  0x1
+#define ACPI_ERST_ACTION_BEGIN_CLEAR_OPERATION 0x2
+#define ACPI_ERST_ACTION_END_OPERATION 0x3
+#define ACPI_ERST_ACTION_SET_RECORD_OFFSET 0x4
+#define ACPI_ERST_ACTION_EXECUTE_OPERATION 0x5
+#define ACPI_ERST_ACTION_CHECK_BUSY_STATUS 0x6
+#define ACPI_ERST_ACTION_GET_COMMAND_STATUS0x7
+#define ACPI_ERST_ACTION_GET_RECORD_IDENTIFIER 0x8
+#define ACPI_ERST_ACTION_SET_RECORD_IDENTIFIER 0x9
+#define ACPI_ERST_ACTION_GET_RECORD_COUNT  0xA
+#define ACPI_ERST_ACTION_BEGIN_DUMMY_WRITE_OPERATION   0xB
+#define ACPI_ERST_ACTION_RESERVED  0xC
+#define ACPI_ERST_ACTION_GET_ERROR_LOG_ADDRESS_RANGE   0xD
+#define ACPI_ERST_ACTION_GET_ERROR_LOG_ADDRESS_LENGTH  0xE
+#define ACPI_ERST_ACTION_GET_ERROR_LOG_ADDRESS_RANGE_ATTRIBUTES 0xF
+#define ACPI_ERST_ACTION_GET_EXECUTE_OPERATION_TIMINGS 0x10
+#define ACPI_ERST_MAX_ACTIONS \
+(ACPI_ERST_ACTION_GET_EXECUTE_OPERATION_TIMINGS + 1)
+
+#define ACPI_ERST_STATUS_SUCCESS0x00
+#define ACPI_ERST_STATUS_NOT_ENOUGH_SPACE   0x01
+#define ACPI_ERST_STATUS_HARDWARE_NOT_AVAILABLE 0x02
+#define ACPI_ERST_STATUS_FAILED 0x03
+#define ACPI_ERST_STATUS_RECORD_STORE_EMPTY 0x04
+#define ACPI_ERST_STATUS_RECORD_NOT_FOUND   0x05
+
+#define ACPI_ERST_INST_READ_REGISTER 0x00
+#define ACPI_ERST_INST_READ_REGISTER_VALUE   0x01
+#define ACPI_ERST_INST_WRITE_REGISTER0x02
+#define ACPI_ERST_INST_WRITE_REGISTER_VALUE  0x03
+#define ACPI_ERST_INST_NOOP  0x04
+#define ACPI_ERST_INST_LOAD_VAR1 0x05
+#define ACPI_ERST_INST_LOAD_VAR2 0x06
+#define ACPI_ERST_INST_STORE_VAR10x07
+#define ACPI_ERST_INST_ADD   0x08
+#define ACPI_ERST_INST_SUBTRACT  0x09
+#define ACPI_ERST_INST_ADD_VALUE 0x0A
+#define ACPI_ERST_INST_SUBTRACT_VALUE0x0B
+#define ACPI_ERST_INST_STALL 0x0C
+#define ACPI_ERST_INST_STALL_WHILE_TRUE  0x0D
+#define ACPI_ERST_INST_SKIP_NEXT_INSTRUCTION_IF_TRUE 0x0E
+#define ACPI_ERST_INST_GOTO  0x0F
+#define ACPI_ERST_INST_SET_SRC_ADDRESS_BASE  0x10
+#define ACPI_ERST_INST_SET_DST_ADDRESS_BASE  0x11
+#define ACPI_ERST_INST_MOVE_DATA 0x12
+
+#endif
+
-- 
1.8.3.1




[PATCH 2/4] accel/tcg: Create io_recompile_replay_branch hook

2021-02-08 Thread Richard Henderson
Create a hook in which to split out the mips and
sh4 ifdefs from cpu_io_recompile.

Signed-off-by: Richard Henderson 
---
 include/hw/core/tcg-cpu-ops.h | 10 ++
 accel/tcg/translate-all.c | 17 +
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index ac3bb051f2..ddf334411f 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -88,6 +88,16 @@ struct TCGCPUOps {
  */
 bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
 
+/**
+ * @io_recompile_replay_branch: Callback for cpu_io_recompile.
+ *
+ * The cpu has been stoped, and cpu_restore_state_from_tb has been
+ * called.  If the faulting instruction is in a delay slot, and the
+ * target architecture requires re-execution of the branch, then
+ * adjust the cpu state as required and return true.
+ */
+bool (*io_recompile_replay_branch)(CPUState *cpu,
+   const TranslationBlock *tb);
 #endif /* CONFIG_SOFTMMU */
 #endif /* NEED_CPU_H */
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 81d4c83f22..6eb37883bd 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -60,6 +60,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "qapi/error.h"
+#include "hw/core/tcg-cpu-ops.h"
 #include "internal.h"
 
 /* #define DEBUG_TB_INVALIDATE */
@@ -2420,6 +2421,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 CPUArchState *env = cpu->env_ptr;
 #endif
 TranslationBlock *tb;
+CPUClass *cc;
 uint32_t n;
 
 tb = tcg_tb_lookup(retaddr);
@@ -2429,11 +2431,18 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 }
 cpu_restore_state_from_tb(cpu, tb, retaddr, true);
 
-/* On MIPS and SH, delay slot instructions can only be restarted if
-   they were already the first instruction in the TB.  If this is not
-   the first instruction in a TB then re-execute the preceding
-   branch.  */
+/*
+ * Some guests must re-execute the branch when re-executing a delay
+ * slot instruction.  When this is the case, adjust icount and N
+ * to account for the re-execution of the branch.
+ */
 n = 1;
+cc = CPU_GET_CLASS(cpu);
+if (cc->tcg_ops->io_recompile_replay_branch &&
+cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
+cpu_neg(cpu)->icount_decr.u16.low++;
+n = 2;
+}
 #if defined(TARGET_MIPS)
 if ((env->hflags & MIPS_HFLAG_BMASK) != 0
 && env->active_tc.PC != tb->pc) {
-- 
2.25.1




[RFC 0/1] vhost-vdmabuf: Add virtio based Dmabuf device

2021-02-08 Thread Vivek Kasireddy
The Virtio based Dmabuf (Vdmabuf) device provides a way to transfer a
dmabuf created in the Guest to the Host. This patch is still a WIP but
is still posted to the ML to provide additional details and context for
the discussion associated with the vhost-vdmabuf and virtio-vdmabuf
kernel drivers here:

https://lists.linuxfoundation.org/pipermail/virtualization/2021-February/052233.html

Vivek Kasireddy (1):
  vhost-vdmabuf: Add virtio based Dmabuf device

 configure   |   8 +
 hw/virtio/meson.build   |   1 +
 hw/virtio/vhost-backend.c   |  10 +
 hw/virtio/vhost-vdmabuf.c   | 526 
 include/hw/pci/pci.h|   1 +
 include/hw/virtio/vhost-backend.h   |   2 +
 include/hw/virtio/vhost-vdmabuf.h   |  76 +++
 include/standard-headers/linux/virtio_ids.h |   1 +
 include/ui/console.h|   1 +
 linux-headers/linux/vhost.h |   3 +
 meson.build |   1 +
 ui/console.c|   7 +
 12 files changed, 637 insertions(+)
 create mode 100644 hw/virtio/vhost-vdmabuf.c
 create mode 100644 include/hw/virtio/vhost-vdmabuf.h

-- 
2.26.2




Re: [PATCH v5 09/15] qapi/introspect.py: create a typed 'Annotated' data strutcure

2021-02-08 Thread John Snow

On 2/8/21 9:36 AM, Markus Armbruster wrote:

John Snow  writes:


Presently, we use a tuple to attach a dict containing annotations
(comments and compile-time conditionals) to a tree node. This is
undesirable because dicts are difficult to strongly type; promoting it
to a real class allows us to name the values and types of the
annotations we are expecting.

In terms of typing, the Annotated type serves as a generic container
where the annotated node's type is preserved, allowing for greater
specificity than we'd be able to provide without a generic.

Signed-off-by: John Snow 
---
  scripts/qapi/introspect.py | 77 ++
  1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
index 8e019b4a26a..b9427aba449 100644
--- a/scripts/qapi/introspect.py
+++ b/scripts/qapi/introspect.py
@@ -13,8 +13,12 @@
  from typing import (
  Any,
  Dict,
+Generic,
+Iterable,
  List,
  Optional,
+Tuple,
+TypeVar,
  Union,
  )
  
@@ -51,15 +55,25 @@

  _scalar = Union[str, bool, None]
  _nonscalar = Union[Dict[str, _stub], List[_stub]]
  _value = Union[_scalar, _nonscalar]
-# TreeValue = TODO, in a forthcoming commit.
+TreeValue = Union[_value, 'Annotated[_value]']
  
  
-def _make_tree(obj, ifcond, comment=None):

-extra = {
-'if': ifcond,
-'comment': comment
-}
-return (obj, extra)
+_NodeT = TypeVar('_NodeT', bound=_value)
+
+
+class Annotated(Generic[_NodeT]):


My gut feeling is "generic type is overkill for this purpose".  Let's go
with it anyway, because

1. It's not wrong.



A famous phrase in Computer Science.


2. I don't have enough experience with Python type hints for reliable
gut feelings.



You are exactly correct that the power it offers us here isn't strictly 
necessary. An argument might be that removing it makes the types easier 
to read, but I think at a certain level of involvement with mypy that it 
isn't feasible to escape understanding Generics, and we are at that level.



3. I plan to overhaul the C generation part relatively soon (after your
work has landed, don't worry), and I can try to make it simpler then.



Yeah. The generation and typing can likely improve substantially at that 
point in time. Hopefully the type hints help guide a design that's nice 
to type and nice to read.



+"""
+Annotated generally contains a SchemaInfo-like type (as a dict),
+But it also used to wrap comments/ifconds around scalar leaf values,
+for the benefit of features and enums.
+"""
+# TODO: Remove after Python 3.7 adds @dataclass:
+# pylint: disable=too-few-public-methods
+def __init__(self, value: _NodeT, ifcond: Iterable[str],
+ comment: Optional[str] = None):
+self.value = value
+self.comment: Optional[str] = comment
+self.ifcond: Tuple[str, ...] = tuple(ifcond)
  
  
  def _tree_to_qlit(obj, level=0, dict_value=False):

@@ -67,24 +81,20 @@ def _tree_to_qlit(obj, level=0, dict_value=False):
  def indent(level):
  return level * 4 * ' '
  
-if isinstance(obj, tuple):

-ifobj, extra = obj
-ifcond = extra.get('if')
-comment = extra.get('comment')
-
+if isinstance(obj, Annotated):
  # NB: _tree_to_qlit is called recursively on the values of a key:value
  # pair; those values can't be decorated with comments or conditionals.
  msg = "dict values cannot have attached comments or if-conditionals."
  assert not dict_value, msg
  
  ret = ''

-if comment:
-ret += indent(level) + '/* %s */\n' % comment
-if ifcond:
-ret += gen_if(ifcond)
-ret += _tree_to_qlit(ifobj, level)
-if ifcond:
-ret += '\n' + gen_endif(ifcond)
+if obj.comment:
+ret += indent(level) + '/* %s */\n' % obj.comment
+if obj.ifcond:
+ret += gen_if(obj.ifcond)
+ret += _tree_to_qlit(obj.value, level)
+if obj.ifcond:
+ret += '\n' + gen_endif(obj.ifcond)
  return ret
  
  ret = ''

@@ -201,7 +211,7 @@ def _use_type(self, typ):
  
  @staticmethod

  def _gen_features(features):
-return [_make_tree(f.name, f.ifcond) for f in features]
+return [Annotated(f.name, f.ifcond) for f in features]
  
  def _gen_tree(self, name, mtype, obj, ifcond, features):

  comment: Optional[str] = None
@@ -215,7 +225,7 @@ def _gen_tree(self, name, mtype, obj, ifcond, features):
  obj['meta-type'] = mtype
  if features:
  obj['features'] = self._gen_features(features)
-self._trees.append(_make_tree(obj, ifcond, comment))
+self._trees.append(Annotated(obj, ifcond, comment))
  
  def _gen_member(self, member):

  obj = {'name': member.name, 'type': self._use_type(member.type)}
@@ -223,7 +233,7 @@ def _gen_member(self, member):
 

[PATCH 1/4] exec: Move TranslationBlock typedef to qemu/typedefs.h

2021-02-08 Thread Richard Henderson
This also means we don't need an extra declaration of
the structure in hw/core/cpu.h.

Signed-off-by: Richard Henderson 
---
 include/exec/tb-context.h | 1 -
 include/hw/core/cpu.h | 4 +---
 include/hw/core/tcg-cpu-ops.h | 3 +--
 include/qemu/typedefs.h   | 1 +
 target/arm/internals.h| 3 +--
 target/cris/translate.c   | 2 +-
 target/lm32/translate.c   | 2 +-
 target/moxie/translate.c  | 2 +-
 target/unicore32/translate.c  | 2 +-
 9 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index ec4c13b455..cc33979113 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -26,7 +26,6 @@
 #define CODE_GEN_HTABLE_BITS 15
 #define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS)
 
-typedef struct TranslationBlock TranslationBlock;
 typedef struct TBContext TBContext;
 
 struct TBContext {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 38d813c389..c005d3dc2d 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -74,8 +74,6 @@ typedef enum MMUAccessType {
 
 typedef struct CPUWatchpoint CPUWatchpoint;
 
-struct TranslationBlock;
-
 /* see tcg-cpu-ops.h */
 struct TCGCPUOps;
 
@@ -375,7 +373,7 @@ struct CPUState {
 IcountDecr *icount_decr_ptr;
 
 /* Accessed in parallel; all accesses must be atomic */
-struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
 
 struct GDBRegisterState *gdb_regs;
 int gdb_num_regs;
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index ccc97d1894..ac3bb051f2 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -30,8 +30,7 @@ struct TCGCPUOps {
  * If more state needs to be restored, the target must implement a
  * function to restore all the state, and register it here.
  */
-void (*synchronize_from_tb)(CPUState *cpu,
-const struct TranslationBlock *tb);
+void (*synchronize_from_tb)(CPUState *cpu, const TranslationBlock *tb);
 /** @cpu_exec_enter: Callback for cpu_exec preparation */
 void (*cpu_exec_enter)(CPUState *cpu);
 /** @cpu_exec_exit: Callback for cpu_exec cleanup */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index dc39b05c30..ee60eb3de4 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -120,6 +120,7 @@ typedef struct ReservedRegion ReservedRegion;
 typedef struct SavedIOTLB SavedIOTLB;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
+typedef struct TranslationBlock TranslationBlock;
 typedef struct VirtIODevice VirtIODevice;
 typedef struct Visitor Visitor;
 typedef struct VMChangeStateEntry VMChangeStateEntry;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 448982dd2f..7d26ce0c9d 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -172,8 +172,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 void arm_translate_init(void);
 
 #ifdef CONFIG_TCG
-void arm_cpu_synchronize_from_tb(CPUState *cs,
- const struct TranslationBlock *tb);
+void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
 #endif /* CONFIG_TCG */
 
 
diff --git a/target/cris/translate.c b/target/cris/translate.c
index c893f877ab..65c168c0c7 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -132,7 +132,7 @@ typedef struct DisasContext {
 
 int delayed_branch;
 
-struct TranslationBlock *tb;
+TranslationBlock *tb;
 int singlestep_enabled;
 } DisasContext;
 
diff --git a/target/lm32/translate.c b/target/lm32/translate.c
index 030b232d66..20c70d03f1 100644
--- a/target/lm32/translate.c
+++ b/target/lm32/translate.c
@@ -93,7 +93,7 @@ typedef struct DisasContext {
 unsigned int tb_flags, synced_flags; /* tb dependent flags.  */
 int is_jmp;
 
-struct TranslationBlock *tb;
+TranslationBlock *tb;
 int singlestep_enabled;
 
 uint32_t features;
diff --git a/target/moxie/translate.c b/target/moxie/translate.c
index d5fb27dfb8..24a742b25e 100644
--- a/target/moxie/translate.c
+++ b/target/moxie/translate.c
@@ -36,7 +36,7 @@
 
 /* This is the state at translation time.  */
 typedef struct DisasContext {
-struct TranslationBlock *tb;
+TranslationBlock *tb;
 target_ulong pc, saved_pc;
 uint32_t opcode;
 uint32_t fp_status;
diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c
index 962f9877a0..370709c9ea 100644
--- a/target/unicore32/translate.c
+++ b/target/unicore32/translate.c
@@ -34,7 +34,7 @@ typedef struct DisasContext {
 int condjmp;
 /* The label that will be jumped to when the instruction is skipped.  */
 TCGLabel *condlabel;
-struct TranslationBlock *tb;
+TranslationBlock *tb;
 int singlestep_enabled;
 #ifndef CONFIG_USER_ONLY
 int user;
-- 
2.25.1




[PATCH 3/4] target/mips: Create mips_io_recompile_replay_branch

2021-02-08 Thread Richard Henderson
Move the code from accel/tcg/translate-all.c to target/mips/cpu.c.

Signed-off-by: Richard Henderson 
---
Cc: Philippe Mathieu-Daudé 
Cc: Aurelien Jarno 
Cc: Jiaxun Yang 
---
 accel/tcg/translate-all.c | 12 ++--
 target/mips/cpu.c | 18 ++
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 6eb37883bd..470657b02a 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2417,7 +2417,7 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
  */
 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 {
-#if defined(TARGET_MIPS) || defined(TARGET_SH4)
+#if defined(TARGET_SH4)
 CPUArchState *env = cpu->env_ptr;
 #endif
 TranslationBlock *tb;
@@ -2443,15 +2443,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 cpu_neg(cpu)->icount_decr.u16.low++;
 n = 2;
 }
-#if defined(TARGET_MIPS)
-if ((env->hflags & MIPS_HFLAG_BMASK) != 0
-&& env->active_tc.PC != tb->pc) {
-env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
-cpu_neg(cpu)->icount_decr.u16.low++;
-env->hflags &= ~MIPS_HFLAG_BMASK;
-n = 2;
-}
-#elif defined(TARGET_SH4)
+#if defined(TARGET_SH4)
 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
 && env->pc != tb->pc) {
 env->pc -= 2;
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index ad163ead62..bf70c77295 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -268,6 +268,23 @@ static void mips_cpu_synchronize_from_tb(CPUState *cs,
 env->hflags &= ~MIPS_HFLAG_BMASK;
 env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
 }
+
+# ifndef CONFIG_USER_ONLY
+static bool mips_io_recompile_replay_branch(CPUState *cs,
+const TranslationBlock *tb)
+{
+MIPSCPU *cpu = MIPS_CPU(cs);
+CPUMIPSState *env = >env;
+
+if ((env->hflags & MIPS_HFLAG_BMASK) != 0
+&& env->active_tc.PC != tb->pc) {
+env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
+env->hflags &= ~MIPS_HFLAG_BMASK;
+return true;
+}
+return false;
+}
+# endif /* !CONFIG_USER_ONLY */
 #endif /* CONFIG_TCG */
 
 static bool mips_cpu_has_work(CPUState *cs)
@@ -679,6 +696,7 @@ static struct TCGCPUOps mips_tcg_ops = {
 .do_interrupt = mips_cpu_do_interrupt,
 .do_transaction_failed = mips_cpu_do_transaction_failed,
 .do_unaligned_access = mips_cpu_do_unaligned_access,
+.io_recompile_replay_branch = mips_io_recompile_replay_branch,
 #endif /* !CONFIG_USER_ONLY */
 };
 #endif /* CONFIG_TCG */
-- 
2.25.1




Re: [PATCH v2 15/15] tcg/arm: Implement TCG_TARGET_HAS_rotv_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 04:03, Richard Henderson
 wrote:
>
> Implement via expansion, so don't actually set TCG_TARGET_HAS_rotv_vec.
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 35 ++-
>  1 file changed, 34 insertions(+), 1 deletion(-)
>

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH 0/4] accel/tcg: Create io_recompile_replay_branch hook

2021-02-08 Thread Richard Henderson
I noticed this today while Alex and I were discussing cpu_io_recompile.
This cleanup seems much easier now that Claudio has split out TCGCPUOps.

I see that mips has a ReplayKernel test, but sh4 does not, so this
probably has non-zero testing.


r~


Richard Henderson (4):
  exec: Move TranslationBlock typedef to qemu/typedefs.h
  accel/tcg: Create io_recompile_replay_branch hook
  target/mips: Create mips_io_recompile_replay_branch
  target/sh4: Create superh_io_recompile_replay_branch

 include/exec/tb-context.h |  1 -
 include/hw/core/cpu.h |  4 +---
 include/hw/core/tcg-cpu-ops.h | 13 +++--
 include/qemu/typedefs.h   |  1 +
 target/arm/internals.h|  3 +--
 accel/tcg/translate-all.c | 31 ++-
 target/cris/translate.c   |  2 +-
 target/lm32/translate.c   |  2 +-
 target/mips/cpu.c | 18 ++
 target/moxie/translate.c  |  2 +-
 target/sh4/cpu.c  | 18 ++
 target/unicore32/translate.c  |  2 +-
 12 files changed, 64 insertions(+), 33 deletions(-)

-- 
2.25.1




Re: [PATCH v4 02/10] iotests/297: Rewrite in Python and extend reach

2021-02-08 Thread Willian Rampazzo




On 1/18/21 7:09 AM, Max Reitz wrote:

On 15.01.21 20:27, Willian Rampazzo wrote:

On Fri, Jan 15, 2021 at 2:43 PM Max Reitz  wrote:


Instead of checking iotests.py only, check all Python files in the
qemu-iotests/ directory.  Of course, most of them do not pass, so there
is an extensive skip list for now.  (The only files that do pass are
209, 254, 283, and iotests.py.)

(Alternatively, we could have the opposite, i.e. an explicit list of
files that we do want to check, but I think it is better to check files
by default.)

Unless started in debug mode (./check -d), the output has no information
on which files are tested, so we will not have a problem e.g. with
backports, where some files may be missing when compared to upstream.

Besides the technical rewrite, some more things are changed:

- For the pylint invocation, PYTHONPATH is adjusted.  This mirrors
   setting MYPYPATH for mypy.

- Also, MYPYPATH is now derived from PYTHONPATH, so that we include
   paths set by the environment.  Maybe at some point we want to let the
   check script add '../../python/' to PYTHONPATH so that iotests.py 
does

   not need to do that.

- Passing --notes=FIXME,XXX to pylint suppresses warnings for TODO
   comments.  TODO is fine, we do not need 297 to complain about such
   comments.

- The "Success" line from mypy's output is suppressed, because (A) it
   does not add useful information, and (B) it would leak information
   about the files having been tested to the reference output, which we
   decidedly do not want.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/297 | 110 +
  tests/qemu-iotests/297.out |   5 +-
  2 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 5c5420712b..fa9e2cac78 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/usr/bin/env python3
  #
  # Copyright (C) 2020 Red Hat, Inc.
  #
@@ -15,30 +15,96 @@
  # You should have received a copy of the GNU General Public License
  # along with this program.  If not, see 
.


-seq=$(basename $0)
-echo "QA output created by $seq"
+import os
+import re
+import shutil
+import subprocess
+import sys

-status=1   # failure is the default!
+import iotests

-# get standard environment
-. ./common.rc

-if ! type -p "pylint-3" > /dev/null; then
-    _notrun "pylint-3 not found"
-fi
-if ! type -p "mypy" > /dev/null; then
-    _notrun "mypy not found"
-fi
+# TODO: Empty this list!
+SKIP_FILES = (
+    '030', '040', '041', '044', '045', '055', '056', '057', '065', 
'093',
+    '096', '118', '124', '129', '132', '136', '139', '147', '148', 
'149',
+    '151', '152', '155', '163', '165', '169', '194', '196', '199', 
'202',
+    '203', '205', '206', '207', '208', '210', '211', '212', '213', 
'216',
+    '218', '219', '222', '224', '228', '234', '235', '236', '237', 
'238',
+    '240', '242', '245', '246', '248', '255', '256', '257', '258', 
'260',
+    '262', '264', '266', '274', '277', '280', '281', '295', '296', 
'298',

+    '299', '300', '302', '303', '304', '307',
+    'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
+)

-pylint-3 --score=n iotests.py

-MYPYPATH=../../python/ mypy --warn-unused-configs 
--disallow-subclassing-any \

-    --disallow-any-generics --disallow-incomplete-defs \
-    --disallow-untyped-decorators --no-implicit-optional \
-    --warn-redundant-casts --warn-unused-ignores \
-    --no-implicit-reexport iotests.py
+def is_python_file(filename):
+    if not os.path.isfile(filename):
+    return False

-# success, all done
-echo "*** done"
-rm -f $seq.full
-status=0
+    if filename.endswith('.py'):
+    return True
+
+    with open(filename) as f:
+    try:
+    first_line = f.readline()
+    return re.match('^#!.*python', first_line) is not None
+    except UnicodeDecodeError:  # Ignore binary files
+    return False
+
+
+def run_linters():
+    files = [filename for filename in (set(os.listdir('.')) - 
set(SKIP_FILES))

+ if is_python_file(filename)]
+
+    iotests.logger.debug('Files to be checked:')
+    iotests.logger.debug(', '.join(sorted(files)))
+
+    print('=== pylint ===')
+    sys.stdout.flush()
+
+    # Todo notes are fine, but fixme's or xxx's should probably just be
+    # fixed (in tests, at least)
+    env = os.environ.copy()
+    try:
+    env['PYTHONPATH'] += ':../../python/'


Do you have any objection to using os.path.dirname and os.path.join
here? This would make the code more pythonic.


Intuitively, I felt a bit uneasy about os.path.join here, because it 
would make it look like this was platform-independent, when it is not: 
The colon as a PATH separator is probably more platform-dependent than 
the slashes.


So turns out there is os.pathsep, which yields a 

Re: [PULL v3 00/27] Block patches

2021-02-08 Thread Stefan Hajnoczi
On Mon, Feb 08, 2021 at 11:02:57AM +0100, Philippe Mathieu-Daudé wrote:
> On 2/8/21 10:27 AM, Stefan Hajnoczi wrote:
> > On Sat, Feb 06, 2021 at 05:03:20PM +, Peter Maydell wrote:
> >> On Fri, 5 Feb 2021 at 22:53, Peter Maydell  
> >> wrote:
> >>>
> >>> On Fri, 5 Feb 2021 at 16:45, Stefan Hajnoczi  wrote:
> 
>  The following changes since commit 
>  e2c5093c993ef646e4e28f7aa78429853bcc06ac:
> 
>    iotests: 30: drop from auto group (and effectively from make check) 
>  (2021-02-05 15:16:13 +)
> 
>  are available in the Git repository at:
> 
>    https://gitlab.com/stefanha/qemu.git tags/block-pull-request
> 
>  for you to fetch changes up to b07011f375bda3319cf72eee7cb18d310078387b:
> 
>    docs: fix Parallels Image "dirty bitmap" section (2021-02-05 16:36:36 
>  +)
> 
>  
>  Pull request
> 
>  v3:
>   * Replace {0} array initialization with {} to make clang happy [Peter]
> 
>  
> >>>
> >>>
> >>> Fails 'make check' on s390x host:
> >>
> >> I gave this a rerun to check it was reproducible (it is) and realised
> >> I missed what looks like an important line in the log. As usual,
> >> trying to disentangle which lines of a parallel make check correspond
> >> to the failure is pretty tricky, but the lines
> >>  Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'
> >>
> >> are probably the proximate causes of the assertion failures.
> >>
> >> MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> >> QTEST_QEMU_IMG=./qemu-img
> >> G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> >> QTEST_QEMU_BINARY=./qemu-system-rx tests/qtest/qos-test --tap -k
> >> PASS 45 qtest-rx/qmp-cmd-test /rx/qmp/query-memory-size-summary
> >> SKIP
> >> MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> >> QTEST_QEMU_IMG=./qemu-img
> >> G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> >> QTEST_QEMU_BINARY=./qemu-system-s390x tests/qtest/pxe-test --tap -k
> >> PASS 46 qtest-rx/qmp-cmd-test /rx/qmp/query-memory-devices
> >> Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'
> >> PASS 47 qtest-rx/qmp-cmd-test /rx/qmp/query-replay
> >> PASS 48 qtest-rx/qmp-cmd-test /rx/qmp/query-yank
> >> PASS 49 qtest-rx/qmp-cmd-test /rx/qmp/query-name
> >> PASS 50 qtest-rx/qmp-cmd-test /rx/qmp/query-iothreads
> >> PASS 51 qtest-rx/qmp-cmd-test /rx/qmp/query-fdsets
> >> PASS 52 qtest-rx/qmp-cmd-test /rx/qmp/query-command-line-options
> >> PASS 53 qtest-rx/qmp-cmd-test /rx/qmp/query-acpi-ospm-status
> >> PASS 54 qtest-rx/qmp-cmd-test /rx/qmp/object-add-failure-modes
> >> MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}
> >> QTEST_QEMU_IMG=./qemu-img
> >> G_TEST_DBUS_DAEMON=/home/ubuntu/qemu/tests/dbus-vmstate-daemon.sh
> >> QTEST_QEMU_BINARY=./qemu-system-s390x tests/qtest/test-netfilter --tap
> >> -k
> >> Type 'remote-pcihost' is missing its parent 'pcie-host-bridge'
> >> socket_accept failed: Resource temporarily unavailable
> >> socket_accept failed: Resource temporarily unavailable
> >> **
> >> ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
> >> assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
> >> **
> >> ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
> >> assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
> >> ../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
> >> signal 6 (Aborted) (core dumped)
> >> ../../tests/qtest/libqtest.c:181: kill_qemu() detected QEMU death from
> >> signal 6 (Aborted) (core dumped)
> >> ERROR qtest-s390x/pxe-test - Bail out!
> >> ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
> >> assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
> >> ERROR qtest-s390x/test-netfilter - Bail out!
> >> ERROR:../../tests/qtest/libqtest.c:308:qtest_init_without_qmp_handshake:
> >> assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
> >> Makefile.mtest:3113: recipe for target 'run-test-387' failed
> >> make: *** [run-test-387] Error 1
> >> make: *** Waiting for unfinished jobs
> >> Makefile.mtest:3121: recipe for target 'run-test-388' failed
> > 
> > Hi Elena and Jag,
> > Please take a look at this QOM failure. I guess remote-pcihost is being
> > built but pcie-host-bridge is missing from the s390x-softmmu target.
> 
> Fix suggested here:
> https://www.mail-archive.com/qemu-block@nongnu.org/msg80536.html
> 
> But beside the fix what would be better is to restrict this feature
> where it makes sense (we are having hard time building/testing all
> features, better enable new ones where they are used).
> 
> Would it be enough to enable this feature on X86 hosts/targets for
> mainstream CI?

Trying to check if I understand correctly:

Instead of writing configure/meson rules that enable the feature
whenever the dependencies 

Re: [PATCH v5] qga: Utilize QAPI_LIST_APPEND in qmp_guest_network_get_interfaces

2021-02-08 Thread Michael Roth
On Mon, Feb 08, 2021 at 02:23:59PM +0100, Markus Armbruster wrote:
> Eric Blake  writes:
> 
> > I found another spot that can benefit from using our macros instead of
> > open-coding qapi list creation.
> >
> > Signed-off-by: Eric Blake 
> > ---
> 
> Reviewed-by: Markus Armbruster 
> 
> Mike, would you like me to stick this into a QAPI pull request?
> 

Hi Markus,

If you have one planned soon please feel free. Otherwise I'll send a pull
next week along with a couple other patches.

Thanks,

Mike



Re: [PATCH v2 63/93] tcg/tci: Use ffi for calls

2021-02-08 Thread Richard Henderson
On 2/8/21 11:04 AM, Stefan Weil wrote:
> 
> Am 08.02.21 um 18:39 schrieb Richard Henderson:
>> On 2/8/21 5:07 AM, Stefan Weil wrote:
>>> Richard, this commit is also the one which breaks qemu-system-i386 on 
>>> sparc64
>>> for me:
>> You'll have to give me more details than that, because qemu-system-i386 works
>> for me on a niagara5 w/ debian sid.
> 
> 
> I am testing on a similar Debian system (debian-ports unstable), but with a
> Niagara3 cpu:
> 
> Linux gcc102.fsffrance.org 5.10.0-3-sparc64-smp #1 SMP Debian 5.10.12-1
> (2021-01-30) sparc64 GNU/Linux
> 
> gcc (Debian 10.2.1-6) 10.2.1 20210110
> 
> $ cat /proc/cpuinfo
> cpu        : UltraSparc T3 (Niagara3)
> fpu        : UltraSparc T3 integrated FPU
> pmu        : niagara3
> prom        : OBP 4.34.6.c 2017/03/22 13:55
> type        : sun4v
> ncpus probed    : 256
> ncpus active    : 256
> D$ parity tl1    : 0
> I$ parity tl1    : 0
> cpucaps        :
> flush,stbar,swap,muldiv,v9,blkinit,n2,mul32,div32,v8plus,popc,vis,vis2,ASIBlkInit,fmaf,vis3,hpc
> 

Ok, I've reproduced something on a T3 (gcc102.fsffrance.org).
Running the same code side-by-side vs the T5, I get different results.

I'll see if I can track down the difference, since they're both running the
same base os.


r~



Re: [PATCH] hw/sd/sdhci: Do not modify BlockSizeRegister if transaction in progress

2021-02-08 Thread Mauro Matteo Cascella
On Mon, Feb 8, 2021 at 8:35 PM Philippe Mathieu-Daudé  wrote:
>
> Per the "SD Host Controller Simplified Specification Version 2.00"
> spec. 'Table 2-4 : Block Size Register':
>
>   Transfer Block Size [...] can be accessed only if no
>   transaction is executing (i.e., after a transaction has stopped).
>   Read operations during transfers may return an invalid value,
>   and write operations shall be ignored.
>
> Transactions will update 'data_count', so do not modify 'blksize'
> and 'blkcnt' when 'data_count' is used. This fixes:
>
> $ cat << EOF | qemu-system-x86_64 -qtest stdio -monitor none \
>-nographic -serial none -M pc-q35-5.0 \
>-device sdhci-pci,sd-spec-version=3 \
>-device sd-card,drive=mydrive \
>-drive if=sd,index=0,file=null-co://,format=raw,id=mydrive
>   outl 0xcf8 0x80001810
>   outl 0xcfc 0xe1068000
>   outl 0xcf8 0x80001814
>   outl 0xcf8 0x80001804
>   outw 0xcfc 0x7
>   outl 0xcf8 0x8000fa20
>   write 0xe106802c 0x1 0x0f
>   write 0xe1068004 0xc 0x2801d10101fbff28a384
>   write 0xe106800c 0x1f 
> 0x9dacbbcad9e8f7061524334251606f7e8d9cabbac9d8e7f60514233241505f
>   write 0xe1068003 0x28 
> 0x80d000251480d000252280d000253080d000253e80d000254c80d000255a80d000256880d0002576
>   write 0xe1068003 0x1 0xfe
>   EOF
>   =
>   ==2686219==ERROR: AddressSanitizer: heap-buffer-overflow on address 
> 0x6153bb00 at pc 0x55ab469f456c bp 0x7ffee71be330 sp 0x7ffee71bdae0
>   WRITE of size 4 at 0x6153bb00 thread T0
>   #0 0x55ab469f456b in __asan_memcpy (qemu-system-i386+0x1cea56b)
>   #1 0x55ab483dc396 in stl_he_p include/qemu/bswap.h:353:5
>   #2 0x55ab483af5e4 in stn_he_p include/qemu/bswap.h:546:1
>   #3 0x55ab483aeb4b in flatview_read_continue softmmu/physmem.c:2839:13
>   #4 0x55ab483b0705 in flatview_read softmmu/physmem.c:2877:12
>   #5 0x55ab483b028e in address_space_read_full softmmu/physmem.c:2890:18
>   #6 0x55ab483b1294 in address_space_rw softmmu/physmem.c:2918:16
>   #7 0x55ab479374a2 in dma_memory_rw_relaxed include/sysemu/dma.h:88:12
>   #8 0x55ab47936f50 in dma_memory_rw include/sysemu/dma.h:127:12
>   #9 0x55ab4793665f in dma_memory_read include/sysemu/dma.h:145:12
>   #10 0x55ab4792f176 in sdhci_sdma_transfer_multi_blocks 
> hw/sd/sdhci.c:639:13
>   #11 0x55ab4793dc9d in sdhci_write hw/sd/sdhci.c:1129:17
>   #12 0x55ab483f8db8 in memory_region_write_accessor 
> softmmu/memory.c:491:5
>   #13 0x55ab483f868a in access_with_adjusted_size softmmu/memory.c:552:18
>   #14 0x55ab483f6da5 in memory_region_dispatch_write 
> softmmu/memory.c:1501:16
>   #15 0x55ab483c3b11 in flatview_write_continue softmmu/physmem.c:2774:23
>   #16 0x55ab483b0eb6 in flatview_write softmmu/physmem.c:2814:14
>   #17 0x55ab483b0a3e in address_space_write softmmu/physmem.c:2906:18
>   #18 0x55ab48465c56 in qtest_process_command softmmu/qtest.c:654:9
>
>   0x6153bb00 is located 0 bytes to the right of 512-byte region 
> [0x6153b900,0x6153bb00)
>   allocated by thread T0 here:
>   #0 0x55ab469f58a7 in calloc (qemu-system-i386+0x1ceb8a7)
>   #1 0x7f21d678f9b0 in g_malloc0 (/lib64/libglib-2.0.so.0+0x589b0)
>   #2 0x55ab479530ed in sdhci_pci_realize hw/sd/sdhci-pci.c:36:5
>   #3 0x55ab476f102a in pci_qdev_realize hw/pci/pci.c:2108:9
>   #4 0x55ab48baaad2 in device_set_realized hw/core/qdev.c:761:13
>
>   SUMMARY: AddressSanitizer: heap-buffer-overflow 
> (qemu-system-i386+0x1cea56b) in __asan_memcpy
>   Shadow bytes around the buggy address:
> 0x0c2a7710: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c2a7720: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c2a7730: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c2a7740: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> 0x0c2a7750: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>   =>0x0c2a7760:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
> 0x0c2a7770: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
> 0x0c2a7780: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
> 0x0c2a7790: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
> 0x0c2a77a0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
> 0x0c2a77b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
>   Shadow byte legend (one shadow byte represents 8 application bytes):
> Addressable:   00
> Heap left redzone:   fa
> Freed heap region:   fd
>   ==2686219==ABORTING
>
> Fixes: CVE-2020-17380
> Fixes: CVE-2020-25085
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Cc: Mauro Matteo Cascella 
> Cc: Alexander Bulekov 
> Cc: Alistair Francis 
> Cc: Prasad J Pandit 
> Cc: Bandan Das 
>
> RFC because missing Reported-by tags, launchpad/bugzilla links and
> qtest reproducer. Sending for review meanwhile.
> ---
>  hw/sd/sdhci.c | 6 ++
>  1 file changed, 6 

[PATCH v3 0/2] viritofsd: Add support for FUSE_HANDLE_KILLPRIV_V2

2021-02-08 Thread Vivek Goyal
Hi,

This is V3 of the patches. V2 had been posted here.

https://listman.redhat.com/archives/virtio-fs/2020-November/msg00084.html

These patches add support for FUSE_HANDLE_KILLPRIV_V2. This is basically
a feature file server can opt-in. And with xattr enabled, this improves
write performance many fold. Ran following fio command while virtiofsd is
running with options "-o cache=auto" and "-o xattr".

fio libaio random writes

fio --ioengine=libaio --direct=1 --gtod_reduce=1 --name=test 
--filename=/mnt/virtiofs/random_read_write.fio --bs=4k --iodepth=64 --size=4G 
--readwrite=randwrite

Without patches:
 WRITE: bw=84.6MiB/s (88.7MB/s), 84.6MiB/s-84.6MiB/s (88.7MB/s-88.7MB/s), 
io=4096MiB (4295MB), run=48436-48436msec

With patches:
  WRITE: bw=273MiB/s (286MB/s), 273MiB/s-273MiB/s (286MB/s-286MB/s), io=4096MiB 
(4295MB), run=15003-15003msec

So with these patches, for this particular workload, there is roughly
more than 200% improvement.

fio psync random writes
---
Even, single queue depth writes with ioengine=psync, how significant
improvement. Performance jumps from roughly 35MiB/s to 70MiB/s.

fio --ioengine=psync --name=test --filename=/mnt/virtiofs/random_read_write.fio 
--bs=4k --size=4G --readwrite=randwrite

Without patches:
  WRITE: bw=35.0MiB/s (36.7MB/s), 35.0MiB/s-35.0MiB/s (36.7MB/s-36.7MB/s), 
io=4096MiB (4295MB), run=116932-116932msec

With patches:
 WRITE: bw=69.9MiB/s (73.3MB/s), 69.9MiB/s-69.9MiB/s (73.3MB/s-73.3MB/s), 
io=4096MiB (4295MB), run=58596-58596msec

In V3 I rebased these patches to latest upstream. There were few
conflicts due to recent security fixes from Stefan. Resolved these
conflicts.

Thanks
Vivek

Vivek Goyal (2):
  virtiofsd: Save error code early at the failure callsite
  viriofsd: Add support for FUSE_HANDLE_KILLPRIV_V2

 tools/virtiofsd/fuse_common.h| 15 ++
 tools/virtiofsd/fuse_lowlevel.c  | 11 +++-
 tools/virtiofsd/fuse_lowlevel.h  |  1 +
 tools/virtiofsd/passthrough_ll.c | 93 
 4 files changed, 108 insertions(+), 12 deletions(-)

-- 
2.25.4




Re: Emulating sd card with hifive_u risc-v machine

2021-02-08 Thread Alistair Francis
On Mon, Feb 8, 2021 at 12:00 PM Pascal Scholz
 wrote:
>
> Hi all,
>
> I'm hoping that I addressed the right mailing lists.

Hello Pascal,

Yep, this is the right place :)

>
> I'm working a bit with qemu's risc-v emulation. My current goal is to
> simulate a complete boot process for the SiFive Unleashed Board (SU 540
> SoC)[1]. I've created the correspondig OpenSBI and U-Boot images, being
> the -bios and the -kernel images. It's possible for me to boot up to the
> U-Boot prompt. From this prompt I now want to boot an system image
> located on an emulated sd card.
>
> However I now fail to get a working sd card within qemu for the device
> sifive_u. For example i tried the following command:
>
> qemu-system-riscv64 -M sifive_u -m 8G -serial stdio -bios
> build/platform/sifive/fu540/firmware/fw_jump.bin -kernel
> ../../u-boot/u-boot.bin -device sdhci-pci -device sd-card,drive=sdX
> -drive id=sdX,if=none,format=raw,file=path/to/image.elf
>
> This results in Qemu telling me: -device sdhci-pci: No 'PCI' bus found
> for device 'sdhci-pci'.
>
> Using the machine "virt" the command above works.
>
> The thing i tried was:
>
> qemu-system-riscv64 -M sifive_u -m 8G -serial stdio -bios
> opensbi/build/platform/sifive/fu540/firmware/fw_jump.bin -kernel
> ../u-boot/u-boot.bin -sd path/to/image.elf
>
> Resulting in: machine type does not support if=sd,bus=0,unit=0
>
>
> Even if the machine gets stuck at some point when booting, Qemu has no
> problem starting the VM if I use "-M virt" instead of "-M sifive_u". At
> this point i think, that the machine "sifive_u" doesn't support sd
> cards? Is this guess right or is there anything left I can try? After

Correct. There is no SD card support for the SiFive U in QEMU.

You actually just reminded me though that someone has sent patches to
add support and I need to review them.

You can find the patches here:
https://patchew.org/QEMU/20210126060007.12904-1-bmeng...@gmail.com/
which should work when applied to QEMU.

If you do get a chance to test the patches it would be great if you
can let me know what works/doesn't work for you.

Alistair

> all I'm a bit confused, because there seems to be a block device "sd0"
> when I try to start the machine "sifive_u" without any additional
> device/drive arguments. I would really appreciate if someone would tell
> me, what I'm doing wrong.
>
> Thanks in advance and stay healthy!
>
> Best regards
>
> Pascal
>
>
> [1] https://www.sifive.com/boards/hifive-unleashed
>
>



Re: [PATCH v2 12/15] tcg/arm: Implement TCG_TARGET_HAS_bitsel_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 04:02, Richard Henderson
 wrote:
>
> NEON has 3 instructions implementing this 4 argument operation,
> with each insn overlapping a different logical input onto the
> destination register.
>
> Signed-off-by: Richard Henderson 
> @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode 
> opc,
>  }
>  return;
>
> +case INDEX_op_bitsel_vec:
> +a3 = args[3];
> +if (a0 == a3) {
> +tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
> +} else if (a0 == a2) {
> +tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
> +} else {
> +tcg_out_mov(s, type, a0, a1);

Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)",
which if I understand correctly is superfluous and could be removed.

> +tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
> +}
> +return;
> +

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v3 1/2] virtiofsd: Save error code early at the failure callsite

2021-02-08 Thread Vivek Goyal
Change error code handling slightly in lo_setattr(). Right now we seem
to jump to out_err and assume that "errno" is valid and use that to
send reply.

But if caller has to do some other operations before jumping to out_err,
then it does the dance of first saving errno to saverr and the restore
errno before jumping to out_err. This makes it more confusing.

I am about to make more changes where caller will have to do some
work after error before jumping to out_err. I found it easier to
change the convention a bit. That is caller saves error in "saverr"
before jumping to out_err. And out_err uses "saverr" to send error
back and does not rely on "errno" having actual error.

v3: Resolved conflicts in lo_setattr() due to lo_inode_open() changes.

Signed-off-by: Vivek Goyal 
Reviewed-by: Dr. David Alan Gilbert 
---
 tools/virtiofsd/passthrough_ll.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 147b59338a..5dd6a4a4a6 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -698,6 +698,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
 }
 if (res == -1) {
+saverr = errno;
 goto out_err;
 }
 }
@@ -707,6 +708,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 
 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
 if (res == -1) {
+saverr = errno;
 goto out_err;
 }
 }
@@ -718,16 +720,15 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 } else {
 truncfd = lo_inode_open(lo, inode, O_RDWR);
 if (truncfd < 0) {
-errno = -truncfd;
+saverr = -truncfd;
 goto out_err;
 }
 }
 
 res = ftruncate(truncfd, attr->st_size);
+saverr = res == -1 ? errno : 0;
 if (!fi) {
-saverr = errno;
 close(truncfd);
-errno = saverr;
 }
 if (res == -1) {
 goto out_err;
@@ -760,6 +761,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 res = utimensat(lo->proc_self_fd, procname, tv, 0);
 }
 if (res == -1) {
+saverr = errno;
 goto out_err;
 }
 }
@@ -768,7 +770,6 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 return lo_getattr(req, ino, fi);
 
 out_err:
-saverr = errno;
 lo_inode_put(lo, );
 fuse_reply_err(req, saverr);
 }
-- 
2.25.4




Re: [PATCH v2 11/15] tcg/arm: Implement TCG_TARGET_HAS_minmax_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 03:41, Richard Henderson
 wrote:
>
> This is minimum and maximu, signed and unsigned.

"maximum"

Otherwise

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH v2 12/15] tcg/arm: Implement TCG_TARGET_HAS_bitsel_vec

2021-02-08 Thread Richard Henderson
On 2/8/21 11:55 AM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 04:02, Richard Henderson
>  wrote:
>>
>> NEON has 3 instructions implementing this 4 argument operation,
>> with each insn overlapping a different logical input onto the
>> destination register.
>>
>> Signed-off-by: Richard Henderson 
>> @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode 
>> opc,
>>  }
>>  return;
>>
>> +case INDEX_op_bitsel_vec:
>> +a3 = args[3];
>> +if (a0 == a3) {
>> +tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
>> +} else if (a0 == a2) {
>> +tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
>> +} else {
>> +tcg_out_mov(s, type, a0, a1);
> 
> Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)",
> which if I understand correctly is superfluous and could be removed.

Yep, tcg_out_mov already does that test.


r~



Re: [PATCH v2 09/15] tcg/arm: Implement TCG_TARGET_HAS_mul_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 03:35, Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.h | 2 +-
>  tcg/arm/tcg-target.c.inc | 6 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
>

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v3 2/2] viriofsd: Add support for FUSE_HANDLE_KILLPRIV_V2

2021-02-08 Thread Vivek Goyal
This patch adds basic support for FUSE_HANDLE_KILLPRIV_V2. virtiofsd
can enable/disable this by specifying option "-o killpriv_v2/no_killpriv_v2".
By default this is enabled as long as client supports it

Enabling this option helps with performance in write path. Without this
option, currently every write is first preceeded with a getxattr() operation
to find out if security.capability is set. (Write is supposed to clear
security.capability). With this option enabled, server is signing up for
clearing security.capability on every WRITE and also clearing suid/sgid
subject to certain rules. This gets rid of extra getxattr() call for every
WRITE and improves performance. This is true when virtiofsd is run with
option -o xattr.

What does enabling FUSE_HANDLE_KILLPRIV_V2 mean for file server implementation.
It needs to adhere to following rules. Thanks to Miklos for this summary.

- clear "security.capability" on write, truncate and chown unconditionally
- clear suid/sgid in case of following. Note, sgid is cleared only if
  group executable bit is set.
o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
o setattr has FATTR_UID or FATTR_GID
o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
o write has FUSE_WRITE_KILL_SUIDGID

>From Linux VFS client perspective, here are the requirements.

- caps are always cleared on chown/write/truncate
- suid is always cleared on chown, while for truncate/write it is cleared
  only if caller does not have CAP_FSETID.
- sgid is always cleared on chown, while for truncate/write it is cleared
  only if caller does not have CAP_FSETID as well as file has group execute
  permission.

virtiofsd implementation has not changed much to adhere to above ruls. And
reason being that current assumption is that we are running on Linux
and on top of filesystems like ext4/xfs which already follow above rules.
On write, truncate, chown, seucurity.capability is cleared. And virtiofsd
drops CAP_FSETID if need be and that will lead to clearing of suid/sgid.

But if virtiofsd is running on top a filesystem which breaks above assumptions,
then it will have to take extra actions to emulate above. That's a TODO
for later when need arises.

Note: create normally is supposed to be called only when file does not
  exist. So generally there should not be any question of clearing
  setuid/setgid. But it is possible that after client checks that
  file is not present, some other client creates file on server
  and this race can trigger sending FUSE_CREATE. In that case, if
  O_TRUNC is set, we should clear suid/sgid if FUSE_OPEN_KILL_SUIDGID
  is also set.

v3:
  - Resolved conflicts due to lo_inode_open() changes.
  - Moved capability code in lo_do_open() so that both lo_open() and
lo_create() can benefit from common code.
  - Dropped changes to kernel headers as these are part of qemu already.

Signed-off-by: Vivek Goyal 
Acked-by: Stefan Hajnoczi 
Reviewed-by: Dr. David Alan Gilbert 
---
 tools/virtiofsd/fuse_common.h| 15 ++
 tools/virtiofsd/fuse_lowlevel.c  | 11 -
 tools/virtiofsd/fuse_lowlevel.h  |  1 +
 tools/virtiofsd/passthrough_ll.c | 84 +---
 4 files changed, 103 insertions(+), 8 deletions(-)

diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index a090040bb2..fa9671872e 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -357,6 +357,21 @@ struct fuse_file_info {
  */
 #define FUSE_CAP_SUBMOUNTS (1 << 27)
 
+/**
+ * Indicates that the filesystem is responsible for clearing
+ * security.capability xattr and clearing setuid and setgid bits. Following
+ * are the rules.
+ * - clear "security.capability" on write, truncate and chown unconditionally
+ * - clear suid/sgid if following is true. Note, sgid is cleared only if
+ *   group executable bit is set.
+ *o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
+ *o setattr has FATTR_UID or FATTR_GID
+ *o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
+ *o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
+ *o write has FUSE_WRITE_KILL_SUIDGID
+ */
+#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
+
 /**
  * Ioctl flags
  *
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index e94b71110b..f78692ef66 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -855,7 +855,7 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
   FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
   FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
   FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
-  FUSE_SET_ATTR_CTIME;
+  FUSE_SET_ATTR_CTIME | FUSE_SET_ATTR_KILL_SUIDGID;
 
 req->se->op.setattr(req, nodeid, , arg->valid, fi);
 } else {
@@ -1069,6 +1069,7 @@ static void do_create(fuse_req_t 

Re: [PATCH v2 02/15] tcg/arm: Add host vector framework

2021-02-08 Thread Richard Henderson
On 2/8/21 11:30 AM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 18:58, Richard Henderson
>  wrote:
>>
>> On 2/8/21 10:28 AM, Peter Maydell wrote:
>>> On Mon, 8 Feb 2021 at 17:53, Peter Maydell  wrote:
 The AAPCS says that q4-q7 are preserved across calls.
>>>
>>> Speaking of which, doesn't that mean we also need to
>>> save and restore q4-q7 in tcg_target_qemu_prologue()
>>> if we might be generating neon insns? (It doesn't look like
>>> aarch64's prologue does this, which seems like a bug.)
>>
>> I just put them on the reserved list so that they don't get used.
>>
>>> tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
>> ...
>>> tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
> 
> I'm confused. That's not the reserved list, it's the call-clobber
> list

Oops.  It's actually done by not adding them to tcg_target_reg_alloc_order.

/* V8 - V15 are call-saved, and skipped.  */

Which works as well, I suppose.  I dunno which makes more sense.


r~



Re: [PATCH v2 08/15] tcg/arm: Implement TCG_TARGET_HAS_shi_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 03:57, Richard Henderson
 wrote:
>
> This consists of the three immediate shifts: shli, shri, sari.
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.h |  2 +-
>  tcg/arm/tcg-target.c.inc | 27 +++
>  2 files changed, 28 insertions(+), 1 deletion(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v2 8/9] acpi: add test case for -no-hpet

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

Signed-off-by: Isaku Yamahata 
---
 tests/qtest/bios-tables-test.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 096d15db68..72c8765baf 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -980,6 +980,17 @@ static void test_acpi_q35_tcg_nosmm(void)
 free_test_data();
 }
 
+static void test_acpi_q35_tcg_nohpet(void)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = MACHINE_Q35;
+data.variant = ".nohpet";
+test_acpi_one(" -no-hpet", );
+free_test_data();
+}
+
 static void test_acpi_piix4_tcg_numamem(void)
 {
 test_data data;
@@ -1337,6 +1348,7 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem);
 qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
 qtest_add_func("acpi/q35/nosmm", test_acpi_q35_tcg_nosmm);
+qtest_add_func("acpi/q35/nohpet", test_acpi_q35_tcg_nohpet);
 qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm);
 qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm);
 qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat);
-- 
2.17.1




[PATCH v2 7/9] i386: acpi: Don't build HPET ACPI entry if HPET is disabled

2021-02-08 Thread isaku . yamahata
From: Sean Christopherson 

Omit HPET AML if the HPET is disabled, QEMU is not emulating it and the
guest may get confused by seeing HPET in the ACPI tables without a
"physical" device present.

The change of DSDT when -no-hpet is as follows.

@@ -141,47 +141,6 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS "
 }
 }

-Scope (_SB)
-{
-Device (HPET)
-{
-Name (_HID, EisaId ("PNP0103") /* HPET System Timer */)  // _HID: 
Hardware ID
-Name (_UID, Zero)  // _UID: Unique ID
-OperationRegion (HPTM, SystemMemory, 0xFED0, 0x0400)
-Field (HPTM, DWordAcc, Lock, Preserve)
-{
-VEND,   32,
-PRD,32
-}
-
-Method (_STA, 0, NotSerialized)  // _STA: Status
-{
-Local0 = VEND /* \_SB_.HPET.VEND */
-Local1 = PRD /* \_SB_.HPET.PRD_ */
-Local0 >>= 0x10
-If (((Local0 == Zero) || (Local0 == 0x)))
-{
-Return (Zero)
-}
-
-If (((Local1 == Zero) || (Local1 > 0x05F5E100)))
-{
-Return (Zero)
-}
-
-Return (0x0F)
-}
-
-Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
-{
-Memory32Fixed (ReadOnly,
-0xFED0, // Address Base
-0x0400, // Address Length
-)
-})
-}
-}
-
 Scope (_SB.PCI0)
 {
 Device (ISA)

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Sean Christopherson 
---
 hw/i386/acpi-build.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index bcb1f65c1d..73ec0b6d32 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1405,7 +1405,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 aml_append(sb_scope, dev);
 aml_append(dsdt, sb_scope);
 
-build_hpet_aml(dsdt);
+if (misc->has_hpet) {
+build_hpet_aml(dsdt);
+}
 build_piix4_isa_bridge(dsdt);
 build_isa_devices_aml(dsdt);
 if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
@@ -1450,7 +1452,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 
 aml_append(dsdt, sb_scope);
 
-build_hpet_aml(dsdt);
+if (misc->has_hpet) {
+build_hpet_aml(dsdt);
+}
 build_q35_isa_bridge(dsdt);
 build_isa_devices_aml(dsdt);
 build_q35_pci0_int(dsdt);
-- 
2.17.1




[PATCH v2 3/9] acpi/core: always set SCI_EN when SMM isn't supported

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

If SMM is not supported, ACPI fixed hardware doesn't support
legacy-mode. ACPI-only platform. Where SCI_EN in PM1_CNT register is
always set.
The bit tells OS legacy mode(SCI_EN cleared) or ACPI mode(SCI_EN set).

ACPI spec 4.8.10.1 PM1 Event Grouping
PM1 Eanble Registers
> For ACPI-only platforms (where SCI_EN is always set)

Signed-off-by: Isaku Yamahata 
---
 hw/acpi/core.c | 11 ++-
 hw/acpi/ich9.c |  2 +-
 hw/acpi/piix4.c|  3 ++-
 hw/isa/vt82c686.c  |  2 +-
 include/hw/acpi/acpi.h |  4 +++-
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 7170bff657..1e004d0078 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -579,6 +579,10 @@ void acpi_pm1_cnt_update(ACPIREGS *ar,
  bool sci_enable, bool sci_disable)
 {
 /* ACPI specs 3.0, 4.7.2.5 */
+if (ar->pm1.cnt.acpi_only) {
+return;
+}
+
 if (sci_enable) {
 ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE;
 } else if (sci_disable) {
@@ -608,11 +612,13 @@ static const MemoryRegionOps acpi_pm_cnt_ops = {
 };
 
 void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
-   bool disable_s3, bool disable_s4, uint8_t s4_val)
+   bool disable_s3, bool disable_s4, uint8_t s4_val,
+   bool acpi_only)
 {
 FWCfgState *fw_cfg;
 
 ar->pm1.cnt.s4_val = s4_val;
+ar->pm1.cnt.acpi_only = acpi_only;
 ar->wakeup.notify = acpi_notify_wakeup;
 qemu_register_wakeup_notifier(>wakeup);
 
@@ -638,6 +644,9 @@ void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
 void acpi_pm1_cnt_reset(ACPIREGS *ar)
 {
 ar->pm1.cnt.cnt = 0;
+if (ar->pm1.cnt.acpi_only) {
+ar->pm1.cnt.cnt |= ACPI_BITMASK_SCI_ENABLE;
+}
 }
 
 /* ACPI GPE */
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 5ff4e01c36..1a34d7f621 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -282,7 +282,7 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
 acpi_pm_tmr_init(>acpi_regs, ich9_pm_update_sci_fn, >io);
 acpi_pm1_evt_init(>acpi_regs, ich9_pm_update_sci_fn, >io);
 acpi_pm1_cnt_init(>acpi_regs, >io, pm->disable_s3, pm->disable_s4,
-  pm->s4_val);
+  pm->s4_val, !smm_enabled);
 
 acpi_gpe_init(>acpi_regs, ICH9_PMIO_GPE0_LEN);
 memory_region_init_io(>io_gpe, OBJECT(lpc_pci), _gpe_ops, pm,
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 669be5bbf6..0cddf91de5 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -496,7 +496,8 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
 
 acpi_pm_tmr_init(>ar, pm_tmr_timer, >io);
 acpi_pm1_evt_init(>ar, pm_tmr_timer, >io);
-acpi_pm1_cnt_init(>ar, >io, s->disable_s3, s->disable_s4, s->s4_val);
+acpi_pm1_cnt_init(>ar, >io, s->disable_s3, s->disable_s4, s->s4_val,
+  !s->smm_enabled);
 acpi_gpe_init(>ar, GPE_LEN);
 
 s->powerdown_notifier.notify = piix4_pm_powerdown_req;
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index a6f5a0843d..071b64b497 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -240,7 +240,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error 
**errp)
 
 acpi_pm_tmr_init(>ar, pm_tmr_timer, >io);
 acpi_pm1_evt_init(>ar, pm_tmr_timer, >io);
-acpi_pm1_cnt_init(>ar, >io, false, false, 2);
+acpi_pm1_cnt_init(>ar, >io, false, false, 2, false);
 }
 
 static Property via_pm_properties[] = {
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index 22b0b65bb2..9e8a76f2e2 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -128,6 +128,7 @@ struct ACPIPM1CNT {
 MemoryRegion io;
 uint16_t cnt;
 uint8_t s4_val;
+bool acpi_only;
 };
 
 struct ACPIGPE {
@@ -163,7 +164,8 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn 
update_sci,
 
 /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */
 void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
-   bool disable_s3, bool disable_s4, uint8_t s4_val);
+   bool disable_s3, bool disable_s4, uint8_t s4_val,
+   bool acpi_only);
 void acpi_pm1_cnt_update(ACPIREGS *ar,
  bool sci_enable, bool sci_disable);
 void acpi_pm1_cnt_reset(ACPIREGS *ar);
-- 
2.17.1




[PATCH v2 1/9] checkpatch: don't emit warning on newly created acpi data files

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

Newly created acpi data files(tests/data/acpi/) cause false positive
warning.
If file names are acpi expected file, don't emit warning.

Fixes: e625ba2a41 ("checkpatch: fix acpi check with multiple file name")
Signed-off-by: Isaku Yamahata 
---
 scripts/checkpatch.pl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e47ad878d8..40c9cc7def 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1530,7 +1530,9 @@ sub process {
($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
 $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
 ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ 
&&
- (defined($1) || defined($2) {
+ (defined($1) || defined($2 &&
+  !(($realfile ne '') &&
+($realfile eq $acpi_testexpected))) {
$reported_maintainer_file = 1;
WARN("added, moved or deleted file(s), does MAINTAINERS 
need updating?\n" . $herecurr);
}
-- 
2.17.1




Re: [PULL v3 00/27] Block patches

2021-02-08 Thread Alex Bennée


Jag Raman  writes:

>> On Feb 8, 2021, at 5:02 AM, Philippe Mathieu-Daudé  wrote:
>> 
>> On 2/8/21 10:27 AM, Stefan Hajnoczi wrote:
>>> On Sat, Feb 06, 2021 at 05:03:20PM +, Peter Maydell wrote:
 On Fri, 5 Feb 2021 at 22:53, Peter Maydell  
 wrote:
> 
> On Fri, 5 Feb 2021 at 16:45, Stefan Hajnoczi  wrote:

>>> 
>>> Hi Elena and Jag,
>>> Please take a look at this QOM failure. I guess remote-pcihost is being
>>> built but pcie-host-bridge is missing from the s390x-softmmu target.
>
> Hi All,
>
> Thank you for letting us know about this build issue! We are working on 
> resolving it.
>
> We would like to ensure that the next version we send doesn’t cause too many
> problems like this. Should the Travis/GitLab CI have caught this problem?
>
> We ran the following tests before sending the patches out for review:
> - “make docker”

On it's own it won't do much. You could go with:

  make docker-test-build@debian-s390-cross

You can't run make check for most cross builds but you can at least run
the builds themselves:

  make docker-test-build

should run the test build on all capable images. docker-all-tests should
be all possible tests.

> - Travis CI (will use GitLab going forward)
> - Tests in the “tests/acceptance” folder.
>
> Is there any other tests we could run before sending the next
> revision?

Manually running: make check-block 

>
> Thank you very much!


-- 
Alex Bennée



Re: [PATCH 1/2] hw/block/nvme: use locally assigned QEMU IEEE OUI

2021-02-08 Thread Klaus Jensen
On Feb  8 19:56, Philippe Mathieu-Daudé wrote:
> On 2/8/21 3:10 PM, Klaus Jensen wrote:
> > From: Gollu Appalanaidu 
> > 
> > Commit 6eb7a071292a ("hw/block/nvme: change controller pci id") changed
> > the controller to use a Red Hat assigned PCI Device and Vendor ID, but
> > did not change the IEEE OUI away from the Intel IEEE OUI.
> > 
> > Fix that and use the locally assigned QEMU IEEE OUI instead.
> > 
> > Signed-off-by: Gollu Appalanaidu 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/block/nvme.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index c2f0c88fbf39..547a3073ef1b 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -4686,8 +4686,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> > *pci_dev)
> >  
> >  id->rab = 6;
> >  id->ieee[0] = 0x00;
> > -id->ieee[1] = 0x02;
> > -id->ieee[2] = 0xb3;
> > +id->ieee[1] = 0x54;
> > +id->ieee[2] = 0x52;
> 
> Shouldn't this be conditional on 'use-intel-id'?
> 

It definitely should! Thanks!

> >  id->mdts = n->params.mdts;
> >  id->ver = cpu_to_le32(NVME_SPEC_VER);
> >  id->oacs = cpu_to_le16(0);
> > 
> 


signature.asc
Description: PGP signature


[PATCH v2 0/9] ACPI related fixes

2021-02-08 Thread isaku . yamahata
From: Isaku Yamahata 

Miscellaneous bug fixes related to ACPI to play nice with guest BIOSes/OSes
by conforming to ACPI spec better.

Changes from v1:
- fixed style issue with fixes to checkpatch.pl
- fixed make check breakage
- added ACPI table tests
- update comment message to include acpi table diff

Isaku Yamahata (8):
  checkpatch: don't emit warning on newly created acpi data files
  qtest: update tests/qtest/bios-tables-test-allowed-diff.h
  acpi/core: always set SCI_EN when SMM isn't supported
  acpi: set fadt.smi_cmd to zero when SMM is not supported
  acpi: add test case for smm unsupported -machine smm=off
  hw/i386: declare ACPI mother board resource for MMCONFIG region
  acpi: add test case for -no-hpet
  qtest/acpi/bios-tables-test: update acpi tables

Sean Christopherson (1):
  i386: acpi: Don't build HPET ACPI entry if HPET is disabled

 hw/acpi/core.c|  11 +-
 hw/acpi/ich9.c|   2 +-
 hw/acpi/piix4.c   |   3 +-
 hw/i386/acpi-build.c  | 192 +-
 hw/isa/vt82c686.c |   2 +-
 include/hw/acpi/acpi.h|   4 +-
 scripts/checkpatch.pl |   4 +-
 tests/data/acpi/q35/DSDT  | Bin 7801 -> 8083 bytes
 tests/data/acpi/q35/DSDT.acpihmat | Bin 9126 -> 9408 bytes
 tests/data/acpi/q35/DSDT.bridge   | Bin 7819 -> 8101 bytes
 tests/data/acpi/q35/DSDT.cphp | Bin 8265 -> 8547 bytes
 tests/data/acpi/q35/DSDT.dimmpxm  | Bin 9455 -> 9737 bytes
 tests/data/acpi/q35/DSDT.ipmibt   | Bin 7876 -> 8158 bytes
 tests/data/acpi/q35/DSDT.memhp| Bin 9160 -> 9442 bytes
 tests/data/acpi/q35/DSDT.mmio64   | Bin 8932 -> 9214 bytes
 tests/data/acpi/q35/DSDT.nohpet   | Bin 0 -> 7941 bytes
 tests/data/acpi/q35/DSDT.nosmm| Bin 0 -> 8083 bytes
 tests/data/acpi/q35/DSDT.numamem  | Bin 7807 -> 8089 bytes
 tests/data/acpi/q35/DSDT.tis  | Bin 8407 -> 8689 bytes
 tests/data/acpi/q35/FACP.nosmm| Bin 0 -> 244 bytes
 tests/qtest/bios-tables-test.c|  24 
 21 files changed, 231 insertions(+), 11 deletions(-)
 create mode 100644 tests/data/acpi/q35/DSDT.nohpet
 create mode 100644 tests/data/acpi/q35/DSDT.nosmm
 create mode 100644 tests/data/acpi/q35/FACP.nosmm

-- 
2.17.1




[RFC PATCH v2 4/4] iotests: Test reopening multiple devices at the same time

2021-02-08 Thread Alberto Garcia
Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/245 | 40 ++
 tests/qemu-iotests/245.out |  4 ++--
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index 850c9f070b..d18dbbe638 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -574,6 +574,46 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 self.reopen(opts, {'file': 'hd1-file'})
 self.run_qemu_io("hd", "read  -P 0xa1 0 10k")
 
+def test_swap_files(self):
+opts0 = hd_opts(0)
+opts2 = hd_opts(2)
+
+# Add hd0 and hd2 (none of them with backing files)
+result = self.vm.qmp('blockdev-add', conv_keys = False, **opts0)
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2)
+self.assert_qmp(result, 'return', {})
+
+# Write different data to both block devices
+self.run_qemu_io("hd0", "write -P 0xa0 0 1k")
+self.run_qemu_io("hd2", "write -P 0xa2 0 1k")
+
+# Check that the data reads correctly
+self.run_qemu_io("hd0", "read  -P 0xa0 0 1k")
+self.run_qemu_io("hd2", "read  -P 0xa2 0 1k")
+
+# It's not possible to make a block device use an image that
+# is already being used by the other device.
+self.reopen(opts0, {'file': 'hd2-file'},
+"Conflicts with use by hd0 as 'file', which does not allow 
'write, resize' on hd2-file")
+self.reopen(opts2, {'file': 'hd0-file'},
+"Conflicts with use by hd2 as 'file', which does not allow 
'write, resize' on hd0-file")
+
+# But we can swap the images if we reopen both devices at the
+# same time
+opts0['file'] = 'hd2-file'
+opts2['file'] = 'hd0-file'
+self.reopenMultiple([opts0, opts2])
+self.run_qemu_io("hd0", "read  -P 0xa2 0 1k")
+self.run_qemu_io("hd2", "read  -P 0xa0 0 1k")
+
+# And we can of course come back to the original state
+opts0['file'] = 'hd0-file'
+opts2['file'] = 'hd2-file'
+self.reopenMultiple([opts0, opts2])
+self.run_qemu_io("hd0", "read  -P 0xa0 0 1k")
+self.run_qemu_io("hd2", "read  -P 0xa2 0 1k")
+
 def test_insert_throttle_filter(self):
 hd0_opts = hd_opts(0)
 result = self.vm.qmp('blockdev-add', conv_keys = False, **hd0_opts)
diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out
index 537a2b5b63..1f9debbd61 100644
--- a/tests/qemu-iotests/245.out
+++ b/tests/qemu-iotests/245.out
@@ -10,8 +10,8 @@
 {"return": {}}
 {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", 
"timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, 
"type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": 
{"microseconds": "USECS", "seconds": "SECS"}}
-...
+
 --
-Ran 23 tests
+Ran 24 tests
 
 OK
-- 
2.20.1




[PATCH v2 0/7] acpi: Error Record Serialization Table, ERST, support for QEMU

2021-02-08 Thread Eric DeVolder
This patchset introduces support for the ACPI Error Record
Serialization Table, ERST.

Linux uses the persistent storage filesystem, pstore, to record
information (eg. dmesg tail) upon panics and shutdowns.  Pstore is
independent of, and runs before, kdump.  In certain scenarios (ie.
hosts/guests with root filesystems on NFS/iSCSI where networking
software and/or hardware fails), pstore may contain the only
information available for post-mortem debugging.

Two common storage backends for the pstore filesystem are ACPI ERST
and UEFI. Most BIOS implement ACPI ERST; however, ACPI ERST is not
currently supported in QEMU, and UEFI is not utilized in all guests.
By implementing ACPI ERST within QEMU, then the ACPI ERST becomes a
viable pstore storage backend for virtual machines (as it is now for
bare metal machines).

Enabling support for ACPI ERST facilitates a consistent method to
capture kernel panic information in a wide range of guests: from
resource- constrained microvms to very large guests, and in
particular, in direct-boot environments (which would lack UEFI
run-time services).

Note that Microsoft Windows also utilizes the ACPI ERST for certain
crash information, if available.

The ACPI ERST persistent storage is contained within a single backing
file, with a default size of 64KiB. The size and filename of the
backing file can be obtained from QEMU parameters.

The ACPI specification[1], in Chapter "ACPI Platform Error Interfaces
(APEI)", and specifically subsection "Error Serialization", outlines
a method for storing error records into persistent storage.

[1] "Advanced Configuration and Power Interface Specification",
version 6.2, May 2017.
https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf

[2] "Unified Extensible Firmware Interface Specification",
version 2.8, March 2019.
https://uefi.org/sites/default/files/resources/UEFI_Spec_2_8_final.pdf

Suggested-by: Konrad Wilk 
Signed-off-by: Eric DeVolder 

---
v2: 8feb2021
 - Added qtest/smoke test per Paolo Bonzini
 - Split patch into smaller chunks, per Igo Mammedov
 - Did away with use of ACPI packed structures, per Igo Mammedov

v1: 26oct2020
 - initial post

---
Eric DeVolder (7):
  ACPI ERST: bios-tables-test.c steps 1 and 2
  ACPI ERST: header file for erst
  ACPI ERST: support for ACPI ERST feature
  ACPI ERST: build step for ACPI ERST
  ACPI ERST: support ERST for x86 guest
  ACPI ERST: qtest for ERST
  ACPI ERST: bios-tables-test.c step 5

 hw/acpi/erst.c   | 952 +++
 hw/acpi/meson.build  |   1 +
 hw/i386/acpi-build.c |   4 +
 include/hw/acpi/erst.h   |  77 
 tests/data/acpi/microvm/ERST |   0
 tests/data/acpi/pc/ERST  | Bin 0 -> 976 bytes
 tests/data/acpi/q35/ERST | Bin 0 -> 976 bytes
 tests/qtest/erst-test.c  | 106 +
 tests/qtest/meson.build  |   2 +
 9 files changed, 1142 insertions(+)
 create mode 100644 hw/acpi/erst.c
 create mode 100644 include/hw/acpi/erst.h
 create mode 100644 tests/data/acpi/microvm/ERST
 create mode 100644 tests/data/acpi/pc/ERST
 create mode 100644 tests/data/acpi/q35/ERST
 create mode 100644 tests/qtest/erst-test.c

-- 
1.8.3.1




[RFC PATCH v2 3/4] block: Support multiple reopening with x-blockdev-reopen

2021-02-08 Thread Alberto Garcia
Signed-off-by: Alberto Garcia 
---
 qapi/block-core.json   |  2 +-
 include/block/block.h  |  1 +
 block.c| 16 +--
 blockdev.c | 85 +-
 tests/qemu-iotests/155 |  9 ++--
 tests/qemu-iotests/165 |  4 +-
 tests/qemu-iotests/245 | 27 +++-
 tests/qemu-iotests/248 |  2 +-
 tests/qemu-iotests/248.out |  2 +-
 tests/qemu-iotests/298 |  4 +-
 10 files changed, 89 insertions(+), 63 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index c0e7c23331..b9fcf20a81 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4177,7 +4177,7 @@
 # Since: 4.0
 ##
 { 'command': 'x-blockdev-reopen',
-  'data': 'BlockdevOptions', 'boxed': true }
+  'data': { 'options': ['BlockdevOptions'] } }
 
 ##
 # @blockdev-del:
diff --git a/include/block/block.h b/include/block/block.h
index 6dd687a69e..fe4a220da9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -372,6 +372,7 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, 
const char *node_name,
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
 BlockDriverState *bs, QDict *options,
 bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
   Error **errp);
diff --git a/block.c b/block.c
index 19b62da4af..b4fef2308f 100644
--- a/block.c
+++ b/block.c
@@ -3933,6 +3933,17 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue 
*bs_queue,
NULL, 0, keep_old_opts);
 }
 
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
+{
+if (bs_queue) {
+BlockReopenQueueEntry *bs_entry, *next;
+QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+g_free(bs_entry);
+}
+g_free(bs_queue);
+}
+}
+
 /*
  * Reopen multiple BlockDriverStates atomically & transactionally.
  *
@@ -4024,10 +4035,7 @@ abort:
 }
 
 cleanup:
-QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
-g_free(bs_entry);
-}
-g_free(bs_queue);
+bdrv_reopen_queue_free(bs_queue);
 
 return ret;
 }
diff --git a/blockdev.c b/blockdev.c
index 098a05709d..6b688c0f73 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3528,38 +3528,16 @@ fail:
 visit_free(v);
 }
 
-void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp)
+void qmp_x_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
 {
-BlockDriverState *bs;
-QObject *obj;
-Visitor *v = qobject_output_visitor_new();
-BlockReopenQueue *queue;
-QDict *qdict;
-
-/* Check for the selected node name */
-if (!options->has_node_name) {
-error_setg(errp, "Node name not specified");
-goto fail;
-}
-
-bs = bdrv_find_node(options->node_name);
-if (!bs) {
-error_setg(errp, "Cannot find node named '%s'", options->node_name);
-goto fail;
-}
-
-/* Put all options in a QDict and flatten it */
-visit_type_BlockdevOptions(v, NULL, , _abort);
-visit_complete(v, );
-qdict = qobject_to(QDict, obj);
-
-qdict_flatten(qdict);
-
-/* Perform the reopen operation */
+BlockReopenQueue *queue = NULL;
+GSList *aio_ctxs = NULL;
+GSList *visitors = NULL;
+GSList *drained = NULL;
 BdrvNextIterator it;
-GSList *aio_ctxs = NULL, *ctx;
 BlockDriverState *it_bs;
 
+/* Acquire all AIO contexts */
 for (it_bs = bdrv_first(); it_bs; it_bs = bdrv_next()) {
 AioContext *aio_context = bdrv_get_aio_context(it_bs);
 
@@ -3569,19 +3547,50 @@ void qmp_x_blockdev_reopen(BlockdevOptions *options, 
Error **errp)
 }
 }
 
-bdrv_subtree_drained_begin(bs);
-queue = bdrv_reopen_queue(NULL, bs, qdict, false);
+/* Add each one of the BDS that we want to reopen to the queue */
+for (; reopen_list != NULL; reopen_list = reopen_list->next) {
+BlockdevOptions *options = reopen_list->value;
+QDict *qdict;
+Visitor *v;
+BlockDriverState *bs;
+QObject *obj;
+
+/* Check for the selected node name */
+if (!options->has_node_name) {
+error_setg(errp, "Node name not specified");
+goto fail;
+}
+
+bs = bdrv_find_node(options->node_name);
+if (!bs) {
+error_setg(errp, "Cannot find node named '%s'", 
options->node_name);
+goto fail;
+}
+
+v = qobject_output_visitor_new();
+visitors = g_slist_prepend(visitors, v);
+
+/* Put all options in a QDict and flatten it */
+visit_type_BlockdevOptions(v, NULL, , _abort);
+visit_complete(v, );
+qdict = qobject_to(QDict, obj);
+
+qdict_flatten(qdict);
+
+bdrv_subtree_drained_begin(bs);
+ 

[PATCH v2 6/7] ACPI ERST: qtest for ERST

2021-02-08 Thread Eric DeVolder
This change provides a qtest that locates and then does a simple
interrogation of the ERST feature within the guest.

Signed-off-by: Eric DeVolder 
---
 tests/qtest/erst-test.c | 106 
 tests/qtest/meson.build |   2 +
 2 files changed, 108 insertions(+)
 create mode 100644 tests/qtest/erst-test.c

diff --git a/tests/qtest/erst-test.c b/tests/qtest/erst-test.c
new file mode 100644
index 000..1030e83
--- /dev/null
+++ b/tests/qtest/erst-test.c
@@ -0,0 +1,106 @@
+/*
+ * QTest testcase for ACPI ERST
+ *
+ * Copyright (c) 2021 Oracle
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bitmap.h"
+#include "qemu/uuid.h"
+#include "hw/acpi/acpi-defs.h"
+#include "boot-sector.h"
+#include "acpi-utils.h"
+#include "libqos/libqtest.h"
+#include "qapi/qmp/qdict.h"
+
+#define RSDP_ADDR_INVALID 0x10 /* RSDP must be below this address */
+
+static uint64_t acpi_find_erst(QTestState *qts)
+{
+uint32_t rsdp_offset;
+uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */];
+uint32_t rsdt_len, table_length;
+uint8_t *rsdt, *ent;
+uint64_t base = 0;
+
+/* Wait for guest firmware to finish and start the payload. */
+boot_sector_test(qts);
+
+/* Tables should be initialized now. */
+rsdp_offset = acpi_find_rsdp_address(qts);
+
+g_assert_cmphex(rsdp_offset, <, RSDP_ADDR_INVALID);
+
+acpi_fetch_rsdp_table(qts, rsdp_offset, rsdp_table);
+acpi_fetch_table(qts, , _len, _table[16 /* RsdtAddress */],
+ 4, "RSDT", true);
+
+ACPI_FOREACH_RSDT_ENTRY(rsdt, rsdt_len, ent, 4 /* Entry size */) {
+uint8_t *table_aml;
+acpi_fetch_table(qts, _aml, _length, ent, 4, NULL, true);
+if (!memcmp(table_aml + 16 /* OEM Table ID */, "BXPCERST", 8)) {
+/*
+ * Picking up ERST base address from the Register Region
+ * specified as part of the first Serialization Instruction
+ * Action (which is a Begin Write Operation).
+ */
+memcpy(, _aml[56], 8);
+g_free(table_aml);
+break;
+}
+g_free(table_aml);
+}
+g_free(rsdt);
+return base;
+}
+
+static char disk[] = "tests/erst-test-disk-XX";
+
+#define ERST_CMD()  \
+"-accel kvm -accel tcg "\
+"-drive id=hd0,if=none,file=%s,format=raw " \
+"-device ide-hd,drive=hd0 ", disk
+
+static void erst_get_error_log_address_range(void)
+{
+QTestState *qts;
+uint64_t log_address_range = 0;
+
+qts = qtest_initf(ERST_CMD());
+
+uint64_t base = acpi_find_erst(qts);
+g_assert(base != 0);
+
+/* Issue GET_ERROR_LOG_ADDRESS_RANGE command */
+qtest_writel(qts, base + 0, 0xD);
+/* Read GET_ERROR_LOG_ADDRESS_RANGE result */
+log_address_range = qtest_readq(qts, base + 8);\
+
+/* Check addr_range is offset of base */
+g_assert((base + 16) == log_address_range);
+
+qtest_quit(qts);
+}
+
+int main(int argc, char **argv)
+{
+int ret;
+
+ret = boot_sector_init(disk);
+if (ret) {
+return ret;
+}
+
+g_test_init(, , NULL);
+
+qtest_add_func("/erst/get-error-log-address-range",
+   erst_get_error_log_address_range);
+
+ret = g_test_run();
+boot_sector_cleanup(disk);
+
+return ret;
+}
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 6a67c53..8409892 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -46,6 +46,7 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-test'] : []) + 
 \
   (config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-swtpm-test'] : 
[]) +\
   (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + 
 \
+  (config_all_devices.has_key('CONFIG_ACPI') ? ['erst-test'] : []) +   
 \
   qtests_pci + 
 \
   ['fdc-test',
'ide-test',
@@ -208,6 +209,7 @@ qtests = {
   'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'],
   'cdrom-test': files('boot-sector.c'),
   'dbus-vmstate-test': files('migration-helpers.c') + dbus_vmstate1,
+  'erst-test': files('erst-test.c', 'boot-sector.c', 'acpi-utils.c'),
   'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'],
   'migration-test': files('migration-helpers.c'),
   'pxe-test': files('boot-sector.c'),
-- 
1.8.3.1




[RFC PATCH v2 0/4] Allow changing bs->file on reopen

2021-02-08 Thread Alberto Garcia
Hi,

this series allows changing bs->file using x-blockdev-reopen. Read
here for more details:

   https://lists.gnu.org/archive/html/qemu-block/2021-01/msg00437.html

Version 2 of the series introduces a very significant change:
x-blockdev-reopen now receives a list of BlockdevOptions instead of
just one, so it is possible to reopen multiple block devices using a
single transaction.

This is still an RFC, I haven't updated the documentation and the
structure of the patches will probably change in the future, but I'd
like to know your opinion about the approach.

These patches apply on top of Vladimir's branch:

git: https://src.openvz.org/scm/~vsementsov/qemu.git
tag: up-block-topologic-perm-v2

Regards,

Berto

Alberto Garcia (4):
  block: Allow changing bs->file on reopen
  iotests: Update 245 to support replacing files with x-blockdev-reopen
  block: Support multiple reopening with x-blockdev-reopen
  iotests: Test reopening multiple devices at the same time

 qapi/block-core.json   |   2 +-
 include/block/block.h  |   2 +
 block.c|  81 +--
 blockdev.c |  85 +---
 tests/qemu-iotests/155 |   9 ++-
 tests/qemu-iotests/165 |   4 +-
 tests/qemu-iotests/245 | 128 -
 tests/qemu-iotests/245.out |   4 +-
 tests/qemu-iotests/248 |   2 +-
 tests/qemu-iotests/248.out |   2 +-
 tests/qemu-iotests/298 |   4 +-
 11 files changed, 254 insertions(+), 69 deletions(-)

-- 
2.20.1




  1   2   3   4   5   >