date:20210719

Re: Failing iotest 206

2021-07-19 Thread Eric Blake

On Mon, Jul 19, 2021 at 10:06:01AM +0200, Thomas Huth wrote:
>  Hi,
> 
> iotest 206 fails for me with:
> 

> --- 206.out
> +++ 206.out.bad
> @@ -99,55 +99,19 @@
> 
>  {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options":
> {"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode":
> "ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg":
> "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file":
> {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}
>  {"return": {}}
> +Job failed: Unsupported cipher algorithm twofish-128 with ctr mode
>  {"execute": "job-dismiss", "arguments": {"id": "job0"}}
>  {"return": {}}

> 
> Looks like it is missing a check for the availability of the corresponding
> crypto stuff? Does anybody got a clue how to fix this?

What system is this on? Which crypto library versions are installed?
I suspect this is related to Dan's effort to speed up crypto by
favoring gnutls over nettle, where the switch in favored libraries
failed to account for whether twofish-128 is supported?

https://lists.gnu.org/archive/html/qemu-devel/2021-07/msg03886.html

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v5 2/5] hw/nvme: use symbolic names for registers

2021-07-19 Thread Keith Busch

On Tue, Jul 20, 2021 at 12:46:44AM +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add the NvmeBarRegs enum and use these instead of explicit register
> offsets.

Thanks, this is a very nice cleanup. For a suggested follow-up companion
patch, we should add "ASSERT_OFFSET()" checks for each register to
enforce correct positioning of the BAR offsets at build time.

Reviewed-by: Keith Busch 


> Signed-off-by: Klaus Jensen 
> Reviewed-by: Gollu Appalanaidu 
> Reviewed-by: Philippe Mathieu-Daudé 
> ---
>  include/block/nvme.h | 29 -
>  hw/nvme/ctrl.c   | 44 ++--
>  2 files changed, 50 insertions(+), 23 deletions(-)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 84053b68b987..77aae0117494 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -9,7 +9,7 @@ typedef struct QEMU_PACKED NvmeBar {
>  uint32_tcc;
>  uint8_t rsvd24[4];
>  uint32_tcsts;
> -uint32_tnssrc;
> +uint32_tnssr;
>  uint32_taqa;
>  uint64_tasq;
>  uint64_tacq;
> @@ -31,6 +31,33 @@ typedef struct QEMU_PACKED NvmeBar {
>  uint8_t css[484];
>  } NvmeBar;
>  
> +enum NvmeBarRegs {
> +NVME_REG_CAP = offsetof(NvmeBar, cap),
> +NVME_REG_VS  = offsetof(NvmeBar, vs),
> +NVME_REG_INTMS   = offsetof(NvmeBar, intms),
> +NVME_REG_INTMC   = offsetof(NvmeBar, intmc),
> +NVME_REG_CC  = offsetof(NvmeBar, cc),
> +NVME_REG_CSTS= offsetof(NvmeBar, csts),
> +NVME_REG_NSSR= offsetof(NvmeBar, nssr),
> +NVME_REG_AQA = offsetof(NvmeBar, aqa),
> +NVME_REG_ASQ = offsetof(NvmeBar, asq),
> +NVME_REG_ACQ = offsetof(NvmeBar, acq),
> +NVME_REG_CMBLOC  = offsetof(NvmeBar, cmbloc),
> +NVME_REG_CMBSZ   = offsetof(NvmeBar, cmbsz),
> +NVME_REG_BPINFO  = offsetof(NvmeBar, bpinfo),
> +NVME_REG_BPRSEL  = offsetof(NvmeBar, bprsel),
> +NVME_REG_BPMBL   = offsetof(NvmeBar, bpmbl),
> +NVME_REG_CMBMSC  = offsetof(NvmeBar, cmbmsc),
> +NVME_REG_CMBSTS  = offsetof(NvmeBar, cmbsts),
> +NVME_REG_PMRCAP  = offsetof(NvmeBar, pmrcap),
> +NVME_REG_PMRCTL  = offsetof(NvmeBar, pmrctl),
> +NVME_REG_PMRSTS  = offsetof(NvmeBar, pmrsts),
> +NVME_REG_PMREBS  = offsetof(NvmeBar, pmrebs),
> +NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp),
> +NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl),
> +NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu),
> +};
> +
>  enum NvmeCapShift {
>  CAP_MQES_SHIFT = 0,
>  CAP_CQR_SHIFT  = 16,
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 28299c6f3764..8c305315f41c 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -5740,7 +5740,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  }
>  
>  switch (offset) {
> -case 0xc:   /* INTMS */
> +case NVME_REG_INTMS:
>  if (unlikely(msix_enabled(&(n->parent_obj {
>  NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
> "undefined access to interrupt mask set"
> @@ -5752,7 +5752,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  trace_pci_nvme_mmio_intm_set(data & 0x, n->bar.intmc);
>  nvme_irq_check(n);
>  break;
> -case 0x10:  /* INTMC */
> +case NVME_REG_INTMC:
>  if (unlikely(msix_enabled(&(n->parent_obj {
>  NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
> "undefined access to interrupt mask clr"
> @@ -5764,7 +5764,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  trace_pci_nvme_mmio_intm_clr(data & 0x, n->bar.intmc);
>  nvme_irq_check(n);
>  break;
> -case 0x14:  /* CC */
> +case NVME_REG_CC:
>  trace_pci_nvme_mmio_cfg(data & 0x);
>  /* Windows first sends data, then sends enable bit */
>  if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
> @@ -5798,7 +5798,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  n->bar.cc = data;
>  }
>  break;
> -case 0x1c:  /* CSTS */
> +case NVME_REG_CSTS:
>  if (data & (1 << 4)) {
>  NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
> "attempted to W1C CSTS.NSSRO"
> @@ -5809,7 +5809,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
> " of controller status");
>  }
>  break;
> -case 0x20:  /* NSSR */
> +case NVME_REG_NSSR:
>  if (data == 0x4e564d65) {
>  trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
>  } else {
> @@ -5817,38 +5817,38 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>  return;
>  }
>  break;
> -case 0x24:  /* AQA */
> +case NVME_REG_AQA:
>

[PATCH v5 4/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

The new PMR test unearthed a long-standing issue with MMIO reads on
big-endian hosts.

Fix this by unconditionally storing all controller registers in little
endian.

Cc: Gollu Appalanaidu 
Reported-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 290 +++--
 1 file changed, 162 insertions(+), 128 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0449cc4dee9b..43dfaeac9f54 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
 
 static void nvme_irq_check(NvmeCtrl *n)
 {
+uint32_t intms = ldl_le_p(>bar.intms);
+
 if (msix_enabled(&(n->parent_obj))) {
 return;
 }
-if (~n->bar.intms & n->irq_status) {
+if (~intms & n->irq_status) {
 pci_irq_assert(>parent_obj);
 } else {
 pci_irq_deassert(>parent_obj);
@@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
 if (ret) {
 trace_pci_nvme_err_addr_write(addr);
 trace_pci_nvme_err_cfs();
-n->bar.csts = NVME_CSTS_FAILED;
+stl_le_p(>bar.csts, NVME_CSTS_FAILED);
 break;
 }
 QTAILQ_REMOVE(>req_list, req, entry);
@@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
 return NVME_INVALID_QID | NVME_DNR;
 }
-if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
 trace_pci_nvme_err_invalid_create_sq_size(qsize);
 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
 }
@@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
csi, uint32_t buf_len,
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-switch (NVME_CC_CSS(n->bar.cc)) {
+switch (NVME_CC_CSS(ldl_le_p(>bar.cc))) {
 case NVME_CC_CSS_NVM:
 src_iocs = nvme_cse_iocs_nvm;
 /* fall through */
@@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
 return NVME_INVALID_QID | NVME_DNR;
 }
-if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
 trace_pci_nvme_err_invalid_create_cq_size(qsize);
 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
 }
@@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
 
 static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
 {
+uint32_t cc = ldl_le_p(>bar.cc);
+
 ns->iocs = nvme_cse_iocs_none;
 switch (ns->csi) {
 case NVME_CSI_NVM:
-if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
+if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
 ns->iocs = nvme_cse_iocs_nvm;
 }
 break;
 case NVME_CSI_ZONED:
-if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
+if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
 ns->iocs = nvme_cse_iocs_zoned;
-} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
+} else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
 ns->iocs = nvme_cse_iocs_nvm;
 }
 break;
@@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
 if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
 trace_pci_nvme_err_addr_read(addr);
 trace_pci_nvme_err_cfs();
-n->bar.csts = NVME_CSTS_FAILED;
+stl_le_p(>bar.csts, NVME_CSTS_FAILED);
 break;
 }
 nvme_inc_sq_head(sq);
@@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
 n->aer_queued = 0;
 n->outstanding_aers = 0;
 n->qs_created = false;
-
-n->bar.cc = 0;
 }
 
 static void nvme_ctrl_shutdown(NvmeCtrl *n)
@@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
 
 static int nvme_start_ctrl(NvmeCtrl *n)
 {
-uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
+uint64_t cap = ldq_le_p(>bar.cap);
+uint32_t cc = ldl_le_p(>bar.cc);
+uint32_t aqa = ldl_le_p(>bar.aqa);
+uint64_t asq = ldq_le_p(>bar.asq);
+uint64_t acq = ldq_le_p(>bar.acq);
+uint32_t page_bits = NVME_CC_MPS(cc) + 12;
 uint32_t page_size = 1 << page_bits;
 
 if (unlikely(n->cq[0])) {
@@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 trace_pci_nvme_err_startfail_sq();
 return -1;
 }
-if (unlikely(!n->bar.asq)) {
+if (unlikely(!asq)) {
 trace_pci_nvme_err_startfail_nbarasq();
 return -1;
 }
-if (unlikely(!n->bar.acq)) {
+if (unlikely(!acq)) {
 trace_pci_nvme_err_startfail_nbaracq();
 return -1;
 }
-if (unlikely(n->bar.asq & (page_size - 1))) {
-trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq);
+if (unlikely(asq & (page_size - 1))) {
+

[PATCH v5 5/5] tests/qtest/nvme-test: add mmio read test

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Add a regression test for mmio read on big-endian hosts.

Signed-off-by: Klaus Jensen 
Reviewed-by: Gollu Appalanaidu 
---
 tests/qtest/nvme-test.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c
index 47e757d7e2af..f8bafb5d70fb 100644
--- a/tests/qtest/nvme-test.c
+++ b/tests/qtest/nvme-test.c
@@ -67,6 +67,30 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, 
QGuestAllocator *alloc)
 g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 
0x44332211);
 }
 
+static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator 
*alloc)
+{
+QNvme *nvme = obj;
+QPCIDevice *pdev = >dev;
+QPCIBar bar;
+uint32_t cap_lo, cap_hi;
+uint64_t cap;
+
+qpci_device_enable(pdev);
+bar = qpci_iomap(pdev, 0, NULL);
+
+cap_lo = qpci_io_readl(pdev, bar, 0x0);
+g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff);
+
+cap_hi = qpci_io_readl(pdev, bar, 0x4);
+g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4);
+
+cap = qpci_io_readq(pdev, bar, 0x0);
+g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff);
+g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4);
+
+qpci_iounmap(pdev, bar);
+}
+
 static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator 
*alloc)
 {
 QNvme *nvme = obj;
@@ -142,6 +166,8 @@ static void nvme_register_nodes(void)
  &(QOSGraphTestOptions) {
 .edge.extra_device_opts = "pmrdev=pmr0"
 });
+
+qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL);
 }
 
 libqos_init(nvme_register_nodes);
-- 
2.32.0

[PATCH v5 3/5] hw/nvme: fix out-of-bounds reads

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Peter noticed that mmio access may read into the NvmeParams member in
the NvmeCtrl struct.

Fix the bounds check.

Reported-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Peter Maydell 
---
 hw/nvme/ctrl.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8c305315f41c..0449cc4dee9b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5968,23 +5968,26 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr 
addr, unsigned size)
 /* should RAZ, fall through for now */
 }
 
-if (addr < sizeof(n->bar)) {
-/*
- * When PMRWBM bit 1 is set then read from
- * from PMRSTS should ensure prior writes
- * made it to persistent media
- */
-if (addr == NVME_REG_PMRSTS &&
-(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
-memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size);
-}
-memcpy(, ptr + addr, size);
-} else {
+if (addr > sizeof(n->bar) - size) {
 NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
"MMIO read beyond last register,"
" offset=0x%"PRIx64", returning 0", addr);
+
+return 0;
 }
 
+/*
+ * When PMRWBM bit 1 is set then read from
+ * from PMRSTS should ensure prior writes
+ * made it to persistent media
+ */
+if (addr == NVME_REG_PMRSTS &&
+(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
+memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size);
+}
+
+memcpy(, ptr + addr, size);
+
 return val;
 }
 
-- 
2.32.0

[PATCH v5 2/5] hw/nvme: use symbolic names for registers

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Add the NvmeBarRegs enum and use these instead of explicit register
offsets.

Signed-off-by: Klaus Jensen 
Reviewed-by: Gollu Appalanaidu 
Reviewed-by: Philippe Mathieu-Daudé 
---
 include/block/nvme.h | 29 -
 hw/nvme/ctrl.c   | 44 ++--
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 84053b68b987..77aae0117494 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -9,7 +9,7 @@ typedef struct QEMU_PACKED NvmeBar {
 uint32_tcc;
 uint8_t rsvd24[4];
 uint32_tcsts;
-uint32_tnssrc;
+uint32_tnssr;
 uint32_taqa;
 uint64_tasq;
 uint64_tacq;
@@ -31,6 +31,33 @@ typedef struct QEMU_PACKED NvmeBar {
 uint8_t css[484];
 } NvmeBar;
 
+enum NvmeBarRegs {
+NVME_REG_CAP = offsetof(NvmeBar, cap),
+NVME_REG_VS  = offsetof(NvmeBar, vs),
+NVME_REG_INTMS   = offsetof(NvmeBar, intms),
+NVME_REG_INTMC   = offsetof(NvmeBar, intmc),
+NVME_REG_CC  = offsetof(NvmeBar, cc),
+NVME_REG_CSTS= offsetof(NvmeBar, csts),
+NVME_REG_NSSR= offsetof(NvmeBar, nssr),
+NVME_REG_AQA = offsetof(NvmeBar, aqa),
+NVME_REG_ASQ = offsetof(NvmeBar, asq),
+NVME_REG_ACQ = offsetof(NvmeBar, acq),
+NVME_REG_CMBLOC  = offsetof(NvmeBar, cmbloc),
+NVME_REG_CMBSZ   = offsetof(NvmeBar, cmbsz),
+NVME_REG_BPINFO  = offsetof(NvmeBar, bpinfo),
+NVME_REG_BPRSEL  = offsetof(NvmeBar, bprsel),
+NVME_REG_BPMBL   = offsetof(NvmeBar, bpmbl),
+NVME_REG_CMBMSC  = offsetof(NvmeBar, cmbmsc),
+NVME_REG_CMBSTS  = offsetof(NvmeBar, cmbsts),
+NVME_REG_PMRCAP  = offsetof(NvmeBar, pmrcap),
+NVME_REG_PMRCTL  = offsetof(NvmeBar, pmrctl),
+NVME_REG_PMRSTS  = offsetof(NvmeBar, pmrsts),
+NVME_REG_PMREBS  = offsetof(NvmeBar, pmrebs),
+NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp),
+NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl),
+NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu),
+};
+
 enum NvmeCapShift {
 CAP_MQES_SHIFT = 0,
 CAP_CQR_SHIFT  = 16,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 28299c6f3764..8c305315f41c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5740,7 +5740,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 }
 
 switch (offset) {
-case 0xc:   /* INTMS */
+case NVME_REG_INTMS:
 if (unlikely(msix_enabled(&(n->parent_obj {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
"undefined access to interrupt mask set"
@@ -5752,7 +5752,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 trace_pci_nvme_mmio_intm_set(data & 0x, n->bar.intmc);
 nvme_irq_check(n);
 break;
-case 0x10:  /* INTMC */
+case NVME_REG_INTMC:
 if (unlikely(msix_enabled(&(n->parent_obj {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
"undefined access to interrupt mask clr"
@@ -5764,7 +5764,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 trace_pci_nvme_mmio_intm_clr(data & 0x, n->bar.intmc);
 nvme_irq_check(n);
 break;
-case 0x14:  /* CC */
+case NVME_REG_CC:
 trace_pci_nvme_mmio_cfg(data & 0x);
 /* Windows first sends data, then sends enable bit */
 if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
@@ -5798,7 +5798,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 n->bar.cc = data;
 }
 break;
-case 0x1c:  /* CSTS */
+case NVME_REG_CSTS:
 if (data & (1 << 4)) {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
"attempted to W1C CSTS.NSSRO"
@@ -5809,7 +5809,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
" of controller status");
 }
 break;
-case 0x20:  /* NSSR */
+case NVME_REG_NSSR:
 if (data == 0x4e564d65) {
 trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
 } else {
@@ -5817,38 +5817,38 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 break;
-case 0x24:  /* AQA */
+case NVME_REG_AQA:
 n->bar.aqa = data & 0x;
 trace_pci_nvme_mmio_aqattr(data & 0x);
 break;
-case 0x28:  /* ASQ */
+case NVME_REG_ASQ:
 n->bar.asq = size == 8 ? data :
 (n->bar.asq & ~0xULL) | (data & 0x);
 trace_pci_nvme_mmio_asqaddr(data);
 break;
-case 0x2c:  /* ASQ hi */
+case NVME_REG_ASQ + 4:
 n->bar.asq = (n->bar.asq & 0x) | (data << 32);
 trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq);
 break;
-

[PATCH v5 1/5] hw/nvme: split pmrmsc register into upper and lower

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to
make up the 64 bit logical PMRMSC register.

Make it so.

Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 31 ---
 hw/nvme/ctrl.c   |  9 +
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 527105fafc0b..84053b68b987 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -26,7 +26,8 @@ typedef struct QEMU_PACKED NvmeBar {
 uint32_tpmrsts;
 uint32_tpmrebs;
 uint32_tpmrswtp;
-uint64_tpmrmsc;
+uint32_tpmrmscl;
+uint32_tpmrmscu;
 uint8_t css[484];
 } NvmeBar;
 
@@ -475,25 +476,25 @@ enum NvmePmrswtpMask {
 #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val)   \
 (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << 
PMRSWTP_PMRSWTV_SHIFT)
 
-enum NvmePmrmscShift {
-PMRMSC_CMSE_SHIFT   = 1,
-PMRMSC_CBA_SHIFT= 12,
+enum NvmePmrmsclShift {
+PMRMSCL_CMSE_SHIFT   = 1,
+PMRMSCL_CBA_SHIFT= 12,
 };
 
-enum NvmePmrmscMask {
-PMRMSC_CMSE_MASK   = 0x1,
-PMRMSC_CBA_MASK= 0xf,
+enum NvmePmrmsclMask {
+PMRMSCL_CMSE_MASK   = 0x1,
+PMRMSCL_CBA_MASK= 0xf,
 };
 
-#define NVME_PMRMSC_CMSE(pmrmsc)\
-((pmrmsc >> PMRMSC_CMSE_SHIFT)   & PMRMSC_CMSE_MASK)
-#define NVME_PMRMSC_CBA(pmrmsc) \
-((pmrmsc >> PMRMSC_CBA_SHIFT)   & PMRMSC_CBA_MASK)
+#define NVME_PMRMSCL_CMSE(pmrmscl)\
+((pmrmscl >> PMRMSCL_CMSE_SHIFT)   & PMRMSCL_CMSE_MASK)
+#define NVME_PMRMSCL_CBA(pmrmscl) \
+((pmrmscl >> PMRMSCL_CBA_SHIFT)   & PMRMSCL_CBA_MASK)
 
-#define NVME_PMRMSC_SET_CMSE(pmrmsc, val)   \
-(pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
-#define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
-(pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
+#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val)   \
+(pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT)
+#define NVME_PMRMSCL_SET_CBA(pmrmscl, val)   \
+(pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT)
 
 enum NvmeSglDescriptorType {
 NVME_SGL_DESCR_TYPE_DATA_BLOCK  = 0x0,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 2f0524e12a36..28299c6f3764 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5916,11 +5916,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 
-n->bar.pmrmsc = (n->bar.pmrmsc & ~0x) | (data & 0x);
+n->bar.pmrmscl = data & 0x;
 n->pmr.cmse = false;
 
-if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) {
-hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
+if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) {
+hwaddr cba = n->bar.pmrmscu |
+(NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT);
 if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
 NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1);
 return;
@@ -5936,7 +5937,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 
-n->bar.pmrmsc = (n->bar.pmrmsc & 0x) | (data << 32);
+n->bar.pmrmscu = data & 0x;
 return;
 default:
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
-- 
2.32.0

[PATCH v5 0/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Fix mmio read issues on big-endian hosts. The core issue is that values
in the BAR is not stored in little endian as required.

Fix that and add a regression test for this. This required a bit of
cleanup, so it blew up into a series.

v5:
  * "hw/nvme: fix mmio read"
- Hurried the changes a bit. Fixed.

v4:
  * "hw/nvme: split pmrmsc register into upper and lower"
- Fix missing left-shift (Peter)

  * "hw/nvme: fix mmio read"
- Remove unnecessary masking (Peter)
- Keep existing behaviour and do not zero the register fields doing
  initialization (Peter)

v3:

  * "hw/nvme: use symbolic names for registers"
Use offsetof(NvmeBar, reg) instead of explicit offsets (Philippe)

  * "hw/nvme: fix mmio read"
Use the st/ld API instead of cpu_to_X (Philippe)

Klaus Jensen (5):
  hw/nvme: split pmrmsc register into upper and lower
  hw/nvme: use symbolic names for registers
  hw/nvme: fix out-of-bounds reads
  hw/nvme: fix mmio read
  tests/qtest/nvme-test: add mmio read test

 include/block/nvme.h|  60 +--
 hw/nvme/ctrl.c  | 352 ++--
 tests/qtest/nvme-test.c |  26 +++
 3 files changed, 265 insertions(+), 173 deletions(-)

-- 
2.32.0

[PATCH v4 5/5] tests/qtest/nvme-test: add mmio read test

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Add a regression test for mmio read on big-endian hosts.

Signed-off-by: Klaus Jensen 
Reviewed-by: Gollu Appalanaidu 
---
 tests/qtest/nvme-test.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c
index 47e757d7e2af..f8bafb5d70fb 100644
--- a/tests/qtest/nvme-test.c
+++ b/tests/qtest/nvme-test.c
@@ -67,6 +67,30 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, 
QGuestAllocator *alloc)
 g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 
0x44332211);
 }
 
+static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator 
*alloc)
+{
+QNvme *nvme = obj;
+QPCIDevice *pdev = >dev;
+QPCIBar bar;
+uint32_t cap_lo, cap_hi;
+uint64_t cap;
+
+qpci_device_enable(pdev);
+bar = qpci_iomap(pdev, 0, NULL);
+
+cap_lo = qpci_io_readl(pdev, bar, 0x0);
+g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff);
+
+cap_hi = qpci_io_readl(pdev, bar, 0x4);
+g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4);
+
+cap = qpci_io_readq(pdev, bar, 0x0);
+g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff);
+g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4);
+
+qpci_iounmap(pdev, bar);
+}
+
 static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator 
*alloc)
 {
 QNvme *nvme = obj;
@@ -142,6 +166,8 @@ static void nvme_register_nodes(void)
  &(QOSGraphTestOptions) {
 .edge.extra_device_opts = "pmrdev=pmr0"
 });
+
+qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL);
 }
 
 libqos_init(nvme_register_nodes);
-- 
2.32.0

Re: [PATCH v4 4/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

On Jul 19 22:07, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
> 
> Fix this by unconditionally storing all controller registers in little
> endian.
> 
> Cc: Gollu Appalanaidu 
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 290 +++--
>  1 file changed, 162 insertions(+), 128 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 0449cc4dee9b..76721e31c6b1 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
>  
>  static void nvme_irq_check(NvmeCtrl *n)
>  {
> +uint32_t intms = ldl_le_p(>bar.intms);
> +
>  if (msix_enabled(&(n->parent_obj))) {
>  return;
>  }
> -if (~n->bar.intms & n->irq_status) {
> +if (~intms & n->irq_status) {
>  pci_irq_assert(>parent_obj);
>  } else {
>  pci_irq_deassert(>parent_obj);
> @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
>  if (ret) {
>  trace_pci_nvme_err_addr_write(addr);
>  trace_pci_nvme_err_cfs();
> -n->bar.csts = NVME_CSTS_FAILED;
> +stl_le_p(>bar.csts, NVME_CSTS_FAILED);
>  break;
>  }
>  QTAILQ_REMOVE(>req_list, req, entry);
> @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>  trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
>  return NVME_INVALID_QID | NVME_DNR;
>  }
> -if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
>  trace_pci_nvme_err_invalid_create_sq_size(qsize);
>  return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>  }
> @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
> csi, uint32_t buf_len,
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -switch (NVME_CC_CSS(n->bar.cc)) {
> +switch (NVME_CC_CSS(ldl_le_p(>bar.cc))) {
>  case NVME_CC_CSS_NVM:
>  src_iocs = nvme_cse_iocs_nvm;
>  /* fall through */
> @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
> *req)
>  trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
>  return NVME_INVALID_QID | NVME_DNR;
>  }
> -if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
>  trace_pci_nvme_err_invalid_create_cq_size(qsize);
>  return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>  }
> @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
>  
>  static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
>  {
> +uint32_t cc = ldl_le_p(>bar.cc);
> +
>  ns->iocs = nvme_cse_iocs_none;
>  switch (ns->csi) {
>  case NVME_CSI_NVM:
> -if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
> +if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
>  ns->iocs = nvme_cse_iocs_nvm;
>  }
>  break;
>  case NVME_CSI_ZONED:
> -if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
> +if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
>  ns->iocs = nvme_cse_iocs_zoned;
> -} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
> +} else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
>  ns->iocs = nvme_cse_iocs_nvm;
>  }
>  break;
> @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
>  if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
>  trace_pci_nvme_err_addr_read(addr);
>  trace_pci_nvme_err_cfs();
> -n->bar.csts = NVME_CSTS_FAILED;
> +stl_le_p(>bar.csts, NVME_CSTS_FAILED);
>  break;
>  }
>  nvme_inc_sq_head(sq);
> @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
>  n->aer_queued = 0;
>  n->outstanding_aers = 0;
>  n->qs_created = false;
> -
> -n->bar.cc = 0;
>  }
>  
>  static void nvme_ctrl_shutdown(NvmeCtrl *n)
> @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
>  
>  static int nvme_start_ctrl(NvmeCtrl *n)
>  {
> -uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> +uint64_t cap = ldq_le_p(>bar.cap);
> +uint32_t cc = ldl_le_p(>bar.cc);
> +uint32_t aqa = ldl_le_p(>bar.aqa);
> +uint64_t asq = ldq_le_p(>bar.asq);
> +uint64_t acq = ldq_le_p(>bar.acq);
> +uint32_t page_bits = NVME_CC_MPS(cc) + 12;
>  uint32_t page_size = 1 << page_bits;
>  
>  if (unlikely(n->cq[0])) {
> @@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>  trace_pci_nvme_err_startfail_sq();
>  return -1;
>  }
> -if (unlikely(!n->bar.asq)) {
> +if (unlikely(!asq)) {
>  trace_pci_nvme_err_startfail_nbarasq();
>  return -1;
>  }
> -

[PATCH v4 2/5] hw/nvme: use symbolic names for registers

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Add the NvmeBarRegs enum and use these instead of explicit register
offsets.

Signed-off-by: Klaus Jensen 
Reviewed-by: Gollu Appalanaidu 
Reviewed-by: Philippe Mathieu-Daudé 
---
 include/block/nvme.h | 29 -
 hw/nvme/ctrl.c   | 44 ++--
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 84053b68b987..77aae0117494 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -9,7 +9,7 @@ typedef struct QEMU_PACKED NvmeBar {
 uint32_tcc;
 uint8_t rsvd24[4];
 uint32_tcsts;
-uint32_tnssrc;
+uint32_tnssr;
 uint32_taqa;
 uint64_tasq;
 uint64_tacq;
@@ -31,6 +31,33 @@ typedef struct QEMU_PACKED NvmeBar {
 uint8_t css[484];
 } NvmeBar;
 
+enum NvmeBarRegs {
+NVME_REG_CAP = offsetof(NvmeBar, cap),
+NVME_REG_VS  = offsetof(NvmeBar, vs),
+NVME_REG_INTMS   = offsetof(NvmeBar, intms),
+NVME_REG_INTMC   = offsetof(NvmeBar, intmc),
+NVME_REG_CC  = offsetof(NvmeBar, cc),
+NVME_REG_CSTS= offsetof(NvmeBar, csts),
+NVME_REG_NSSR= offsetof(NvmeBar, nssr),
+NVME_REG_AQA = offsetof(NvmeBar, aqa),
+NVME_REG_ASQ = offsetof(NvmeBar, asq),
+NVME_REG_ACQ = offsetof(NvmeBar, acq),
+NVME_REG_CMBLOC  = offsetof(NvmeBar, cmbloc),
+NVME_REG_CMBSZ   = offsetof(NvmeBar, cmbsz),
+NVME_REG_BPINFO  = offsetof(NvmeBar, bpinfo),
+NVME_REG_BPRSEL  = offsetof(NvmeBar, bprsel),
+NVME_REG_BPMBL   = offsetof(NvmeBar, bpmbl),
+NVME_REG_CMBMSC  = offsetof(NvmeBar, cmbmsc),
+NVME_REG_CMBSTS  = offsetof(NvmeBar, cmbsts),
+NVME_REG_PMRCAP  = offsetof(NvmeBar, pmrcap),
+NVME_REG_PMRCTL  = offsetof(NvmeBar, pmrctl),
+NVME_REG_PMRSTS  = offsetof(NvmeBar, pmrsts),
+NVME_REG_PMREBS  = offsetof(NvmeBar, pmrebs),
+NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp),
+NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl),
+NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu),
+};
+
 enum NvmeCapShift {
 CAP_MQES_SHIFT = 0,
 CAP_CQR_SHIFT  = 16,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 28299c6f3764..8c305315f41c 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5740,7 +5740,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 }
 
 switch (offset) {
-case 0xc:   /* INTMS */
+case NVME_REG_INTMS:
 if (unlikely(msix_enabled(&(n->parent_obj {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
"undefined access to interrupt mask set"
@@ -5752,7 +5752,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 trace_pci_nvme_mmio_intm_set(data & 0x, n->bar.intmc);
 nvme_irq_check(n);
 break;
-case 0x10:  /* INTMC */
+case NVME_REG_INTMC:
 if (unlikely(msix_enabled(&(n->parent_obj {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
"undefined access to interrupt mask clr"
@@ -5764,7 +5764,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 trace_pci_nvme_mmio_intm_clr(data & 0x, n->bar.intmc);
 nvme_irq_check(n);
 break;
-case 0x14:  /* CC */
+case NVME_REG_CC:
 trace_pci_nvme_mmio_cfg(data & 0x);
 /* Windows first sends data, then sends enable bit */
 if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
@@ -5798,7 +5798,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 n->bar.cc = data;
 }
 break;
-case 0x1c:  /* CSTS */
+case NVME_REG_CSTS:
 if (data & (1 << 4)) {
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
"attempted to W1C CSTS.NSSRO"
@@ -5809,7 +5809,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
" of controller status");
 }
 break;
-case 0x20:  /* NSSR */
+case NVME_REG_NSSR:
 if (data == 0x4e564d65) {
 trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
 } else {
@@ -5817,38 +5817,38 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 break;
-case 0x24:  /* AQA */
+case NVME_REG_AQA:
 n->bar.aqa = data & 0x;
 trace_pci_nvme_mmio_aqattr(data & 0x);
 break;
-case 0x28:  /* ASQ */
+case NVME_REG_ASQ:
 n->bar.asq = size == 8 ? data :
 (n->bar.asq & ~0xULL) | (data & 0x);
 trace_pci_nvme_mmio_asqaddr(data);
 break;
-case 0x2c:  /* ASQ hi */
+case NVME_REG_ASQ + 4:
 n->bar.asq = (n->bar.asq & 0x) | (data << 32);
 trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq);
 break;
-

[PATCH v4 4/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

The new PMR test unearthed a long-standing issue with MMIO reads on
big-endian hosts.

Fix this by unconditionally storing all controller registers in little
endian.

Cc: Gollu Appalanaidu 
Reported-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 290 +++--
 1 file changed, 162 insertions(+), 128 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0449cc4dee9b..76721e31c6b1 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
 
 static void nvme_irq_check(NvmeCtrl *n)
 {
+uint32_t intms = ldl_le_p(>bar.intms);
+
 if (msix_enabled(&(n->parent_obj))) {
 return;
 }
-if (~n->bar.intms & n->irq_status) {
+if (~intms & n->irq_status) {
 pci_irq_assert(>parent_obj);
 } else {
 pci_irq_deassert(>parent_obj);
@@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
 if (ret) {
 trace_pci_nvme_err_addr_write(addr);
 trace_pci_nvme_err_cfs();
-n->bar.csts = NVME_CSTS_FAILED;
+stl_le_p(>bar.csts, NVME_CSTS_FAILED);
 break;
 }
 QTAILQ_REMOVE(>req_list, req, entry);
@@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
 return NVME_INVALID_QID | NVME_DNR;
 }
-if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
 trace_pci_nvme_err_invalid_create_sq_size(qsize);
 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
 }
@@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
csi, uint32_t buf_len,
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-switch (NVME_CC_CSS(n->bar.cc)) {
+switch (NVME_CC_CSS(ldl_le_p(>bar.cc))) {
 case NVME_CC_CSS_NVM:
 src_iocs = nvme_cse_iocs_nvm;
 /* fall through */
@@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
*req)
 trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
 return NVME_INVALID_QID | NVME_DNR;
 }
-if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(>bar.cap {
 trace_pci_nvme_err_invalid_create_cq_size(qsize);
 return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
 }
@@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
 
 static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
 {
+uint32_t cc = ldl_le_p(>bar.cc);
+
 ns->iocs = nvme_cse_iocs_none;
 switch (ns->csi) {
 case NVME_CSI_NVM:
-if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
+if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
 ns->iocs = nvme_cse_iocs_nvm;
 }
 break;
 case NVME_CSI_ZONED:
-if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
+if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
 ns->iocs = nvme_cse_iocs_zoned;
-} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
+} else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
 ns->iocs = nvme_cse_iocs_nvm;
 }
 break;
@@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
 if (nvme_addr_read(n, addr, (void *), sizeof(cmd))) {
 trace_pci_nvme_err_addr_read(addr);
 trace_pci_nvme_err_cfs();
-n->bar.csts = NVME_CSTS_FAILED;
+stl_le_p(>bar.csts, NVME_CSTS_FAILED);
 break;
 }
 nvme_inc_sq_head(sq);
@@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
 n->aer_queued = 0;
 n->outstanding_aers = 0;
 n->qs_created = false;
-
-n->bar.cc = 0;
 }
 
 static void nvme_ctrl_shutdown(NvmeCtrl *n)
@@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
 
 static int nvme_start_ctrl(NvmeCtrl *n)
 {
-uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
+uint64_t cap = ldq_le_p(>bar.cap);
+uint32_t cc = ldl_le_p(>bar.cc);
+uint32_t aqa = ldl_le_p(>bar.aqa);
+uint64_t asq = ldq_le_p(>bar.asq);
+uint64_t acq = ldq_le_p(>bar.acq);
+uint32_t page_bits = NVME_CC_MPS(cc) + 12;
 uint32_t page_size = 1 << page_bits;
 
 if (unlikely(n->cq[0])) {
@@ -5616,73 +5623,72 @@ static int nvme_start_ctrl(NvmeCtrl *n)
 trace_pci_nvme_err_startfail_sq();
 return -1;
 }
-if (unlikely(!n->bar.asq)) {
+if (unlikely(!asq)) {
 trace_pci_nvme_err_startfail_nbarasq();
 return -1;
 }
-if (unlikely(!n->bar.acq)) {
+if (unlikely(!acq)) {
 trace_pci_nvme_err_startfail_nbaracq();
 return -1;
 }
-if (unlikely(n->bar.asq & (page_size - 1))) {
-trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq);
+if (unlikely(asq & (page_size - 1))) {
+

[PATCH v4 3/5] hw/nvme: fix out-of-bounds reads

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Peter noticed that mmio access may read into the NvmeParams member in
the NvmeCtrl struct.

Fix the bounds check.

Reported-by: Peter Maydell 
Signed-off-by: Klaus Jensen 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Peter Maydell 
---
 hw/nvme/ctrl.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8c305315f41c..0449cc4dee9b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5968,23 +5968,26 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr 
addr, unsigned size)
 /* should RAZ, fall through for now */
 }
 
-if (addr < sizeof(n->bar)) {
-/*
- * When PMRWBM bit 1 is set then read from
- * from PMRSTS should ensure prior writes
- * made it to persistent media
- */
-if (addr == NVME_REG_PMRSTS &&
-(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
-memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size);
-}
-memcpy(, ptr + addr, size);
-} else {
+if (addr > sizeof(n->bar) - size) {
 NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
"MMIO read beyond last register,"
" offset=0x%"PRIx64", returning 0", addr);
+
+return 0;
 }
 
+/*
+ * When PMRWBM bit 1 is set then read from
+ * from PMRSTS should ensure prior writes
+ * made it to persistent media
+ */
+if (addr == NVME_REG_PMRSTS &&
+(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
+memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size);
+}
+
+memcpy(, ptr + addr, size);
+
 return val;
 }
 
-- 
2.32.0

[PATCH v4 0/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

Fix mmio read issues on big-endian hosts. The core issue is that values
in the BAR is not stored in little endian as required.

Fix that and add a regression test for this. This required a bit of
cleanup, so it blew up into a series.

v4:
  * "hw/nvme: split pmrmsc register into upper and lower"
- Fix missing left-shift (Peter)

  * "hw/nvme: fix mmio read"
- Remove unnecessary masking (Peter)
- Keep existing behaviour and do not zero the register fields doing
  initialization (Peter)

v3:

  * "hw/nvme: use symbolic names for registers"
Use offsetof(NvmeBar, reg) instead of explicit offsets (Philippe)

  * "hw/nvme: fix mmio read"
Use the st/ld API instead of cpu_to_X (Philippe)

Klaus Jensen (5):
  hw/nvme: split pmrmsc register into upper and lower
  hw/nvme: use symbolic names for registers
  hw/nvme: fix out-of-bounds reads
  hw/nvme: fix mmio read
  tests/qtest/nvme-test: add mmio read test

 include/block/nvme.h|  60 +--
 hw/nvme/ctrl.c  | 352 ++--
 tests/qtest/nvme-test.c |  26 +++
 3 files changed, 265 insertions(+), 173 deletions(-)

-- 
2.32.0

[PATCH v4 1/5] hw/nvme: split pmrmsc register into upper and lower

2021-07-19 Thread Klaus Jensen

From: Klaus Jensen 

The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to
make up the 64 bit logical PMRMSC register.

Make it so.

Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 31 ---
 hw/nvme/ctrl.c   |  9 +
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 527105fafc0b..84053b68b987 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -26,7 +26,8 @@ typedef struct QEMU_PACKED NvmeBar {
 uint32_tpmrsts;
 uint32_tpmrebs;
 uint32_tpmrswtp;
-uint64_tpmrmsc;
+uint32_tpmrmscl;
+uint32_tpmrmscu;
 uint8_t css[484];
 } NvmeBar;
 
@@ -475,25 +476,25 @@ enum NvmePmrswtpMask {
 #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val)   \
 (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << 
PMRSWTP_PMRSWTV_SHIFT)
 
-enum NvmePmrmscShift {
-PMRMSC_CMSE_SHIFT   = 1,
-PMRMSC_CBA_SHIFT= 12,
+enum NvmePmrmsclShift {
+PMRMSCL_CMSE_SHIFT   = 1,
+PMRMSCL_CBA_SHIFT= 12,
 };
 
-enum NvmePmrmscMask {
-PMRMSC_CMSE_MASK   = 0x1,
-PMRMSC_CBA_MASK= 0xf,
+enum NvmePmrmsclMask {
+PMRMSCL_CMSE_MASK   = 0x1,
+PMRMSCL_CBA_MASK= 0xf,
 };
 
-#define NVME_PMRMSC_CMSE(pmrmsc)\
-((pmrmsc >> PMRMSC_CMSE_SHIFT)   & PMRMSC_CMSE_MASK)
-#define NVME_PMRMSC_CBA(pmrmsc) \
-((pmrmsc >> PMRMSC_CBA_SHIFT)   & PMRMSC_CBA_MASK)
+#define NVME_PMRMSCL_CMSE(pmrmscl)\
+((pmrmscl >> PMRMSCL_CMSE_SHIFT)   & PMRMSCL_CMSE_MASK)
+#define NVME_PMRMSCL_CBA(pmrmscl) \
+((pmrmscl >> PMRMSCL_CBA_SHIFT)   & PMRMSCL_CBA_MASK)
 
-#define NVME_PMRMSC_SET_CMSE(pmrmsc, val)   \
-(pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
-#define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
-(pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
+#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val)   \
+(pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT)
+#define NVME_PMRMSCL_SET_CBA(pmrmscl, val)   \
+(pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT)
 
 enum NvmeSglDescriptorType {
 NVME_SGL_DESCR_TYPE_DATA_BLOCK  = 0x0,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 2f0524e12a36..28299c6f3764 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5916,11 +5916,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 
-n->bar.pmrmsc = (n->bar.pmrmsc & ~0x) | (data & 0x);
+n->bar.pmrmscl = data & 0x;
 n->pmr.cmse = false;
 
-if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) {
-hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
+if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) {
+hwaddr cba = n->bar.pmrmscu |
+(NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT);
 if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
 NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1);
 return;
@@ -5936,7 +5937,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
uint64_t data,
 return;
 }
 
-n->bar.pmrmsc = (n->bar.pmrmsc & 0x) | (data << 32);
+n->bar.pmrmscu = data & 0x;
 return;
 default:
 NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
-- 
2.32.0

[PULL 5/6] block/blkdebug: remove new_state field and instead use a local variable

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

There seems to be no benefit in using a field. Replace it with a local
variable, and move the state update before the yields.

The state update has do be done before the yields because now using
a local variable does not allow the new updated state to be visible
by the other yields.

Signed-off-by: Emanuele Giuseppe Esposito 
Message-Id: <20210614082931.24925-6-eespo...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index dd82131d1e..b47c3fd97c 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -40,7 +40,6 @@
 
 typedef struct BDRVBlkdebugState {
 int state;
-int new_state;
 uint64_t align;
 uint64_t max_transfer;
 uint64_t opt_write_zero;
@@ -792,7 +791,7 @@ static void suspend_request(BlockDriverState *bs, 
BlkdebugRule *rule)
 }
 
 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
- int *action_count)
+ int *action_count, int *new_state)
 {
 BDRVBlkdebugState *s = bs->opaque;
 
@@ -812,7 +811,7 @@ static void process_rule(BlockDriverState *bs, struct 
BlkdebugRule *rule,
 break;
 
 case ACTION_SET_STATE:
-s->new_state = rule->options.set_state.new_state;
+*new_state = rule->options.set_state.new_state;
 break;
 
 case ACTION_SUSPEND:
@@ -825,21 +824,21 @@ static void blkdebug_debug_event(BlockDriverState *bs, 
BlkdebugEvent event)
 {
 BDRVBlkdebugState *s = bs->opaque;
 struct BlkdebugRule *rule, *next;
+int new_state;
 int actions_count[ACTION__MAX] = { 0 };
 
 assert((int)event >= 0 && event < BLKDBG__MAX);
 
-s->new_state = s->state;
+new_state = s->state;
 QLIST_FOREACH_SAFE(rule, >rules[event], next, next) {
-process_rule(bs, rule, actions_count);
+process_rule(bs, rule, actions_count, _state);
 }
+s->state = new_state;
 
 while (actions_count[ACTION_SUSPEND] > 0) {
 qemu_coroutine_yield();
 actions_count[ACTION_SUSPEND]--;
 }
-
-s->state = s->new_state;
 }
 
 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
-- 
2.31.1

[PULL 6/6] blkdebug: protect rules and suspended_reqs with a lock

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

First, categorize the structure fields to identify what needs
to be protected and what doesn't.

We essentially need to protect only .state, and the 3 lists in
BDRVBlkdebugState.

Then, add the lock and mark the functions accordingly.

Co-developed-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Message-Id: <20210614082931.24925-7-eespo...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 49 ++--
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index b47c3fd97c..8b67554bec 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -38,24 +38,27 @@
 #include "qapi/qobject-input-visitor.h"
 #include "sysemu/qtest.h"
 
+/* All APIs are thread-safe */
+
 typedef struct BDRVBlkdebugState {
-int state;
+/* IN: initialized in blkdebug_open() and never changed */
 uint64_t align;
 uint64_t max_transfer;
 uint64_t opt_write_zero;
 uint64_t max_write_zero;
 uint64_t opt_discard;
 uint64_t max_discard;
-
+char *config_file; /* For blkdebug_refresh_filename() */
+/* initialized in blkdebug_parse_perms() */
 uint64_t take_child_perms;
 uint64_t unshare_child_perms;
 
-/* For blkdebug_refresh_filename() */
-char *config_file;
-
+/* State. Protected by lock */
+int state;
 QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
 QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
 QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
+QemuMutex lock;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -64,8 +67,11 @@ typedef struct BlkdebugAIOCB {
 } BlkdebugAIOCB;
 
 typedef struct BlkdebugSuspendedReq {
+/* IN: initialized in suspend_request() */
 Coroutine *co;
 char *tag;
+
+/* List entry protected BDRVBlkdebugState's lock */
 QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
@@ -77,6 +83,7 @@ enum {
 };
 
 typedef struct BlkdebugRule {
+/* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
 BlkdebugEvent event;
 int action;
 int state;
@@ -95,6 +102,8 @@ typedef struct BlkdebugRule {
 char *tag;
 } suspend;
 } options;
+
+/* List entries protected BDRVBlkdebugState's lock */
 QLIST_ENTRY(BlkdebugRule) next;
 QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 } BlkdebugRule;
@@ -244,11 +253,14 @@ static int add_rule(void *opaque, QemuOpts *opts, Error 
**errp)
 };
 
 /* Add the rule */
+qemu_mutex_lock(>lock);
 QLIST_INSERT_HEAD(>rules[event], rule, next);
+qemu_mutex_unlock(>lock);
 
 return 0;
 }
 
+/* Called with lock held or from .bdrv_close */
 static void remove_rule(BlkdebugRule *rule)
 {
 switch (rule->action) {
@@ -467,6 +479,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict 
*options, int flags,
 int ret;
 uint64_t align;
 
+qemu_mutex_init(>lock);
 opts = qemu_opts_create(_opts, NULL, 0, _abort);
 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
 ret = -EINVAL;
@@ -567,6 +580,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict 
*options, int flags,
 ret = 0;
 out:
 if (ret < 0) {
+qemu_mutex_destroy(>lock);
 g_free(s->config_file);
 }
 qemu_opts_del(opts);
@@ -581,6 +595,7 @@ static int rule_check(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 int error;
 bool immediately;
 
+qemu_mutex_lock(>lock);
 QSIMPLEQ_FOREACH(rule, >active_rules, active_next) {
 uint64_t inject_offset = rule->options.inject.offset;
 
@@ -594,6 +609,7 @@ static int rule_check(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 }
 
 if (!rule || !rule->options.inject.error) {
+qemu_mutex_unlock(>lock);
 return 0;
 }
 
@@ -605,6 +621,7 @@ static int rule_check(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 remove_rule(rule);
 }
 
+qemu_mutex_unlock(>lock);
 if (!immediately) {
 aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
 qemu_coroutine_yield();
@@ -770,8 +787,10 @@ static void blkdebug_close(BlockDriverState *bs)
 }
 
 g_free(s->config_file);
+qemu_mutex_destroy(>lock);
 }
 
+/* Called with lock held.  */
 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 {
 BDRVBlkdebugState *s = bs->opaque;
@@ -790,6 +809,7 @@ static void suspend_request(BlockDriverState *bs, 
BlkdebugRule *rule)
 }
 }
 
+/* Called with lock held.  */
 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
  int *action_count, int *new_state)
 {
@@ -829,11 +849,13 @@ static void blkdebug_debug_event(BlockDriverState *bs, 
BlkdebugEvent event)
 
 assert((int)event >= 0 && event < BLKDBG__MAX);
 
-new_state = s->state;
-QLIST_FOREACH_SAFE(rule, >rules[event], next, next) {
-

[PULL 4/6] blkdebug: do not suspend in the middle of QLIST_FOREACH_SAFE

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

That would be unsafe in case a rule other than the current one
is removed while the coroutine has yielded.
Keep FOREACH_SAFE because suspend_request deletes the current rule.

After this patch, *all* matching rules are deleted before suspending
the coroutine, rather than just one.
This doesn't affect the existing testcases.

Use actions_count to see how many yield to issue.

Co-developed-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210614082931.24925-5-eespo...@redhat.com>
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 6bdeb2c7b3..dd82131d1e 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -789,7 +789,6 @@ static void suspend_request(BlockDriverState *bs, 
BlkdebugRule *rule)
 if (!qtest_enabled()) {
 printf("blkdebug: Suspended request '%s'\n", r->tag);
 }
-qemu_coroutine_yield();
 }
 
 static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
@@ -834,6 +833,12 @@ static void blkdebug_debug_event(BlockDriverState *bs, 
BlkdebugEvent event)
 QLIST_FOREACH_SAFE(rule, >rules[event], next, next) {
 process_rule(bs, rule, actions_count);
 }
+
+while (actions_count[ACTION_SUSPEND] > 0) {
+qemu_coroutine_yield();
+actions_count[ACTION_SUSPEND]--;
+}
+
 s->state = s->new_state;
 }
 
-- 
2.31.1

[PULL 2/6] blkdebug: move post-resume handling to resume_req_by_tag

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

We want to move qemu_coroutine_yield() after the loop on rules,
because QLIST_FOREACH_SAFE is wrong if the rule list is modified
while the coroutine has yielded.  Therefore move the suspended
request to the heap and clean it up from the remove side.
All that is left is for blkdebug_debug_event to handle the
yielding.

Co-developed-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210614082931.24925-3-eespo...@redhat.com>
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 5ccbfcab42..e8fdf7b056 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -775,25 +775,20 @@ static void blkdebug_close(BlockDriverState *bs)
 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 {
 BDRVBlkdebugState *s = bs->opaque;
-BlkdebugSuspendedReq r;
+BlkdebugSuspendedReq *r;
 
-r = (BlkdebugSuspendedReq) {
-.co = qemu_coroutine_self(),
-.tag= g_strdup(rule->options.suspend.tag),
-};
+r = g_new(BlkdebugSuspendedReq, 1);
+
+r->co = qemu_coroutine_self();
+r->tag= g_strdup(rule->options.suspend.tag);
 
 remove_rule(rule);
-QLIST_INSERT_HEAD(>suspended_reqs, , next);
+QLIST_INSERT_HEAD(>suspended_reqs, r, next);
 
 if (!qtest_enabled()) {
-printf("blkdebug: Suspended request '%s'\n", r.tag);
+printf("blkdebug: Suspended request '%s'\n", r->tag);
 }
 qemu_coroutine_yield();
-if (!qtest_enabled()) {
-printf("blkdebug: Resuming request '%s'\n", r.tag);
-}
-
-g_free(r.tag);
 }
 
 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
@@ -880,8 +875,18 @@ retry:
  */
 QLIST_FOREACH(r, >suspended_reqs, next) {
 if (!strcmp(r->tag, tag)) {
+Coroutine *co = r->co;
+
+if (!qtest_enabled()) {
+printf("blkdebug: Resuming request '%s'\n", r->tag);
+}
+
 QLIST_REMOVE(r, next);
-qemu_coroutine_enter(r->co);
+g_free(r->tag);
+g_free(r);
+
+qemu_coroutine_enter(co);
+
 if (all) {
 goto retry;
 }
-- 
2.31.1

[PULL 0/6] Block patches for 6.1-rc0

2021-07-19 Thread Max Reitz

The following changes since commit 7457b407edd6e8555e4b46488aab2f13959fccf8:

  Merge remote-tracking branch 
'remotes/thuth-gitlab/tags/pull-request-2021-07-19' into staging (2021-07-19 
11:34:08 +0100)

are available in the Git repository at:

  https://github.com/XanClic/qemu.git tags/pull-block-2021-07-19

for you to fetch changes up to 36109bff171ba0811fa4c723cecdf6c3561fa318:

  blkdebug: protect rules and suspended_reqs with a lock (2021-07-19 17:38:38 
+0200)


Block patches for 6.1-rc0:
- Make blkdebug's suspend/resume handling robust (and thread-safe)


Emanuele Giuseppe Esposito (6):
  blkdebug: refactor removal of a suspended request
  blkdebug: move post-resume handling to resume_req_by_tag
  blkdebug: track all actions
  blkdebug: do not suspend in the middle of QLIST_FOREACH_SAFE
  block/blkdebug: remove new_state field and instead use a local
variable
  blkdebug: protect rules and suspended_reqs with a lock

 block/blkdebug.c | 136 ---
 1 file changed, 92 insertions(+), 44 deletions(-)

-- 
2.31.1

[PULL 3/6] blkdebug: track all actions

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

Add a counter for each action that a rule can trigger.
This is mainly used to keep track of how many coroutine_yield()
we need to perform after processing all rules in the list.

Co-developed-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210614082931.24925-4-eespo...@redhat.com>
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index e8fdf7b056..6bdeb2c7b3 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -74,6 +74,7 @@ enum {
 ACTION_INJECT_ERROR,
 ACTION_SET_STATE,
 ACTION_SUSPEND,
+ACTION__MAX,
 };
 
 typedef struct BlkdebugRule {
@@ -791,22 +792,22 @@ static void suspend_request(BlockDriverState *bs, 
BlkdebugRule *rule)
 qemu_coroutine_yield();
 }
 
-static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-bool injected)
+static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
+ int *action_count)
 {
 BDRVBlkdebugState *s = bs->opaque;
 
 /* Only process rules for the current state */
 if (rule->state && rule->state != s->state) {
-return injected;
+return;
 }
 
 /* Take the action */
+action_count[rule->action]++;
 switch (rule->action) {
 case ACTION_INJECT_ERROR:
-if (!injected) {
+if (action_count[ACTION_INJECT_ERROR] == 1) {
 QSIMPLEQ_INIT(>active_rules);
-injected = true;
 }
 QSIMPLEQ_INSERT_HEAD(>active_rules, rule, active_next);
 break;
@@ -819,21 +820,19 @@ static bool process_rule(BlockDriverState *bs, struct 
BlkdebugRule *rule,
 suspend_request(bs, rule);
 break;
 }
-return injected;
 }
 
 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 {
 BDRVBlkdebugState *s = bs->opaque;
 struct BlkdebugRule *rule, *next;
-bool injected;
+int actions_count[ACTION__MAX] = { 0 };
 
 assert((int)event >= 0 && event < BLKDBG__MAX);
 
-injected = false;
 s->new_state = s->state;
 QLIST_FOREACH_SAFE(rule, >rules[event], next, next) {
-injected = process_rule(bs, rule, injected);
+process_rule(bs, rule, actions_count);
 }
 s->state = s->new_state;
 }
-- 
2.31.1

[PULL 1/6] blkdebug: refactor removal of a suspended request

2021-07-19 Thread Max Reitz

From: Emanuele Giuseppe Esposito 

Extract to a separate function.  Do not rely on FOREACH_SAFE, which is
only "safe" if the *current* node is removed---not if another node is
removed.  Instead, just walk the entire list from the beginning when
asked to resume all suspended requests with a given tag.

Co-developed-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Message-Id: <20210614082931.24925-2-eespo...@redhat.com>
Signed-off-by: Max Reitz 
---
 block/blkdebug.c | 33 ++---
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 2c0b9b0ee8..5ccbfcab42 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -793,7 +793,6 @@ static void suspend_request(BlockDriverState *bs, 
BlkdebugRule *rule)
 printf("blkdebug: Resuming request '%s'\n", r.tag);
 }
 
-QLIST_REMOVE(, next);
 g_free(r.tag);
 }
 
@@ -869,25 +868,40 @@ static int blkdebug_debug_breakpoint(BlockDriverState 
*bs, const char *event,
 return 0;
 }
 
-static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
 {
-BDRVBlkdebugState *s = bs->opaque;
-BlkdebugSuspendedReq *r, *next;
+BlkdebugSuspendedReq *r;
 
-QLIST_FOREACH_SAFE(r, >suspended_reqs, next, next) {
+retry:
+/*
+ * No need for _SAFE, since a different coroutine can remove another node
+ * (not the current one) in this list, and when the current one is removed
+ * the iteration starts back from beginning anyways.
+ */
+QLIST_FOREACH(r, >suspended_reqs, next) {
 if (!strcmp(r->tag, tag)) {
+QLIST_REMOVE(r, next);
 qemu_coroutine_enter(r->co);
+if (all) {
+goto retry;
+}
 return 0;
 }
 }
 return -ENOENT;
 }
 
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+BDRVBlkdebugState *s = bs->opaque;
+
+return resume_req_by_tag(s, tag, false);
+}
+
 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
 const char *tag)
 {
 BDRVBlkdebugState *s = bs->opaque;
-BlkdebugSuspendedReq *r, *r_next;
 BlkdebugRule *rule, *next;
 int i, ret = -ENOENT;
 
@@ -900,11 +914,8 @@ static int 
blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
 }
 }
 }
-QLIST_FOREACH_SAFE(r, >suspended_reqs, next, r_next) {
-if (!strcmp(r->tag, tag)) {
-qemu_coroutine_enter(r->co);
-ret = 0;
-}
+if (resume_req_by_tag(s, tag, true) == 0) {
+ret = 0;
 }
 return ret;
 }
-- 
2.31.1

Re: [RFC PATCH nvme-cli 2/2] nvme-cli/plugins/mi:add support

2021-07-19 Thread Mohit Kapoor


On Fri, Jul 09, 2021 at 09:04:30AM -0700, Keith Busch wrote:

+int hal_init()
+{
+int retval = -1;
+switch (GetSidebandInterface()) {
+case qemu_nvme_mi:
+retval = qemu_mi_init();
+break;
+default:
+break;
+}
+return retval;
+}
+
+int hal_open()
+{
+int retval = -1;
+switch (GetSidebandInterface()) {
+case qemu_nvme_mi:
+retval = qemu_mi_open();
+break;
+default:
+break;
+}
+return retval;
+}
+
+int hal_close()
+{
+int retval = -1;
+switch (GetSidebandInterface()) {
+case qemu_nvme_mi:
+retval = qemu_mi_close();
+break;
+default:
+break;
+}
+return retval;
+}
+
+int hal_i2c_write(uint8_t *data_out, uint16_t num_bytes)
+{
+int retval = -1;
+switch (GetSidebandInterface()) {
+case qemu_nvme_mi:
+retval = qemu_mi_write(data_out, num_bytes);
+break;
+default:
+break;
+}
+return retval;
+}
+
+int hal_i2c_read(uint8_t *data_in, uint16_t num_bytes)
+{
+uint32_t retval = -1;
+switch (GetSidebandInterface()) {
+case qemu_nvme_mi:
+retval = qemu_mi_read(data_in, num_bytes);
+break;
+default:
+break;
+}
+return retval;
+}


I'm really not a fan of having non-standard interfaces. If you were
going to do that, though, you should create a struct of function
pointers so that you don't need these repetitive "switch (...)"
statements.

But if we're going to have OOB MI support in toolign, they should all
use the same standard defined interface.



Thanks for your comments.
We are planning to incorporate smbus/i2c interface in nvme-cli & qemu
and the patch will be shared in a few days.
While sharing the above patch, we will take care of the hardware
interface layer based on function pointer implementation as well.

Re: [RFC PATCH 0/6] job: replace AioContext lock with job_mutex

2021-07-19 Thread Kevin Wolf

Am 19.07.2021 um 11:29 hat Stefan Hajnoczi geschrieben:
> On Fri, Jul 16, 2021 at 05:23:50PM +0200, Kevin Wolf wrote:
> > Am 13.07.2021 um 15:10 hat Stefan Hajnoczi geschrieben:
> > > AIO_WAIT_WHILE() requires that AioContext is acquired according to its
> > > documentation, but I'm not sure that's true anymore. Thread-safe/atomic
> > > primitives are used by AIO_WAIT_WHILE(), so as long as the condition
> > > being waited for is thread-safe too it should work without the
> > > AioContext lock.
> > 
> > Polling something in a different AioContext from the main thread still
> > temporarily drops the lock, which crashes if it isn't locked. I'm not
> > sure if the documentation claims that the lock is needed in more cases,
> > I guess you could interpret it either way.
> 
> I'm claiming that the lock doesn't need to be dropped in that case
> anymore - as long as the condition we're polling is thread-safe. :)
> 
> Have I missed something that still need locking?

I'm not sure if AIO_WAIT_WHILE() actually ever needed the locking. I
think it's more a convenience thing since the callers would already hold
the lock, so dropping it temporarily in AIO_WAIT_WHILE() means that the
callers don't have to duplicate the temporary unlock everywhere.

> We could temporarily introduce an AIO_WAIT_WHILE_UNLOCKED() macro so
> that callers can be converted individually.

Yes, this makes sense to me.

Kevin


signature.asc
Description: PGP signature

Re: [PATCH 02/14] iotests.py: qemu_img*("create"): support IMGOPTS='compression_type=zstd'

2021-07-19 Thread Vladimir Sementsov-Ogievskiy


19.07.2021 15:58, Vladimir Sementsov-Ogievskiy wrote:




Could also be done with something like

imgopts = os.environ.get('IMGOPTS')


imgopts is a string after it. So you don't need to join it?


opts = optstr2dict(','.join(([imgopts] if imgopts else []) + parsed.o))


Build a string to be than parsed looks strange IMHO..


Oh, but that's exactly what I should do anyway to cover several -o options. Now 
I see that what you write is correct.





if parsed.f != 'qcow2' or (opts.get('compat') in ['v2', '0.10']):
 opts.pop('compression_type', None)

(Never tested, of course)

Because optstr2dict() prioritizes later options over earlier ones. (Which is 
good, because that’s also qemu-img’s behavior.)



Ok, I'll think about this all when prepare v2, and we'll see how it goes 


This way you also drop compression_type if test specify it. I think we 
shouldn't touch test specified options. Let it clearly fail instead.

We only want to ignore compression_type in IMGOPTS when create non-qcow2 image. 
I think I'll drop checking for compat: the only user for this check ic 065 and 
it's simpler to explicitly set compression_type in it even for compat=0.10 
cases.

--
Best regards,
Vladimir

Re: [PATCH 02/14] iotests.py: qemu_img*("create"): support IMGOPTS='compression_type=zstd'

2021-07-19 Thread Vladimir Sementsov-Ogievskiy


16.07.2021 13:34, Vladimir Sementsov-Ogievskiy wrote:

16.07.2021 13:07, Max Reitz wrote:

On 05.07.21 11:15, Vladimir Sementsov-Ogievskiy wrote:

Adding support of IMGOPTS (like in bash tests) allows user to pass a
lot of different options. Still, some may require additional logic.

Now we want compression_type option, so add some smart logic around it:
ignore compression_type=zstd in IMGOPTS, if test want qcow2 in
compatibility mode. As well, ignore compression_type for non-qcow2
formats.

Note that we may instead add support only to qemu_img_create(), but
that works bad:

1. We'll have to update a lot of tests to use qemu_img_create instead
    of qemu_img('create'). (still, we may want do it anyway, but no
    reason to create a dependancy between task of supporting IMGOPTS and
    updating a lot of tests)

2. Some tests use qemu_img_pipe('create', ..) - even more work on
    updating


I feel compelled to again say that we had a series that did exactly that.  But 
of course, now that so much time has passed, overhauling it would require quite 
a bit of work.


3. Even if we update all tests to go through qemu_img_create, we'll
    need a way to avoid creating new tests using qemu_img*('create') -
    add assertions.. That doesn't seem good.


That almost sounds like you remember my series, because:

https://lists.nongnu.org/archive/html/qemu-block/2019-10/msg00135.html

;)


:) No, I don't remember it.




So, let's add support of IMGOPTS to most generic
qemu_img_pipe_and_status().

Signed-off-by: Vladimir Sementsov-Ogievskiy
---
  tests/qemu-iotests/iotests.py | 48 ++-
  1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 0d99dd841f..80f0cb4f42 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -16,6 +16,7 @@
  # along with this program.  If not, see.
  #
+import argparse
  import atexit
  import bz2
  from collections import OrderedDict
@@ -41,6 +42,19 @@
  from qemu.machine import qtest
  from qemu.qmp import QMPMessage
+
+def optstr2dict(opts: str) -> Dict[str, str]:
+    if not opts:
+    return {}
+
+    return {arr[0]: arr[1] for arr in
+    (opt.split('=', 1) for opt in opts.strip().split(','))}
+
+
+def dict2optstr(opts: Dict[str, str]) -> str:
+    return ','.join(f'{k}={v}' for k, v in opts.items())
+
+
  # Use this logger for logging messages directly from the iotests module
  logger = logging.getLogger('qemu.iotests')
  logger.addHandler(logging.NullHandler())
@@ -57,6 +71,8 @@
  if os.environ.get('QEMU_IMG_OPTIONS'):
  qemu_img_args += os.environ['QEMU_IMG_OPTIONS'].strip().split(' ')
+imgopts = optstr2dict(os.environ.get('IMGOPTS', ''))
+
  qemu_io_args = [os.environ.get('QEMU_IO_PROG', 'qemu-io')]
  if os.environ.get('QEMU_IO_OPTIONS'):
  qemu_io_args += os.environ['QEMU_IO_OPTIONS'].strip().split(' ')
@@ -121,11 +137,41 @@ def qemu_tool_pipe_and_status(tool: str, args: 
Sequence[str],
 {-subp.returncode}: {cmd}\n')
  return (output, subp.returncode)
+def qemu_img_create_prepare_args(args: List[str]) -> List[str]:
+    if not args or args[0] != 'create':
+    return list(args)
+    args = args[1:]
+
+    p = argparse.ArgumentParser(allow_abbrev=False)
+    # -o option may be specified several times
+    p.add_argument('-o', action='append', default=[])
+    p.add_argument('-f')
+    parsed, remaining = p.parse_known_args(args)
+
+    opts = optstr2dict(','.join(parsed.o))
+
+    compat = 'compat' in opts and opts['compat'][0] == '0'


I suppose `opts['compat'][0] == '0'` is supposed to check for compat=0.10?

If so, then why not just check `opts['compat'] == '0.10'` to be clearer?  I 
don’t think we allow any other compat=0* values, and I have no reason to 
believe we ever will.

Also, I think compat=v2 is valid, too.  (And I think calling this variable “v2” 
would also make more sense than “compat”.)


Good for me, will do.




+    for k, v in imgopts.items():
+    if k in opts:
+    continue
+    if k == 'compression_type' and (compat or parsed.f != 'qcow2'):
+    continue
+    opts[k] = v


Could also be done with something like

imgopts = os.environ.get('IMGOPTS')


imgopts is a string after it. So you don't need to join it?


opts = optstr2dict(','.join(([imgopts] if imgopts else []) + parsed.o))


Build a string to be than parsed looks strange IMHO..


Oh, but that's exactly what I should do anyway to cover several -o options. Now 
I see that what you write is correct.





if parsed.f != 'qcow2' or (opts.get('compat') in ['v2', '0.10']):
 opts.pop('compression_type', None)

(Never tested, of course)

Because optstr2dict() prioritizes later options over earlier ones. (Which is 
good, because that’s also qemu-img’s behavior.)



Ok, I'll think about this all when prepare v2, and we'll see how it goes




+
+

Re: [PATCH 3/3] linux-aio: limit the batch size using `aio-max-batch` parameter

2021-07-19 Thread Stefano Garzarella


On Tue, Jul 13, 2021 at 03:58:04PM +0100, Stefan Hajnoczi wrote:

On Wed, Jul 07, 2021 at 05:00:19PM +0200, Stefano Garzarella wrote:

@@ -371,7 +375,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb 
*laiocb, off_t offset,
 s->io_q.in_queue++;
 if (!s->io_q.blocked &&
 (!s->io_q.plugged ||
- s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
+ s->io_q.in_queue >= max_batch)) {


Is it safe to drop the MAX_EVENTS case?


I think it is safe since in ioq_submit() we have this check while 
dequeueing:


QSIMPLEQ_FOREACH(aiocb, >io_q.pending, next) {
iocbs[len++] = >iocb;
if (s->io_q.in_flight + len >= MAX_EVENTS) {
break;
}
}

But in term of performance, I think is better what you're suggesting, 
because if we have fewer slots available than `max_batch`, here we were 
delaying the call to io_submit().




Perhaps the following can be used:

 int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
 max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight + s->io_q.in_queue, 
max_batch);



Since we will compare `in_queue` with `max_batch`, should we remove it 
from this expression?


I mean:

  int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
  max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);

then as it is in this patch:

  s->io_q.in_queue++;
  if (!s->io_q.blocked &&
  (!s->io_q.plugged ||
   s->io_q.in_queue >= max_batch)) {
  ioq_submit(s);
  }


Here we'll only need to check against max_batch but it takes into
account MAX_EVENT and in_flight.


Thanks,
Stefano

Re: [PATCH 1/1] block: Do not poll in bdrv_set_aio_context_ignore() when acquiring new_context

2021-07-19 Thread Kevin Wolf

Am 12.07.2021 um 07:38 hat Zhiyong Ye geschrieben:
> When bdrv_set_aio_context_ignore() is called in the main loop to change
> the AioContext onto the IO thread, the bdrv_drain_invoke_entry() never
> gets to run and the IO thread hangs at co_schedule_bh_cb().
> 
> This is because the AioContext is occupied by the main thread after
> being attached to the IO thread, and the main thread poll in
> bdrv_drained_end() waiting for the IO request to be drained, but the IO
> thread cannot acquire the AioContext, which leads to deadlock.

This shouldn't be the case:

 * The caller must own the AioContext lock for the old AioContext of bs, but it
 * must not own the AioContext lock for new_context (unless new_context is the
 * same as the current context of bs).

Then bdrv_set_aio_context_ignore() switches the AioContext of bs, and
then calls bdrv_drained_end() while holding only the lock for the new
context. AIO_WAIT_WHILE() will temporarily drop that lock, so aio_poll()
should run without holding any AioContext locks.

If I'm not missing anything, the scenario you're seeing means that the
caller already held a lock for the new AioContext, so that it's locked
twice while AIO_WAIT_WHILE() drops the lock only once. This would be a
bug in the caller because the documentation I quoted explicitly forbids
holding the AioContext lock for the new context.

> Just like below:
> 
> <-->
> [Switching to thread 1 (Thread 0x7fd810bbef40 (LWP 533312))]
> (gdb) bt
> ...
> 3  0x5601f6ea93aa in fdmon_poll_wait at ../util/fdmon-poll.c:80
> 4  0x5601f6e81a1c in aio_poll at ../util/aio-posix.c:607
> 5  0x5601f6dcde87 in bdrv_drained_end at ../block/io.c:496
> 6  0x5601f6d798cd in bdrv_set_aio_context_ignore at ../block.c:6502
> 7  0x5601f6d7996c in bdrv_set_aio_context_ignore at ../block.c:6472
> 8  0x5601f6d79cb8 in bdrv_child_try_set_aio_context at ../block.c:6587
> 9  0x5601f6da86f2 in blk_do_set_aio_context at 
> ../block/block-backend.c:2026
> 10 0x5601f6daa96d in blk_set_aio_context at ../block/block-backend.c:2047
> 11 0x5601f6c71883 in virtio_scsi_hotplug at ../hw/scsi/virtio-scsi.c:831

Which version of QEMU are you using? In current git master, line 831 is
something entirely different.

Are you using something before commit c7040ff6? Because this is a commit
that fixed a virtio-scsi bug where it would hold the wrong lock before
calling blk_set_aio_context().

> 
> [Switching to thread 4 (Thread 0x7fd8092e7700 (LWP 533315))]
> (gdb) bt
> ...
> 4  0x5601f6eab6a8 in qemu_mutex_lock_impl at 
> ../util/qemu-thread-posix.c:79
> 5  0x5601f6e7ce88 in co_schedule_bh_cb at ../util/async.c:489
> 6  0x5601f6e7c404 in aio_bh_poll at ../util/async.c:164
> 7  0x5601f6e81a46 in aio_poll at ../util/aio-posix.c:659
> 8  0x5601f6d5ccf3 in iothread_run at ../iothread.c:73
> 9  0x5601f6eab512 in qemu_thread_start at ../util/qemu-thread-posix.c:521
> 10 0x7fd80d7b84a4 in start_thread at pthread_create.c:456
> 11 0x7fd80d4fad0f in clone () at 
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) f 4
> 4  0x5601f6eab6a8 in qemu_mutex_lock_impl at 
> ../util/qemu-thread-posix.c:79
> (gdb) p *mutex
> $2 = {lock = {__data = {__lock = 2, __count = 1, __owner = 533312, __nusers = 
> 1,
>   __kind = 1, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next 
> = 0x0}},
>   __size = "\002\000\000\000\001\000\000\000@#\b\000\001\000\000\000\001",
>   '\000' , __align = 4294967298}, initialized = true}
> <-->
> 
> Therefore, we should never poll anywhere in
> bdrv_set_aio_context_ignore() when acquiring the new context. In fact,
> commit e037c09c has also already elaborated on why we can't poll at
> bdrv_do_drained_end().
> 
> Signed-off-by: Zhiyong Ye 
> ---
>  block.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/block.c b/block.c
> index be083f389e..ebbea72d64 100644
> --- a/block.c
> +++ b/block.c
> @@ -6846,6 +6846,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
>  GSList *parents_to_process = NULL;
>  GSList *entry;
>  BdrvChild *child, *parent;
> +int drained_end_counter = 0;
>  
>  g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
>  
> @@ -6907,7 +6908,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
>  aio_context_release(old_context);
>  }
>  
> -bdrv_drained_end(bs);
> +bdrv_drained_end_no_poll(bs, _end_counter);
>  
>  if (qemu_get_aio_context() != old_context) {
>  aio_context_acquire(old_context);
> @@ -6915,6 +6916,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
>  if (qemu_get_aio_context() != new_context) {
>  aio_context_release(new_context);
>  }
> +BDRV_POLL_WHILE(bs, qatomic_read(_end_counter) > 0);

This would be wrong because bs is already in the new context, but you
wouldn't hold the lock for it. AIO_WAIT_WHILE() would try to drop the
lock for a context that isn't even locked,

Re: [PATCH 2/3] iothread: add aio-max-batch parameter

2021-07-19 Thread Stefano Garzarella


On Tue, Jul 13, 2021 at 03:51:15PM +0100, Stefan Hajnoczi wrote:

On Wed, Jul 07, 2021 at 05:00:18PM +0200, Stefano Garzarella wrote:

diff --git a/qapi/misc.json b/qapi/misc.json
index 156f98203e..f64bb69f74 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -86,6 +86,9 @@
 # @poll-shrink: how many ns will be removed from polling time, 0 means that
 #   it's not configured (since 2.9)
 #
+# @aio-max-batch: maximum number of requests in a bacth for the AIO engine,


s/bacth/batch/


+# 0 means that the engine will use its default (since 6.1)
+#
 # Since: 2.0
 ##
 { 'struct': 'IOThreadInfo',
@@ -93,7 +96,8 @@
'thread-id': 'int',
'poll-max-ns': 'int',
'poll-grow': 'int',
-   'poll-shrink': 'int' } }
+   'poll-shrink': 'int',
+   'aio-max-batch': 'int' } }

 ##
 # @query-iothreads:
diff --git a/qapi/qom.json b/qapi/qom.json
index 652be317b8..23fd586614 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -516,12 +516,17 @@
 #   algorithm detects it is spending too long polling without
 #   encountering events. 0 selects a default behaviour (default: 0)
 #
+# @aio-max-batch: maximum number of requests in a bacth for the AIO engine,


s/bacth/batch/


Fixed in v2 also in include/block/aio.h :-)

Thanks,
Stefano

Re: [PATCH v3 4/5] hw/nvme: fix mmio read

2021-07-19 Thread Peter Maydell

On Wed, 14 Jul 2021 at 07:01, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
>
> Fix this by unconditionally storing all controller registers in little
> endian.
>
> Cc: Gollu Appalanaidu 
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 300 -
>  1 file changed, 173 insertions(+), 127 deletions(-)

> @@ -5708,22 +5712,38 @@ static int nvme_start_ctrl(NvmeCtrl *n)
>
>  static void nvme_cmb_enable_regs(NvmeCtrl *n)
>  {
> -NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1);
> -NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1);
> -NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR);
> +uint32_t cmbloc = 0, cmbsz = 0;
>
> -NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
> -NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
> -NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
> -NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
> -NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
> -NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
> -NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
> +NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
> +NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
> +NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
> +stl_le_p(>bar.cmbloc, cmbloc);
> +
> +NVME_CMBSZ_SET_SQS(cmbsz, 1);
> +NVME_CMBSZ_SET_CQS(cmbsz, 0);
> +NVME_CMBSZ_SET_LISTS(cmbsz, 1);
> +NVME_CMBSZ_SET_RDS(cmbsz, 1);
> +NVME_CMBSZ_SET_WDS(cmbsz, 1);
> +NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */
> +NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
> +stl_le_p(>bar.cmbsz, cmbsz);

This used to only set the listed fields and left the
rest of the registers untouched. Now it zeroes the other
fields in the registers. If this is an intentional change it
should be separate from this patch, I think.

> @@ -5747,9 +5767,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
> " when MSI-X is enabled");
>  /* should be ignored, fall through for now */
>  }
> -n->bar.intms |= data & 0x;
> +intms |= data & 0x;

intms is a uint32_t so the & here is unnecessary; you can just
say "intms |= data;".

> +stl_le_p(>bar.intms, intms);
>  n->bar.intmc = n->bar.intms;
> -trace_pci_nvme_mmio_intm_set(data & 0x, n->bar.intmc);
> +trace_pci_nvme_mmio_intm_set(data & 0x, intms);
>  nvme_irq_check(n);
>  break;
>  case NVME_REG_INTMC:
> @@ -5759,44 +5780,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
> " when MSI-X is enabled");
>  /* should be ignored, fall through for now */
>  }
> -n->bar.intms &= ~(data & 0x);
> +intms &= ~(data & 0x);

Similarly here just "intms &= ~data;" is enough.

> +stl_le_p(>bar.intms, intms);
>  n->bar.intmc = n->bar.intms;
> -trace_pci_nvme_mmio_intm_clr(data & 0x, n->bar.intmc);
> +trace_pci_nvme_mmio_intm_clr(data & 0x, intms);
>  nvme_irq_check(n);
>  break;

> @@ -5818,26 +5850,30 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>  }
>  break;
>  case NVME_REG_AQA:
> -n->bar.aqa = data & 0x;
> +stl_le_p(>bar.aqa, data & 0x);

You don't need to mask here, stl_le_p() takes a uint32_t argument and
will only write 4 bytes.

>  trace_pci_nvme_mmio_aqattr(data & 0x);
>  break;
>  case NVME_REG_ASQ:
> -n->bar.asq = size == 8 ? data :
> -(n->bar.asq & ~0xULL) | (data & 0x);
> +asq = size == 8 ? data :
> +(asq & ~0xULL) | (data & 0x);
> +stq_le_p(>bar.asq, asq);

You could save doing the manual assembly of the 64-byte value and just write
the data you have:

   stn_le_p(>bar.asq, size, data);

>  trace_pci_nvme_mmio_asqaddr(data);
>  break;
>  case NVME_REG_ASQ + 4:
> -n->bar.asq = (n->bar.asq & 0x) | (data << 32);
> -trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq);
> +asq = (asq & 0x) | (data << 32);
> +stq_le_p(>bar.asq, asq);
> +trace_pci_nvme_mmio_asqaddr_hi(data, asq);

Similarly here
   stn_le_p(>bar.asq + 4, size, data);
   trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(>bar.asq));

(and then you don't need 'asq' as a local variable in this function.)

>  break;
>  case NVME_REG_ACQ:
>  trace_pci_nvme_mmio_acqaddr(data);
> -n->bar.acq = size == 8 ? data :
> -(n->bar.acq & ~0xULL) | (data & 0x);
> +acq = size == 8 ? data :
> +(acq & ~0xULL) | (data & 0x);
> +stq_le_p(>bar.acq, acq);
>  break;
>  case NVME_REG_ACQ + 4:
> -

Re: [PATCH v3 1/5] hw/nvme: split pmrmsc register into upper and lower

2021-07-19 Thread Klaus Jensen

On Jul 19 10:13, Peter Maydell wrote:
> On Wed, 14 Jul 2021 at 07:01, Klaus Jensen  wrote:
> >
> > From: Klaus Jensen 
> >
> > The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to
> > make up the 64 bit logical PMRMSC register.
> >
> > Make it so.
> >
> > Signed-off-by: Klaus Jensen 
> > ---
> >  include/block/nvme.h | 31 ---
> >  hw/nvme/ctrl.c   |  9 +
> >  2 files changed, 21 insertions(+), 19 deletions(-)
> 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index 2f0524e12a36..28299c6f3764 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -5916,11 +5916,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> > offset, uint64_t data,
> >  return;
> >  }
> >
> > -n->bar.pmrmsc = (n->bar.pmrmsc & ~0x) | (data & 
> > 0x);
> > +n->bar.pmrmscl = data & 0x;
> >  n->pmr.cmse = false;
> >
> > -if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) {
> > -hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << 
> > PMRMSC_CBA_SHIFT;
> > +if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) {
> > +hwaddr cba = n->bar.pmrmscu |
> > +(NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT);
> 
> Don't we need to shift n->bar.pmrmscu left by 32 before we OR it in
> with the pmrmscl part?
> 

We do indeed - thanks for catching this!

> >  if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
> >  NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1);
> >  return;
> > @@ -5936,7 +5937,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> > offset, uint64_t data,
> >  return;
> >  }
> >
> > -n->bar.pmrmsc = (n->bar.pmrmsc & 0x) | (data << 32);
> > +n->bar.pmrmscu = data & 0x;
> >  return;
> 
> Not an issue with this patch, but why don't we need to do the
> "update cba" stuff for a write to the U register that we do for
> a write to the L register? Does the spec mandate that guests
> do the writes in the order H then L and only the L change makes
> it take effect?
> 

Yeah, bit 1 in PMRMSCL is the "Controller Memory Space Enable" bit
(CMSE) and we only enable the address space when that bit is set. So the
driver will typically write the high register first (if needed) and then
write the lower register with or without CMSE set.


signature.asc
Description: PGP signature

Re: [RFC PATCH 0/6] job: replace AioContext lock with job_mutex

2021-07-19 Thread Stefan Hajnoczi

On Fri, Jul 16, 2021 at 05:23:50PM +0200, Kevin Wolf wrote:
> Am 13.07.2021 um 15:10 hat Stefan Hajnoczi geschrieben:
> > AIO_WAIT_WHILE() requires that AioContext is acquired according to its
> > documentation, but I'm not sure that's true anymore. Thread-safe/atomic
> > primitives are used by AIO_WAIT_WHILE(), so as long as the condition
> > being waited for is thread-safe too it should work without the
> > AioContext lock.
> 
> Polling something in a different AioContext from the main thread still
> temporarily drops the lock, which crashes if it isn't locked. I'm not
> sure if the documentation claims that the lock is needed in more cases,
> I guess you could interpret it either way.

I'm claiming that the lock doesn't need to be dropped in that case
anymore - as long as the condition we're polling is thread-safe. :)

Have I missed something that still need locking?

We could temporarily introduce an AIO_WAIT_WHILE_UNLOCKED() macro so
that callers can be converted individually.

Stefan


signature.asc
Description: PGP signature

Re: [PATCH v3 3/5] hw/nvme: fix out-of-bounds reads

2021-07-19 Thread Peter Maydell

On Wed, 14 Jul 2021 at 07:01, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Peter noticed that mmio access may read into the NvmeParams member in
> the NvmeCtrl struct.
>
> Fix the bounds check.
>
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 27 +++
>  1 file changed, 15 insertions(+), 12 deletions(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v3 1/5] hw/nvme: split pmrmsc register into upper and lower

2021-07-19 Thread Peter Maydell

On Wed, 14 Jul 2021 at 07:01, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to
> make up the 64 bit logical PMRMSC register.
>
> Make it so.
>
> Signed-off-by: Klaus Jensen 
> ---
>  include/block/nvme.h | 31 ---
>  hw/nvme/ctrl.c   |  9 +
>  2 files changed, 21 insertions(+), 19 deletions(-)

> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 2f0524e12a36..28299c6f3764 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -5916,11 +5916,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>  return;
>  }
>
> -n->bar.pmrmsc = (n->bar.pmrmsc & ~0x) | (data & 0x);
> +n->bar.pmrmscl = data & 0x;
>  n->pmr.cmse = false;
>
> -if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) {
> -hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
> +if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) {
> +hwaddr cba = n->bar.pmrmscu |
> +(NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT);

Don't we need to shift n->bar.pmrmscu left by 32 before we OR it in
with the pmrmscl part?

>  if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
>  NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1);
>  return;
> @@ -5936,7 +5937,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, 
> uint64_t data,
>  return;
>  }
>
> -n->bar.pmrmsc = (n->bar.pmrmsc & 0x) | (data << 32);
> +n->bar.pmrmscu = data & 0x;
>  return;

Not an issue with this patch, but why don't we need to do the
"update cba" stuff for a write to the U register that we do for
a write to the L register? Does the spec mandate that guests
do the writes in the order H then L and only the L change makes
it take effect?

thanks
-- PMM

Re: [PATCH v5 3/5] block/nbd: refactor nbd_recv_coroutines_wake_all()

2021-07-19 Thread Vladimir Sementsov-Ogievskiy


17.07.2021 00:25, Eric Blake wrote:

On Wed, Jul 14, 2021 at 07:59:14PM +0300, Vladimir Sementsov-Ogievskiy wrote:

Split out nbd_recv_coroutine_wake(), as it will be used in separate.


s/in separate/separately/


Also add a possibility to wake only first found sleeping coroutine.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/nbd.c | 24 
  1 file changed, 16 insertions(+), 8 deletions(-)




+
+static void nbd_recv_coroutines_wake_all(BDRVNBDState *s, bool only_one)


Without reading docs (including the parameter name), I would have guessed:

wake_all(s, true) - wakes all
wake_all(s, false) - wakes one

but your code does:

wake_all(s, true) - wakes one
wake_all(s, false) - wakes all

Maybe that means this function and/or its parameter is now misnamed.
Having the _all in the name with true to NOT be all is what threw me.
Would the following be any better:

nbd_recv_coroutines_wake(BDRVNBDState *s, bool all)

where

wake(s, true) - wakes all
wake(s, false) - wakes one

and where your helper function needs to be renamed, and callers
updated to match those semantics?



No objections, will do)

Actually in a previous commit message I promise to rename the function, but 
actually don't do it.


  {
  int i;
  
  for (i = 0; i < MAX_NBD_REQUESTS; i++) {

-NBDClientRequest *req = >requests[i];
-
-if (req->coroutine && req->receiving) {
-req->receiving = false;
-aio_co_wake(req->coroutine);
+if (nbd_recv_coroutine_wake(>requests[i]) && only_one) {
+return;


But while I'm not sold on the naming, the change in logic (to be able
to wake any one waiter but not the whole list) looks useful for future
patches.




--
Best regards,
Vladimir

Re: [PATCH v3 0/5] hw/nvme: fix mmio read

2021-07-19 Thread Stefan Hajnoczi

On Mon, Jul 19, 2021 at 08:43:33AM +0200, Klaus Jensen wrote:
> On Jul 14 08:01, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Fix mmio read issues on big-endian hosts. The core issue is that values
> > in the BAR is not stored in little endian as required.
> > 
> > Fix that and add a regression test for this. This required a bit of
> > cleanup, so it blew up into a series.
> > 
> > v2:
> > 
> >   * "hw/nvme: use symbolic names for registers"
> > Use offsetof(NvmeBar, reg) instead of explicit offsets (Philippe)
> > 
> >   * "hw/nvme: fix mmio read"
> > Use the st/ld API instead of cpu_to_X (Philippe)
> > 
> > Klaus Jensen (5):
> >   hw/nvme: split pmrmsc register into upper and lower
> >   hw/nvme: use symbolic names for registers
> >   hw/nvme: fix out-of-bounds reads
> >   hw/nvme: fix mmio read
> >   tests/qtest/nvme-test: add mmio read test
> > 
> >  include/block/nvme.h|  60 +--
> >  hw/nvme/ctrl.c  | 362 +++-
> >  tests/qtest/nvme-test.c |  26 +++
> >  3 files changed, 276 insertions(+), 172 deletions(-)
> > 
> 
> Oi,
> 
> A review on patch 3 and 4 would be appreciated so this has a chance of
> reaching Peter for -rc0 :)

I have reviewed Patch 3. Unfortunately I don't have time to review the
rest right now but maybe you can ask a specific person on the CC list to
review other patches. A Reply-All to multiple people might not receive
any attention :).

Stefan


signature.asc
Description: PGP signature

Re: [PATCH v3 3/5] hw/nvme: fix out-of-bounds reads

2021-07-19 Thread Stefan Hajnoczi

On Wed, Jul 14, 2021 at 08:01:23AM +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Peter noticed that mmio access may read into the NvmeParams member in
> the NvmeCtrl struct.
> 
> Fix the bounds check.
> 
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 27 +++
>  1 file changed, 15 insertions(+), 12 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Failing iotest 206

2021-07-19 Thread Thomas Huth


 Hi,

iotest 206 fails for me with:

$ ./check -qcow2 206
QEMU  -- ".../tests/qemu-iotests/../../qemu-system-x86_64" 
-nodefaults -display none -accel qtest

QEMU_IMG  -- ".../tests/qemu-iotests/../../qemu-img"
QEMU_IO   -- ".../tests/qemu-iotests/../../qemu-io" --cache writeback 
--aio threads -f qcow2

QEMU_NBD  -- ".../tests/qemu-iotests/../../qemu-nbd"
IMGFMT-- qcow2
IMGPROTO  -- file
PLATFORM  -- Linux/x86_64 thuth.remote.csb 4.18.0-305.3.1.el8_4.x86_64
TEST_DIR  -- .../tests/qemu-iotests/scratch
SOCK_DIR  -- /tmp/tmpx4hiqpkd
SOCKET_SCM_HELPER -- .../tests/qemu-iotests/socket_scm_helper

206   fail   [10:00:50] [10:00:54]   3.4s   (last: 6.2s)  output 
mismatch (see 206.out.bad)

--- 206.out
+++ 206.out.bad
@@ -99,55 +99,19 @@

 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode": 
"ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": 
"plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": 
{"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}

 {"return": {}}
+Job failed: Unsupported cipher algorithm twofish-128 with ctr mode
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}

 image: TEST_IMG
 file format: IMGFMT
 virtual size: 32 MiB (33554432 bytes)
-encrypted: yes
 cluster_size: 65536
 Format specific information:
 compat: 1.1
 compression type: zlib
 lazy refcounts: false
 refcount bits: 16
-encrypt:
-ivgen alg: plain64
-hash alg: sha1
-cipher alg: twofish-128
-uuid: ----
-format: luks
-cipher mode: ctr
-slots:
-[0]:
-active: true
-iters: XXX
-key offset: 4096
-stripes: 4000
-[1]:
-active: false
-key offset: 69632
-[2]:
-active: false
-key offset: 135168
-[3]:
-active: false
-key offset: 200704
-[4]:
-active: false
-key offset: 266240
-[5]:
-active: false
-key offset: 331776
-[6]:
-active: false
-key offset: 397312
-[7]:
-active: false
-key offset: 462848
-payload offset: 528384
-master key iters: XXX
 corrupt: false
 extended l2: false

Looks like it is missing a check for the availability of the corresponding 
crypto stuff? Does anybody got a clue how to fix this?


 Thomas

Re: [PATCH v3 0/5] hw/nvme: fix mmio read

2021-07-19 Thread Klaus Jensen

On Jul 14 08:01, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Fix mmio read issues on big-endian hosts. The core issue is that values
> in the BAR is not stored in little endian as required.
> 
> Fix that and add a regression test for this. This required a bit of
> cleanup, so it blew up into a series.
> 
> v2:
> 
>   * "hw/nvme: use symbolic names for registers"
> Use offsetof(NvmeBar, reg) instead of explicit offsets (Philippe)
> 
>   * "hw/nvme: fix mmio read"
> Use the st/ld API instead of cpu_to_X (Philippe)
> 
> Klaus Jensen (5):
>   hw/nvme: split pmrmsc register into upper and lower
>   hw/nvme: use symbolic names for registers
>   hw/nvme: fix out-of-bounds reads
>   hw/nvme: fix mmio read
>   tests/qtest/nvme-test: add mmio read test
> 
>  include/block/nvme.h|  60 +--
>  hw/nvme/ctrl.c  | 362 +++-
>  tests/qtest/nvme-test.c |  26 +++
>  3 files changed, 276 insertions(+), 172 deletions(-)
> 

Oi,

A review on patch 3 and 4 would be appreciated so this has a chance of
reaching Peter for -rc0 :)


signature.asc
Description: PGP signature

39 matches

Mail list logo