Re: [PATCH v3 5/5] tests/qtest/nvme-test: add mmio read test
On Wed, Jul 14, 2021 at 08:01:25AM +0200, Klaus Jensen wrote: From: Klaus Jensen Add a regression test for mmio read on big-endian hosts. Signed-off-by: Klaus Jensen --- tests/qtest/nvme-test.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index 47e757d7e2af..f8bafb5d70fb 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -67,6 +67,30 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator *alloc) +{ +QNvme *nvme = obj; +QPCIDevice *pdev = >dev; +QPCIBar bar; +uint32_t cap_lo, cap_hi; +uint64_t cap; + +qpci_device_enable(pdev); +bar = qpci_iomap(pdev, 0, NULL); + +cap_lo = qpci_io_readl(pdev, bar, 0x0); +g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff); + +cap_hi = qpci_io_readl(pdev, bar, 0x4); +g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4); + +cap = qpci_io_readq(pdev, bar, 0x0); +g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff); +g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4); + +qpci_iounmap(pdev, bar); +} + static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc) { QNvme *nvme = obj; @@ -142,6 +166,8 @@ static void nvme_register_nodes(void) &(QOSGraphTestOptions) { .edge.extra_device_opts = "pmrdev=pmr0" }); + +qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL); } libqos_init(nvme_register_nodes); -- 2.32.0 Reviewed-by: Gollu Appalanaidu
Re: [PATCH v3 2/5] hw/nvme: use symbolic names for registers
| (data << 32); trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); break; -case 0x30: /* ACQ */ +case NVME_REG_ACQ: trace_pci_nvme_mmio_acqaddr(data); n->bar.acq = size == 8 ? data : (n->bar.acq & ~0xULL) | (data & 0x); break; -case 0x34: /* ACQ hi */ +case NVME_REG_ACQ + 4: n->bar.acq = (n->bar.acq & 0x) | (data << 32); trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); break; -case 0x38: /* CMBLOC */ +case NVME_REG_CMBLOC: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, "invalid write to reserved CMBLOC" " when CMBSZ is zero, ignored"); return; -case 0x3C: /* CMBSZ */ +case NVME_REG_CMBSZ: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; -case 0x50: /* CMBMSC */ +case NVME_REG_CMBMSC: if (!NVME_CAP_CMBS(n->bar.cap)) { return; } @@ -5876,15 +5876,15 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; -case 0x54: /* CMBMSC hi */ +case NVME_REG_CMBMSC + 4: n->bar.cmbmsc = (n->bar.cmbmsc & 0x) | (data << 32); return; -case 0xe00: /* PMRCAP */ +case NVME_REG_PMRCAP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; -case 0xe04: /* PMRCTL */ +case NVME_REG_PMRCTL: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5899,19 +5899,19 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->pmr.cmse = false; } return; -case 0xe08: /* PMRSTS */ +case NVME_REG_PMRSTS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; -case 0xe0C: /* PMREBS */ +case NVME_REG_PMREBS: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; -case 0xe10: /* PMRSWTP */ +case NVME_REG_PMRSWTP: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; -case 0xe14: /* PMRMSCL */ +case NVME_REG_PMRMSCL: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5932,7 +5932,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; -case 0xe18: /* PMRMSCU */ +case NVME_REG_PMRMSCU: if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5974,7 +5974,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * from PMRSTS should ensure prior writes * made it to persistent media */ -if (addr == 0xe08 && +if (addr == NVME_REG_PMRSTS && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size); } -- 2.32.0 LGTM. Reviewed-by : Gollu Appalanaidu
Re: [PATCH v3 1/5] hw/nvme: split pmrmsc register into upper and lower
On Wed, Jul 14, 2021 at 08:01:21AM +0200, Klaus Jensen wrote: From: Klaus Jensen The specification uses a set of 32 bit PMRMSCL and PMRMSCU registers to make up the 64 bit logical PMRMSC register. Make it so. Signed-off-by: Klaus Jensen --- include/block/nvme.h | 31 --- hw/nvme/ctrl.c | 9 + 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/include/block/nvme.h b/include/block/nvme.h index 527105fafc0b..84053b68b987 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -26,7 +26,8 @@ typedef struct QEMU_PACKED NvmeBar { uint32_tpmrsts; uint32_tpmrebs; uint32_tpmrswtp; -uint64_tpmrmsc; +uint32_tpmrmscl; +uint32_tpmrmscu; uint8_t css[484]; } NvmeBar; @@ -475,25 +476,25 @@ enum NvmePmrswtpMask { #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT) -enum NvmePmrmscShift { -PMRMSC_CMSE_SHIFT = 1, -PMRMSC_CBA_SHIFT= 12, +enum NvmePmrmsclShift { +PMRMSCL_CMSE_SHIFT = 1, +PMRMSCL_CBA_SHIFT= 12, }; -enum NvmePmrmscMask { -PMRMSC_CMSE_MASK = 0x1, -PMRMSC_CBA_MASK= 0xf, +enum NvmePmrmsclMask { +PMRMSCL_CMSE_MASK = 0x1, +PMRMSCL_CBA_MASK= 0xf, }; -#define NVME_PMRMSC_CMSE(pmrmsc)\ -((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK) -#define NVME_PMRMSC_CBA(pmrmsc) \ -((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK) +#define NVME_PMRMSCL_CMSE(pmrmscl)\ +((pmrmscl >> PMRMSCL_CMSE_SHIFT) & PMRMSCL_CMSE_MASK) +#define NVME_PMRMSCL_CBA(pmrmscl) \ +((pmrmscl >> PMRMSCL_CBA_SHIFT) & PMRMSCL_CBA_MASK) -#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \ -(pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT) -#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \ -(pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT) +#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val) \ +(pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT) +#define NVME_PMRMSCL_SET_CBA(pmrmscl, val) \ +(pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT) enum NvmeSglDescriptorType { NVME_SGL_DESCR_TYPE_DATA_BLOCK = 0x0, diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2f0524e12a36..28299c6f3764 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5916,11 +5916,12 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } -n->bar.pmrmsc = (n->bar.pmrmsc & ~0x) | (data & 0x); +n->bar.pmrmscl = data & 0x; n->pmr.cmse = false; -if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) { -hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT; +if (NVME_PMRMSCL_CMSE(n->bar.pmrmscl)) { +hwaddr cba = n->bar.pmrmscu | +(NVME_PMRMSCL_CBA(n->bar.pmrmscl) << PMRMSCL_CBA_SHIFT); if (cba + int128_get64(n->pmr.dev->mr.size) < cba) { NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1); return; @@ -5936,7 +5937,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, return; } -n->bar.pmrmsc = (n->bar.pmrmsc & 0x) | (data << 32); + n->bar.pmrmscu = data & 0x; return; default: NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, -- 2.32.0 Changes LGTM. Reviewed-by: Gollu Appalanaidu
Re: [PATCH] hw/nvme: fix mmio read
On Tue, Jul 13, 2021 at 07:43:59AM +0200, Klaus Jensen wrote: From: Klaus Jensen The new PMR test unearthed a long-standing issue with MMIO reads on big-endian hosts. Fix by using the ldn_he_p helper instead of memcpy. Cc: Gollu Appalanaidu Reported-by: Peter Maydell Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2f0524e12a36..dd81c3b19c7e 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5951,7 +5951,6 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) { NvmeCtrl *n = (NvmeCtrl *)opaque; uint8_t *ptr = (uint8_t *)>bar; -uint64_t val = 0; trace_pci_nvme_mmio_read(addr, size); @@ -5977,14 +5976,15 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msync(>pmr.dev->mr, 0, n->pmr.dev->size); } -memcpy(, ptr + addr, size); -} else { -NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, - "MMIO read beyond last register," - " offset=0x%"PRIx64", returning 0", addr); + +return ldn_he_p(ptr + addr, size); } -return val; +NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, + "MMIO read beyond last register," + " offset=0x%"PRIx64", returning 0", addr); + +return 0; } static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) -- 2.32.0 LGTM. Reviewed-by: Gollu Appalanaidu
[PATCH] tests/qtest/nvme-test: add persistent memory region test
This will test the PMR functionality. Signed-off-by: Gollu Appalanaidu --- tests/qtest/nvme-test.c | 78 - 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index d32c953a38..6d557be6ca 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -13,6 +13,7 @@ #include "libqos/libqtest.h" #include "libqos/qgraph.h" #include "libqos/pci.h" +#include "include/block/nvme.h" typedef struct QNvme QNvme; @@ -21,6 +22,9 @@ struct QNvme { QPCIDevice dev; }; +static char *t_path; +#define TEST_IMAGE_SIZE (2 * 1024 * 1024) + static void *nvme_get_driver(void *obj, const char *interface) { QNvme *nvme = obj; @@ -66,12 +70,77 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc) +{ +QNvme *nvme = obj; +QPCIDevice *pdev = >dev; +QPCIBar pmr_bar, nvme_bar; +uint32_t pmrcap, pmrsts; + +qpci_device_enable(pdev); +pmr_bar = qpci_iomap(pdev, 4, NULL); + +/* Without Enabling PMRCTL check bar enablemet */ +qpci_io_writel(pdev, pmr_bar, 0, 0xccbbaa99); +g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x99); +g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0xaa99); + +/* Map NVMe Bar Register to Enable the Mem Region */ +nvme_bar = qpci_iomap(pdev, 0, NULL); + +pmrcap = qpci_io_readl(pdev, nvme_bar, 0xe00); +g_assert_cmpint(NVME_PMRCAP_RDS(pmrcap), ==, 0x1); +g_assert_cmpint(NVME_PMRCAP_WDS(pmrcap), ==, 0x1); +g_assert_cmpint(NVME_PMRCAP_BIR(pmrcap), ==, 0x4); +g_assert_cmpint(NVME_PMRCAP_PMRWBM(pmrcap), ==, 0x2); +g_assert_cmpint(NVME_PMRCAP_CMSS(pmrcap), ==, 0x1); + +/* Enable PMRCTRL */ +qpci_io_writel(pdev, nvme_bar, 0xe04, 0x1); + +qpci_io_writel(pdev, pmr_bar, 0, 0x44332211); +g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), ==, 0x11); +g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), ==, 0x2211); +g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), ==, 0x44332211); + +pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); +g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x0); + +/* Disable PMRCTRL */ +qpci_io_writel(pdev, nvme_bar, 0xe04, 0x0); + +qpci_io_writel(pdev, pmr_bar, 0, 0x88776655); +g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x55); +g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0x6655); +g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), !=, 0x88776655); + +pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08); +g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x1); + +qpci_iounmap(pdev, nvme_bar); +qpci_iounmap(pdev, pmr_bar); +} + static void nvme_register_nodes(void) { +int fd, ret; +t_path = g_strdup("/tmp/qtest.XX"); + +/* Create a temporary raw image*/ +fd = mkstemp(t_path); +g_assert(fd >= 0); +ret = ftruncate(fd, TEST_IMAGE_SIZE); +g_assert(ret == 0); +close(fd); + +char *pmr_cmd_line = g_strdup_printf("-object memory-backend-file,id=pmr0," + "share=on,mem-path=%s,size=8", t_path); + QOSGraphEdgeOptions opts = { .extra_device_opts = "addr=04.0,drive=drv0,serial=foo", .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", + "file.read-zeroes=on,format=raw ", + pmr_cmd_line, }; add_qpci_address(, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) }); @@ -83,6 +152,13 @@ static void nvme_register_nodes(void) qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) { .edge.extra_device_opts = "cmb_size_mb=2" }); + +qos_add_test("pmr-test-access", "nvme", nvmetest_pmr_reg_test, + &(QOSGraphTestOptions) { +.edge.extra_device_opts = "pmrdev=pmr0" +}); + +unlink(t_path); } libqos_init(nvme_register_nodes); -- 2.17.1
[PATCH v3 1/2] hw/nvme: fix endianess conversion and add controller list
Add the controller identifiers list CNS 0x13, available list of ctrls in NVM Subsystem that may or may not be attached to namespaces. In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion for the nsid field. These two CNS values shows affect when there exists a Subsystem. Added condition if there is no Subsystem return invalid field in command. Signed-off-by: Gollu Appalanaidu -v3: Added condition if there is no Subsystem return invalid field in command -v2: Fix the review comments from Klaus and squashed 2nd commit into 1st commit --- hw/nvme/ctrl.c | 26 ++ hw/nvme/trace-events | 2 +- include/block/nvme.h | 1 + 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..d513b022c4 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4251,9 +4251,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, +bool attached) { NvmeIdentify *c = (NvmeIdentify *)>cmd; +uint32_t nsid = le32_to_cpu(c->nsid); uint16_t min_id = le16_to_cpu(c->ctrlid); uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; uint16_t *ids = [1]; @@ -4261,15 +4263,21 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) NvmeCtrl *ctrl; int cntlid, nr_ids = 0; -trace_pci_nvme_identify_ns_attached_list(min_id); +trace_pci_nvme_identify_ctrl_list(c->cns, min_id); -if (c->nsid == NVME_NSID_BROADCAST) { +if (!n->subsys) { return NVME_INVALID_FIELD | NVME_DNR; } -ns = nvme_subsys_ns(n->subsys, c->nsid); -if (!ns) { -return NVME_INVALID_FIELD | NVME_DNR; +if (attached) { +if (nsid == NVME_NSID_BROADCAST) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +ns = nvme_subsys_ns(n->subsys, nsid); +if (!ns) { +return NVME_INVALID_FIELD | NVME_DNR; +} } for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { @@ -4278,7 +4286,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) continue; } -if (!nvme_ns(ctrl, c->nsid)) { +if (attached && !nvme_ns(ctrl, nsid)) { continue; } @@ -4493,7 +4501,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) case NVME_ID_CNS_NS_PRESENT: return nvme_identify_ns(n, req, false); case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: -return nvme_identify_ns_attached_list(n, req); +return nvme_identify_ctrl_list(n, req, true); +case NVME_ID_CNS_CTRL_LIST: +return nvme_identify_ctrl_list(n, req, false); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ea33d0ccc3..4976eb9bec 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -55,7 +55,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 0ff9ce17a9..188ab460df 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -980,6 +980,7 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT= 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, +NVME_ID_CNS_CTRL_LIST = 0x13, NVME_ID_CNS_CS_NS_PRESENT_LIST= 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET= 0x1c, -- 2.17.1
[PATCH v3 2/2] hw/nvme: documentation fix
In the documentation of the '-detached' param "be" and "not" has been used side by side, fix that. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index d513b022c4..21883e4b3c 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -114,7 +114,7 @@ * This parameter is only valid together with the `subsys` parameter. If left * at the default value (`false/off`), the namespace will be attached to all * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the - * namespace will be be available in the subsystem not not attached to any + * namespace will be available in the subsystem but not attached to any * controllers. * * Setting `zoned` to true selects Zoned Command Set at the namespace. -- 2.17.1
Re: [PATCH v2 1/2] hw/nvme: fix endianess conversion and add controller list
On Wed, Jun 09, 2021 at 10:22:49PM +0200, Klaus Jensen wrote: On Jun 1 20:32, Gollu Appalanaidu wrote: Add the controller identifiers list CNS 0x13, available list of ctrls in NVM Subsystem that may or may not be attached to namespaces. In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion for the nsid field. Signed-off-by: Gollu Appalanaidu -v2: Fix the review comments from Klaus and squashed 2nd commit into 1st commit --- hw/nvme/ctrl.c | 26 -- hw/nvme/trace-events | 2 +- include/block/nvme.h | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..813a72c655 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4251,9 +4251,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, +bool attached) { NvmeIdentify *c = (NvmeIdentify *)>cmd; +uint32_t nsid = le32_to_cpu(c->nsid); uint16_t min_id = le16_to_cpu(c->ctrlid); uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; uint16_t *ids = [1]; @@ -4261,15 +4263,17 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) NvmeCtrl *ctrl; int cntlid, nr_ids = 0; -trace_pci_nvme_identify_ns_attached_list(min_id); +trace_pci_nvme_identify_ctrl_list(c->cns, min_id); -if (c->nsid == NVME_NSID_BROADCAST) { -return NVME_INVALID_FIELD | NVME_DNR; -} +if (attached) { +if (nsid == NVME_NSID_BROADCAST) { +return NVME_INVALID_FIELD | NVME_DNR; +} -ns = nvme_subsys_ns(n->subsys, c->nsid); -if (!ns) { -return NVME_INVALID_FIELD | NVME_DNR; +ns = nvme_subsys_ns(n->subsys, nsid); +if (!ns) { +return NVME_INVALID_FIELD | NVME_DNR; +} } for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { Assume that `attached` is false and `n->subsys` is NULL. KABM :) This scenario has been tested but executed without any issue, since here ARRAY_SIZE calculating size as per the "NVME_MAX_CONTROLLERS" defined. These two CNS values shows affect when there exists a Subsystem. will add check condition if there is no Subsystem will return invalid field in command. if (!n->subsys) { return NVME_INVALID_FIELD | NVME_DNR; }
[PATCH v2 2/2] tests/qtest/nvme-test: add boot partition read test
Add a test case for reading an NVMe Boot Partition without enabling the controller. Signed-off-by: Gollu Appalanaidu --- tests/qtest/nvme-test.c | 118 +++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index d32c953a38..39d2e26f76 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -13,6 +13,19 @@ #include "libqos/libqtest.h" #include "libqos/qgraph.h" #include "libqos/pci.h" +#include "libqos/pci-pc.h" +#include "libqos/malloc-pc.h" +#include "libqos/malloc.h" +#include "libqos/libqos.h" +#include "include/block/nvme.h" +#include "include/hw/pci/pci.h" + +#define NVME_BPINFO_BPSZ_UNITS (128 * KiB) +#define NVME_BRS_BPSZ_UNITS (4 * KiB) +#define NVME_BRS_READ_MAX_TIME 100 +#define TEST_IMAGE_SIZE (2 * 128 * KiB) + +static char *t_path; typedef struct QNvme QNvme; @@ -44,6 +57,13 @@ static void *nvme_create(void *pci_bus, QGuestAllocator *alloc, void *addr) return >obj; } +static void drive_destroy(void *path) +{ +unlink(path); +g_free(path); +qos_invalidate_command_line(); +} + /* This used to cause a NULL pointer dereference. */ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) { @@ -66,12 +86,100 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_bp_read_test(void *obj, void *data, QGuestAllocator *alloc) +{ +uint16_t test_size = 32; +size_t bp_test_len = test_size * NVME_BRS_BPSZ_UNITS; +uint8_t *read_buf = g_malloc(bp_test_len); +uint8_t *cmp_buf = g_malloc(bp_test_len); +QNvme *nvme = obj; +QPCIDevice *pdev = >dev; +QPCIBar nvme_bar; +uint8_t brs = 0; +uint64_t sleep_time = 0; +uintptr_t guest_buf; +uint64_t buf_addr; + +memset(cmp_buf, 0x42, bp_test_len); + +guest_buf = guest_alloc(alloc, bp_test_len); +buf_addr = cpu_to_le64(guest_buf); + +qpci_device_enable(pdev); +nvme_bar = qpci_iomap(pdev, 0, NULL); + +/* BPINFO */ +uint32_t bpinfo = qpci_io_readl(pdev, nvme_bar, 0x40); +uint16_t single_bp_size = bpinfo & BPINFO_BPSZ_MASK; +uint8_t active_bpid = bpinfo >> BPINFO_ABPID_SHIFT; +uint8_t read_select = (bpinfo >> BPINFO_BRS_SHIFT) & BPINFO_BRS_MASK; + +g_assert_cmpint(single_bp_size, ==, 0x1); +g_assert_cmpint(active_bpid, ==, 0); +g_assert_cmpint(read_select, ==, NVME_BPINFO_BRS_NOREAD); + +/* BPMBL */ +uint64_t bpmbl = buf_addr; +uint32_t bpmbl_low = bpmbl & 0x; +uint32_t bpmbl_hi = (bpmbl >> 32) & 0x; +qpci_io_writel(pdev, nvme_bar, 0x48, bpmbl_low); +qpci_io_writel(pdev, nvme_bar, 0x4c, bpmbl_hi); + +/* BPRSEL */ +qpci_io_writel(pdev, nvme_bar, 0x44, 32); + +while (true) { +usleep(1000); +sleep_time += 1000; +brs = qpci_io_readb(pdev, nvme_bar, 0x43) & BPINFO_BRS_MASK; +if (brs == NVME_BPINFO_BRS_SUCCESS || brs == NVME_BPINFO_BRS_ERROR || +sleep_time == NVME_BRS_READ_MAX_TIME) { +break; +} +} +g_assert_cmpint(brs, ==, NVME_BPINFO_BRS_SUCCESS); + +qtest_memread(pdev->bus->qts, guest_buf, read_buf, bp_test_len); +g_assert_cmpint(memcmp(cmp_buf, read_buf, bp_test_len), ==, 0); + +g_free(cmp_buf); +g_free(read_buf); +g_test_queue_destroy(drive_destroy, t_path); +} + static void nvme_register_nodes(void) { +int fd; +FILE *fh; +uint16_t bpsz = 2; +size_t bp_len = NVME_BPINFO_BPSZ_UNITS * bpsz; +size_t ret; +uint8_t *pattern = g_malloc(bp_len); + +t_path = g_strdup("/tmp/qtest.XX"); + +/* Create a temporary raw image */ +fd = mkstemp(t_path); +g_assert_cmpint(fd, >=, 0); +ret = ftruncate(fd, TEST_IMAGE_SIZE); +g_assert_cmpint(ret, ==, 0); +close(fd); + +memset(pattern, 0x42, bp_len); + +fh = fopen(t_path, "w+"); +ret = fwrite(pattern, NVME_BPINFO_BPSZ_UNITS, bpsz, fh); +g_assert_cmpint(ret, ==, bpsz); +fclose(fh); + +char *bp_cmd_line = g_strdup_printf("-drive id=bp0,file=%s,if=none," +"format=raw", t_path); + QOSGraphEdgeOptions opts = { .extra_device_opts = "addr=04.0,drive=drv0,serial=foo", .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", + "file.read-zeroes=on,format=raw ", + bp_cmd_line, }; add_qpci_address(, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) }); @@ -83,6 +191,14 @@ static void nvme_register_nodes
[PATCH v2 1/2] hw/nvme: fix endianess conversion and add controller list
Add the controller identifiers list CNS 0x13, available list of ctrls in NVM Subsystem that may or may not be attached to namespaces. In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion for the nsid field. Signed-off-by: Gollu Appalanaidu -v2: Fix the review comments from Klaus and squashed 2nd commit into 1st commit --- hw/nvme/ctrl.c | 26 -- hw/nvme/trace-events | 2 +- include/block/nvme.h | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..813a72c655 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4251,9 +4251,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, +bool attached) { NvmeIdentify *c = (NvmeIdentify *)>cmd; +uint32_t nsid = le32_to_cpu(c->nsid); uint16_t min_id = le16_to_cpu(c->ctrlid); uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; uint16_t *ids = [1]; @@ -4261,15 +4263,17 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) NvmeCtrl *ctrl; int cntlid, nr_ids = 0; -trace_pci_nvme_identify_ns_attached_list(min_id); +trace_pci_nvme_identify_ctrl_list(c->cns, min_id); -if (c->nsid == NVME_NSID_BROADCAST) { -return NVME_INVALID_FIELD | NVME_DNR; -} +if (attached) { +if (nsid == NVME_NSID_BROADCAST) { +return NVME_INVALID_FIELD | NVME_DNR; +} -ns = nvme_subsys_ns(n->subsys, c->nsid); -if (!ns) { -return NVME_INVALID_FIELD | NVME_DNR; +ns = nvme_subsys_ns(n->subsys, nsid); +if (!ns) { +return NVME_INVALID_FIELD | NVME_DNR; +} } for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { @@ -4278,7 +4282,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) continue; } -if (!nvme_ns(ctrl, c->nsid)) { +if (attached && !nvme_ns(ctrl, nsid)) { continue; } @@ -4493,7 +4497,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) case NVME_ID_CNS_NS_PRESENT: return nvme_identify_ns(n, req, false); case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: -return nvme_identify_ns_attached_list(n, req); +return nvme_identify_ctrl_list(n, req, true); +case NVME_ID_CNS_CTRL_LIST: +return nvme_identify_ctrl_list(n, req, false); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ea33d0ccc3..4976eb9bec 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -55,7 +55,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 0ff9ce17a9..188ab460df 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -980,6 +980,7 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT= 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, +NVME_ID_CNS_CTRL_LIST = 0x13, NVME_ID_CNS_CS_NS_PRESENT_LIST= 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET= 0x1c, -- 2.17.1
[PATCH v2 0/2] add boot partitions support and read test case
-v2: Boot partitions write and read offset corrections This series adds the boot partition feature as well test case for reading boot partition area. Gollu Appalanaidu (2): hw/nvme: add support for boot partiotions tests/qtest/nvme-test: add boot partition read test hw/nvme/ctrl.c | 207 hw/nvme/nvme.h | 3 + hw/nvme/trace-events| 7 ++ include/block/nvme.h| 75 ++- tests/qtest/nvme-test.c | 118 ++- 5 files changed, 408 insertions(+), 2 deletions(-) -- 2.17.1
[PATCH v2 1/2] hw/nvme: add support for boot partiotions
NVMe Boot Partitions provides an area that may be read by the host without initializing queues or even enabling the controller. This allows various platform initialization code to be stored on the NVMe device instead of some separete medium. This patch adds the read support for such an area, as well as support for updating the boot partition contents from the host through the FW Download and Commit commands. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 207 +++ hw/nvme/nvme.h | 3 + hw/nvme/trace-events | 7 ++ include/block/nvme.h | 75 +++- 4 files changed, 291 insertions(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 0bcaf7192f..ab72091802 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -100,6 +100,12 @@ * the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e. * defaulting to the value of `mdts`). * + * - `bootpart` + * NVMe Boot Partitions provides an area that may be read by the host without + * initializing queues or even enabling the controller. This 'bootpart' block + * device stores platform initialization code. Its size shall be in 256 KiB + * units. + * * nvme namespace device parameters * * - `shared` @@ -215,6 +221,8 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +[NVME_ADM_CMD_DOWNLOAD_FW] = NVME_CMD_EFF_CSUPP, +[NVME_ADM_CMD_COMMIT_FW]= NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -5145,6 +5153,112 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) return req->status; } +struct nvme_bp_read_ctx { +NvmeCtrl *n; +QEMUSGList qsg; +}; + +static void nvme_bp_read_cb(void *opaque, int ret) +{ +struct nvme_bp_read_ctx *ctx = opaque; +NvmeCtrl *n = ctx->n; + +trace_pci_nvme_bp_read_cb(); + +if (ret) { +NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo); +NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR); +goto free; +} + +NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo); +NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_SUCCESS); + +free: +if (ctx->qsg.sg) { +qemu_sglist_destroy(>qsg); +} + +g_free(ctx); +} + +static void nvme_fw_commit_cb(void *opaque, int ret) +{ +NvmeRequest *req = opaque; + +trace_pci_nvme_fw_commit_cb(nvme_cid(req)); + +if (ret) { +nvme_aio_err(req, ret); +} + +nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static uint16_t nvme_fw_commit(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); +uint8_t fwug = n->id_ctrl.fwug; +uint8_t fs = dw10 & 0x7; +uint8_t ca = (dw10 >> 3) & 0x7; +uint8_t bpid = dw10 >> 31; +int64_t offset = 0; + +trace_pci_nvme_fw_commit(nvme_cid(req), dw10, fwug, fs, ca, +bpid); + +if (fs || ca == NVME_FW_CA_REPLACE) { +return NVME_INVALID_FW_SLOT | NVME_DNR; +} +/* + * current firmware commit command only support boot partions + * related commit actions + */ +if (ca < NVME_FW_CA_REPLACE_BP) { +return NVME_FW_ACTIVATE_PROHIBITED | NVME_DNR; +} + +if (ca == NVME_FW_CA_ACTIVATE_BP) { +NVME_BPINFO_CLEAR_ABPID(n->bar.bpinfo); +NVME_BPINFO_SET_ABPID(n->bar.bpinfo, bpid); +return NVME_SUCCESS; +} + +if (bpid) { +offset = n->bp_size; +} + +nvme_sg_init(n, >sg, false); +qemu_iovec_add(>sg.iov, n->bp_data, n->bp_size); + +req->aiocb = blk_aio_pwritev(n->blk_bp, offset, >sg.iov, 0, + nvme_fw_commit_cb, req); + +return NVME_NO_COMPLETE; +} + +static uint16_t nvme_fw_download(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t numd = le32_to_cpu(req->cmd.cdw10); +uint32_t offset = le32_to_cpu(req->cmd.cdw11); +size_t len = 0; +uint16_t status = NVME_SUCCESS; + +trace_pci_nvme_fw_download(nvme_cid(req), numd, offset, n->id_ctrl.fwug); + +len = (numd + 1) << 2; +offset <<= 2; + +if (len + offset > n->bp_size) { +trace_pci_nvme_fw_download_invalid_bp_size(offset, len, n->bp_size); +return NVME_INVALID_FIELD | NVME_DNR; +} + +status = nvme_h2c(n, n->bp_data + offset, len, req); + +return status; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -5181,6 +5295,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_get_feature(n, req); case NVME_ADM_CMD_ASYNC_EV_REQ
[PATCH v2 2/2] hw/nvme: documentation fix
In the documentation of the '-detached' param "be" and "not" has been used side by side, fix that. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 813a72c655..a3df26d0ce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -114,7 +114,7 @@ * This parameter is only valid together with the `subsys` parameter. If left * at the default value (`false/off`), the namespace will be attached to all * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the - * namespace will be be available in the subsystem not not attached to any + * namespace will be available in the subsystem not attached to any * controllers. * * Setting `zoned` to true selects Zoned Command Set at the namespace. -- 2.17.1
Re: [PATCH] hw/nvme/ctrl: fix functions style
On Fri, May 21, 2021 at 09:13:51AM +0200, Klaus Jensen wrote: On May 21 11:38, Gollu Appalanaidu wrote: Identify command related functions style fix. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 0bcaf7192f..40a7efcea9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4291,7 +4291,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) } static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; @@ -4326,7 +4326,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; @@ -4373,7 +4373,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; -- 2.17.1 Thanks, applied to nvme-next. Please just use 'hw/nvme:' in the commit title, we don't need to specify the sub-subsystem ;) Sure Klaus, Thanks :)
[PATCH 2/2] tests/qtest/nvme-test: add boot partition read test
Add a test case for reading an NVMe Boot Partition without enabling the controller. Signed-off-by: Gollu Appalanaidu --- tests/qtest/nvme-test.c | 118 +++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c index d32c953a38..8409adac04 100644 --- a/tests/qtest/nvme-test.c +++ b/tests/qtest/nvme-test.c @@ -13,6 +13,19 @@ #include "libqos/libqtest.h" #include "libqos/qgraph.h" #include "libqos/pci.h" +#include "libqos/pci-pc.h" +#include "libqos/malloc-pc.h" +#include "libqos/malloc.h" +#include "libqos/libqos.h" +#include "include/block/nvme.h" +#include "include/hw/pci/pci.h" + +#define NVME_BPINFO_BPSZ_UNITS (128 * KiB) +#define NVME_BRS_BPSZ_UNITS (4 * KiB) +#define NVME_BRS_READ_MAX_TIME 100 +#define TEST_IMAGE_SIZE (2 * 128 * KiB) + +static char *t_path; typedef struct QNvme QNvme; @@ -44,6 +57,13 @@ static void *nvme_create(void *pci_bus, QGuestAllocator *alloc, void *addr) return >obj; } +static void drive_destroy(void *path) +{ +unlink(path); +g_free(path); +qos_invalidate_command_line(); +} + /* This used to cause a NULL pointer dereference. */ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) { @@ -66,12 +86,100 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc) g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211); } +static void nvmetest_bp_read_test(void *obj, void *data, QGuestAllocator *alloc) +{ +uint16_t test_size = 32; +size_t bp_test_len = test_size * NVME_BRS_BPSZ_UNITS; +uint8_t *read_buf = g_malloc(bp_test_len); +uint8_t *cmp_buf = g_malloc(bp_test_len); +QNvme *nvme = obj; +QPCIDevice *pdev = >dev; +QPCIBar nvme_bar; +uint8_t brs = 0; +uint64_t sleep_time = 0; +uintptr_t guest_buf; +uint64_t buf_addr; + +memset(cmp_buf, 0x42, bp_test_len); + +guest_buf = guest_alloc(alloc, bp_test_len); +buf_addr = cpu_to_le64(guest_buf); + +qpci_device_enable(pdev); +nvme_bar = qpci_iomap(pdev, 0, NULL); + +/* BPINFO */ +uint32_t bpinfo = qpci_io_readl(pdev, nvme_bar, 0x40); +uint16_t single_bp_size = bpinfo & BPINFO_BPSZ_MASK; +uint8_t active_bpid = bpinfo >> BPINFO_ABPID_SHIFT; +uint8_t read_select = (bpinfo >> BPINFO_BRS_SHIFT) & BPINFO_BRS_MASK; + +g_assert_cmpint(single_bp_size, ==, 0x1); +g_assert_cmpint(active_bpid, ==, 0x1); +g_assert_cmpint(read_select, ==, NVME_BPINFO_BRS_NOREAD); + +/* BPMBL */ +uint64_t bpmbl = buf_addr; +uint32_t bpmbl_low = bpmbl & 0x; +uint32_t bpmbl_hi = (bpmbl >> 32) & 0x; +qpci_io_writel(pdev, nvme_bar, 0x48, bpmbl_low); +qpci_io_writel(pdev, nvme_bar, 0x4B, bpmbl_hi); + +/* BPRSEL */ +qpci_io_writel(pdev, nvme_bar, 0x44, 32); + +while (true) { +usleep(1000); +sleep_time += 1000; +brs = qpci_io_readb(pdev, nvme_bar, 0x43) & BPINFO_BRS_MASK; +if (brs == NVME_BPINFO_BRS_SUCCESS || brs == NVME_BPINFO_BRS_ERROR || +sleep_time == NVME_BRS_READ_MAX_TIME) { +break; +} +} +g_assert_cmpint(brs, ==, NVME_BPINFO_BRS_SUCCESS); + +qtest_memread(pdev->bus->qts, guest_buf, read_buf, bp_test_len); +g_assert_cmpint(memcmp(cmp_buf, read_buf, bp_test_len), ==, 0); + +g_free(cmp_buf); +g_free(read_buf); +g_test_queue_destroy(drive_destroy, t_path); +} + static void nvme_register_nodes(void) { +int fd; +FILE *fh; +uint16_t bpsz = 2; +size_t bp_len = NVME_BPINFO_BPSZ_UNITS * bpsz; +size_t ret; +uint8_t *pattern = g_malloc(bp_len); + +t_path = g_strdup("/tmp/qtest.XX"); + +/* Create a temporary raw image */ +fd = mkstemp(t_path); +g_assert_cmpint(fd, >=, 0); +ret = ftruncate(fd, TEST_IMAGE_SIZE); +g_assert_cmpint(ret, ==, 0); +close(fd); + +memset(pattern, 0x42, bp_len); + +fh = fopen(t_path, "w+"); +ret = fwrite(pattern, NVME_BPINFO_BPSZ_UNITS, bpsz, fh); +g_assert_cmpint(ret, ==, bpsz); +fclose(fh); + +char *bp_cmd_line = g_strdup_printf("-drive id=bp0,file=%s,if=none," +"format=raw", t_path); + QOSGraphEdgeOptions opts = { .extra_device_opts = "addr=04.0,drive=drv0,serial=foo", .before_cmd_line = "-drive id=drv0,if=none,file=null-co://," - "file.read-zeroes=on,format=raw", + "file.read-zeroes=on,format=raw ", + bp_cmd_line, }; add_qpci_address(, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) }); @@ -83,6 +191,14 @@ static void nvme_register_nodes
[PATCH 1/2] hw/nvme: add support for boot partiotions
NVMe Boot Partitions provides an area that may be read by the host without initializing queues or even enabling the controller. This allows various platform initialization code to be stored on the NVMe device instead of some separete medium. This patch adds the read support for such an area, as well as support for updating the boot partition contents from the host through the FW Download and Commit commands. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 200 +++ hw/nvme/nvme.h | 3 + hw/nvme/trace-events | 7 ++ include/block/nvme.h | 75 +++- 4 files changed, 284 insertions(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 0bcaf7192f..9314ca90e6 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -169,6 +169,7 @@ #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) +#define NVME_NUM_OF_BPIDS 2 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -215,6 +216,8 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +[NVME_ADM_CMD_DOWNLOAD_FW] = NVME_CMD_EFF_CSUPP, +[NVME_ADM_CMD_COMMIT_FW]= NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -5145,6 +5148,111 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) return req->status; } +struct nvme_bp_read_ctx { +NvmeCtrl *n; +QEMUSGList qsg; +}; + +static void nvme_bp_read_cb(void *opaque, int ret) +{ +struct nvme_bp_read_ctx *ctx = opaque; +NvmeCtrl *n = ctx->n; + +trace_pci_nvme_bp_read_cb(); + +if (ret) { +NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo); +NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR); +goto free; +} + +NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo); +NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_SUCCESS); + +free: +if (ctx->qsg.sg) { +qemu_sglist_destroy(>qsg); +} + +g_free(ctx); +} + +static void nvme_fw_commit_cb(void *opaque, int ret) +{ +NvmeRequest *req = opaque; + +trace_pci_nvme_fw_commit_cb(nvme_cid(req)); + +if (ret) { +nvme_aio_err(req, ret); +} + +nvme_enqueue_req_completion(nvme_cq(req), req); +} + +static uint16_t nvme_fw_commit(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); +uint8_t fwug = n->id_ctrl.fwug; +uint8_t fs = dw10 & 0x7; +uint8_t ca = (dw10 >> 3) & 0x7; +uint8_t bpid = dw10 >> 31; +int offset; + +trace_pci_nvme_fw_commit(nvme_cid(req), dw10, fwug, fs, ca, +bpid); + +if (fs || ca == NVME_FW_CA_REPLACE) { +return NVME_INVALID_FW_SLOT | NVME_DNR; +} +/* + * current firmware commit command only support boot partions + * related commit actions + */ +if (ca < NVME_FW_CA_REPLACE_BP) { +return NVME_FW_ACTIVATE_PROHIBITED | NVME_DNR; +} + +if (ca == NVME_FW_CA_ACTIVATE_BP) { +NVME_BPINFO_CLEAR_ABPID(n->bar.bpinfo); +NVME_BPINFO_SET_ABPID(n->bar.bpinfo, bpid); +return NVME_SUCCESS; +} + +offset = bpid * (n->bp_size / NVME_NUM_OF_BPIDS); + +nvme_sg_init(n, >sg, false); +qemu_iovec_add(>sg.iov, n->bp_data + offset, + (n->bp_size / NVME_NUM_OF_BPIDS)); + +req->aiocb = blk_aio_pwritev(n->blk_bp, offset, >sg.iov, 0, +nvme_fw_commit_cb, req); + +return NVME_NO_COMPLETE; +} + +static uint16_t nvme_fw_download(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t numd = le32_to_cpu(req->cmd.cdw10); +uint32_t offset = le32_to_cpu(req->cmd.cdw11); +size_t len = 0; +uint16_t status = NVME_SUCCESS; + +trace_pci_nvme_fw_download(nvme_cid(req), numd, offset, n->id_ctrl.fwug); + +len = (numd + 1) << 2; + +if (offset >= n->bp_size || len > n->bp_size || +len + offset > n->bp_size) { +trace_pci_nvme_fw_download_invalid_bp_size(offset, len, n->bp_size); +return NVME_INVALID_FIELD | NVME_DNR; +} + +status = nvme_h2c(n, n->bp_data + offset, len, req); + +return status; +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -5181,6 +5289,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_get_feature(n, req); case NVME_ADM_CMD_ASYNC_EV_REQ: return nvme_aer(n, req); +case NVME_ADM_CMD_COMMIT_FW: +return nvme_fw_commit(n, req); +case NVME_ADM_CMD_DOWNLOAD_FW: +return nvme_fw_download(n, req); case NVME_ADM_CMD_NS_ATTACHMENT: ret
[PATCH 0/2] add boot partitions support and read test case
This series adds the boot partition feature as well test case for reading boot partition area. Gollu Appalanaidu (2): hw/nvme: add support for boot partiotions tests/qtest/nvme-test: add boot partition read test hw/nvme/ctrl.c | 200 hw/nvme/nvme.h | 3 + hw/nvme/trace-events| 7 ++ include/block/nvme.h| 75 ++- tests/qtest/nvme-test.c | 118 +++- 5 files changed, 401 insertions(+), 2 deletions(-) -- 2.17.1
[PATCH] hw/nvme/ctrl: fix functions style
Identify command related functions style fix. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 0bcaf7192f..40a7efcea9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4291,7 +4291,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) } static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; @@ -4326,7 +4326,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; @@ -4373,7 +4373,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, } static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, -bool active) + bool active) { NvmeNamespace *ns; NvmeIdentify *c = (NvmeIdentify *)>cmd; -- 2.17.1
[PATCH 0/3] adding ctrl list (cns 0x13) support and random fixes
This series will add the Identify Controller List (CNS 0x13) support and NSID endian conversion fixes for CNS 0x12 and CNS 0x13. Documentation fix for the '-detached' parameter. Gollu Appalanaidu (3): hw/nvme/ctrl: add controller list cns 0x13 hw/nvme/ctrl: fix endian conversion for nsid in ctrl list hw/nvme/ctrl: documentation fix hw/nvme/ctrl.c | 28 +--- hw/nvme/trace-events | 2 +- include/block/nvme.h | 1 + 3 files changed, 19 insertions(+), 12 deletions(-) -- 2.17.1
[PATCH 3/3] hw/nvme/ctrl: documentation fix
In the documentation of the '-detached' param "be" and "not" has been used side by side, fix that. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 813a72c655..a3df26d0ce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -114,7 +114,7 @@ * This parameter is only valid together with the `subsys` parameter. If left * at the default value (`false/off`), the namespace will be attached to all * controllers in the NVMe subsystem at boot-up. If set to `true/on`, the - * namespace will be be available in the subsystem not not attached to any + * namespace will be available in the subsystem not attached to any * controllers. * * Setting `zoned` to true selects Zoned Command Set at the namespace. -- 2.17.1
[PATCH 2/3] hw/nvme/ctrl: fix endian conversion for nsid in ctrl list
In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion for the nsid field. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index d08a3350e2..813a72c655 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4255,6 +4255,7 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, bool attached) { NvmeIdentify *c = (NvmeIdentify *)>cmd; +uint32_t nsid = le32_to_cpu(c->nsid); uint16_t min_id = le16_to_cpu(c->ctrlid); uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {}; uint16_t *ids = [1]; @@ -4265,11 +4266,11 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_ctrl_list(c->cns, min_id); if (attached) { -if (c->nsid == NVME_NSID_BROADCAST) { +if (nsid == NVME_NSID_BROADCAST) { return NVME_INVALID_FIELD | NVME_DNR; } -ns = nvme_subsys_ns(n->subsys, c->nsid); +ns = nvme_subsys_ns(n->subsys, nsid); if (!ns) { return NVME_INVALID_FIELD | NVME_DNR; } @@ -4281,7 +4282,7 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, continue; } -if (attached && !nvme_ns(ctrl, c->nsid)) { +if (attached && !nvme_ns(ctrl, nsid)) { continue; } -- 2.17.1
[PATCH 1/3] hw/nvme/ctrl: add controller list cns 0x13
Add the controller identifiers list available in NVM Subsystem that may or may not be attached to namespaces. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 25 +++-- hw/nvme/trace-events | 2 +- include/block/nvme.h | 1 + 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..d08a3350e2 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4251,7 +4251,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) return NVME_INVALID_CMD_SET | NVME_DNR; } -static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) +static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, +bool attached) { NvmeIdentify *c = (NvmeIdentify *)>cmd; uint16_t min_id = le16_to_cpu(c->ctrlid); @@ -4261,15 +4262,17 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) NvmeCtrl *ctrl; int cntlid, nr_ids = 0; -trace_pci_nvme_identify_ns_attached_list(min_id); +trace_pci_nvme_identify_ctrl_list(c->cns, min_id); -if (c->nsid == NVME_NSID_BROADCAST) { -return NVME_INVALID_FIELD | NVME_DNR; -} +if (attached) { +if (c->nsid == NVME_NSID_BROADCAST) { +return NVME_INVALID_FIELD | NVME_DNR; +} -ns = nvme_subsys_ns(n->subsys, c->nsid); -if (!ns) { -return NVME_INVALID_FIELD | NVME_DNR; +ns = nvme_subsys_ns(n->subsys, c->nsid); +if (!ns) { +return NVME_INVALID_FIELD | NVME_DNR; +} } for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) { @@ -4278,7 +4281,7 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) continue; } -if (!nvme_ns(ctrl, c->nsid)) { +if (attached && !nvme_ns(ctrl, c->nsid)) { continue; } @@ -4493,7 +4496,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) case NVME_ID_CNS_NS_PRESENT: return nvme_identify_ns(n, req, false); case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST: -return nvme_identify_ns_attached_list(n, req); +return nvme_identify_ctrl_list(n, req, true); +case NVME_ID_CNS_CTRL_LIST: +return nvme_identify_ctrl_list(n, req, false); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ea33d0ccc3..7ba3714671 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -55,7 +55,7 @@ pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid=%"PRIu16"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 0ff9ce17a9..188ab460df 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -980,6 +980,7 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT= 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, +NVME_ID_CNS_CTRL_LIST = 0x13, NVME_ID_CNS_CS_NS_PRESENT_LIST= 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET= 0x1c, -- 2.17.1
[PATCH v2] hw/nvme/ctrl: fix csi field for cns 0x00 and 0x11
As per the TP 4056d Namespace types CNS 0x00 and CNS 0x11 CSI field shouldn't use but it is being used for these two Identify command CNS values, fix that. Remove 'nvme_csi_has_nvm_support()' helper as suggested by Klaus we can safely assume NVM command set support for all namespaces. Suggested-by: Klaus Jensen Signed-off-by: Gollu Appalanaidu --- -v2: add sugggestions from Klaus We can Remove 'nvme_csi_has_nvm_support()' helper, we can assume NVM command set support for all namespaces. hw/nvme/ctrl.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..7fcd699235 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4178,16 +4178,6 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, id, sizeof(id), req); } -static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns) -{ -switch (ns->csi) { -case NVME_CSI_NVM: -case NVME_CSI_ZONED: -return true; -} -return false; -} - static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_identify_ctrl(); @@ -4244,7 +4234,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { +if (active || ns->csi == NVME_CSI_NVM) { return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); } @@ -4315,7 +4305,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { +if (c->csi == NVME_CSI_NVM) { return nvme_rpt_empty_id_struct(n, req); } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), -- 2.17.1
Re: [PATCH] hw/block/nvme: fix csi field for cns 0x00 and 0x11
On Mon, Apr 26, 2021 at 01:03:04PM +0200, Klaus Jensen wrote: On Apr 26 13:16, Gollu Appalanaidu wrote: As per the TP 4056d Namespace types CNS 0x00 and CNS 0x11 CSI field shouldn't use but it is being used for these two Identify command CNS values, fix that. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..1657b1d04a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4244,11 +4244,16 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { -return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); +if (active && nvme_csi_has_nvm_support(ns)) { +goto out; +} else if (!active && ns->csi == NVME_CSI_NVM) { +goto out; +} else { +return NVME_INVALID_CMD_SET | NVME_DNR; } -return NVME_INVALID_CMD_SET | NVME_DNR; +out: +return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); } static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) -- 2.17.1 Looking closer at this, since we only support the NVM and Zoned command sets, we can get rid of the `nvme_csi_has_nvm_support()` helper and just assume NVM command set support for all namespaces. The way different command sets are handled doesn't scale anyway, so we might as well simplify the logic a bit. Something like this (compile-tested only) patch maybe? diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c index 2e7498a73e70..7fcd6992358d 100644 --- i/hw/nvme/ctrl.c +++ w/hw/nvme/ctrl.c @@ -4178,16 +4178,6 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, id, sizeof(id), req); } -static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns) -{ -switch (ns->csi) { -case NVME_CSI_NVM: -case NVME_CSI_ZONED: -return true; -} -return false; -} - static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_identify_ctrl(); @@ -4244,7 +4234,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { +if (active || ns->csi == NVME_CSI_NVM) { return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); } @@ -4315,7 +4305,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { +if (c->csi == NVME_CSI_NVM) { return nvme_rpt_empty_id_struct(n, req); } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), Sure, will make changes and submit v2
[PATCH] hw/block/nvme: fix csi field for cns 0x00 and 0x11
As per the TP 4056d Namespace types CNS 0x00 and CNS 0x11 CSI field shouldn't use but it is being used for these two Identify command CNS values, fix that. Signed-off-by: Gollu Appalanaidu --- hw/nvme/ctrl.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2e7498a73e..1657b1d04a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4244,11 +4244,16 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active) } } -if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) { -return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); +if (active && nvme_csi_has_nvm_support(ns)) { +goto out; +} else if (!active && ns->csi == NVME_CSI_NVM) { +goto out; +} else { +return NVME_INVALID_CMD_SET | NVME_DNR; } -return NVME_INVALID_CMD_SET | NVME_DNR; +out: +return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req); } static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req) -- 2.17.1
[PATCH 3/3] hw/block/nvme: add id ns metadata cap (mc) enum
Add Idnetify Namespace Metadata Capablities (MC) enum. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme-ns.c | 2 +- include/block/nvme.h | 5 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 9065a7ae99..db75302136 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,7 +85,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -id_ns->mc = 0x3; +id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; if (ms && ns->params.mset) { id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDEND; diff --git a/include/block/nvme.h b/include/block/nvme.h index 1d61030756..a3b610ba86 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1344,6 +1344,11 @@ enum NvmeIdNsFlbas { NVME_ID_NS_FLBAS_EXTENDEND = 1 << 4, }; +enum NvmeIdNsMc { +NVME_ID_NS_MC_EXTENDED = 1 << 0, +NVME_ID_NS_MC_SEPARATE = 1 << 1, +}; + #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) typedef struct NvmeDifTuple { -- 2.17.1
[PATCH 2/3] hw/block/nvme: add id ns flbas enum
Add the Identify Namespace FLBAS related enums and remove NVME_ID_NS_FLBAS_EXTENDEND macro its being used in only one place and converted into enum. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme-ns.c | 2 +- hw/block/nvme-ns.h | 2 +- include/block/nvme.h | 5 - 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index ae56142fcd..9065a7ae99 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -88,7 +88,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->mc = 0x3; if (ms && ns->params.mset) { -id_ns->flbas |= 0x10; +id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDEND; } id_ns->dpc = 0x1f; diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index fb0a41f912..5aa36cd1d2 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -134,7 +134,7 @@ static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) static inline bool nvme_ns_ext(NvmeNamespace *ns) { -return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); +return ns->id_ns.flbas & NVME_ID_NS_FLBAS_EXTENDEND; } /* calculate the number of LBAs that the namespace can accomodate */ diff --git a/include/block/nvme.h b/include/block/nvme.h index 4ac926fbc6..1d61030756 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -1321,7 +1321,6 @@ typedef struct QEMU_PACKED NvmeIdNsZoned { #define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1)) #define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1) -#define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1) #define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf)) #define NVME_ID_NS_MC_SEPARATE(mc) ((mc >> 1) & 0x1) #define NVME_ID_NS_MC_EXTENDED(mc) ((mc & 0x1)) @@ -1341,6 +1340,10 @@ enum NvmeIdNsDps { NVME_ID_NS_DPS_FIRST_EIGHT = 8, }; +enum NvmeIdNsFlbas { +NVME_ID_NS_FLBAS_EXTENDEND = 1 << 4, +}; + #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) typedef struct NvmeDifTuple { -- 2.17.1
[PATCH 1/3] hw/block/nvme: fix lbaf formats initialization
Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. And make LBAF array as read-only. Reviewed-by: Klaus Jensen Signed-off-by: Gollu Appalanaidu --- hw/block/nvme-ns.c | 51 -- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..ae56142fcd 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,39 +85,32 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} - -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +if (ms && ns->params.mset) { +id_ns->flbas |= 0x10; +} -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; +id_ns->dpc = 0x1f; +id_ns->dps = ns->params.pi; +if (ns->params.pi && ns->params.pil) { +id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; } +static const NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; + for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; if (lbaf->ds == ds) { -- 2.17.1
[PATCH 0/3] hw/block/nvme: fixes lbaf formats initialization and adds idns enums
This series fixes the LBAF Formats intialization and Adds the Identify Namespace Parameters enums to make to more readable. Gollu Appalanaidu (3): hw/block/nvme: fix lbaf formats initialization hw/block/nvme: add id ns flbas enum hw/block/nvme: add id ns metadata cap (mc) enum hw/block/nvme-ns.c | 51 +++- hw/block/nvme-ns.h | 2 +- include/block/nvme.h | 10 - 3 files changed, 32 insertions(+), 31 deletions(-) -- 2.17.1
Re: [PATCH v3] hw/block/nvme: fix lbaf formats initialization
On Tue, Apr 20, 2021 at 09:47:00PM +0200, Klaus Jensen wrote: On Apr 16 17:29, Gollu Appalanaidu wrote: Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. Signed-off-by: Gollu Appalanaidu --- -v3: Remove "mset" constraint check if ms < 8, "mset" can be set even when ms < 8 and non-zero. -v2: Addressing review comments (Klaus) Change the current "pi" and "ms" constraint check such that it will throw the error if ms < 8 and if namespace protection info, location and metadata settings are set. Splitting this from compare fix patch series. hw/block/nvme-ns.c | 58 -- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..594b0003cf 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,38 +85,28 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} +if (ms && ns->params.mset) { +id_ns->flbas |= 0x10; +} -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +id_ns->dpc = 0x1f; +id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; -} +NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; This part LGTM. @@ -395,10 +385,12 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return -1; } -if (ns->params.pi && ns->params.ms < 8) { -error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); -return -1; +if (ns->params.ms < 8) { +if (ns->params.pi || ns->params.pil) { +error_setg(errp, "at least 8 bytes of metadata required to enable " +"protection information, protection information location"); +return -1; +} } If you do this additional check, then you should maybe also check that pil is only set if pi is. But if pi is not enabled, then the value of pil is irrelevant (even though it ends up in FLBAS). In other words, if you want to validate all possible parameter configurations, then we have a lot more checking to do! Currently, the approach taken by the parameter validation code is to error out on *invalid* configurations that causes invariants to not hold, and I'd prefer that we stick with that to keep the check logic as simple as possible. So, (without this unnecessary check): Sure, will remove this check and send v4 Reviewed-by: Klaus Jensen
[PATCH] hw/block/nvme: function formatting fix
nvme_map_addr_pmr function arguments not aligned, fix that. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 624a1431d0..63ceeb47bd 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -574,7 +574,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, } static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, -size_t len) + size_t len) { if (!len) { return NVME_SUCCESS; -- 2.17.1
[PATCH] hw/block/nvme: fix io-command set profile feature
Currently IO Command Set Profile feaure is supported, but feature support flag not set and this feature is changable add support for that. Remove filling default value of feature in CQE CDW0 with zero, since it fallbacks to default case and it is zero initialized, if feature default value not set explicitly. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 624a1431d0..b5d2c29fc4 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -185,6 +185,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_WRITE_ATOMICITY] = true, [NVME_ASYNCHRONOUS_EVENT_CONF] = true, [NVME_TIMESTAMP]= true, +[NVME_COMMAND_SET_PROFILE] = true, }; static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { @@ -194,6 +195,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP]= NVME_FEAT_CAP_CHANGE, +[NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, }; static const uint32_t nvme_cse_acs[256] = { @@ -4707,9 +4709,6 @@ defaults: result |= NVME_INTVC_NOCOALESCING; } break; -case NVME_COMMAND_SET_PROFILE: -result = 0; -break; default: result = nvme_feature_default[fid]; break; -- 2.17.1
[PATCH v3] hw/block/nvme: fix lbaf formats initialization
Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. Signed-off-by: Gollu Appalanaidu --- -v3: Remove "mset" constraint check if ms < 8, "mset" can be set even when ms < 8 and non-zero. -v2: Addressing review comments (Klaus) Change the current "pi" and "ms" constraint check such that it will throw the error if ms < 8 and if namespace protection info, location and metadata settings are set. Splitting this from compare fix patch series. hw/block/nvme-ns.c | 58 -- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..594b0003cf 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,38 +85,28 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} +if (ms && ns->params.mset) { +id_ns->flbas |= 0x10; +} -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +id_ns->dpc = 0x1f; +id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; -} +NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; @@ -395,10 +385,12 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return -1; } -if (ns->params.pi && ns->params.ms < 8) { -error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); -return -1; +if (ns->params.ms < 8) { +if (ns->params.pi || ns->params.pil) { +error_setg(errp, "at least 8 bytes of metadata required to enable " +"protection information, protection information location"); +return -1; +} } if (ns->params.nsid > NVME_MAX_NAMESPACES) { -- 2.17.1
[PATCH v2] hw/block/nvme: fix lbaf formats initialization
Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. Signed-off-by: Gollu Appalanaidu --- -v2: Addressing review comments (Klaus) Change the current "pi" and "ms" constraint check such that it will throw the error if ms < 8 and if namespace protection info, location and metadata settings are set. Splitting this from compare fix patch series. hw/block/nvme-ns.c | 59 -- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..160873a8d4 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,38 +85,28 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} +if (ns->params.mset) { +id_ns->flbas |= 0x10; +} -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +id_ns->dpc = 0x1f; +id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; -} +NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; @@ -395,10 +385,13 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, return -1; } -if (ns->params.pi && ns->params.ms < 8) { -error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); -return -1; +if (ns->params.ms < 8) { +if (ns->params.pi || ns->params.pil || ns->params.mset) { +error_setg(errp, "at least 8 bytes of metadata required to enable " +"protection information, protection information location and " +"metadata settings"); +return -1; +} } if (ns->params.nsid > NVME_MAX_NAMESPACES) { -- 2.17.1
Re: [PATCH 2/2] hw/block/nvme: fix lbaf formats initialization
On Fri, Apr 16, 2021 at 10:48:16AM +0200, Klaus Jensen wrote: On Apr 16 12:52, Gollu Appalanaidu wrote: Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme-ns.c | 48 ++ 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..573dbb5a9d 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,38 +85,28 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} +if (ms && ns->params.mset) { +id_ns->flbas |= 0x10; +} -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +id_ns->dpc = 0x1f; +id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; While nvme_ns_check_constraints() will error out if pi is set and the metadata bytes are insufficient, I don't think this should set bit 3 unless both metadata and pi is enabled. It's not against the spec, but it's just slightly weird. okay, will modify current "ms" and "pi" constraint check like this: if (ns->params.ms < 8) { if (ns->params.pi || ns->params.pil || ns->params.mset) { error_setg(errp, "at least 8 bytes of metadata required to enable " "protection information, protection information location and " "metadata settings"); return -1; } } and "mset" is set will set directly without checing "ms" in nvme_ns_init() -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; -} +NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; -- 2.17.1
[PATCH 1/2] hw/block/nvme: consider metadata read aio return value in compare
Currently in compare command metadata aio read blk_aio_preadv return value ignored, consider it and complete the block accounting. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 624a1431d0..c2727540f1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2369,10 +2369,19 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint32_t reftag = le32_to_cpu(rw->reftag); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; +BlockBackend *blk = ns->blkconf.blk; +BlockAcctCookie *acct = >acct; +BlockAcctStats *stats = blk_get_stats(blk); uint16_t status = NVME_SUCCESS; trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); +if (ret) { +block_acct_failed(stats, acct); +nvme_aio_err(req, ret); +goto out; +} + buf = g_malloc(ctx->mdata.iov.size); status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, @@ -2421,6 +2430,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) goto out; } +block_acct_done(stats, acct); + out: qemu_iovec_destroy(>data.iov); g_free(ctx->data.bounce); -- 2.17.1
[PATCH 2/2] hw/block/nvme: fix lbaf formats initialization
Currently LBAF formats are being intialized based on metadata size if and only if nvme-ns "ms" parameter is non-zero value. Since FormatNVM command being supported device parameter "ms" may not be the criteria to initialize the supported LBAFs. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme-ns.c | 48 ++ 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..573dbb5a9d 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -85,38 +85,28 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; -if (ns->params.ms) { -id_ns->mc = 0x3; +id_ns->mc = 0x3; -if (ns->params.mset) { -id_ns->flbas |= 0x10; -} +if (ms && ns->params.mset) { +id_ns->flbas |= 0x10; +} -id_ns->dpc = 0x1f; -id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; - -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 9, .ms = 8 }, -[2] = { .ds = 9, .ms = 16 }, -[3] = { .ds = 9, .ms = 64 }, -[4] = { .ds = 12 }, -[5] = { .ds = 12, .ms = 8 }, -[6] = { .ds = 12, .ms = 16 }, -[7] = { .ds = 12, .ms = 64 }, -}; - -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 7; -} else { -NvmeLBAF lbaf[16] = { -[0] = { .ds = 9 }, -[1] = { .ds = 12 }, -}; +id_ns->dpc = 0x1f; +id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi; -memcpy(_ns->lbaf, , sizeof(lbaf)); -id_ns->nlbaf = 1; -} +NvmeLBAF lbaf[16] = { +[0] = { .ds = 9 }, +[1] = { .ds = 9, .ms = 8 }, +[2] = { .ds = 9, .ms = 16 }, +[3] = { .ds = 9, .ms = 64 }, +[4] = { .ds = 12 }, +[5] = { .ds = 12, .ms = 8 }, +[6] = { .ds = 12, .ms = 16 }, +[7] = { .ds = 12, .ms = 64 }, +}; + +memcpy(_ns->lbaf, , sizeof(lbaf)); +id_ns->nlbaf = 7; for (i = 0; i <= id_ns->nlbaf; i++) { NvmeLBAF *lbaf = _ns->lbaf[i]; -- 2.17.1
[PATCH v3] hw/block/nvme: align with existing style
Use lower case hexadecimal format for the constants and in comments use the same format as used in Spec. ("h") Signed-off-by: Gollu Appalanaidu --- -v3: Add Suggestions (Philippe) Describe the NVMe subsystem style in nvme.c header -v2: Address review comments (Klaus) use lower case hexa format for the code and in comments use the same format as used in Spec. ("h") hw/block/nvme-ns.c | 2 +- hw/block/nvme.c | 46 +--- include/block/nvme.h | 10 +- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..a0895614d9 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -303,7 +303,7 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); -/* MAR/MOR are zeroes-based, 0x means no limit */ +/* MAR/MOR are zeroes-based, Fh means no limit */ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); id_ns_z->zoc = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 624a1431d0..cbe7f33daa 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -134,6 +134,12 @@ * Setting this property to true enables Read Across Zone Boundaries. */ +/** + * While QEMU coding style prefers lowercase hexadecimal in constants, + * the NVMe subsystem use the format from the NVMe specifications in + * the comments: no '0x' prefix, uppercase, 'h' hexadecimal suffix + */ + #include "qemu/osdep.h" #include "qemu/units.h" #include "qemu/error-report.h" @@ -3607,18 +3613,18 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) /* * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0x). But if that feature is used + * (indicated by NSID being set to h). But if that feature is used * along with TP 4056 (Namespace Types), it may be pretty screwed up. * - * If NSID is indeed set to 0x, we simply cannot associate the + * If NSID is indeed set to h, we simply cannot associate the * opcode with a specific command since we cannot determine a unique I/O * command set. Opcode 0x0 could have any other meaning than something * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0x then + * semantics in some other command set - does an NSID of h then * mean "for all namespaces, apply whatever command set specific command * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0x"? + * NSID to be h"? * * Anyway (and luckily), for now, we do not care about this since the * device only supports namespace types that includes the NVM Flush command @@ -3934,7 +3940,7 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, NVME_CHANGED_NSID_SIZE) { /* * If more than 1024 namespaces, the first entry in the log page should - * be set to 0x and the others to 0 as spec. + * be set to h and the others to 0 as spec. */ if (i == ARRAY_SIZE(nslist)) { memset(nslist, 0x0, sizeof(nslist)); @@ -4332,7 +4338,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist(min_nsid); /* - * Both 0x (NVME_NSID_BROADCAST) and 0xfffe are invalid values + * Both h (NVME_NSID_BROADCAST) and Eh are invalid values * since the Active Namespace ID List should return namespaces with ids * *higher* than the NSID specified in the command. This is also specified * in the spec (NVM Express v1.3d, Section 5.15.4). @@ -4379,7 +4385,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); /* - * Same as in nvme_identify_nslist(), 0x/0xfffe are invalid. + * Same as in nvme_identify_nslist(), h/Eh are invalid. */ if (min_nsid >= NVME_NSID_BROADCAST - 1) { return NVME_INVALID_NSID | NVME_DNR; @@ -4595,7 +4601,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) /* * The Reservation Notification Mask and Reservation Persistence * features require a status code of Invalid Field in Command when - * NSID is 0x. Since the device does not support those + * NSID is h. Since the device does not support those * f
Re: [PATCH v2] hw/block/nvme: align with existing style
On Thu, Apr 15, 2021 at 07:18:50PM +0200, Klaus Jensen wrote: On Apr 15 15:13, Philippe Mathieu-Daudé wrote: On 4/15/21 2:00 PM, Gollu Appalanaidu wrote: Make uniform hexadecimal numbers format. Signed-off-by: Gollu Appalanaidu --- -v2: Address review comments (Klaus) use lower case hexa format for the code and in comments use the same format as used in Spec. ("h") ^ This comment is relevant to the commit message. Also it would be nice if the subsystem could describe somewhere what is its style. Not sure where... The file header is probably the simplest place. Something like: "While QEMU coding style prefers lowercase hexadecimal in constants, the NVMe subsystem use the format from the NVMe specifications in the comments: no '0x' prefix, uppercase, 'h' hexadecimal suffix." +1 for that suggestion. Sure, will add it and send v3. hw/block/nvme-ns.c | 2 +- hw/block/nvme.c | 40 include/block/nvme.h | 10 +- 3 files changed, 26 insertions(+), 26 deletions(-)
[PATCH v2] hw/block/nvme: align with existing style
Make uniform hexadecimal numbers format. Signed-off-by: Gollu Appalanaidu --- -v2: Address review comments (Klaus) use lower case hexa format for the code and in comments use the same format as used in Spec. ("h") hw/block/nvme-ns.c | 2 +- hw/block/nvme.c | 40 include/block/nvme.h | 10 +- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7bb618f182..a0895614d9 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -303,7 +303,7 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); -/* MAR/MOR are zeroes-based, 0x means no limit */ +/* MAR/MOR are zeroes-based, Fh means no limit */ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); id_ns_z->zoc = 0; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 624a1431d0..f50e25c501 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -3607,18 +3607,18 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) /* * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0x). But if that feature is used + * (indicated by NSID being set to h). But if that feature is used * along with TP 4056 (Namespace Types), it may be pretty screwed up. * - * If NSID is indeed set to 0x, we simply cannot associate the + * If NSID is indeed set to h, we simply cannot associate the * opcode with a specific command since we cannot determine a unique I/O * command set. Opcode 0x0 could have any other meaning than something * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0x then + * semantics in some other command set - does an NSID of h then * mean "for all namespaces, apply whatever command set specific command * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0x"? + * NSID to be h"? * * Anyway (and luckily), for now, we do not care about this since the * device only supports namespace types that includes the NVM Flush command @@ -3934,7 +3934,7 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, NVME_CHANGED_NSID_SIZE) { /* * If more than 1024 namespaces, the first entry in the log page should - * be set to 0x and the others to 0 as spec. + * be set to h and the others to 0 as spec. */ if (i == ARRAY_SIZE(nslist)) { memset(nslist, 0x0, sizeof(nslist)); @@ -4332,7 +4332,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist(min_nsid); /* - * Both 0x (NVME_NSID_BROADCAST) and 0xfffe are invalid values + * Both h (NVME_NSID_BROADCAST) and Eh are invalid values * since the Active Namespace ID List should return namespaces with ids * *higher* than the NSID specified in the command. This is also specified * in the spec (NVM Express v1.3d, Section 5.15.4). @@ -4379,7 +4379,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); /* - * Same as in nvme_identify_nslist(), 0x/0xfffe are invalid. + * Same as in nvme_identify_nslist(), h/Eh are invalid. */ if (min_nsid >= NVME_NSID_BROADCAST - 1) { return NVME_INVALID_NSID | NVME_DNR; @@ -4595,7 +4595,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) /* * The Reservation Notification Mask and Reservation Persistence * features require a status code of Invalid Field in Command when - * NSID is 0x. Since the device does not support those + * NSID is h. Since the device does not support those * features we can always return Invalid Namespace or Format as we * should do for all other features. */ @@ -4854,8 +4854,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } -trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1, -((dw11 >> 16) & 0x) + 1, +trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1, +((dw11 >> 16) & 0x) + 1, n->params.m
[PATCH] hw/block/nvme: remove redundant invalid_lba_range trace
Currently pci_nvme_err_invalid_lba_range tace being called indvidually at each function, add this in nvme_check_bounds and remove redundant usage of it. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 9 + 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6842b01ab5..c67d3315a1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1424,6 +1424,7 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) { +trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -2266,7 +2267,6 @@ static void nvme_copy_in_complete(NvmeRequest *req) status = nvme_check_bounds(ns, sdlba, ctx->nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); goto invalid; } @@ -2528,8 +2528,6 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) uint32_t nlb = le32_to_cpu(range[i].nlb); if (nvme_check_bounds(ns, slba, nlb)) { -trace_pci_nvme_err_invalid_lba_range(slba, nlb, - ns->id_ns.nsze); continue; } @@ -2602,7 +2600,6 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2687,7 +2684,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, _nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); goto out; } @@ -2816,7 +2812,6 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2935,7 +2930,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } @@ -3015,7 +3009,6 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, status = nvme_check_bounds(ns, slba, nlb); if (status) { -trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } -- 2.17.1
Re: [PATCH v3] hw/block/nvme: add device self test command support
On Mon, Apr 12, 2021 at 01:57:49PM +0530, Gollu Appalanaidu wrote: On Sat, Apr 10, 2021 at 12:35:20AM +0900, Keith Busch wrote: On Wed, Mar 31, 2021 at 02:54:27PM +0530, Gollu Appalanaidu wrote: This is to add support for Device Self Test Command (DST) and DST Log Page. Refer NVM Express specification 1.4b section 5.8 ("Device Self-test command") Please don't write change logs that just say what you did. I can read the code to see that. Explain why this is useful because this frankly looks like another useless feature. We don't need to implement every optional spec feature here. There should be a real value proposition. Hi Keith, It was useful to us to be able to test the feature against qemu - and we wanted to contribute the code, but we understand that features should be more "complete" for upstreaming. New features for SPDK (and nvme-cli) are use-cases for optional features like this, where one might not have physical device available and also users who is going to develop their in house host test tool this would be useful, since we are providing the functional behaviour as per the NVMe protocol. Hi Keith, It was useful to us to be able to test the feature against qemu - and we wanted to contribute the code, but we understand that features should be more "complete" for upstreaming. New features for SPDK (and nvme-cli) are use-cases for optional features like this, where one might not have physical device available and also users who is going to develop their in house host test tool this would be useful, since we are providing the functional behaviour as per the NVMe protocol.
Re: [PATCH 2/2] hw/block/nvme: drain namespaces on sq deletion
On Thu, Apr 08, 2021 at 09:37:09PM +0200, Klaus Jensen wrote: From: Klaus Jensen For most commands, when issuing an AIO, the BlockAIOCB is stored in the NvmeRequest aiocb pointer when the AIO is issued. The main use of this is cancelling AIOs when deleting submission queues (it is currently not used for Abort). However, some commands like Dataset Management Zone Management Send (zone reset) may involve more than one AIO and here the AIOs are issued without saving a reference to the BlockAIOCB. This is a problem since nvme_del_sq() will attempt to cancel outstanding AIOs, potentially with an invalid BlockAIOCB since the aiocb pointer is not NULL'ed when the request structure is recycled. Fix this by 1. making sure the aiocb pointer is NULL'ed when requests are recycled 2. only attempt to cancel the AIO if the aiocb is non-NULL 3. if any AIOs could not be cancelled, drain all aio as a last resort. Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command") Fixes: c94973288cd9 ("hw/block/nvme: add broadcast nsid support flush command") Fixes: e4e430b3d6ba ("hw/block/nvme: add simple copy command") Fixes: 5f5dc4c6a942 ("hw/block/nvme: zero out zones on reset") Fixes: 2605257a26b8 ("hw/block/nvme: add the dataset management command") Cc: Gollu Appalanaidu Cc: Minwoo Im Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 94bc373260be..3c4297e38a52 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -470,6 +470,7 @@ static void nvme_req_clear(NvmeRequest *req) { req->ns = NULL; req->opaque = NULL; +req->aiocb = NULL; memset(>cqe, 0x0, sizeof(req->cqe)); req->status = NVME_SUCCESS; } @@ -3681,6 +3682,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) NvmeSQueue *sq; NvmeCQueue *cq; uint16_t qid = le16_to_cpu(c->qid); +int nsid; if (unlikely(!qid || nvme_check_sqid(n, qid))) { trace_pci_nvme_err_invalid_del_sq(qid); @@ -3692,9 +3694,26 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req) sq = n->sq[qid]; while (!QTAILQ_EMPTY(>out_req_list)) { r = QTAILQ_FIRST(>out_req_list); -assert(r->aiocb); -blk_aio_cancel(r->aiocb); +if (r->aiocb) { +blk_aio_cancel(r->aiocb); +} } + +/* + * Drain all namespaces if there are still outstanding requests that we + * could not cancel explicitly. + */ +if (!QTAILQ_EMPTY(>out_req_list)) { +for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { +NvmeNamespace *ns = nvme_ns(n, nsid); +if (ns) { +nvme_ns_drain(ns); +} +} +} + +assert(QTAILQ_EMPTY(>out_req_list)); + if (!nvme_check_cqid(n, sq->cqid)) { cq = n->cq[sq->cqid]; QTAILQ_REMOVE(>sq_list, sq, entry); -- LTM. Reviewed-by: Gollu Appalanaidu 2.31.1
Re: [PATCH 1/2] hw/block/nvme: store aiocb in compare
On Thu, Apr 08, 2021 at 09:37:08PM +0200, Klaus Jensen wrote: From: Klaus Jensen nvme_compare() fails to store the aiocb from the blk_aio_preadv() call. Fix this. Fixes: 0a384f923f51 ("hw/block/nvme: add compare command") Cc: Gollu Appalanaidu Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6b1f056a0ebc..94bc373260be 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2837,7 +2837,8 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) block_acct_start(blk_get_stats(blk), >acct, data_len, BLOCK_ACCT_READ); -blk_aio_preadv(blk, offset, >data.iov, 0, nvme_compare_data_cb, req); +req->aiocb = blk_aio_preadv(blk, offset, >data.iov, 0, +nvme_compare_data_cb, req); return NVME_NO_COMPLETE; } -- Reviewed-by: Gollu Appalanaidu 2.31.1
[PATCH] hw/block/nvme: slba equal to nsze is out of bounds if nlb is 1-based
NSZE is the total size of the namespace in logical blocks. So the max addressable logical block is NLB minus 1. So your starting logical block is equal to NSZE it is a out of range. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 953ec64729..be9edb1158 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2527,7 +2527,7 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) uint64_t slba = le64_to_cpu(range[i].slba); uint32_t nlb = le32_to_cpu(range[i].nlb); -if (nvme_check_bounds(ns, slba, nlb)) { +if (nvme_check_bounds(ns, slba, nlb) || slba == ns->id_ns.nsze) { trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); continue; -- 2.17.1
Re: [PATCH for-6.0 0/7] hw/block/nvme: misc fixes
On Wed, Mar 24, 2021 at 09:09:00PM +0100, Klaus Jensen wrote: From: Klaus Jensen Various fixes for 6.0. Klaus Jensen (7): hw/block/nvme: fix pi constraint check hw/block/nvme: fix missing string representation for ns attachment hw/block/nvme: fix the nsid 'invalid' value hw/block/nvme: fix controller namespaces array indexing hw/block/nvme: fix warning about legacy namespace configuration hw/block/nvme: update dmsrl limit on namespace detachment hw/block/nvme: fix handling of private namespaces hw/block/nvme-ns.h | 12 ++-- hw/block/nvme-subsys.h | 7 +-- hw/block/nvme.h| 45 ++ include/block/nvme.h | 1 + hw/block/nvme-ns.c | 76 hw/block/nvme-subsys.c | 28 - hw/block/nvme.c| 131 - hw/block/trace-events | 1 - 8 files changed, 129 insertions(+), 172 deletions(-) -- Reviewed-by: Gollu Appalanaidu 2.31.0
Re: [PATCH for-6.0 7/7] hw/block/nvme: fix handling of private namespaces
w0(NvmeRequest *, n->params.aerl + 1); } -static int nvme_attach_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) -{ -if (nvme_ns_is_attached(n, ns)) { -error_setg(errp, - "namespace %d is already attached to controller %d", - nvme_nsid(ns), n->cntlid); -return -1; -} - -nvme_ns_attach(n, ns); - -return 0; -} - -int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) -{ -uint32_t nsid = nvme_nsid(ns); - -if (nsid > NVME_MAX_NAMESPACES) { -error_setg(errp, "invalid namespace id (must be between 0 and %d)", - NVME_MAX_NAMESPACES); -return -1; -} - -if (!nsid) { -for (int i = 1; i <= n->num_namespaces; i++) { -if (!nvme_ns(n, i)) { -nsid = ns->params.nsid = i; -break; -} -} - -if (!nsid) { -error_setg(errp, "no free namespace id"); -return -1; -} -} else { -if (n->namespaces[nsid]) { -error_setg(errp, "namespace id '%d' is already in use", nsid); -return -1; -} -} - -trace_pci_nvme_register_namespace(nsid); - -/* - * If subsys is not given, namespae is always attached to the controller - * because there's no subsystem to manage namespace allocation. - */ -if (!n->subsys) { -if (ns->params.detached) { -error_setg(errp, - "detached needs nvme-subsys specified nvme or nvme-ns"); -return -1; -} - -return nvme_attach_namespace(n, ns, errp); -} else { -if (!ns->params.detached) { -return nvme_attach_namespace(n, ns, errp); -} -} - -n->dmrsl = MIN_NON_ZERO(n->dmrsl, -BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); - -return 0; -} - static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) { uint64_t cmb_size = n->params.cmb_size_mb * MiB; @@ -6187,6 +6133,18 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp) return 0; } +void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ +uint32_t nsid = ns->params.nsid; +assert(nsid && nsid <= NVME_MAX_NAMESPACES); + +n->namespaces[nsid] = ns; +ns->attached++; + +n->dmrsl = MIN_NON_ZERO(n->dmrsl, +BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); +} + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -6218,13 +6176,11 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) ns = >namespace; ns->params.nsid = 1; -if (nvme_ns_setup(ns, errp)) { +if (nvme_ns_setup(n, ns, errp)) { return; } -if (nvme_register_namespace(n, ns, errp)) { -return; -} +nvme_attach_ns(n, ns); } } diff --git a/hw/block/trace-events b/hw/block/trace-events index 22da06986d72..fa12e3a67a75 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -51,7 +51,6 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t # nvme.c # nvme traces for successful events -pci_nvme_register_namespace(uint32_t nsid) "nsid %"PRIu32"" pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" pci_nvme_irq_pin(void) "pulsing IRQ pin" pci_nvme_irq_masked(void) "IRQ is masked" -- Changes looks good to me. Reviewed-by: Gollu Appalanaidu 2.31.0
[PATCH v3] hw/block/nvme: add device self test command support
This is to add support for Device Self Test Command (DST) and DST Log Page. Refer NVM Express specification 1.4b section 5.8 ("Device Self-test command") Signed-off-by: Gollu Appalanaidu --- changes: -v3: removed unwanted patch file added -v2: addressed style fixes in hw/block/nvme.h hw/block/nvme.c | 118 +- hw/block/nvme.h | 13 + hw/block/trace-events | 1 + include/block/nvme.h | 49 ++ 4 files changed, 180 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6842b01ab5..3c2186b170 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -214,6 +214,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +[NVME_ADM_CMD_DST] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -3980,6 +3981,34 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)) + off, trans_len, req); } +static uint16_t nvme_dst_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ +NvmeDstLogPage dst_log = {}; +NvmeDst *dst; +NvmeDstEntry *traverser; +uint32_t trans_len; +uint8_t entry_index = 0; +dst = >dst; + +if (off >= sizeof(dst_log)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +dst_log.current_dsto = dst->current_dsto; +dst_log.current_dstc = dst->current_dstc; + +QTAILQ_FOREACH(traverser, >dst_list, entry) { +memcpy(_log.dst_result[entry_index], +>dst_entry, sizeof(NvmeSelfTestResult)); +entry_index++; +} + +trans_len = MIN(sizeof(dst_log) - off, buf_len); + +return nvme_c2h(n, ((uint8_t *)_log) + off, trans_len, req); +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = >cmd; @@ -4027,6 +4056,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); +case NVME_LOG_DEV_SELF_TEST: +return nvme_dst_info(n, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -5069,6 +5100,73 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) return req->status; } +static void nvme_dst_create_entry(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +NvmeDstEntry *cur_entry; +time_t current_ms; + +cur_entry = QTAILQ_LAST(>dst.dst_list); +QTAILQ_REMOVE(>dst.dst_list, cur_entry, entry); +memset(cur_entry, 0x0, sizeof(NvmeDstEntry)); + +cur_entry->dst_entry.dst_status = stc << 4; + +if ((n->temperature >= n->features.temp_thresh_hi) || +(n->temperature <= n->features.temp_thresh_low)) { +cur_entry->dst_entry.dst_status |= NVME_DST_WITH_FAILED_SEG; +cur_entry->dst_entry.segment_number = NVME_SMART_CHECK; +} + +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +cur_entry->dst_entry.poh = cpu_to_le64current_ms - +n->starttime_ms) / 1000) / 60) / 60); +cur_entry->dst_entry.nsid = nsid; + +QTAILQ_INSERT_HEAD(>dst.dst_list, cur_entry, entry); +} + +static uint16_t nvme_dst_processing(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +/* + * n->dst.current_dsto will be always 0x0 or NO DST OPERATION, + * since no background device self test operation takes place. + */ +assert(n->dst.current_dsto == NVME_DST_NO_OPERATION); + +if (stc == NVME_ABORT_DSTO) { +goto out; +} +if (stc == NVME_SHORT_DSTO || stc == NVME_EXTENDED_DSTO) { +nvme_dst_create_entry(n, nsid, stc); +} + +out: +n->dst.current_dstc = NVME_DST_OPERATION_COMPLETED; +return NVME_SUCCESS; +} + +static uint16_t nvme_dst(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); +uint32_t nsid = le32_to_cpu(req->cmd.nsid); +uint8_t stc = dw10 & 0xf; + +trace_pci_nvme_dst(nvme_cid(req), nsid, stc); + +if (!nvme_nsid_valid(n, nsid) && nsid != 0) { +return NVME_INVALID_NSID | NVME_DNR; +} + +if (nsid != NVME_NSID_BROADCAST && nsid != 0 && +!nvme_ns(n, nsid)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +return nvme_dst_processing(n, nsid, stc); +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -510
[PATCH v2] hw/block/nvme: add device self test command support
This is to add support for Device Self Test Command (DST) and DST Log Page. Refer NVM Express specification 1.4b section 5.8 ("Device Self-test command") Signed-off-by: Gollu Appalanaidu --- changes: -v2: addressed style fixes in hw/block/nvme.h hw/block/nvme.c | 118 +- hw/block/nvme.h | 13 + hw/block/trace-events | 1 + include/block/nvme.h | 49 +++ ...add-device-self-test-command-support.patch | 335 ++ 5 files changed, 515 insertions(+), 1 deletion(-) create mode 100644 outgoing/0001-hw-block-nvme-add-device-self-test-command-support.patch diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6842b01ab5..3c2186b170 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -214,6 +214,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +[NVME_ADM_CMD_DST] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -3980,6 +3981,34 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)) + off, trans_len, req); } +static uint16_t nvme_dst_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ +NvmeDstLogPage dst_log = {}; +NvmeDst *dst; +NvmeDstEntry *traverser; +uint32_t trans_len; +uint8_t entry_index = 0; +dst = >dst; + +if (off >= sizeof(dst_log)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +dst_log.current_dsto = dst->current_dsto; +dst_log.current_dstc = dst->current_dstc; + +QTAILQ_FOREACH(traverser, >dst_list, entry) { +memcpy(_log.dst_result[entry_index], +>dst_entry, sizeof(NvmeSelfTestResult)); +entry_index++; +} + +trans_len = MIN(sizeof(dst_log) - off, buf_len); + +return nvme_c2h(n, ((uint8_t *)_log) + off, trans_len, req); +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = >cmd; @@ -4027,6 +4056,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); +case NVME_LOG_DEV_SELF_TEST: +return nvme_dst_info(n, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -5069,6 +5100,73 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) return req->status; } +static void nvme_dst_create_entry(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +NvmeDstEntry *cur_entry; +time_t current_ms; + +cur_entry = QTAILQ_LAST(>dst.dst_list); +QTAILQ_REMOVE(>dst.dst_list, cur_entry, entry); +memset(cur_entry, 0x0, sizeof(NvmeDstEntry)); + +cur_entry->dst_entry.dst_status = stc << 4; + +if ((n->temperature >= n->features.temp_thresh_hi) || +(n->temperature <= n->features.temp_thresh_low)) { +cur_entry->dst_entry.dst_status |= NVME_DST_WITH_FAILED_SEG; +cur_entry->dst_entry.segment_number = NVME_SMART_CHECK; +} + +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +cur_entry->dst_entry.poh = cpu_to_le64current_ms - +n->starttime_ms) / 1000) / 60) / 60); +cur_entry->dst_entry.nsid = nsid; + +QTAILQ_INSERT_HEAD(>dst.dst_list, cur_entry, entry); +} + +static uint16_t nvme_dst_processing(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +/* + * n->dst.current_dsto will be always 0x0 or NO DST OPERATION, + * since no background device self test operation takes place. + */ +assert(n->dst.current_dsto == NVME_DST_NO_OPERATION); + +if (stc == NVME_ABORT_DSTO) { +goto out; +} +if (stc == NVME_SHORT_DSTO || stc == NVME_EXTENDED_DSTO) { +nvme_dst_create_entry(n, nsid, stc); +} + +out: +n->dst.current_dstc = NVME_DST_OPERATION_COMPLETED; +return NVME_SUCCESS; +} + +static uint16_t nvme_dst(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); +uint32_t nsid = le32_to_cpu(req->cmd.nsid); +uint8_t stc = dw10 & 0xf; + +trace_pci_nvme_dst(nvme_cid(req), nsid, stc); + +if (!nvme_nsid_valid(n, nsid) && nsid != 0) { +return NVME_INVALID_NSID | NVME_DNR; +} + +if (nsid != NVME_NSID_BROADCAST && nsid != 0 && +!nvme_ns(n, nsid)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +return nvme_dst_processing(n, nsid, stc); +} + s
[PATCH] hw/block/nvme: add device self test command support
This is to add support for Device Self Test Command (DST) and DST Log Page. Refer NVM Express specification 1.4b section 5.8 ("Device Self-test command") Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 118 +- hw/block/nvme.h | 13 + hw/block/trace-events | 1 + include/block/nvme.h | 49 ++ 4 files changed, 180 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6842b01ab5..3c2186b170 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -214,6 +214,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, +[NVME_ADM_CMD_DST] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -3980,6 +3981,34 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)) + off, trans_len, req); } +static uint16_t nvme_dst_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ +NvmeDstLogPage dst_log = {}; +NvmeDst *dst; +NvmeDstEntry *traverser; +uint32_t trans_len; +uint8_t entry_index = 0; +dst = >dst; + +if (off >= sizeof(dst_log)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +dst_log.current_dsto = dst->current_dsto; +dst_log.current_dstc = dst->current_dstc; + +QTAILQ_FOREACH(traverser, >dst_list, entry) { +memcpy(_log.dst_result[entry_index], +>dst_entry, sizeof(NvmeSelfTestResult)); +entry_index++; +} + +trans_len = MIN(sizeof(dst_log) - off, buf_len); + +return nvme_c2h(n, ((uint8_t *)_log) + off, trans_len, req); +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = >cmd; @@ -4027,6 +4056,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); +case NVME_LOG_DEV_SELF_TEST: +return nvme_dst_info(n, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -5069,6 +5100,73 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) return req->status; } +static void nvme_dst_create_entry(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +NvmeDstEntry *cur_entry; +time_t current_ms; + +cur_entry = QTAILQ_LAST(>dst.dst_list); +QTAILQ_REMOVE(>dst.dst_list, cur_entry, entry); +memset(cur_entry, 0x0, sizeof(NvmeDstEntry)); + +cur_entry->dst_entry.dst_status = stc << 4; + +if ((n->temperature >= n->features.temp_thresh_hi) || +(n->temperature <= n->features.temp_thresh_low)) { +cur_entry->dst_entry.dst_status |= NVME_DST_WITH_FAILED_SEG; +cur_entry->dst_entry.segment_number = NVME_SMART_CHECK; +} + +current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +cur_entry->dst_entry.poh = cpu_to_le64current_ms - +n->starttime_ms) / 1000) / 60) / 60); +cur_entry->dst_entry.nsid = nsid; + +QTAILQ_INSERT_HEAD(>dst.dst_list, cur_entry, entry); +} + +static uint16_t nvme_dst_processing(NvmeCtrl *n, uint32_t nsid, +uint8_t stc) +{ +/* + * n->dst.current_dsto will be always 0x0 or NO DST OPERATION, + * since no background device self test operation takes place. + */ +assert(n->dst.current_dsto == NVME_DST_NO_OPERATION); + +if (stc == NVME_ABORT_DSTO) { +goto out; +} +if (stc == NVME_SHORT_DSTO || stc == NVME_EXTENDED_DSTO) { +nvme_dst_create_entry(n, nsid, stc); +} + +out: +n->dst.current_dstc = NVME_DST_OPERATION_COMPLETED; +return NVME_SUCCESS; +} + +static uint16_t nvme_dst(NvmeCtrl *n, NvmeRequest *req) +{ +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); +uint32_t nsid = le32_to_cpu(req->cmd.nsid); +uint8_t stc = dw10 & 0xf; + +trace_pci_nvme_dst(nvme_cid(req), nsid, stc); + +if (!nvme_nsid_valid(n, nsid) && nsid != 0) { +return NVME_INVALID_NSID | NVME_DNR; +} + +if (nsid != NVME_NSID_BROADCAST && nsid != 0 && +!nvme_ns(n, nsid)) { +return NVME_INVALID_FIELD | NVME_DNR; +} + +return nvme_dst_processing(n, nsid, stc); +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -5109,6 +5207,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_ns_attachment(n, r
Re: [PATCH for-6.0 5/7] hw/block/nvme: fix warning about legacy namespace configuration
On Wed, Mar 24, 2021 at 09:09:05PM +0100, Klaus Jensen wrote: From: Klaus Jensen Remove the unused BlockConf from the controller structure and fix the constraint checking to actually check the right BlockConf and issue the warning. Signed-off-by: Klaus Jensen --- hw/block/nvme.h | 1 - hw/block/nvme.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index c610ab30dc5c..1570f65989a7 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -166,7 +166,6 @@ typedef struct NvmeCtrl { NvmeBar bar; NvmeParams params; NvmeBus bus; -BlockConfconf; uint16_tcntlid; boolqs_created; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7a7e793c6c26..403c8381a498 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -5807,7 +5807,7 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) params->max_ioqpairs = params->num_queues - 1; } -if (n->conf.blk) { +if (n->namespace.blkconf.blk) { warn_report("drive property is deprecated; " "please use an nvme-ns device instead"); } -- 2.31.0 Reviewed-by: Gollu Appalanaidu
Re: [PATCH for-6.0 6/7] hw/block/nvme: update dmsrl limit on namespace detachment
On Wed, Mar 24, 2021 at 09:09:06PM +0100, Klaus Jensen wrote: From: Klaus Jensen The Non-MDTS DMSRL limit must be recomputed when namespaces are detached. Fixes: 645ce1a70cb6 ("hw/block/nvme: support namespace attachment command") Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 17 + 1 file changed, 17 insertions(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 403c8381a498..e84e43b2692d 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -4876,6 +4876,21 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } +static void __nvme_update_dmrsl(NvmeCtrl *n) +{ +int nsid; + +for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) { +NvmeNamespace *ns = nvme_ns(n, nsid); +if (!ns) { +continue; +} + +n->dmrsl = MIN_NON_ZERO(n->dmrsl, +BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1)); +} +} + Looks good to me! static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns); static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) { @@ -4925,6 +4940,8 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) } nvme_ns_detach(ctrl, ns); + +__nvme_update_dmrsl(ctrl); } /* -- 2.31.0 Reviwed-by: Gollu Appalanaidu
Re: [PATCH for-6.0 1/7] hw/block/nvme: fix pi constraint check
On Tue, Mar 30, 2021 at 09:24:59AM +0200, Klaus Jensen wrote: On Mar 29 19:52, Gollu Appalanaidu wrote: On Wed, Mar 24, 2021 at 09:09:01PM +0100, Klaus Jensen wrote: > From: Klaus Jensen > > Protection Information can only be enabled if there is at least 8 bytes > of metadata. > > Signed-off-by: Klaus Jensen > --- > hw/block/nvme-ns.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c > index 7f8d139a8663..ca04ee1bacfb 100644 > --- a/hw/block/nvme-ns.c > +++ b/hw/block/nvme-ns.c > @@ -394,7 +394,7 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) > return -1; > } > > -if (ns->params.pi && !ns->params.ms) { > +if (ns->params.pi && ns->params.ms < 8) { and also it is good check that "metadata size" is power of 2 or not? While I don't expect a lot of real-world devices having metadata sizes that are not power of twos, there is no requirement in the spec for that. And the implementation here also does not require it :) Reviewed-by: Gollu Appalanaidu
Re: [PATCH for-6.0 2/7] hw/block/nvme: fix missing string representation for ns attachment
On Wed, Mar 24, 2021 at 09:09:02PM +0100, Klaus Jensen wrote: From: Klaus Jensen Add the missing nvme_adm_opc_str entry for the Namespace Attachment command. Signed-off-by: Klaus Jensen --- hw/block/nvme.h | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 5b0031b11db2..9edc86d79e98 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -86,6 +86,7 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; +case NVME_ADM_CMD_NS_ATTACHMENT:return "NVME_ADM_CMD_NS_ATTACHMENT"; case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default:return "NVME_ADM_CMD_UNKNOWN"; } -- Reviewed-by: Gollu Appalanaidu 2.31.0
Re: [PATCH for-6.0 1/7] hw/block/nvme: fix pi constraint check
On Wed, Mar 24, 2021 at 09:09:01PM +0100, Klaus Jensen wrote: From: Klaus Jensen Protection Information can only be enabled if there is at least 8 bytes of metadata. Signed-off-by: Klaus Jensen --- hw/block/nvme-ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 7f8d139a8663..ca04ee1bacfb 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -394,7 +394,7 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return -1; } -if (ns->params.pi && !ns->params.ms) { +if (ns->params.pi && ns->params.ms < 8) { and also it is good check that "metadata size" is power of 2 or not? error_setg(errp, "at least 8 bytes of metadata required to enable " "protection information"); return -1; -- 2.31.0
Re: [PATCH v2 2/2] hw/block/nvme: fix ref counting in nvme_format_ns
On Mon, Mar 22, 2021 at 01:09:44PM +0100, Klaus Jensen wrote: From: Klaus Jensen Max noticed that since blk_aio_pwrite_zeroes() may invoke the callback before returning, the callbacks will never see *count == 0 and thus never free the count variable or decrement num_formats causing a CQE to never be posted. Coverity (CID 1451082) also picked up on the fact that count would not be free'ed if the namespace was of zero size. Fix both of these issues by explicitly checking *count and finalize for the given namespace if --(*count) is zero. Enqueing a CQE if there are no AIOs outstanding after this case is already handled by nvme_format() by inspecting *num_formats. Reported-by: Max Reitz Reported-by: Coverity (CID 1451082) Fixes: dc04d25e2f3f ("hw/block/nvme: add support for the format nvm command") Signed-off-by: Klaus Jensen --- hw/block/nvme.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6842b01ab58b..c54ec3c9523c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -5009,9 +5009,15 @@ static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf, } -(*count)--; +if (--(*count)) { +return NVME_NO_COMPLETE; +} -return NVME_NO_COMPLETE; +g_free(count); +ns->status = 0x0; +(*num_formats)--; + +return NVME_SUCCESS; } Reviewed-by: Gollu Appalanaidu static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) -- 2.31.0
Re: [PATCH v2 1/2] hw/block/nvme: fix resource leak in nvme_dif_rw
On Mon, Mar 22, 2021 at 01:09:43PM +0100, Klaus Jensen wrote: From: Klaus Jensen If nvme_map_dptr() fails, nvme_dif_rw() will leak the bounce context. Fix this by using the same error handling as everywhere else in the function. Reported-by: Coverity (CID 1451080) Fixes: 146f720c5563 ("hw/block/nvme: end-to-end data protection") Signed-off-by: Klaus Jensen --- hw/block/nvme-dif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c index 2038d724bda5..e6f04faafb5f 100644 --- a/hw/block/nvme-dif.c +++ b/hw/block/nvme-dif.c @@ -432,7 +432,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) status = nvme_map_dptr(n, >sg, mapped_len, >cmd); if (status) { -return status; +goto err; Looks good to me. } ctx->data.bounce = g_malloc(len); -- Reviewed-by: Gollu Appalanaidu 2.31.0
[PATCH 2/2] hw/block/nvme: align reserved fields declarations
Align the Reserved fields declaration in NvmeBar Signed-off-by: Gollu Appalanaidu --- include/block/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/block/nvme.h b/include/block/nvme.h index fc65cfcb01..e5bd00bb85 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -7,7 +7,7 @@ typedef struct QEMU_PACKED NvmeBar { uint32_tintms; uint32_tintmc; uint32_tcc; -uint32_trsvd1; +uint8_t rsvd24[4]; uint32_tcsts; uint32_tnssrc; uint32_taqa; -- 2.17.1
[PATCH 1/2] hw/block/nvme: align with existing style
Make uniform hexadecimal numbers format. Signed-off-by: Gollu Appalanaidu --- hw/block/nvme.c | 30 +++--- include/block/nvme.h | 10 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d439e44db8..21e85374bf 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2728,18 +2728,18 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) /* * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0x). But if that feature is used + * (indicated by NSID being set to 0x). But if that feature is used * along with TP 4056 (Namespace Types), it may be pretty screwed up. * - * If NSID is indeed set to 0x, we simply cannot associate the + * If NSID is indeed set to 0x, we simply cannot associate the * opcode with a specific command since we cannot determine a unique I/O * command set. Opcode 0x0 could have any other meaning than something * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0x then + * semantics in some other command set - does an NSID of 0x then * mean "for all namespaces, apply whatever command set specific command * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0x"? + * NSID to be 0x"? * * Anyway (and luckily), for now, we do not care about this since the * device only supports namespace types that includes the NVM Flush command @@ -3948,8 +3948,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } -trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1, -((dw11 >> 16) & 0x) + 1, +trace_pci_nvme_setfeat_numq((dw11 & 0x) + 1, +((dw11 >> 16) & 0x) + 1, n->params.max_ioqpairs, n->params.max_ioqpairs); req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | @@ -4436,7 +4436,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } break; case 0x20: /* NSSR */ -if (data == 0x4E564D65) { +if (data == 0x4e564d65) { trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { /* The spec says that writes of other values have no effect */ @@ -4506,11 +4506,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cmbmsc = (n->bar.cmbmsc & 0x) | (data << 32); return; -case 0xE00: /* PMRCAP */ +case 0xe00: /* PMRCAP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; -case 0xE04: /* PMRCTL */ +case 0xe04: /* PMRCTL */ n->bar.pmrctl = data; if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(>pmr.dev->mr, true); @@ -4521,19 +4521,19 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->pmr.cmse = false; } return; -case 0xE08: /* PMRSTS */ +case 0xe08: /* PMRSTS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; -case 0xE0C: /* PMREBS */ +case 0xe0C: /* PMREBS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; -case 0xE10: /* PMRSWTP */ +case 0xe10: /* PMRSWTP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; -case 0xE14: /* PMRMSCL */ +case 0xe14: /* PMRMSCL */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -4553,7 +4553,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; -case 0xE18: /* PMRMSCU */ +case 0xe18: /* PMRMSCU */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -4595,7 +4595,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * from PMRSTS should ensure prior writes * made it to persistent media */ -if (addr == 0xE08 && +if (addr == 0xe08 && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msy