Add the new Namespace Identification Descriptor List (CNS 03h) and track creation of queues to enable the controller to return Command Sequence Error if Set Features is called for Number of Queues after any queues have been created.
Signed-off-by: Klaus Birkelund Jensen <klaus.jen...@cnexlabs.com> --- hw/block/nvme.c | 84 ++++++++++++++++++++++++++++++++++++------- hw/block/nvme.h | 1 + hw/block/trace-events | 4 ++- include/block/nvme.h | 30 +++++++++++++--- 4 files changed, 102 insertions(+), 17 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 8259dd7c1d6c..8ad95fdfa261 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,20 +9,22 @@ */ /** - * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e + * Reference Specs: http://www.nvmexpress.org, 1.3d, 1.2, 1.1, 1.0e * * http://www.nvmexpress.org/resources/ */ /** * Usage: add options: - * -drive file=<file>,if=none,id=<drive_id> - * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ - * cmb_size_mb=<cmb_size_mb[optional]>, \ - * num_queues=<N[optional]> + * -drive file=<file>,if=none,id=<drive_id> + * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]> * - * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at - * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * Advanced optional options: + * + * num_queues=<uint32> : Maximum number of IO Queues. + * Default: 64 + * cmb_size_mb=<uint32> : Size of Controller Memory Buffer in MBs. + * Default: 0 (disabled) */ #include "qemu/osdep.h" @@ -43,6 +45,7 @@ #define NVME_ELPE 3 #define NVME_AERL 3 #define NVME_OP_ABORTED 0xff + #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -316,6 +319,8 @@ static void nvme_post_cqes(void *opaque) static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) { assert(cq->cqid == req->sq->cqid); + + trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid); QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); @@ -534,6 +539,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) if (sq->sqid) { g_free(sq); } + n->qs_created--; } static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -600,6 +606,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, cq = n->cq[cqid]; QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); n->sq[sqid] = sq; + n->qs_created++; } static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) @@ -649,6 +656,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) if (cq->cqid) { g_free(cq); } + n->qs_created--; } static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -689,6 +697,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, msix_vector_use(&n->parent_obj, cq->vector); n->cq[cqid] = cq; cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); + n->qs_created++; } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) @@ -762,7 +771,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) prp1, prp2); } -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) +static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c) { static const int data_len = 4 * KiB; uint32_t min_nsid = le32_to_cpu(c->nsid); @@ -772,7 +781,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; - trace_nvme_identify_nslist(min_nsid); + trace_nvme_identify_ns_list(min_nsid); list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { @@ -789,6 +798,47 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) return ret; } +static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c) +{ + static const int data_len = 4 * KiB; + + /* + * The device model does not have anywhere to store a persistent UUID, so + * conjure up something that is reproducible. We generate an UUID of the + * form "00000000-0000-0000-0000-<nsid>", where nsid is similar to, say, + * 000000000001. + */ + struct ns_descr { + uint8_t nidt; + uint8_t nidl; + uint8_t rsvd[14]; + uint32_t nid; + }; + + uint32_t nsid = le32_to_cpu(c->nsid); + uint64_t prp1 = le64_to_cpu(c->prp1); + uint64_t prp2 = le64_to_cpu(c->prp2); + + struct ns_descr *list; + uint16_t ret; + + trace_nvme_identify_ns_descriptor_list(nsid); + + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + return NVME_INVALID_NSID | NVME_DNR; + } + + list = g_malloc0(data_len); + list->nidt = 0x3; + list->nidl = 0x10; + list->nid = cpu_to_be32(nsid); + + ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2); + g_free(list); + return ret; +} + static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) { NvmeIdentify *c = (NvmeIdentify *)cmd; @@ -799,7 +849,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) case 0x01: return nvme_identify_ctrl(n, c); case 0x02: - return nvme_identify_nslist(n, c); + return nvme_identify_ns_list(n, c); + case 0x03: + return nvme_identify_ns_descriptor_list(n, cmd); default: trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; @@ -951,6 +1003,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: + if (n->qs_created > 2) { + return NVME_CMD_SEQ_ERROR | NVME_DNR; + } + + if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, n->params.num_queues - 1, @@ -1798,7 +1858,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ieee[0] = 0x00; id->ieee[1] = 0x02; id->ieee[2] = 0xb3; - id->ver = cpu_to_le32(0x00010201); + id->ver = cpu_to_le32(0x00010300); id->oacs = cpu_to_le16(0); id->acl = 3; id->aerl = NVME_AERL; @@ -1829,7 +1889,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010201; + n->bar.vs = 0x00010300; n->bar.intmc = n->bar.intms = 0; } diff --git a/hw/block/nvme.h b/hw/block/nvme.h index ed3fa3faa718..a502a3dbbbfd 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -95,6 +95,7 @@ typedef struct NvmeCtrl { uint64_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint32_t qs_created; QEMUTimer *aer_timer; uint8_t aer_mask; uint8_t aer_mask_queued; diff --git a/hw/block/trace-events b/hw/block/trace-events index 17485bb0375b..66f6c2c07d20 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -40,7 +40,8 @@ nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" nvme_identify_ctrl(void) "identify controller" nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" -nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_identify_ns_list(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_identify_ns_descriptor_list(uint16_t ns) "identify namespace descriptor list, nsid=%"PRIu16"" nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32"" nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" @@ -54,6 +55,7 @@ nvme_aer(uint16_t cid) "cid %"PRIu16"" nvme_aer_aerl_exceeded(void) "aerl exceeded" nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid) "cid %"PRIu16" cqid %"PRIu16"" nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index d24b1f28e0fc..30f1d8b00fc5 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -550,7 +550,9 @@ typedef struct NvmeIdCtrl { uint32_t rtd3e; uint32_t oaes; uint32_t ctratt; - uint8_t rsvd255[156]; + uint8_t rsvd111[12]; + uint8_t fguid[16]; + uint8_t rsvd255[128]; uint16_t oacs; uint8_t acl; uint8_t aerl; @@ -568,9 +570,15 @@ typedef struct NvmeIdCtrl { uint8_t tnvmcap[16]; uint8_t unvmcap[16]; uint32_t rpmbs; - uint8_t rsvd319[4]; + uint16_t edstt; + uint8_t dsto; + uint8_t fwug; uint16_t kas; - uint8_t rsvd511[190]; + uint16_t hctma; + uint16_t mntmt; + uint16_t mxtmt; + uint32_t sanicap; + uint8_t rsvd511[180]; uint8_t sqes; uint8_t cqes; uint16_t maxcmd; @@ -678,7 +686,21 @@ typedef struct NvmeIdNs { uint8_t mc; uint8_t dpc; uint8_t dps; - uint8_t res30[98]; + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint8_t dlfeat; + uint16_t nawun; + uint16_t nawupf; + uint16_t nacwu; + uint16_t nabsn; + uint16_t nabo; + uint16_t nabspf; + uint16_t noiob; + uint8_t nvmcap[16]; + uint8_t resv103[40]; + uint8_t nguid[16]; + uint64_t eui64; NvmeLBAF lbaf[16]; uint8_t res192[192]; uint8_t vs[3712]; -- 2.20.1