On Fri, Oct 30, 2020 at 11:32:38AM +0900, Dmitry Fomichev wrote: > The emulation code has been changed to advertise NVM Command Set when > "zoned" device property is not set (default) and Zoned Namespace > Command Set otherwise. > > Define values and structures that are needed to support Zoned > Namespace Command Set (NVMe TP 4053) in PCI NVMe controller emulator. > Define trace events where needed in newly introduced code. > > In order to improve scalability, all open, closed and full zones > are organized in separate linked lists. Consequently, almost all > zone operations don't require scanning of the entire zone array > (which potentially can be quite large) - it is only necessary to > enumerate one or more zone lists. > > Handlers for three new NVMe commands introduced in Zoned Namespace > Command Set specification are added, namely for Zone Management > Receive, Zone Management Send and Zone Append. > > Device initialization code has been extended to create a proper > configuration for zoned operation using device properties. > > Read/Write command handler is modified to only allow writes at the > write pointer if the namespace is zoned. For Zone Append command, > writes implicitly happen at the write pointer and the starting write > pointer value is returned as the result of the command. Write Zeroes > handler is modified to add zoned checks that are identical to those > done as a part of Write flow. > > Subsequent commits in this series add ZDE support and checks for > active and open zone limits. > > Signed-off-by: Niklas Cassel <niklas.cas...@wdc.com> > Signed-off-by: Hans Holmberg <hans.holmb...@wdc.com> > Signed-off-by: Ajay Joshi <ajay.jo...@wdc.com> > Signed-off-by: Chaitanya Kulkarni <chaitanya.kulka...@wdc.com> > Signed-off-by: Matias Bjorling <matias.bjorl...@wdc.com> > Signed-off-by: Aravind Ramesh <aravind.ram...@wdc.com> > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com> > Signed-off-by: Adam Manzanares <adam.manzana...@wdc.com> > Signed-off-by: Dmitry Fomichev <dmitry.fomic...@wdc.com> > Reviewed-by: Niklas Cassel <niklas.cas...@wdc.com>
(snip) > +static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) > +{ > + NvmeCmd *cmd = (NvmeCmd *)&req->cmd; > + NvmeNamespace *ns = req->ns; > + /* cdw12 is zero-based number of dwords to return. Convert to bytes */ > + uint32_t len = (le32_to_cpu(cmd->cdw12) + 1) << 2; > + uint32_t dw13 = le32_to_cpu(cmd->cdw13); > + uint32_t zone_idx, zra, zrasf, partial; > + uint64_t max_zones, nr_zones = 0; > + uint16_t ret; > + uint64_t slba; > + NvmeZoneDescr *z; > + NvmeZone *zs; > + NvmeZoneReportHeader *header; > + void *buf, *buf_p; > + size_t zone_entry_sz; > + > + req->status = NVME_SUCCESS; > + > + ret = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx); > + if (ret) { > + return ret; > + } > + > + if (len < sizeof(NvmeZoneReportHeader)) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } Just like nvme_read() and nvme_write(), nvme_zone_mgmt_recv() has to do something like: + ret = nvme_check_mdts(n, len); + if (ret) { + trace_pci_nvme_err_mdts(nvme_cid(req), len); + return ret; + } + To see that we are not exceeding MDTS. Kind regards, Niklas > + > + zra = dw13 & 0xff; > + if (!(zra == NVME_ZONE_REPORT || zra == NVME_ZONE_REPORT_EXTENDED)) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } > + > + if (zra == NVME_ZONE_REPORT_EXTENDED) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } > + > + zrasf = (dw13 >> 8) & 0xff; > + if (zrasf > NVME_ZONE_REPORT_OFFLINE) { > + return NVME_INVALID_FIELD | NVME_DNR; > + } > + > + partial = (dw13 >> 16) & 0x01; > + > + zone_entry_sz = sizeof(NvmeZoneDescr); > + > + max_zones = (len - sizeof(NvmeZoneReportHeader)) / zone_entry_sz; > + buf = g_malloc0(len); > + > + header = (NvmeZoneReportHeader *)buf; > + buf_p = buf + sizeof(NvmeZoneReportHeader); > + > + while (zone_idx < ns->num_zones && nr_zones < max_zones) { > + zs = &ns->zone_array[zone_idx]; > + > + if (!nvme_zone_matches_filter(zrasf, zs)) { > + zone_idx++; > + continue; > + } > + > + z = (NvmeZoneDescr *)buf_p; > + buf_p += sizeof(NvmeZoneDescr); > + nr_zones++; > + > + z->zt = zs->d.zt; > + z->zs = zs->d.zs; > + z->zcap = cpu_to_le64(zs->d.zcap); > + z->zslba = cpu_to_le64(zs->d.zslba); > + z->za = zs->d.za; > + > + if (nvme_wp_is_valid(zs)) { > + z->wp = cpu_to_le64(zs->d.wp); > + } else { > + z->wp = cpu_to_le64(~0ULL); > + } > + > + zone_idx++; > + } > + > + if (!partial) { > + for (; zone_idx < ns->num_zones; zone_idx++) { > + zs = &ns->zone_array[zone_idx]; > + if (nvme_zone_matches_filter(zrasf, zs)) { > + nr_zones++; > + } > + } > + } > + header->nr_zones = cpu_to_le64(nr_zones); > + > + ret = nvme_dma(n, (uint8_t *)buf, len, DMA_DIRECTION_FROM_DEVICE, req); > + > + g_free(buf); > + > + return ret; > +}