RE: [PATCH v4 1/2] hw/nvme: Support for Namespaces Management from guest OS - create-ns
Hi Keith, Thank you for the review and I agree with you, the issues will be addressed. Regards, Michael -Original Message- From: Keith Busch Sent: Wednesday, December 28, 2022 12:10 PM To: Jonathan Derrick Cc: qemu-de...@nongnu.org; Michael Kropaczek (CW) ; qemu-block@nongnu.org; Klaus Jensen ; Kevin Wolf ; Hanna Reitz Subject: Re: [PATCH v4 1/2] hw/nvme: Support for Namespaces Management from guest OS - create-ns [You don't often get email from kbu...@kernel.org. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ] Caution: External Email On Wed, Dec 28, 2022 at 01:41:40PM -0600, Jonathan Derrick wrote: > +static uint16_t nvme_ns_mgmt(NvmeCtrl *n, NvmeRequest *req) { > +NvmeCtrl *n_p = NULL; /* primary controller */ > +NvmeIdCtrl *id = &n->id_ctrl; > +NvmeNamespace *ns; > +NvmeIdNsMgmt id_ns = {}; > +uint8_t flags = req->cmd.flags; > +uint32_t nsid = le32_to_cpu(req->cmd.nsid); > +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); > +uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); > +uint8_t sel = dw10 & 0xf; > +uint8_t csi = (dw11 >> 24) & 0xf; > +uint16_t i; > +uint16_t ret; > +Error *local_err = NULL; > + > +trace_pci_nvme_ns_mgmt(nvme_cid(req), nsid, sel, csi, > + NVME_CMD_FLAGS_PSDT(flags)); > + > +if (!(le16_to_cpu(id->oacs) & NVME_OACS_NS_MGMT)) { > +return NVME_NS_ATTACH_MGMT_NOTSPRD | NVME_DNR; > +} > + > +if (n->cntlid && !n->subsys) { > +error_setg(&local_err, "Secondary controller without subsystem"); > +return NVME_NS_ATTACH_MGMT_NOTSPRD | NVME_DNR; Leaking local_err. Any time you call error_setg(), the error needs to be reported or freed at some point. > +} > + > +n_p = n->subsys->ctrls[0]; > + > +switch (sel) { > +case NVME_NS_MANAGEMENT_CREATE: > +switch (csi) { > +case NVME_CSI_NVM: The following case is sufficiently large enough that the implementation should be its own function. > +if (nsid) { > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +ret = nvme_h2c(n, (uint8_t *)&id_ns, sizeof(id_ns), req); > +if (ret) { > +return ret; > +} > + > +uint64_t nsze = le64_to_cpu(id_ns.nsze); > +uint64_t ncap = le64_to_cpu(id_ns.ncap); Please don't mix declarations with code; declare these local variables at the top of the scope. > + > +if (ncap > nsze) { > +return NVME_INVALID_FIELD | NVME_DNR; > +} else if (ncap != nsze) { > +return NVME_THIN_PROVISION_NOTSPRD | NVME_DNR; > +} > + > +nvme_validate_flbas(id_ns.flbas, &local_err); > +if (local_err) { > +error_report_err(local_err); > +return NVME_INVALID_FORMAT | NVME_DNR; > +} > + > +for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { > +if (nvme_ns(n_p, (uint32_t)i) || nvme_subsys_ns(n_p->subsys, > (uint32_t)i)) { > +continue; > +} > +break; > +} > + > + > +if (i > le32_to_cpu(n_p->id_ctrl.nn) || i > > NVME_MAX_NAMESPACES) { > + return NVME_NS_IDNTIFIER_UNAVAIL | NVME_DNR; > +} > +nsid = i; > + > +/* create ns here */ > +ns = nvme_ns_mgmt_create(n, nsid, &id_ns, &local_err); > +if (!ns || local_err) { > +if (local_err) { > +error_report_err(local_err); > +} > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +if (nvme_cfg_update(n, ns->size, NVME_NS_ALLOC_CHK)) { > +/* place for delete-ns */ > +error_setg(&local_err, "Insufficient capacity, an orphaned > ns[%"PRIu32"] will be left behind", nsid); > +return NVME_NS_INSUFFICIENT_CAPAC | NVME_DNR; Leaked local_err. > +} > +(void)nvme_cfg_update(n, ns->size, NVME_NS_ALLOC); > +if (nvme_cfg_save(n)) { > +(void)nvme_cfg_update(n, ns->size, NVME_NS_DEALLOC); > +/* place for delete-ns */ > +error_setg(&local_err, "Cannot save conf file, an orphaned > ns[%"PRIu32"] will be left behind", nsid); > +return NVME_INVALID_FIELD | NVME_DNR; Another leaked local_err. > +} > +req->cqe.result = cpu_to_le32(nsid); > +break; > +case NVME_CSI_ZONED: > +/* fall through for now */ > +default: > +return NVME_INVALID_FIELD | NVME_DNR; > + } > +break; > +default: > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +return NVME_SUCCESS; > +}
Re: [PATCH v4 1/2] hw/nvme: Support for Namespaces Management from guest OS - create-ns
On Wed, Dec 28, 2022 at 01:41:40PM -0600, Jonathan Derrick wrote: > +static uint16_t nvme_ns_mgmt(NvmeCtrl *n, NvmeRequest *req) > +{ > +NvmeCtrl *n_p = NULL; /* primary controller */ > +NvmeIdCtrl *id = &n->id_ctrl; > +NvmeNamespace *ns; > +NvmeIdNsMgmt id_ns = {}; > +uint8_t flags = req->cmd.flags; > +uint32_t nsid = le32_to_cpu(req->cmd.nsid); > +uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); > +uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); > +uint8_t sel = dw10 & 0xf; > +uint8_t csi = (dw11 >> 24) & 0xf; > +uint16_t i; > +uint16_t ret; > +Error *local_err = NULL; > + > +trace_pci_nvme_ns_mgmt(nvme_cid(req), nsid, sel, csi, > NVME_CMD_FLAGS_PSDT(flags)); > + > +if (!(le16_to_cpu(id->oacs) & NVME_OACS_NS_MGMT)) { > +return NVME_NS_ATTACH_MGMT_NOTSPRD | NVME_DNR; > +} > + > +if (n->cntlid && !n->subsys) { > +error_setg(&local_err, "Secondary controller without subsystem"); > +return NVME_NS_ATTACH_MGMT_NOTSPRD | NVME_DNR; Leaking local_err. Any time you call error_setg(), the error needs to be reported or freed at some point. > +} > + > +n_p = n->subsys->ctrls[0]; > + > +switch (sel) { > +case NVME_NS_MANAGEMENT_CREATE: > +switch (csi) { > +case NVME_CSI_NVM: The following case is sufficiently large enough that the implementation should be its own function. > +if (nsid) { > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +ret = nvme_h2c(n, (uint8_t *)&id_ns, sizeof(id_ns), req); > +if (ret) { > +return ret; > +} > + > +uint64_t nsze = le64_to_cpu(id_ns.nsze); > +uint64_t ncap = le64_to_cpu(id_ns.ncap); Please don't mix declarations with code; declare these local variables at the top of the scope. > + > +if (ncap > nsze) { > +return NVME_INVALID_FIELD | NVME_DNR; > +} else if (ncap != nsze) { > +return NVME_THIN_PROVISION_NOTSPRD | NVME_DNR; > +} > + > +nvme_validate_flbas(id_ns.flbas, &local_err); > +if (local_err) { > +error_report_err(local_err); > +return NVME_INVALID_FORMAT | NVME_DNR; > +} > + > +for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { > +if (nvme_ns(n_p, (uint32_t)i) || nvme_subsys_ns(n_p->subsys, > (uint32_t)i)) { > +continue; > +} > +break; > +} > + > + > +if (i > le32_to_cpu(n_p->id_ctrl.nn) || i > > NVME_MAX_NAMESPACES) { > + return NVME_NS_IDNTIFIER_UNAVAIL | NVME_DNR; > +} > +nsid = i; > + > +/* create ns here */ > +ns = nvme_ns_mgmt_create(n, nsid, &id_ns, &local_err); > +if (!ns || local_err) { > +if (local_err) { > +error_report_err(local_err); > +} > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +if (nvme_cfg_update(n, ns->size, NVME_NS_ALLOC_CHK)) { > +/* place for delete-ns */ > +error_setg(&local_err, "Insufficient capacity, an orphaned > ns[%"PRIu32"] will be left behind", nsid); > +return NVME_NS_INSUFFICIENT_CAPAC | NVME_DNR; Leaked local_err. > +} > +(void)nvme_cfg_update(n, ns->size, NVME_NS_ALLOC); > +if (nvme_cfg_save(n)) { > +(void)nvme_cfg_update(n, ns->size, NVME_NS_DEALLOC); > +/* place for delete-ns */ > +error_setg(&local_err, "Cannot save conf file, an orphaned > ns[%"PRIu32"] will be left behind", nsid); > +return NVME_INVALID_FIELD | NVME_DNR; Another leaked local_err. > +} > +req->cqe.result = cpu_to_le32(nsid); > +break; > +case NVME_CSI_ZONED: > +/* fall through for now */ > +default: > +return NVME_INVALID_FIELD | NVME_DNR; > + } > +break; > +default: > +return NVME_INVALID_FIELD | NVME_DNR; > +} > + > +return NVME_SUCCESS; > +}
[PATCH v4 1/2] hw/nvme: Support for Namespaces Management from guest OS - create-ns
From: Michael Kropaczek Added support for NVMEe NameSpaces Mangement allowing the guest OS to create namespaces by issuing nvme create-ns command. It is an extension to currently implemented Qemu nvme virtual device. Virtual devices representing namespaces will be created and/or deleted during Qemu's running session, at anytime. Signed-off-by: Michael Kropaczek --- docs/system/devices/nvme.rst | 59 ++- hw/nvme/cfg_key_checker.c| 51 ++ hw/nvme/ctrl-cfg.c | 224 ++ hw/nvme/ctrl.c | 244 - hw/nvme/meson.build | 2 +- hw/nvme/ns-backend.c | 283 + hw/nvme/ns.c | 293 ++- hw/nvme/nvme.h | 31 +++- hw/nvme/subsys.c | 11 +- hw/nvme/trace-events | 2 + include/block/nvme.h | 30 include/hw/nvme/ctrl-cfg.h | 24 +++ include/hw/nvme/ns-cfg.h | 28 include/hw/nvme/nvme-cfg.h | 188 ++ qemu-img-cmds.hx | 6 + qemu-img.c | 132 16 files changed, 1554 insertions(+), 54 deletions(-) create mode 100644 hw/nvme/cfg_key_checker.c create mode 100644 hw/nvme/ctrl-cfg.c create mode 100644 hw/nvme/ns-backend.c create mode 100644 include/hw/nvme/ctrl-cfg.h create mode 100644 include/hw/nvme/ns-cfg.h create mode 100644 include/hw/nvme/nvme-cfg.h diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index 30f841ef62..6b3bee5e5d 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -92,6 +92,63 @@ There are a number of parameters available: attach the namespace to a specific ``nvme`` device (identified by an ``id`` parameter on the controller device). +Additional Namespaces managed by guest OS Namespaces Management +- + +.. code-block:: console + + -device nvme,id=nvme-ctrl,serial=1234,subsys=nvme-subsys,auto-ns-path=path + +Parameters: + +``auto-ns-path=`` + If specified indicates a support for dynamic management of nvme namespaces + by means of nvme create-ns command. This path points + to the storage area for backend images must exist. Additionally it requires + that parameter `ns-subsys` must be specified whereas parameter `drive` + must not. The legacy namespace backend is disabled, instead, a pair of + files 'nvme__ns_.cfg' and 'nvme__ns_.img' + will refer to respective namespaces. The create-ns, attach-ns + and detach-ns commands, issued at the guest side, will make changes to + those files accordingly. + For each namespace exists an image file in raw format and a config file + containing namespace parameters and state of the attachment allowing QEMU + to configure namespaces accordingly during start up. If for instance an + image file has a size of 0 bytes this will be interpreted as non existent + namespace. Issuing create-ns command will change the status in the config + files and and will re-size the image file accordingly so the image file + will be associated with the respective namespace. The main config file + nvme__ctrl.cfg keeps the track of allocated capacity to the + namespaces within the nvme controller. + As it is the case of a typical hard drive, backend images together with + config files need to be created. For this reason the qemu-img tool has + been extended by adding createns command. + + qemu-img createns {-S -C } + [-N ] {} + + Parameters: + -S and -C and are mandatory, `-S` must match `serial` parameter + and must match `auto-ns-path` parameter of "-device nvme,..." + specification. + -N is optional, if specified it will set a limit to the number of potential + namespaces and will reduce the number of backend images and config files + accordingly. As a default, a set of images of 0 bytes size and default + config files for 256 namespaces will be created, a total of 513 files. + +Please note that ``nvme-ns`` device is not required to support of dynamic +namespaces management feature. It is not prohibited to assign a such device to +``nvme`` device specified to support dynamic namespace management if one has +an use case to do so, however, it will only coexist and be out of the scope of +Namespaces Management. NsIds will be consistently managed, creation (create-ns) +of a namespace will not allocate the NsId already being taken. If ``nvme-ns`` +device conflicts with previously created one by create-ns (the same NsId), +it will break QEMU's start up. +More than one of NVMe controllers associated with NVMe subsystem are supported. +This feature requires that parameters ``serial=`` and ``subsys=`` of additional +controllers must match those of the primary controller and ``auto-ns-path=`` +must not be specified. + NVM Subsystems -- @@ -320,4 +377,4 @@ controller are: .. code-block::
[PATCH v4 2/2] hw/nvme: Support for Namespaces Management from guest OS - delete-ns
From: Michael Kropaczek Added support for NVMEe NameSpaces Mangement allowing the guest OS to delete namespaces by issuing nvme delete-ns command. It is an extension to currently implemented Qemu nvme virtual device. Virtual devices representing namespaces will be created and/or deleted during Qemu's running session, at anytime. Signed-off-by: Michael Kropaczek --- docs/system/devices/nvme.rst | 9 ++-- hw/nvme/ctrl.c | 81 +--- hw/nvme/ns-backend.c | 5 +++ hw/nvme/ns.c | 72 hw/nvme/nvme.h | 3 +- hw/nvme/trace-events | 1 + include/block/nvme.h | 1 + 7 files changed, 161 insertions(+), 11 deletions(-) diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index 6b3bee5e5d..f19072f1bc 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -103,12 +103,12 @@ Parameters: ``auto-ns-path=`` If specified indicates a support for dynamic management of nvme namespaces - by means of nvme create-ns command. This path points + by means of nvme create-ns and nvme delete-ns commands. This path points to the storage area for backend images must exist. Additionally it requires that parameter `ns-subsys` must be specified whereas parameter `drive` must not. The legacy namespace backend is disabled, instead, a pair of files 'nvme__ns_.cfg' and 'nvme__ns_.img' - will refer to respective namespaces. The create-ns, attach-ns + will refer to respective namespaces. The create-ns, delete-ns, attach-ns and detach-ns commands, issued at the guest side, will make changes to those files accordingly. For each namespace exists an image file in raw format and a config file @@ -140,8 +140,9 @@ Please note that ``nvme-ns`` device is not required to support of dynamic namespaces management feature. It is not prohibited to assign a such device to ``nvme`` device specified to support dynamic namespace management if one has an use case to do so, however, it will only coexist and be out of the scope of -Namespaces Management. NsIds will be consistently managed, creation (create-ns) -of a namespace will not allocate the NsId already being taken. If ``nvme-ns`` +Namespaces Management. Deletion (delete-ns) will render an error for this +namespace. NsIds will be consistently managed, creation (create-ns) of +a namespace will not allocate the NsId already being taken. If ``nvme-ns`` device conflicts with previously created one by create-ns (the same NsId), it will break QEMU's start up. More than one of NVMe controllers associated with NVMe subsystem are supported. diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6155c2013e..6b93911f01 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -144,12 +144,12 @@ * * - `auto-ns-path` * If specified indicates a support for dynamic management of nvme namespaces - * by means of nvme create-ns command. This path pointing + * by means of nvme create-ns and nvme delete-ns commands. This path pointing * to a storage area for backend images must exist. Additionally it requires * that parameter `ns-subsys` must be specified whereas parameter `drive` * must not. The legacy namespace backend is disabled, instead, a pair of * files 'nvme__ns_.cfg' and 'nvme__ns_.img' - * will refer to respective namespaces. The create-ns, attach-ns + * will refer to respective namespaces. The create-ns, delete-ns, attach-ns * and detach-ns commands, issued at the guest side, will make changes to * those files accordingly. * For each namespace exists an image file in raw format and a config file @@ -5660,6 +5660,23 @@ static NvmeNamespace *nvme_ns_mgmt_create(NvmeCtrl *n, uint32_t nsid, NvmeIdNsMg return ns; } +static void nvme_ns_mgmt_delete(NvmeCtrl *n, uint32_t nsid, Error **errp) +{ +Error *local_err = NULL; + +if (!n->params.ns_directory) { +error_setg(&local_err, "delete-ns not supported if 'auto-ns-path' is not specified"); +} else if (n->namespace.blkconf.blk) { +error_setg(&local_err, "delete-ns not supported if 'drive' is specified"); +} else { +nvme_ns_delete(n, nsid, &local_err); +} + +if (local_err) { +error_propagate(errp, local_err); +} +} + static uint16_t nvme_ns_mgmt(NvmeCtrl *n, NvmeRequest *req) { NvmeCtrl *n_p = NULL; /* primary controller */ @@ -5673,6 +5690,9 @@ static uint16_t nvme_ns_mgmt(NvmeCtrl *n, NvmeRequest *req) uint8_t sel = dw10 & 0xf; uint8_t csi = (dw11 >> 24) & 0xf; uint16_t i; +uint16_t first = nsid; +uint16_t last = nsid;; +uint64_t image_size; uint16_t ret; Error *local_err = NULL; @@ -5739,16 +5759,15 @@ static uint16_t nvme_ns_mgmt(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } +/* ns->size is the real image size after creation */
[PATCH v4 0/2] hw/nvme: Support for Namespaces Management from guest OS
From: Michael Kropaczek Description: Currently namespaces could be configured as follows: 1. Legacy Namespace - just one namespace within Nvme controller's where the back-end was specified for nvme device by -drive parameter pointing directly to the image file. 2. Additional Namespaces - specified by nvme-ns devices each having its own back-end. To have multiple namespaces each needed to be specified at Qemu's command line being associated with the most recently defined nvme-bus from nvme device. If a such additional namespace should be attached and/or detached by the guest OS, nvme controller has to be linked with another device nvme-subsys. All that have a static nature, all need to be specified at the Qemu's command line, all specified virtual nvme entities will be processed during Qemu's start-up then created and provided to the guest OS. To have a support for nvme create-ns and delete-ns commands with specified parameters a different approach is needed. Virtual devices representing namespaces need to be created and/or deleted during Qemu's running session, at anytime. The back-end image sizes for a namespace must accommodate the payload size and size of metadata resulted from specified parameters. The total capacity of the nvme controller altogether with un-allocated capacity needs to be taken into account and updated according to nvme create-ns and delete-ns commands respectively. Here is the approach: The nvme device will get new parameter: - auto-ns-path, which specifies the path to the storage area where back-end image and necessary config files located stored. The virtual devices representing namespaces will be created dynamically during the Qemu running session following issuance of nvme create-ns and delete-ns commands from the guest OS. QOM classes and instances will be created utilizing existing configuration scheme used during Qemu's start-up. Back-end image files will be neither created nor deleted during Qemu's startup or running session. Instead a set of back-end image files and relevant config will be created by qemu-img tool with createns sub-command prior to Qemu's session. Required parameters are: -S serial number which must match serial parameter of qemu-system-xx -device nvme command line specification, -C total capacity, and optional -N that will set a maximal limit on number of allowed namespaces (default 256) which will be followed by path name pointing to storage location corresponding to auto-ns-path of qemu-system-xx -device nvme parameter. Those created back-end image files will be pre-loaded during Qemu's start-up and then during running Qemu's session will be associated or disassociated with QOM namespaces virtual instances, as a result of issuing nvme create-ns or delete-ns commands. The associated back-end image file for relevant namespace will be re-sized as follows: delete-ns command will truncate image file to the size of 0, whereas create-ns command will re-size the image file to the size provided by nvme create-ns command parameters. Truncating/re-sizing is a result of blk_truncate() API which utilizes co-routines and should not block Qemu main thread while scheduling AIO operations. It is assured that all settings will retain over Qemu's start-ups and shutdowns. The implementation makes it possible to combine the existing "Additional Namespace" implementation with the new "Managed Namespaces". Those will coexist with obvious restrictions, like both will share the same NsIds space, "static" namespaces cannot be deleted or if its NsId specified at Qemu's command line will conflicts with previously created one by nvme create-ns (and retained), this will lead to an abort of Qemu at its start up. More than one of NVMe controllers associated with NVMe subsystem are supported. This feature requires that parameters serial= and subsys= of additional controllers must match those of the primary controller and auto-ns-path= must not be specified. Michael Kropaczek (2): hw/nvme: Support for Namespaces Management from guest OS - create-ns hw/nvme: Support for Namespaces Management from guest OS - delete-ns docs/system/devices/nvme.rst | 60 +- hw/nvme/cfg_key_checker.c| 51 + hw/nvme/ctrl-cfg.c | 224 + hw/nvme/ctrl.c | 313 +- hw/nvme/meson.build | 2 +- hw/nvme/ns-backend.c | 288 +++ hw/nvme/ns.c | 365 +++ hw/nvme/nvme.h | 32 ++- hw/nvme/subsys.c | 11 +- hw/nvme/trace-events | 3 + include/block/nvme.h | 31 +++ include/hw/nvme/ctrl-cfg.h | 24 +++ include/hw/nvme/ns-cfg.h | 28 +++ include/hw/nvme/nvme-cfg.h | 188 ++ qemu-img-cmds.hx | 6 + qemu-img.c | 132 + 16 files changed, 1704 insertions(+), 54 deletions(-) create mode 100644 hw/nvme/