Re: [RFC PATCH v3] hw/nvme:Adding Support for namespace management

2021-11-24 Thread Lukasz Maniak
On Tue, Nov 23, 2021 at 11:11:37AM +0100, Lukasz Maniak wrote:
> On Wed, Nov 10, 2021 at 04:56:29PM +0530, Naveen wrote:
> > From: Naveen Nagar 
> > 
> > This patch supports namespace management : create and delete operations
> > This patch has been tested with the following command and size of image
> > file for unallocated namespaces is taken as 0GB. ns_create will look into
> > the list of unallocated namespaces and it will initialize the same and 
> > return the nsid of the same. A new mandatory field has been added called
> > tnvmcap and we have ensured that the total capacity of namespace created
> > does not exceed tnvmcap
> > 
> > -device nvme-subsys,id=subsys0,tnvmcap=8
> > -device nvme,serial=foo,id=nvme0,subsys=subsys0
> > -device nvme,serial=bar,id=nvme1,subsys=subsys0
> > 
> > -drive id=ns1,file=ns1.img,if=none
> > -device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true
> > -drive id=ns2,file=ns2.img,if=none
> > -device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true
> > -drive id=ns3,file=ns3.img,if=none
> > -device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true
> > -drive id=ns4,file=ns4.img,if=none
> > -device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true
> > 
> > Please review and suggest if any changes are required.
> > 
> > Signed-off-by: Naveen Nagar 
> > 
> > Since v2:
> > -Lukasz Maniak found a bug in namespace attachment and proposed 
> >  solution is added
> > 
> 
> Hi Naveen,
> 
> The current implementation is inconsistent and thus has a bug related to
> unvmcap support.
> 
> Namespaces are pre-allocated after boot, and the initial namespace size
> is the size of the associated blockdev. If the blockdevs are non-zero
> sized then the first deletion of the namespaces associated with them
> will increment unvmcap by their size. This will make unvmcap greater
> than tnvmcap.
> 
> While the easiest way would be to prohibit the use of non-zero sized
> blockdev with namespace management, doing so would limit the
> functionality of the namespaces itself, which we would like to avoid.
> 
> This fix below addresses issues related to unvmcap and non-zero block
> devices. The unvmcap value will be properly updated on both the first
> and subsequent controllers added to the subsystem regardless of the
> order in which nvme-ns is defined on the command line before or after
> the controller definition. Additionally, if the block device size of any
> namespace causes the unvmcap to be exceeded, an error will be returned
> at the namespace definition point.
> 
> The fix is based on v3 based on v6.1.0, as v3 does not apply to master.
> 
> ---
>  hw/nvme/ctrl.c |  7 +++
>  hw/nvme/ns.c   | 23 ---
>  2 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 63ea2fcb14..dc0ad4155b 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -6594,6 +6594,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  NvmeIdCtrl *id = >id_ctrl;
>  uint8_t *pci_conf = pci_dev->config;
>  uint64_t cap = ldq_le_p(>bar.cap);
> +int i;
>  
>  id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
>  id->ssvid = cpu_to_le16(pci_get_word(pci_conf + 
> PCI_SUBSYSTEM_VENDOR_ID));
> @@ -6672,6 +6673,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->cmic |= NVME_CMIC_MULTI_CTRL;
>  id->tnvmcap = n->subsys->params.tnvmcap * GiB;
>  id->unvmcap = n->subsys->params.tnvmcap * GiB;
> +
> +for (i = 0; i < ARRAY_SIZE(n->subsys->namespaces); i++) {
> +if (n->subsys->namespaces[i]) {
> +id->unvmcap -= le64_to_cpu(n->subsys->namespaces[i]->size);
> +}
> +}
>  }
>  
>  NVME_CAP_SET_MQES(cap, 0x7ff);
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index f62a695132..c87d7f5bd6 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -140,9 +140,12 @@ lbaf_found:
>  return 0;
>  }
>  
> -static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
> +static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeSubsystem *subsys,
> +Error **errp)
>  {
>  bool read_only;
> +NvmeCtrl *ctrl;
> +int i;
>  
>  if (!blkconf_blocksizes(>blkconf, errp)) {
>  return -1;
> @@ -164,6 +167,21 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error 
> **errp)
>  return -1;
>  }
>  
> +if (subsys) {
> +for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
> +ctrl = nvme_subsys_ctrl(subsys, i);
> +
> +if (ctrl) {
> +if (ctrl->id_ctrl.unvmcap < le64_to_cpu(ns->size)) {
> +error_setg(errp, "blockdev size %ld exceeds subsystem's "
> + "unallocated capacity", ns->size);
> +} else {
> +ctrl->id_ctrl.unvmcap -= le64_to_cpu(ns->size);
> +}
> +}
> +}
> +}

Re: [RFC PATCH v3] hw/nvme:Adding Support for namespace management

2021-11-23 Thread Lukasz Maniak
On Wed, Nov 10, 2021 at 04:56:29PM +0530, Naveen wrote:
> From: Naveen Nagar 
> 
> This patch supports namespace management : create and delete operations
> This patch has been tested with the following command and size of image
> file for unallocated namespaces is taken as 0GB. ns_create will look into
> the list of unallocated namespaces and it will initialize the same and 
> return the nsid of the same. A new mandatory field has been added called
> tnvmcap and we have ensured that the total capacity of namespace created
> does not exceed tnvmcap
> 
> -device nvme-subsys,id=subsys0,tnvmcap=8
> -device nvme,serial=foo,id=nvme0,subsys=subsys0
> -device nvme,serial=bar,id=nvme1,subsys=subsys0
> 
> -drive id=ns1,file=ns1.img,if=none
> -device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true
> -drive id=ns2,file=ns2.img,if=none
> -device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true
> -drive id=ns3,file=ns3.img,if=none
> -device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true
> -drive id=ns4,file=ns4.img,if=none
> -device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true
> 
> Please review and suggest if any changes are required.
> 
> Signed-off-by: Naveen Nagar 
> 
> Since v2:
> -Lukasz Maniak found a bug in namespace attachment and proposed 
>  solution is added
> 

Hi Naveen,

The current implementation is inconsistent and thus has a bug related to
unvmcap support.

Namespaces are pre-allocated after boot, and the initial namespace size
is the size of the associated blockdev. If the blockdevs are non-zero
sized then the first deletion of the namespaces associated with them
will increment unvmcap by their size. This will make unvmcap greater
than tnvmcap.

While the easiest way would be to prohibit the use of non-zero sized
blockdev with namespace management, doing so would limit the
functionality of the namespaces itself, which we would like to avoid.

This fix below addresses issues related to unvmcap and non-zero block
devices. The unvmcap value will be properly updated on both the first
and subsequent controllers added to the subsystem regardless of the
order in which nvme-ns is defined on the command line before or after
the controller definition. Additionally, if the block device size of any
namespace causes the unvmcap to be exceeded, an error will be returned
at the namespace definition point.

The fix is based on v3 based on v6.1.0, as v3 does not apply to master.

---
 hw/nvme/ctrl.c |  7 +++
 hw/nvme/ns.c   | 23 ---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 63ea2fcb14..dc0ad4155b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -6594,6 +6594,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
 uint64_t cap = ldq_le_p(>bar.cap);
+int i;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -6672,6 +6673,12 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->cmic |= NVME_CMIC_MULTI_CTRL;
 id->tnvmcap = n->subsys->params.tnvmcap * GiB;
 id->unvmcap = n->subsys->params.tnvmcap * GiB;
+
+for (i = 0; i < ARRAY_SIZE(n->subsys->namespaces); i++) {
+if (n->subsys->namespaces[i]) {
+id->unvmcap -= le64_to_cpu(n->subsys->namespaces[i]->size);
+}
+}
 }
 
 NVME_CAP_SET_MQES(cap, 0x7ff);
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index f62a695132..c87d7f5bd6 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -140,9 +140,12 @@ lbaf_found:
 return 0;
 }
 
-static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
+static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeSubsystem *subsys,
+Error **errp)
 {
 bool read_only;
+NvmeCtrl *ctrl;
+int i;
 
 if (!blkconf_blocksizes(>blkconf, errp)) {
 return -1;
@@ -164,6 +167,21 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error 
**errp)
 return -1;
 }
 
+if (subsys) {
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+ctrl = nvme_subsys_ctrl(subsys, i);
+
+if (ctrl) {
+if (ctrl->id_ctrl.unvmcap < le64_to_cpu(ns->size)) {
+error_setg(errp, "blockdev size %ld exceeds subsystem's "
+ "unallocated capacity", ns->size);
+} else {
+ctrl->id_ctrl.unvmcap -= le64_to_cpu(ns->size);
+}
+}
+}
+}
+
 return 0;
 }
 
@@ -480,7 +498,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 }
 }
 
-if (nvme_ns_init_blk(ns, errp)) {
+if (nvme_ns_init_blk(ns, subsys, errp)) {
 return;
 }
 
@@ -527,7 +545,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 
 

[RFC PATCH v3] hw/nvme:Adding Support for namespace management

2021-11-10 Thread Naveen
From: Naveen Nagar 

This patch supports namespace management : create and delete operations
This patch has been tested with the following command and size of image
file for unallocated namespaces is taken as 0GB. ns_create will look into
the list of unallocated namespaces and it will initialize the same and 
return the nsid of the same. A new mandatory field has been added called
tnvmcap and we have ensured that the total capacity of namespace created
does not exceed tnvmcap

-device nvme-subsys,id=subsys0,tnvmcap=8
-device nvme,serial=foo,id=nvme0,subsys=subsys0
-device nvme,serial=bar,id=nvme1,subsys=subsys0

-drive id=ns1,file=ns1.img,if=none
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=true
-drive id=ns2,file=ns2.img,if=none
-device nvme-ns,drive=ns2,bus=nvme0,nsid=2,zoned=false,shared=true
-drive id=ns3,file=ns3.img,if=none
-device nvme-ns,drive=ns3,bus=nvme0,nsid=3,zoned=false,shared=true
-drive id=ns4,file=ns4.img,if=none
-device nvme-ns,drive=ns4,bus=nvme0,nsid=4,zoned=false,shared=true

Please review and suggest if any changes are required.

Signed-off-by: Naveen Nagar 

Since v2:
-Lukasz Maniak found a bug in namespace attachment and proposed 
 solution is added

---
 hw/nvme/ctrl.c   | 241 ---
 hw/nvme/ns.c |  61 ++-
 hw/nvme/nvme.h   |   7 +-
 hw/nvme/subsys.c |   3 +
 include/block/nvme.h |  18 +++-
 5 files changed, 288 insertions(+), 42 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6baf9e0420..63ea2fcb14 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -219,6 +219,7 @@ static const uint32_t nvme_cse_acs[256] = {
 [NVME_ADM_CMD_SET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_NS_MANAGEMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 };
@@ -4450,11 +4451,19 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeRequest *req, bool active)
 NvmeNamespace *ns;
 NvmeIdentify *c = (NvmeIdentify *)>cmd;
 uint32_t nsid = le32_to_cpu(c->nsid);
+NvmeIdNs *id_ns = NULL;
+uint16_t ret;
 
 trace_pci_nvme_identify_ns(nsid);
 
-if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+if (!nvme_nsid_valid(n, nsid)) {
 return NVME_INVALID_NSID | NVME_DNR;
+} else if (nsid == NVME_NSID_BROADCAST) {
+id_ns = g_new0(NvmeIdNs, 1);
+nvme_ns_identify_common(id_ns);
+ret = nvme_c2h(n, (uint8_t *)id_ns, sizeof(NvmeIdNs), req);
+g_free(id_ns);
+return ret;
 }
 
 ns = nvme_ns(n, nsid);
@@ -5184,9 +5193,204 @@ static void nvme_select_iocs_ns(NvmeCtrl *n, 
NvmeNamespace *ns)
 }
 }
 
+static int nvme_blk_truncate(BlockBackend *blk, size_t len, Error **errp)
+{
+int ret;
+uint64_t perm, shared_perm;
+
+blk_get_perm(blk, , _perm);
+
+ret = blk_set_perm(blk, perm | BLK_PERM_RESIZE, shared_perm, errp);
+if (ret < 0) {
+return ret;
+}
+
+ret = blk_truncate(blk, len, false, PREALLOC_MODE_OFF, 0, errp);
+if (ret < 0) {
+return ret;
+}
+
+ret = blk_set_perm(blk, perm, shared_perm, errp);
+if (ret < 0) {
+return ret;
+}
+
+return 0;
+}
+
+static uint32_t nvme_allocate_nsid(NvmeCtrl *n)
+{
+uint32_t nsid = 0;
+for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+if (nvme_ns(n, i) || nvme_subsys_ns(n->subsys, i)) {
+continue;
+}
+
+nsid = i;
+return nsid;
+}
+return nsid;
+}
+
+static uint16_t nvme_namespace_create(NvmeCtrl *n, NvmeRequest *req)
+{
+uint32_t ret;
+NvmeIdNs id_ns_host;
+NvmeSubsystem *subsys = n->subsys;
+Error *err = NULL;
+uint8_t flbas_host;
+uint64_t ns_size;
+int lba_index;
+NvmeNamespace *ns;
+NvmeCtrl *ctrl;
+NvmeIdNs *id_ns;
+
+ret = nvme_h2c(n, (uint8_t *)_ns_host, sizeof(id_ns_host), req);
+if (ret) {
+return ret;
+}
+
+if (id_ns_host.ncap < id_ns_host.nsze) {
+return NVME_THIN_PROVISION_NO_SUPP | NVME_DNR;
+} else if (id_ns_host.ncap > id_ns_host.nsze) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+if (!id_ns_host.nsze) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+if (QSLIST_EMPTY(>unallocated_namespaces)) {
+return NVME_NS_ID_UNAVAILABLE;
+}
+
+ns = QSLIST_FIRST(>unallocated_namespaces);
+id_ns = >id_ns;
+flbas_host = (id_ns_host.flbas) & (0xF);
+
+if (flbas_host > id_ns->nlbaf) {
+return NVME_INVALID_FORMAT | NVME_DNR;
+}
+
+ret = nvme_ns_setup(ns, );
+if (ret) {
+return ret;
+}
+
+id_ns->flbas = id_ns_host.flbas;
+id_ns->dps = id_ns_host.dps;
+id_ns->nmic = id_ns_host.nmic;
+
+lba_index =