Re: [PATCH v2 13/20] nvme: refactor prp mapping

2019-11-25 Thread Beata Michalska
On Wed, 20 Nov 2019 at 09:39, Klaus Birkelund  wrote:
>
> On Tue, Nov 12, 2019 at 03:23:43PM +, Beata Michalska wrote:
> > Hi Klaus,
> >
> > On Tue, 15 Oct 2019 at 11:57, Klaus Jensen  wrote:
> > >
> > > Instead of handling both QSGs and IOVs in multiple places, simply use
> > > QSGs everywhere by assuming that the request does not involve the
> > > controller memory buffer (CMB). If the request is found to involve the
> > > CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted
> > > to an IOV by the dma helpers anyway, so the CMB path is not unfairly
> > > affected by this simplifying change.
> > >
> >
> > Out of curiosity, in how many cases the SG list will have to
> > be converted to IOV ? Does that justify creating the SG list in vain ?
> >
>
> You got me wondering. Only using QSGs does not really remove much
> complexity, so I readded the direct use of IOVs for the CMB path. There
> is no harm in that.
>
> > > +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1,
> > > +uint64_t prp2, uint32_t len, NvmeRequest *req)
> > >  {
> > >  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
> > >  trans_len = MIN(len, trans_len);
> > >  int num_prps = (len >> n->page_bits) + 1;
> > > +uint16_t status = NVME_SUCCESS;
> > > +bool prp_list_in_cmb = false;
> > > +
> > > +trace_nvme_map_prp(req->cid, req->cmd.opcode, trans_len, len, prp1, 
> > > prp2,
> > > +num_prps);
> > >
> > >  if (unlikely(!prp1)) {
> > >  trace_nvme_err_invalid_prp();
> > >  return NVME_INVALID_FIELD | NVME_DNR;
> > > -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> > > -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) 
> > > {
> > > -qsg->nsg = 0;
> > > -qemu_iovec_init(iov, num_prps);
> > > -qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], 
> > > trans_len);
> > > -} else {
> > > -pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> > > -qemu_sglist_add(qsg, prp1, trans_len);
> > >  }
> > > +
> > > +if (nvme_addr_is_cmb(n, prp1)) {
> > > +req->is_cmb = true;
> > > +}
> > > +
> > This seems to be used here and within read/write functions which are calling
> > this one. Maybe there is a nicer way to track that instead of passing
> > the request
> > from multiple places ?
> >
>
> Hmm. Whether or not the command reads/writes from the CMB is really only
> something you can determine by looking at the PRPs (which is done in
> nvme_map_prp), so I think this is the right way to track it. Or do you
> have something else in mind?
>

I think what I mean is that is seems that this variable is being used only in
functions that call map_prp directly, but in order to set that variable within
the req structure this needs to be passed along from the top level of command
submission instead of maybe having additional  param to map_prpr to set it to be
CMB command or not. If that makes sense.

BR
Beata
> > > +pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> > > +qemu_sglist_add(qsg, prp1, trans_len);
> > > +
> > >  len -= trans_len;
> > >  if (len) {
> > >  if (unlikely(!prp2)) {
> > >  trace_nvme_err_invalid_prp2_missing();
> > > +status = NVME_INVALID_FIELD | NVME_DNR;
> > >  goto unmap;
> > >  }
> > > +
> > >  if (len > n->page_size) {
> > >  uint64_t prp_list[n->max_prp_ents];
> > >  uint32_t nents, prp_trans;
> > >  int i = 0;
> > >
> > > +if (nvme_addr_is_cmb(n, prp2)) {
> > > +prp_list_in_cmb = true;
> > > +}
> > > +
> > >  nents = (len + n->page_size - 1) >> n->page_bits;
> > >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > > -nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
> > > +nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> > >  while (len != 0) {
> > > +bool addr_is_cmb;
> > >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> > >
> > >  if (i == n->max_prp_ents - 1 && len > n->page_size) {
> > >  if (unlikely(!prp_ent || prp_ent & (n->page_size - 
> > > 1))) {
> > >  trace_nvme_err_invalid_prplist_ent(prp_ent);
> > > +status = NVME_INVALID_FIELD | NVME_DNR;
> > > +goto unmap;
> > > +}
> > > +
> > > +addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
> > > +if ((prp_list_in_cmb && !addr_is_cmb) ||
> > > +(!prp_list_in_cmb && addr_is_cmb)) {
> >
> > Minor: Same condition (based on different vars) is being used in
> > multiple places. Might be worth to move it outside and just pass in
> > the needed values.
> >
>
> I'm really not sure what I was smoking when writing those condition

Re: [PATCH v2 13/20] nvme: refactor prp mapping

2019-11-20 Thread Klaus Birkelund
On Tue, Nov 12, 2019 at 03:23:43PM +, Beata Michalska wrote:
> Hi Klaus,
> 
> On Tue, 15 Oct 2019 at 11:57, Klaus Jensen  wrote:
> >
> > Instead of handling both QSGs and IOVs in multiple places, simply use
> > QSGs everywhere by assuming that the request does not involve the
> > controller memory buffer (CMB). If the request is found to involve the
> > CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted
> > to an IOV by the dma helpers anyway, so the CMB path is not unfairly
> > affected by this simplifying change.
> >
> 
> Out of curiosity, in how many cases the SG list will have to
> be converted to IOV ? Does that justify creating the SG list in vain ?
> 

You got me wondering. Only using QSGs does not really remove much
complexity, so I readded the direct use of IOVs for the CMB path. There
is no harm in that.

> > +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1,
> > +uint64_t prp2, uint32_t len, NvmeRequest *req)
> >  {
> >  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
> >  trans_len = MIN(len, trans_len);
> >  int num_prps = (len >> n->page_bits) + 1;
> > +uint16_t status = NVME_SUCCESS;
> > +bool prp_list_in_cmb = false;
> > +
> > +trace_nvme_map_prp(req->cid, req->cmd.opcode, trans_len, len, prp1, 
> > prp2,
> > +num_prps);
> >
> >  if (unlikely(!prp1)) {
> >  trace_nvme_err_invalid_prp();
> >  return NVME_INVALID_FIELD | NVME_DNR;
> > -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> > -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> > -qsg->nsg = 0;
> > -qemu_iovec_init(iov, num_prps);
> > -qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], 
> > trans_len);
> > -} else {
> > -pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> > -qemu_sglist_add(qsg, prp1, trans_len);
> >  }
> > +
> > +if (nvme_addr_is_cmb(n, prp1)) {
> > +req->is_cmb = true;
> > +}
> > +
> This seems to be used here and within read/write functions which are calling
> this one. Maybe there is a nicer way to track that instead of passing
> the request
> from multiple places ?
> 

Hmm. Whether or not the command reads/writes from the CMB is really only
something you can determine by looking at the PRPs (which is done in
nvme_map_prp), so I think this is the right way to track it. Or do you
have something else in mind?

> > +pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> > +qemu_sglist_add(qsg, prp1, trans_len);
> > +
> >  len -= trans_len;
> >  if (len) {
> >  if (unlikely(!prp2)) {
> >  trace_nvme_err_invalid_prp2_missing();
> > +status = NVME_INVALID_FIELD | NVME_DNR;
> >  goto unmap;
> >  }
> > +
> >  if (len > n->page_size) {
> >  uint64_t prp_list[n->max_prp_ents];
> >  uint32_t nents, prp_trans;
> >  int i = 0;
> >
> > +if (nvme_addr_is_cmb(n, prp2)) {
> > +prp_list_in_cmb = true;
> > +}
> > +
> >  nents = (len + n->page_size - 1) >> n->page_bits;
> >  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> > -nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
> > +nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
> >  while (len != 0) {
> > +bool addr_is_cmb;
> >  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> >
> >  if (i == n->max_prp_ents - 1 && len > n->page_size) {
> >  if (unlikely(!prp_ent || prp_ent & (n->page_size - 
> > 1))) {
> >  trace_nvme_err_invalid_prplist_ent(prp_ent);
> > +status = NVME_INVALID_FIELD | NVME_DNR;
> > +goto unmap;
> > +}
> > +
> > +addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
> > +if ((prp_list_in_cmb && !addr_is_cmb) ||
> > +(!prp_list_in_cmb && addr_is_cmb)) {
> 
> Minor: Same condition (based on different vars) is being used in
> multiple places. Might be worth to move it outside and just pass in
> the needed values.
> 

I'm really not sure what I was smoking when writing those conditions.
It's just `var != nvme_addr_is_cmb(n, prp_ent)`. I fixed that. No need
to pull it out I think.

> >  static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > -   uint64_t prp1, uint64_t prp2)
> > +uint64_t prp1, uint64_t prp2, NvmeRequest *req)
> >  {
> >  QEMUSGList qsg;
> > -QEMUIOVector iov;
> >  uint16_t status = NVME_SUCCESS;
> >
> > -if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
> > -return NVME_INVALID_FIELD | NVME_DNR;
> > +status = nvme_map_prp(n, &qsg, prp1, prp2, len, req);
> > +if (status) {
> > +return status;
>

Re: [PATCH v2 13/20] nvme: refactor prp mapping

2019-11-12 Thread Beata Michalska
Hi Klaus,

On Tue, 15 Oct 2019 at 11:57, Klaus Jensen  wrote:
>
> Instead of handling both QSGs and IOVs in multiple places, simply use
> QSGs everywhere by assuming that the request does not involve the
> controller memory buffer (CMB). If the request is found to involve the
> CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted
> to an IOV by the dma helpers anyway, so the CMB path is not unfairly
> affected by this simplifying change.
>

Out of curiosity, in how many cases the SG list will have to
be converted to IOV ? Does that justify creating the SG list in vain ?

> As a side-effect, this patch also allows PRPs to be located in the CMB.
> The logic ensures that if some of the PRP is in the CMB, all of it must
> be located there, as per the specification.
>
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 255 --
>  hw/block/nvme.h   |   4 +-
>  hw/block/trace-events |   1 +
>  include/block/nvme.h  |   1 +
>  4 files changed, 174 insertions(+), 87 deletions(-)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 1e2320b38b14..cbc0b6a660b6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -179,138 +179,200 @@ static void nvme_set_error_page(NvmeCtrl *n, uint16_t 
> sqid, uint16_t cid,
>  n->elp_index = (n->elp_index + 1) % n->params.elpe;
>  }
>
> -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t 
> prp1,
> - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1,
> +uint64_t prp2, uint32_t len, NvmeRequest *req)
>  {
>  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>  trans_len = MIN(len, trans_len);
>  int num_prps = (len >> n->page_bits) + 1;
> +uint16_t status = NVME_SUCCESS;
> +bool prp_list_in_cmb = false;
> +
> +trace_nvme_map_prp(req->cid, req->cmd.opcode, trans_len, len, prp1, prp2,
> +num_prps);
>
>  if (unlikely(!prp1)) {
>  trace_nvme_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
> -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -qsg->nsg = 0;
> -qemu_iovec_init(iov, num_prps);
> -qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], 
> trans_len);
> -} else {
> -pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> -qemu_sglist_add(qsg, prp1, trans_len);
>  }
> +
> +if (nvme_addr_is_cmb(n, prp1)) {
> +req->is_cmb = true;
> +}
> +
This seems to be used here and within read/write functions which are calling
this one. Maybe there is a nicer way to track that instead of passing
the request
from multiple places ?

> +pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> +qemu_sglist_add(qsg, prp1, trans_len);
> +
>  len -= trans_len;
>  if (len) {
>  if (unlikely(!prp2)) {
>  trace_nvme_err_invalid_prp2_missing();
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
> +
>  if (len > n->page_size) {
>  uint64_t prp_list[n->max_prp_ents];
>  uint32_t nents, prp_trans;
>  int i = 0;
>
> +if (nvme_addr_is_cmb(n, prp2)) {
> +prp_list_in_cmb = true;
> +}
> +
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> -nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
> +nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
>  while (len != 0) {
> +bool addr_is_cmb;
>  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
>
>  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>  trace_nvme_err_invalid_prplist_ent(prp_ent);
> +status = NVME_INVALID_FIELD | NVME_DNR;
> +goto unmap;
> +}
> +
> +addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
> +if ((prp_list_in_cmb && !addr_is_cmb) ||
> +(!prp_list_in_cmb && addr_is_cmb)) {

Minor: Same condition (based on different vars) is being used in
multiple places. Might be worth to move it outside and just pass in
the needed values.

> +status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
>  goto unmap;
>  }
>
>  i = 0;
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * 
> sizeof(uint64_t);
> -nvme_addr_read(n, prp_ent, (void *)prp_list,
> -prp_trans);
> +  

[PATCH v2 13/20] nvme: refactor prp mapping

2019-10-15 Thread Klaus Jensen
Instead of handling both QSGs and IOVs in multiple places, simply use
QSGs everywhere by assuming that the request does not involve the
controller memory buffer (CMB). If the request is found to involve the
CMB, convert the QSG to an IOV and issue the I/O. The QSG is converted
to an IOV by the dma helpers anyway, so the CMB path is not unfairly
affected by this simplifying change.

As a side-effect, this patch also allows PRPs to be located in the CMB.
The logic ensures that if some of the PRP is in the CMB, all of it must
be located there, as per the specification.

Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 255 --
 hw/block/nvme.h   |   4 +-
 hw/block/trace-events |   1 +
 include/block/nvme.h  |   1 +
 4 files changed, 174 insertions(+), 87 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 1e2320b38b14..cbc0b6a660b6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -179,138 +179,200 @@ static void nvme_set_error_page(NvmeCtrl *n, uint16_t 
sqid, uint16_t cid,
 n->elp_index = (n->elp_index + 1) % n->params.elpe;
 }
 
-static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
- uint64_t prp2, uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, uint64_t prp1,
+uint64_t prp2, uint32_t len, NvmeRequest *req)
 {
 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
+uint16_t status = NVME_SUCCESS;
+bool prp_list_in_cmb = false;
+
+trace_nvme_map_prp(req->cid, req->cmd.opcode, trans_len, len, prp1, prp2,
+num_prps);
 
 if (unlikely(!prp1)) {
 trace_nvme_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
-} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
-   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
-qsg->nsg = 0;
-qemu_iovec_init(iov, num_prps);
-qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], 
trans_len);
-} else {
-pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
-qemu_sglist_add(qsg, prp1, trans_len);
 }
+
+if (nvme_addr_is_cmb(n, prp1)) {
+req->is_cmb = true;
+}
+
+pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
+qemu_sglist_add(qsg, prp1, trans_len);
+
 len -= trans_len;
 if (len) {
 if (unlikely(!prp2)) {
 trace_nvme_err_invalid_prp2_missing();
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
+
 if (len > n->page_size) {
 uint64_t prp_list[n->max_prp_ents];
 uint32_t nents, prp_trans;
 int i = 0;
 
+if (nvme_addr_is_cmb(n, prp2)) {
+prp_list_in_cmb = true;
+}
+
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
 while (len != 0) {
+bool addr_is_cmb;
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
 if (i == n->max_prp_ents - 1 && len > n->page_size) {
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_nvme_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
+goto unmap;
+}
+
+addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
+if ((prp_list_in_cmb && !addr_is_cmb) ||
+(!prp_list_in_cmb && addr_is_cmb)) {
+status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
 goto unmap;
 }
 
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp_ent, (void *)prp_list,
-prp_trans);
+nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans);
 prp_ent = le64_to_cpu(prp_list[i]);
 }
 
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_nvme_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
 
-trans_len = MIN(len, n->page_size);
-if (qsg->nsg){
-qemu_sglist_add(qsg, prp_ent, trans_len);
-} else {
-qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - 
n->ctrl_mem.addr], trans_len);
+addr_is_cmb = nvme_addr_is_cmb(n, prp_ent)