Re: [PATCH v5 4/5] hw/nvme: fix mmio read

2021-07-20 Thread Peter Maydell
On Mon, 19 Jul 2021 at 23:47, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
>
> Fix this by unconditionally storing all controller registers in little
> endian.
>
> Cc: Gollu Appalanaidu 
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 290 +++--
>  1 file changed, 162 insertions(+), 128 deletions(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH v5 4/5] hw/nvme: fix mmio read

2021-07-20 Thread Philippe Mathieu-Daudé
On 7/20/21 3:33 PM, Peter Maydell wrote:
> On Tue, 20 Jul 2021 at 13:58, Philippe Mathieu-Daudé  
> wrote:
>>
>> On 7/20/21 12:46 AM, Klaus Jensen wrote:
>>> From: Klaus Jensen 
>>>
>>> The new PMR test unearthed a long-standing issue with MMIO reads on
>>> big-endian hosts.
>>>
>>> Fix this by unconditionally storing all controller registers in little
>>> endian.
> 
>> My brain overflowed at this point, too many changes to track :/
>>
>> Would it make sense to split it? (per big function body maybe?)
> 
> I did review of (a previous version of) the patch by:
>  (1) check that after the patch is applied there aren't any
>  direct accesses to n->bar.anything which don't go via a
>  ld*/st* (ie it didn't miss anything)

OK

>  (2) check that the accesses which use ldq/stq are consistently
>  doing so for those fields and that they are uint64_t

OK

>  (3) read through the patch and check all the changes are ok
>  (either using inline calls to the accessors, or fishing
>  the value out at the start of the function)

OK, perfect then :)

> It is a big patch, but I'm not sure splitting it apart helps much.

Since it is reviewed, not needed.

Thanks a lot!

Phil.




Re: [PATCH v5 4/5] hw/nvme: fix mmio read

2021-07-20 Thread Peter Maydell
On Tue, 20 Jul 2021 at 13:58, Philippe Mathieu-Daudé  wrote:
>
> On 7/20/21 12:46 AM, Klaus Jensen wrote:
> > From: Klaus Jensen 
> >
> > The new PMR test unearthed a long-standing issue with MMIO reads on
> > big-endian hosts.
> >
> > Fix this by unconditionally storing all controller registers in little
> > endian.

> My brain overflowed at this point, too many changes to track :/
>
> Would it make sense to split it? (per big function body maybe?)

I did review of (a previous version of) the patch by:
 (1) check that after the patch is applied there aren't any
 direct accesses to n->bar.anything which don't go via a
 ld*/st* (ie it didn't miss anything)
 (2) check that the accesses which use ldq/stq are consistently
 doing so for those fields and that they are uint64_t
 (3) read through the patch and check all the changes are ok
 (either using inline calls to the accessors, or fishing
 the value out at the start of the function)

It is a big patch, but I'm not sure splitting it apart helps much.

thanks
-- PMM



Re: [PATCH v5 4/5] hw/nvme: fix mmio read

2021-07-20 Thread Philippe Mathieu-Daudé
On 7/20/21 12:46 AM, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
> 
> Fix this by unconditionally storing all controller registers in little
> endian.
> 
> Cc: Gollu Appalanaidu 
> Reported-by: Peter Maydell 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/nvme/ctrl.c | 290 +++--
>  1 file changed, 162 insertions(+), 128 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 0449cc4dee9b..43dfaeac9f54 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
>  
>  static void nvme_irq_check(NvmeCtrl *n)
>  {
> +uint32_t intms = ldl_le_p(&n->bar.intms);
> +
>  if (msix_enabled(&(n->parent_obj))) {
>  return;
>  }
> -if (~n->bar.intms & n->irq_status) {

Why not use an inline call like the rest of this file?

   if (~ldl_le_p(&n->bar.intms) & n->irq_status) {

Anyway, not an issue.

> +if (~intms & n->irq_status) {
>  pci_irq_assert(&n->parent_obj);
>  } else {
>  pci_irq_deassert(&n->parent_obj);
> @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
>  if (ret) {
>  trace_pci_nvme_err_addr_write(addr);
>  trace_pci_nvme_err_cfs();
> -n->bar.csts = NVME_CSTS_FAILED;
> +stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>  break;
>  }
>  QTAILQ_REMOVE(&cq->req_list, req, entry);
> @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>  trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
>  return NVME_INVALID_QID | NVME_DNR;
>  }
> -if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap {
>  trace_pci_nvme_err_invalid_create_sq_size(qsize);
>  return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>  }
> @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
> csi, uint32_t buf_len,
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -switch (NVME_CC_CSS(n->bar.cc)) {
> +switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
>  case NVME_CC_CSS_NVM:
>  src_iocs = nvme_cse_iocs_nvm;
>  /* fall through */
> @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
> *req)
>  trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
>  return NVME_INVALID_QID | NVME_DNR;
>  }
> -if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap {
>  trace_pci_nvme_err_invalid_create_cq_size(qsize);
>  return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>  }
> @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
>  
>  static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
>  {
> +uint32_t cc = ldl_le_p(&n->bar.cc);

This one is understandable.

>  ns->iocs = nvme_cse_iocs_none;
>  switch (ns->csi) {
>  case NVME_CSI_NVM:
> -if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
> +if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
>  ns->iocs = nvme_cse_iocs_nvm;
>  }
>  break;
>  case NVME_CSI_ZONED:
> -if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
> +if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
>  ns->iocs = nvme_cse_iocs_zoned;
> -} else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
> +} else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
>  ns->iocs = nvme_cse_iocs_nvm;
>  }
>  break;
> @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
>  if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
>  trace_pci_nvme_err_addr_read(addr);
>  trace_pci_nvme_err_cfs();
> -n->bar.csts = NVME_CSTS_FAILED;
> +stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>  break;
>  }
>  nvme_inc_sq_head(sq);
> @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
>  n->aer_queued = 0;
>  n->outstanding_aers = 0;
>  n->qs_created = false;
> -
> -n->bar.cc = 0;

This change is not documented, is it related to the fix?

>  }
>  
>  static void nvme_ctrl_shutdown(NvmeCtrl *n)
> @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
>  
>  static int nvme_start_ctrl(NvmeCtrl *n)
>  {
> -uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> +uint64_t cap = ldq_le_p(&n->bar.cap);
> +uint32_t cc = ldl_le_p(&n->bar.cc);
> +uint32_t aqa = ldl_le_p(&n->bar.aqa);
> +uint64_t asq = ldq_le_p(&n->bar.asq);
> +uint64_t acq = ldq_le_p(&n->bar.acq);
> +uint32_t page_bits = NVME_CC_MPS(cc) + 12;
>  uint32_t page_size = 1 << page_bits;

My brain overflowed at this point, too many changes to