nvme: fix mmio read

Philippe Mathieu-Daudé Tue, 20 Jul 2021 06:03:25 -0700

On 7/20/21 12:46 AM, Klaus Jensen wrote:
> From: Klaus Jensen <k.jen...@samsung.com>
> 
> The new PMR test unearthed a long-standing issue with MMIO reads on
> big-endian hosts.
> 
> Fix this by unconditionally storing all controller registers in little
> endian.
> 
> Cc: Gollu Appalanaidu <anaidu.go...@samsung.com>
> Reported-by: Peter Maydell <peter.mayd...@linaro.org>
> Signed-off-by: Klaus Jensen <k.jen...@samsung.com>
> ---
>  hw/nvme/ctrl.c | 290 +++++++++++++++++++++++++++----------------------
>  1 file changed, 162 insertions(+), 128 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 0449cc4dee9b..43dfaeac9f54 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -439,10 +439,12 @@ static uint8_t nvme_sq_empty(NvmeSQueue *sq)
>  
>  static void nvme_irq_check(NvmeCtrl *n)
>  {
> +    uint32_t intms = ldl_le_p(&n->bar.intms);
> +
>      if (msix_enabled(&(n->parent_obj))) {
>          return;
>      }
> -    if (~n->bar.intms & n->irq_status) {


Why not use an inline call like the rest of this file?

       if (~ldl_le_p(&n->bar.intms) & n->irq_status) {

Anyway, not an issue.

> +    if (~intms & n->irq_status) {
>          pci_irq_assert(&n->parent_obj);
>      } else {
>          pci_irq_deassert(&n->parent_obj);
> @@ -1289,7 +1291,7 @@ static void nvme_post_cqes(void *opaque)
>          if (ret) {
>              trace_pci_nvme_err_addr_write(addr);
>              trace_pci_nvme_err_cfs();
> -            n->bar.csts = NVME_CSTS_FAILED;
> +            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>              break;
>          }
>          QTAILQ_REMOVE(&cq->req_list, req, entry);
> @@ -4022,7 +4024,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>          trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
>          return NVME_INVALID_QID | NVME_DNR;
>      }
> -    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
>          trace_pci_nvme_err_invalid_create_sq_size(qsize);
>          return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>      }
> @@ -4208,7 +4210,7 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
> csi, uint32_t buf_len,
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>  
> -    switch (NVME_CC_CSS(n->bar.cc)) {
> +    switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
>      case NVME_CC_CSS_NVM:
>          src_iocs = nvme_cse_iocs_nvm;
>          /* fall through */
> @@ -4370,7 +4372,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest 
> *req)
>          trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
>          return NVME_INVALID_QID | NVME_DNR;
>      }
> -    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
> +    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
>          trace_pci_nvme_err_invalid_create_cq_size(qsize);
>          return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
>      }
> @@ -5163,17 +5165,19 @@ static void nvme_update_dmrsl(NvmeCtrl *n)
>  
>  static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
>  {
> +    uint32_t cc = ldl_le_p(&n->bar.cc);

This one is understandable.

>      ns->iocs = nvme_cse_iocs_none;
>      switch (ns->csi) {
>      case NVME_CSI_NVM:
> -        if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
> +        if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
>              ns->iocs = nvme_cse_iocs_nvm;
>          }
>          break;
>      case NVME_CSI_ZONED:
> -        if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
> +        if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
>              ns->iocs = nvme_cse_iocs_zoned;
> -        } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
> +        } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
>              ns->iocs = nvme_cse_iocs_nvm;
>          }
>          break;
> @@ -5510,7 +5514,7 @@ static void nvme_process_sq(void *opaque)
>          if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
>              trace_pci_nvme_err_addr_read(addr);
>              trace_pci_nvme_err_cfs();
> -            n->bar.csts = NVME_CSTS_FAILED;
> +            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>              break;
>          }
>          nvme_inc_sq_head(sq);
> @@ -5565,8 +5569,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
>      n->aer_queued = 0;
>      n->outstanding_aers = 0;
>      n->qs_created = false;
> -
> -    n->bar.cc = 0;

This change is not documented, is it related to the fix?

>  }
>  
>  static void nvme_ctrl_shutdown(NvmeCtrl *n)
> @@ -5605,7 +5607,12 @@ static void nvme_select_iocs(NvmeCtrl *n)
>  
>  static int nvme_start_ctrl(NvmeCtrl *n)
>  {
> -    uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
> +    uint64_t cap = ldq_le_p(&n->bar.cap);
> +    uint32_t cc = ldl_le_p(&n->bar.cc);
> +    uint32_t aqa = ldl_le_p(&n->bar.aqa);
> +    uint64_t asq = ldq_le_p(&n->bar.asq);
> +    uint64_t acq = ldq_le_p(&n->bar.acq);
> +    uint32_t page_bits = NVME_CC_MPS(cc) + 12;
>      uint32_t page_size = 1 << page_bits;

My brain overflowed at this point, too many changes to track :/

Would it make sense to split it? (per big function body maybe?)


Note, using so many manual endian accesses seems fragile. Maybe we
could find a way to write a pair of macros taking 'n' + bar 'fieldname'
as input, using the correct size swapping using sizeof_field(fieldname)?

Something like (untested):

  #define ld_bar(fieldname) \
          ldn_le_p(&n->bar.fieldname, \
                   sizeof_field(NvmeBar, fieldname));

  #define st_bar(fieldname, val) \
          stn_le_p(&n->bar.fieldname, \
                   sizeof_field(NvmeBar, fieldname), val);

Using as:

  uint64_t cap = ld_bar(cap);

Or if you prefer:

  #define BAR_LD(bar, fieldname) \
          ldn_le_p(pbar.fieldname, \
                   sizeof_field(NvmeBar, fieldname));

as:

  uint64_t cap = BAR_LD(&n->bar, cap);

Regards,

Phil.

Re: [PATCH v5 4/5] hw/nvme: fix mmio read

Reply via email to