On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alast...@d-silva.org>
> > 
> > This patch adds IOCTLs to allow userspace to request & fetch dumps
> > of the internal controller state.
> > 
> > This is useful during debugging or when a fatal error on the
> > controller
> > has occurred.
> > 
> > Signed-off-by: Alastair D'Silva <alast...@d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c | 132
> > +++++++++++++++++++++
> >   include/uapi/nvdimm/ocxl-pmem.h            |  15 +++
> >   2 files changed, 147 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2b64504f9129..2cabafe1fc58 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem
> > *ocxlpmem,
> >     return 0;
> >   }
> >   
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > +           struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > +   struct ioctl_ocxl_pmem_controller_dump_data args;
> > +   u16 i;
> > +   u64 val;
> > +   int rc;
> > +
> > +   if (copy_from_user(&args, uarg, sizeof(args)))
> > +           return -EFAULT;
> > +
> > +   if (args.buf_size % 8)
> > +           return -EINVAL;
> > +
> > +   if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +           return -EINVAL;
> > +
> > +   mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +   rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > +   if (rc)
> > +           goto out;
> > +
> > +   val = ((u64)args.offset) << 32;
> > +   val |= args.buf_size;
> > +   rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +                                 ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > +                                 OCXL_LITTLE_ENDIAN, val);
> > +   if (rc)
> > +           goto out;
> > +
> > +   rc = admin_command_execute(ocxlpmem);
> > +   if (rc)
> > +           goto out;
> > +
> > +   rc = admin_command_complete_timeout(ocxlpmem,
> > +                                       ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > +   if (rc < 0) {
> > +           dev_warn(&ocxlpmem->dev, "Controller dump timed
> > out\n");
> > +           goto out;
> > +   }
> > +
> > +   rc = admin_response(ocxlpmem);
> > +   if (rc < 0)
> > +           goto out;
> > +   if (rc != STATUS_SUCCESS) {
> > +           warn_status(ocxlpmem,
> > +                       "Unexpected status from retrieve error
> > log",
> > +                       rc);
> > +           goto out;
> > +   }
> 
> 
> It would help if there was a comment indicating how the 3 ioctls are 
> used. My understanding is that the userland is:
> - requesting the controller to prepare a state dump
> - then one or more ioctls to fetch the data. The number of calls 
> required to get the full state really depends on the size of the
> buffer 
> passed by user
> - a last ioctl to tell the controller that we're done, presumably to
> let 
> it free some resources.
> 

Ok, will add it to the blurb.
> 
> > +
> > +   for (i = 0; i < args.buf_size; i += 8) {
> > +           u64 val;
> > +
> > +           rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +                                        ocxlpmem-
> > >admin_command.data_offset + i,
> > +                                        OCXL_HOST_ENDIAN, &val);
> > +           if (rc)
> > +                   goto out;
> > +
> > +           if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> > +                   rc = -EFAULT;
> > +                   goto out;
> > +           }
> > +   }
> > +
> > +   if (copy_to_user(uarg, &args, sizeof(args))) {
> > +           rc = -EFAULT;
> > +           goto out;
> > +   }
> > +
> > +   rc = admin_response_handled(ocxlpmem);
> > +   if (rc)
> > +           goto out;
> > +
> > +out:
> > +   mutex_unlock(&ocxlpmem->admin_command.lock);
> > +   return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > +   int rc;
> > +   u64 busy = 1;
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > +                               OCXL_LITTLE_ENDIAN,
> > +                               GLOBAL_MMIO_CHI_CDA);
> > +
> 
> rc is not checked here.

Whoops

> 
> 
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +                               OCXL_LITTLE_ENDIAN,
> > +                               GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > +   if (rc)
> > +           return rc;
> > +
> > +   while (busy) {
> > +           rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +                                        GLOBAL_MMIO_HCI,
> > +                                        OCXL_LITTLE_ENDIAN,
> > &busy);
> > +           if (rc)
> > +                   return rc;
> > +
> > +           busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> 
> Setting 'busy' doesn't hurt, but it's not really useful, is it?
> 
> We should add some kind of timeout so that if the controller hits an 
> issue, we don't spin in kernel space endlessly.
> 
> 

Here we are polling the controller dump bit of the HCI register until
the controller clears it - that line is masking off the bits we don't
care about.

I'll talk to the firmware team about adding a timeout for that to the
spec so we know how long to wait for before giving up.

> 
> > +           cond_resched();
> > +   }
> > +
> > +   return 0;
> > +}

> > +
> > +static int ioctl_controller_dump_complete(struct ocxlpmem
> > *ocxlpmem)
> > +{
> > +   return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +                               OCXL_LITTLE_ENDIAN,
> > +                               GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL
> > LECTED);
> > +}
> > +
> >   static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> >   {
> >     struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> >             rc = ioctl_error_log(ocxlpmem,
> >                                  (struct ioctl_ocxl_pmem_error_log
> > __user *)args);
> >             break;
> > +
> > +   case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
> > +           rc = request_controller_dump(ocxlpmem);
> > +           break;
> > +
> > +   case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
> > +           rc = ioctl_controller_dump_data(ocxlpmem,
> > +                                           (struct
> > ioctl_ocxl_pmem_controller_dump_data __user *)args);
> > +           break;
> > +
> > +   case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> > +           rc = ioctl_controller_dump_complete(ocxlpmem);
> > +           break;
> >     }
> > +
> >     return rc;
> >   }
> >   
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index b10f8ac0c20f..d4d8512d03f7 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
> >     __u8 *buf; /* pointer to output buffer */
> >   };
> >   
> > +struct ioctl_ocxl_pmem_controller_dump_data {
> > +   __u8 *buf; /* pointer to output buffer */
> 
> We only support 64-bit user app on powerpc, but using a pointer type
> in 
> a kernel ABI is unusual. We should use a know size like __u64.
> (also applies to buf pointer in struct ioctl_ocxl_pmem_error_log
> from 
> previous patch too)
> 
> The rest of the structure will also be padded by the compiler, which
> we 
> should avoid.
> 
>     Fred
> 

Ok, I'll co-erce the pointers into a __u64.

> 
> 
> > +   __u16 buf_size; /* in/out, buffer size provided/required.
> > +                    * If required is greater than provided, the
> > buffer
> > +                    * will be truncated to the amount provided. If
> > its
> > +                    * less, then only the required bytes will be
> > populated.
> > +                    * If it is 0, then there is no more dump data
> > available.
> > +                    */
> > +   __u32 offset; /* in, Offset within the dump */
> > +   __u64 reserved[8];
> > +};
> > +
> >   /* ioctl numbers */
> >   #define OCXL_PMEM_MAGIC 0x5C
> >   /* SCM devices */
> >   #define IOCTL_OCXL_PMEM_ERROR_LOG                 _IOWR(OCXL_PMEM
> > _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP                    _IO(OCX
> > L_PMEM_MAGIC, 0x02)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA               _IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE   _IO(OCXL_PMEM_M
> > AGIC, 0x04)
> >   
> >   #endif /* _UAPI_OCXL_SCM_H */
> > 
-- 
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819

Reply via email to