Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
On Wed, 2020-03-04 at 17:53 +1100, Andrew Donnellan wrote: > On 21/2/20 2:27 pm, Alastair D'Silva wrote: > > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_controller_dump_data __user > > *uarg) > > +{ > > + struct ioctl_ocxl_pmem_controller_dump_data args; > > + u16 i; > > + u64 val; > > + int rc; > > + > > + if (copy_from_user(&args, uarg, sizeof(args))) > > + return -EFAULT; > > + > > + if (args.buf_size % 8) > > + return -EINVAL; > > + > > + if (args.buf_size > ocxlpmem->admin_command.data_size) > > + return -EINVAL; > > + > > + mutex_lock(&ocxlpmem->admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, > > ADMIN_COMMAND_CONTROLLER_DUMP); > > + if (rc) > > + goto out; > > + > > + val = ((u64)args.offset) << 32; > > + val |= args.buf_size; > > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.request_offset + 0x08, > > + OCXL_LITTLE_ENDIAN, val); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > + ADMIN_COMMAND_CONTROLLER_DU > > MP); > > + if (rc < 0) { > > + dev_warn(&ocxlpmem->dev, "Controller dump timed > > out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, > > + "Unexpected status from retrieve error > > log", > > Controller dump > Ok > > + rc); > > + goto out; > > + } > > + > > + for (i = 0; i < args.buf_size; i += 8) { > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset + i, > > +OCXL_HOST_ENDIAN, &val); > > Is a controller dump something where we want to do endian swapping? > No, we just have raw binary data that we want to pass through. OCXL_HOST_ENDIAN does no swapping. > Any reason we're not doing the usual check of the data identifier, > additional data length etc? > I'll add that > > + if (rc) > > + goto out; > > + > > + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + } > > + > > + if (copy_to_user(uarg, &args, sizeof(args))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + > > + rc = admin_response_handled(ocxlpmem); > > + if (rc) > > + goto out; > > + > > +out: > > + mutex_unlock(&ocxlpmem->admin_command.lock); > > + return rc; > > +} > > + > > +int request_controller_dump(struct ocxlpmem *ocxlpmem) > > +{ > > + int rc; > > + u64 busy = 1; > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_CHIC, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_CHI_CDA); > > This return code is ignored > > > + > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); > > + if (rc) > > + return rc; > > + > > + while (busy) { > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +GLOBAL_MMIO_HCI, > > +OCXL_LITTLE_ENDIAN, > > &busy); > > + if (rc) > > + return rc; > > + > > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; > > + cond_resched(); > > + } > > + > > + return 0; > > +} > > -- Alastair D'Silva Open Source Developer Linux Technology Centre, IBM Australia mob: 0423 762 819
Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote: > > Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : > > From: Alastair D'Silva > > > > This patch adds IOCTLs to allow userspace to request & fetch dumps > > of the internal controller state. > > > > This is useful during debugging or when a fatal error on the > > controller > > has occurred. > > > > Signed-off-by: Alastair D'Silva > > --- > > arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 > > + > > include/uapi/nvdimm/ocxl-pmem.h| 15 +++ > > 2 files changed, 147 insertions(+) > > > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > index 2b64504f9129..2cabafe1fc58 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem > > *ocxlpmem, > > return 0; > > } > > > > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_controller_dump_data __user > > *uarg) > > +{ > > + struct ioctl_ocxl_pmem_controller_dump_data args; > > + u16 i; > > + u64 val; > > + int rc; > > + > > + if (copy_from_user(&args, uarg, sizeof(args))) > > + return -EFAULT; > > + > > + if (args.buf_size % 8) > > + return -EINVAL; > > + > > + if (args.buf_size > ocxlpmem->admin_command.data_size) > > + return -EINVAL; > > + > > + mutex_lock(&ocxlpmem->admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, > > ADMIN_COMMAND_CONTROLLER_DUMP); > > + if (rc) > > + goto out; > > + > > + val = ((u64)args.offset) << 32; > > + val |= args.buf_size; > > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.request_offset + 0x08, > > + OCXL_LITTLE_ENDIAN, val); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > + ADMIN_COMMAND_CONTROLLER_DU > > MP); > > + if (rc < 0) { > > + dev_warn(&ocxlpmem->dev, "Controller dump timed > > out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, > > + "Unexpected status from retrieve error > > log", > > + rc); > > + goto out; > > + } > > > It would help if there was a comment indicating how the 3 ioctls are > used. My understanding is that the userland is: > - requesting the controller to prepare a state dump > - then one or more ioctls to fetch the data. The number of calls > required to get the full state really depends on the size of the > buffer > passed by user > - a last ioctl to tell the controller that we're done, presumably to > let > it free some resources. > Ok, will add it to the blurb. > > > + > > + for (i = 0; i < args.buf_size; i += 8) { > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset + i, > > +OCXL_HOST_ENDIAN, &val); > > + if (rc) > > + goto out; > > + > > + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + } > > + > > + if (copy_to_user(uarg, &args, sizeof(args))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + > > + rc = admin_response_handled(ocxlpmem); > > + if (rc) > > + goto out; > > + > > +out: > > + mutex_unlock(&ocxlpmem->admin_command.lock); > > + return rc; > > +} > > + > > +int request_controller_dump(struct ocxlpmem *ocxlpmem) > > +{ > > + int rc; > > + u64 busy = 1; > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_CHIC, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_CHI_CDA); > > + > > rc is not checked here. Whoops > > > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); > > + if (rc) > > + return rc; > > + > > + while (busy) { > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +GLOBAL_MMIO_HCI, > > +OCXL_LITTLE_ENDIAN, > > &busy); > > + if (rc) > > + return rc; > > + > > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_D
Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
On 21/2/20 2:27 pm, Alastair D'Silva wrote: +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_controller_dump_data __user *uarg) +{ + struct ioctl_ocxl_pmem_controller_dump_data args; + u16 i; + u64 val; + int rc; + + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + if (args.buf_size % 8) + return -EINVAL; + + if (args.buf_size > ocxlpmem->admin_command.data_size) + return -EINVAL; + + mutex_lock(&ocxlpmem->admin_command.lock); + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP); + if (rc) + goto out; + + val = ((u64)args.offset) << 32; + val |= args.buf_size; + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x08, + OCXL_LITTLE_ENDIAN, val); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_complete_timeout(ocxlpmem, + ADMIN_COMMAND_CONTROLLER_DUMP); + if (rc < 0) { + dev_warn(&ocxlpmem->dev, "Controller dump timed out\n"); + goto out; + } + + rc = admin_response(ocxlpmem); + if (rc < 0) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, + "Unexpected status from retrieve error log", Controller dump + rc); + goto out; + } + + for (i = 0; i < args.buf_size; i += 8) { + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.data_offset + i, +OCXL_HOST_ENDIAN, &val); Is a controller dump something where we want to do endian swapping? Any reason we're not doing the usual check of the data identifier, additional data length etc? + if (rc) + goto out; + + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) { + rc = -EFAULT; + goto out; + } + } + + if (copy_to_user(uarg, &args, sizeof(args))) { + rc = -EFAULT; + goto out; + } + + rc = admin_response_handled(ocxlpmem); + if (rc) + goto out; + +out: + mutex_unlock(&ocxlpmem->admin_command.lock); + return rc; +} + +int request_controller_dump(struct ocxlpmem *ocxlpmem) +{ + int rc; + u64 busy = 1; + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_CHI_CDA); This return code is ignored + + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); + if (rc) + return rc; + + while (busy) { + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +GLOBAL_MMIO_HCI, +OCXL_LITTLE_ENDIAN, &busy); + if (rc) + return rc; + + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; + cond_resched(); + } + + return 0; +} -- Andrew Donnellan OzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : From: Alastair D'Silva This patch adds IOCTLs to allow userspace to request & fetch dumps of the internal controller state. This is useful during debugging or when a fatal error on the controller has occurred. Signed-off-by: Alastair D'Silva --- arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 + include/uapi/nvdimm/ocxl-pmem.h| 15 +++ 2 files changed, 147 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c index 2b64504f9129..2cabafe1fc58 100644 --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem, return 0; } +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_controller_dump_data __user *uarg) +{ + struct ioctl_ocxl_pmem_controller_dump_data args; + u16 i; + u64 val; + int rc; + + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + if (args.buf_size % 8) + return -EINVAL; + + if (args.buf_size > ocxlpmem->admin_command.data_size) + return -EINVAL; + + mutex_lock(&ocxlpmem->admin_command.lock); + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP); + if (rc) + goto out; + + val = ((u64)args.offset) << 32; + val |= args.buf_size; + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x08, + OCXL_LITTLE_ENDIAN, val); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_complete_timeout(ocxlpmem, + ADMIN_COMMAND_CONTROLLER_DUMP); + if (rc < 0) { + dev_warn(&ocxlpmem->dev, "Controller dump timed out\n"); + goto out; + } + + rc = admin_response(ocxlpmem); + if (rc < 0) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, + "Unexpected status from retrieve error log", + rc); + goto out; + } It would help if there was a comment indicating how the 3 ioctls are used. My understanding is that the userland is: - requesting the controller to prepare a state dump - then one or more ioctls to fetch the data. The number of calls required to get the full state really depends on the size of the buffer passed by user - a last ioctl to tell the controller that we're done, presumably to let it free some resources. + + for (i = 0; i < args.buf_size; i += 8) { + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.data_offset + i, +OCXL_HOST_ENDIAN, &val); + if (rc) + goto out; + + if (copy_to_user(&args.buf[i], &val, sizeof(u64))) { + rc = -EFAULT; + goto out; + } + } + + if (copy_to_user(uarg, &args, sizeof(args))) { + rc = -EFAULT; + goto out; + } + + rc = admin_response_handled(ocxlpmem); + if (rc) + goto out; + +out: + mutex_unlock(&ocxlpmem->admin_command.lock); + return rc; +} + +int request_controller_dump(struct ocxlpmem *ocxlpmem) +{ + int rc; + u64 busy = 1; + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_CHI_CDA); + rc is not checked here. + + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); + if (rc) + return rc; + + while (busy) { + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +GLOBAL_MMIO_HCI, +OCXL_LITTLE_ENDIAN, &busy); + if (rc) + return rc; + + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; Setting 'busy' doesn't hurt, but it's not really useful, is it? We should add some kind of timeout so that if the controller hits an issue, we don't spin in kernel space endlessly. + cond_resched(); + } + + return 0; +} + +static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem) +{ + return ocxl_global_mmio_set64