Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
+ if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + 0x28, +OCXL_HOST_ENDIAN, >wwid[1]); + if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + 0x30, +OCXL_HOST_ENDIAN, (u64 *)log- fw_revision); + if (rc) + goto out; + log->fw_revision[8] = '\0'; + + buf_length = (user_buf_length < log->buf_size) ? +user_buf_length : log->buf_size; + for (i = 0; i < buf_length + 0x48; i += 8) { + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + i, +OCXL_HOST_ENDIAN, ); + if (rc) + goto out; + + if (buf_is_user) { + if (copy_to_user(>buf[i], , sizeof(u64))) { + rc = -EFAULT; + goto out; + } + } else + log->buf[i] = val; + } I think it could be a bit simplified by keeping the handling of the user buffer out of this function. Always call it with a kernel buffer. And have only one copy_to_user() call on the ioctl() path. You'd need to allocate a kernel buf on the ioctl path, but you're already doing it on the probe() path, so it should be doable to share code. Hmm, the problem then is that on the IOCTL side, I'll have to save, modify, then restore the buf member of struct ioctl_ocxl_pmem_error_log, which would be uglier. buf is just an output buffer. All you'd need to do is allocate a kernel buf, like it's already done for the "probe" case in dump_error_log(). And add a global copy_to_user() of the buf at the end of the ioctl path, instead of having multiple smaller copy_to_user() in the loop here. copy_to_user() is a bit expensive so it's usually better to regroup them. I think it's easy here and make sense since that function is also trying to handle both a kernel and user space bufffers. But we're not in a critical path, and after this patch, there are others copying out mmio content to user buffers and those don't have a kernel buffer to handle, so the copy_to_user() in a loop makes things easier. So I guess the conclusion is whatever you think is the easiest... + + rc = admin_response_handled(ocxlpmem); + if (rc) + goto out; + +out: + mutex_unlock(>admin_command.lock); + return rc; + +} + +static int ioctl_error_log(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_error_log __user *uarg) +{ + struct ioctl_ocxl_pmem_error_log args; + int rc; + + if (copy_from_user(, uarg, sizeof(args))) + return -EFAULT; + + rc = read_error_log(ocxlpmem, , true); + if (rc) + return rc; + + if (copy_to_user(uarg, , sizeof(args))) + return -EFAULT; + + return 0; +} + +static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) +{ + struct ocxlpmem *ocxlpmem = file->private_data; + int rc = -EINVAL; + + switch (cmd) { + case IOCTL_OCXL_PMEM_ERROR_LOG: + rc = ioctl_error_log(ocxlpmem, +(struct ioctl_ocxl_pmem_error_log __user *)args); + break; + } + return rc; +} + static const struct file_operations fops = { .owner = THIS_MODULE, .open = file_open, .release= file_release, + .unlocked_ioctl = file_ioctl, + .compat_ioctl = file_ioctl, }; /** @@ -527,6 +736,60 @@ static int read_device_metadata(struct ocxlpmem *ocxlpmem) return 0; } +static const char *decode_error_log_type(u8 error_log_type) +{ + switch (error_log_type) { + case 0x00: + return "general"; + case 0x01: + return "predictive failure"; + case 0x02: + return "thermal warning"; + case 0x03: + return "data loss"; + case 0x04: + return "health & performance"; + default: + return "unknown"; + } +} + +static void dump_error_log(struct ocxlpmem *ocxlpmem) +{ + struct ioctl_ocxl_pmem_error_log log; + u32 buf_size; + u8 *buf; + int rc; + + if (ocxlpmem->admin_command.data_size == 0) + return; + + buf_size = ocxlpmem->admin_command.data_size - 0x48; + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return; + + log.buf = buf; + log.buf_size = buf_size; + + rc =
Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
On Tue, 2020-03-03 at 11:36 +0100, Frederic Barrat wrote: > > Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : > > From: Alastair D'Silva > > > > The read error log command extracts information from the > > controller's > > internal error log. > > > > This patch exposes this information in 2 ways: > > - During probe, if an error occurs & a log is available, print it > > to the > >console > > - After probe, make the error log available to userspace via an > > IOCTL. > >Userspace is notified of pending error logs in a later patch > >("powerpc/powernv/pmem: Forward events to userspace") > > > > Signed-off-by: Alastair D'Silva > > --- > > arch/powerpc/platforms/powernv/pmem/ocxl.c| 269 > > ++ > > .../platforms/powernv/pmem/ocxl_internal.h| 1 + > > include/uapi/nvdimm/ocxl-pmem.h | 46 +++ > > 3 files changed, 316 insertions(+) > > create mode 100644 include/uapi/nvdimm/ocxl-pmem.h > > > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > index 63109a870d2c..2b64504f9129 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, > > struct file *file) > > return 0; > > } > > > > +/** > > + * error_log_header_parse() - Parse the first 64 bits of the error > > log command response > > + * @ocxlpmem: the device metadata > > + * @length: out, returns the number of bytes in the response > > (excluding the 64 bit header) > > + */ > > +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 > > *length) > > +{ > > + int rc; > > + u64 val; > > + > > Empty line in the middle of declarations > Ok > > > + u16 data_identifier; > > + u32 data_length; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset, > > +OCXL_LITTLE_ENDIAN, ); > > + if (rc) > > + return rc; > > + > > + data_identifier = val >> 48; > > + data_length = val & 0x; > > + > > + if (data_identifier != 0x454C) { // 'EL' > > + dev_err(>dev, > > + "Bad data identifier for error log data, > > expected 'EL', got '%2s' (%#x), data_length=%u\n", > > + (char *)_identifier, > > + (unsigned int)data_identifier, data_length); > > + return -EINVAL; > > + } > > + > > + *length = data_length; > > + return 0; > > +} > > + > > +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem, > > +u32 *log_identifier, u32 > > *program_ref_code) > > +{ > > + int rc; > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset + 0x08, > > +OCXL_LITTLE_ENDIAN, ); > > + if (rc) > > + return rc; > > + > > + *log_identifier = val >> 32; > > + *program_ref_code = val & 0x; > > + > > + return 0; > > +} > > + > > +static int read_error_log(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_error_log *log, bool > > buf_is_user) > > +{ > > + u64 val; > > + u16 user_buf_length; > > + u16 buf_length; > > + u16 i; > > + int rc; > > + > > + if (log->buf_size % 8) > > + return -EINVAL; > > + > > + rc = ocxlpmem_chi(ocxlpmem, ); > > + if (rc) > > + goto out; > > > "out" will unlock a mutex not yet taken. > Thanks, that should have been a return. > > > > + > > + if (!(val & GLOBAL_MMIO_CHI_ELA)) > > + return -EAGAIN; > > + > > + user_buf_length = log->buf_size; > > + > > + mutex_lock(>admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > ADMIN_COMMAND_ERRLOG); > > + if (rc < 0) { > > + dev_warn(>dev, "Read error log timed out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, "Unexpected status from retrieve > > error log", rc); > > + goto out; > > + } > > + > > + > > + rc = error_log_header_parse(ocxlpmem, >buf_size); > > + if (rc) > > + goto out; > > + // log->buf_size now contains the returned buffer size, not the > > user size > > + > > + rc = error_log_offset_0x08(ocxlpmem, >log_identifier, > > + >program_reference_code); > > + if (rc) > > + goto out; > > > Offset 0x08 gets a preferential treatment compared to 0x10 below and > it's not clear why. > I
Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
On 21/2/20 2:27 pm, Alastair D'Silva wrote: From: Alastair D'Silva The read error log command extracts information from the controller's internal error log. This patch exposes this information in 2 ways: - During probe, if an error occurs & a log is available, print it to the console - After probe, make the error log available to userspace via an IOCTL. Userspace is notified of pending error logs in a later patch ("powerpc/powernv/pmem: Forward events to userspace") Signed-off-by: Alastair D'Silva A few minor style checks at https://openpower.xyz/job/snowpatch/job/snowpatch-linux-checkpatch/11787//artifact/linux/checkpatch.log We should also add some documentation for the user interfaces we're adding (same applies for all the remaining patches in this series that add more interfaces). --- arch/powerpc/platforms/powernv/pmem/ocxl.c| 269 ++ .../platforms/powernv/pmem/ocxl_internal.h| 1 + include/uapi/nvdimm/ocxl-pmem.h | 46 +++ 3 files changed, 316 insertions(+) create mode 100644 include/uapi/nvdimm/ocxl-pmem.h diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c index 63109a870d2c..2b64504f9129 100644 --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, struct file *file) return 0; } +/** + * error_log_header_parse() - Parse the first 64 bits of the error log command response + * @ocxlpmem: the device metadata + * @length: out, returns the number of bytes in the response (excluding the 64 bit header) + */ +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length) +{ + int rc; + u64 val; + + u16 data_identifier; + u32 data_length; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + data_identifier = val >> 48; + data_length = val & 0x; + + if (data_identifier != 0x454C) { // 'EL' + dev_err(>dev, + "Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n", + (char *)_identifier, + (unsigned int)data_identifier, data_length); + return -EINVAL; This should be something other than EINVAL I think + } + + *length = data_length; + return 0; +} + +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem, +u32 *log_identifier, u32 *program_ref_code) +{ + int rc; + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x08, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + *log_identifier = val >> 32; + *program_ref_code = val & 0x; + + return 0; +} + +static int read_error_log(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user) +{ + u64 val; + u16 user_buf_length; + u16 buf_length; + u16 i; + int rc; + + if (log->buf_size % 8) + return -EINVAL; + + rc = ocxlpmem_chi(ocxlpmem, ); + if (rc) + goto out; + + if (!(val & GLOBAL_MMIO_CHI_ELA)) + return -EAGAIN; + + user_buf_length = log->buf_size; + + mutex_lock(>admin_command.lock); + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc < 0) { + dev_warn(>dev, "Read error log timed out\n"); + goto out; + } + + rc = admin_response(ocxlpmem); + if (rc < 0) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc); + goto out; + } + + + rc = error_log_header_parse(ocxlpmem, >buf_size); + if (rc) + goto out; + // log->buf_size now contains the returned buffer size, not the user size In the event that the log is truncated to fit the user buffer, we return the full log size, I assume this is intentional to signal it's truncated as per the nd stuff? + + rc = error_log_offset_0x08(ocxlpmem, >log_identifier, + >program_reference_code); + if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +
Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : From: Alastair D'Silva The read error log command extracts information from the controller's internal error log. This patch exposes this information in 2 ways: - During probe, if an error occurs & a log is available, print it to the console - After probe, make the error log available to userspace via an IOCTL. Userspace is notified of pending error logs in a later patch ("powerpc/powernv/pmem: Forward events to userspace") Signed-off-by: Alastair D'Silva --- arch/powerpc/platforms/powernv/pmem/ocxl.c| 269 ++ .../platforms/powernv/pmem/ocxl_internal.h| 1 + include/uapi/nvdimm/ocxl-pmem.h | 46 +++ 3 files changed, 316 insertions(+) create mode 100644 include/uapi/nvdimm/ocxl-pmem.h diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c index 63109a870d2c..2b64504f9129 100644 --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, struct file *file) return 0; } +/** + * error_log_header_parse() - Parse the first 64 bits of the error log command response + * @ocxlpmem: the device metadata + * @length: out, returns the number of bytes in the response (excluding the 64 bit header) + */ +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length) +{ + int rc; + u64 val; + Empty line in the middle of declarations + u16 data_identifier; + u32 data_length; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + data_identifier = val >> 48; + data_length = val & 0x; + + if (data_identifier != 0x454C) { // 'EL' + dev_err(>dev, + "Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n", + (char *)_identifier, + (unsigned int)data_identifier, data_length); + return -EINVAL; + } + + *length = data_length; + return 0; +} + +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem, +u32 *log_identifier, u32 *program_ref_code) +{ + int rc; + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x08, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + *log_identifier = val >> 32; + *program_ref_code = val & 0x; + + return 0; +} + +static int read_error_log(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user) +{ + u64 val; + u16 user_buf_length; + u16 buf_length; + u16 i; + int rc; + + if (log->buf_size % 8) + return -EINVAL; + + rc = ocxlpmem_chi(ocxlpmem, ); + if (rc) + goto out; "out" will unlock a mutex not yet taken. + + if (!(val & GLOBAL_MMIO_CHI_ELA)) + return -EAGAIN; + + user_buf_length = log->buf_size; + + mutex_lock(>admin_command.lock); + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc < 0) { + dev_warn(>dev, "Read error log timed out\n"); + goto out; + } + + rc = admin_response(ocxlpmem); + if (rc < 0) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc); + goto out; + } + + + rc = error_log_header_parse(ocxlpmem, >buf_size); + if (rc) + goto out; + // log->buf_size now contains the returned buffer size, not the user size + + rc = error_log_offset_0x08(ocxlpmem, >log_identifier, + >program_reference_code); + if (rc) + goto out; Offset 0x08 gets a preferential treatment compared to 0x10 below and it's not clear why. I would create a subfonction which parses all the fields linearly. + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x10, +OCXL_LITTLE_ENDIAN, ); + if (rc) + goto out; + + log->error_log_type = val >> 56; + log->action_flags = (log->error_log_type == OCXL_PMEM_ERROR_LOG_TYPE_GENERAL)
[PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
From: Alastair D'Silva The read error log command extracts information from the controller's internal error log. This patch exposes this information in 2 ways: - During probe, if an error occurs & a log is available, print it to the console - After probe, make the error log available to userspace via an IOCTL. Userspace is notified of pending error logs in a later patch ("powerpc/powernv/pmem: Forward events to userspace") Signed-off-by: Alastair D'Silva --- arch/powerpc/platforms/powernv/pmem/ocxl.c| 269 ++ .../platforms/powernv/pmem/ocxl_internal.h| 1 + include/uapi/nvdimm/ocxl-pmem.h | 46 +++ 3 files changed, 316 insertions(+) create mode 100644 include/uapi/nvdimm/ocxl-pmem.h diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c index 63109a870d2c..2b64504f9129 100644 --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, struct file *file) return 0; } +/** + * error_log_header_parse() - Parse the first 64 bits of the error log command response + * @ocxlpmem: the device metadata + * @length: out, returns the number of bytes in the response (excluding the 64 bit header) + */ +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length) +{ + int rc; + u64 val; + + u16 data_identifier; + u32 data_length; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + data_identifier = val >> 48; + data_length = val & 0x; + + if (data_identifier != 0x454C) { // 'EL' + dev_err(>dev, + "Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n", + (char *)_identifier, + (unsigned int)data_identifier, data_length); + return -EINVAL; + } + + *length = data_length; + return 0; +} + +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem, +u32 *log_identifier, u32 *program_ref_code) +{ + int rc; + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x08, +OCXL_LITTLE_ENDIAN, ); + if (rc) + return rc; + + *log_identifier = val >> 32; + *program_ref_code = val & 0x; + + return 0; +} + +static int read_error_log(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user) +{ + u64 val; + u16 user_buf_length; + u16 buf_length; + u16 i; + int rc; + + if (log->buf_size % 8) + return -EINVAL; + + rc = ocxlpmem_chi(ocxlpmem, ); + if (rc) + goto out; + + if (!(val & GLOBAL_MMIO_CHI_ELA)) + return -EAGAIN; + + user_buf_length = log->buf_size; + + mutex_lock(>admin_command.lock); + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc) + goto out; + + rc = admin_command_execute(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG); + if (rc < 0) { + dev_warn(>dev, "Read error log timed out\n"); + goto out; + } + + rc = admin_response(ocxlpmem); + if (rc < 0) + goto out; + if (rc != STATUS_SUCCESS) { + warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc); + goto out; + } + + + rc = error_log_header_parse(ocxlpmem, >buf_size); + if (rc) + goto out; + // log->buf_size now contains the returned buffer size, not the user size + + rc = error_log_offset_0x08(ocxlpmem, >log_identifier, + >program_reference_code); + if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x10, +OCXL_LITTLE_ENDIAN, ); + if (rc) + goto out; + + log->error_log_type = val >> 56; + log->action_flags = (log->error_log_type == OCXL_PMEM_ERROR_LOG_TYPE_GENERAL) ? + (val >> 32) & 0xFF : 0; + log->power_on_seconds = val & 0x; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem->admin_command.data_offset + 0x18, +OCXL_LITTLE_ENDIAN, >timestamp); +