RE: [PATCH v2] drm/amdkfd: Provide SMI events watch
[AMD Official Use Only - Internal Distribution Only] So are you saying you'll make the event descriptions text rather than binary? If you switch to a text format, I wouldn't use a binary header. Rather I'd make it a text format completely. You could use one line per event, that makes it easy to use something like fgets to read a line (event) at a time in user mode. Each line could still start with an event identifier, but it would be text rather than a binary. And you don’t need the size if you define "\n" as delimiter between events. Regards, Felix -Original Message- From: Lin, Amber Sent: Friday, April 3, 2020 11:38 To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH v2] drm/amdkfd: Provide SMI events watch Further thinking about it, I'll use struct kfd_smi_msg_header. Instead of using struct kfd_smi_msg_vmfault, it's a description about the event. This way we make it generic to all events. On 2020-04-03 9:38 a.m., Amber Lin wrote: > Thanks Felix. I'll make changes accordingly but please pay attention > to my last reply inline. > > On 2020-04-02 7:51 p.m., Felix Kuehling wrote: >> On 2020-04-02 4:46 p.m., Amber Lin wrote: >>> When the compute is malfunctioning or performance drops, the system >>> admin will use SMI (System Management Interface) tool to >>> monitor/diagnostic what went wrong. This patch provides an event >>> watch interface for the user space to register events they are >>> interested. After the event is registered, the user can use >>> annoymous file descriptor's poll function with wait-time specified >>> to wait for the event to happen. Once the event happens, the user >>> can use read() to retrieve information related to the event. >>> >>> VM fault event is done in this patch. >>> >>> v2: - remove UNREGISTER and add event ENABLE/DISABLE >>> - correct kfifo usage >>> - move event message API to kfd_ioctl.h >>> >>> Signed-off-by: Amber Lin >>> --- >>> drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- >>> drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 + >>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 >>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + >>> drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 + >>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 12 ++ >>> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 177 >>> +++ >>> drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 31 >>> include/uapi/linux/kfd_ioctl.h | 30 +++- >>> 9 files changed, 286 insertions(+), 2 deletions(-) >>> create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c >>> create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h >>> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile >>> b/drivers/gpu/drm/amd/amdkfd/Makefile >>> index 6147462..cc98b4a 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/Makefile >>> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile >>> @@ -53,7 +53,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ >>> $(AMDKFD_PATH)/kfd_int_process_v9.o \ >>> $(AMDKFD_PATH)/kfd_dbgdev.o \ >>> $(AMDKFD_PATH)/kfd_dbgmgr.o \ >>> - $(AMDKFD_PATH)/kfd_crat.o >>> + $(AMDKFD_PATH)/kfd_crat.o \ >>> + $(AMDKFD_PATH)/kfd_smi_events.o >>> ifneq ($(CONFIG_AMD_IOMMU_V2),) >>> AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git >>> a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c >>> b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c >>> index 9f59ba9..24b4717 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c >>> @@ -24,6 +24,7 @@ >>> #include "kfd_events.h" >>> #include "cik_int.h" >>> #include "amdgpu_amdkfd.h" >>> +#include "kfd_smi_events.h" >>> static bool cik_event_interrupt_isr(struct kfd_dev *dev, >>> const uint32_t *ih_ring_entry, @@ -107,6 >>> +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, >>> ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { >>> struct kfd_vm_fault_info info; >>> + kfd_smi_event_update_vmfault(dev, pasid); >>> kfd_process_vm_fault(dev->dqm, pasid); >>> memset(, 0, sizeof(info)); diff --git >>> a/drivers/gpu/drm/amd/amdkfd/k
Re: [PATCH v2] drm/amdkfd: Provide SMI events watch
Further thinking about it, I'll use struct kfd_smi_msg_header. Instead of using struct kfd_smi_msg_vmfault, it's a description about the event. This way we make it generic to all events. On 2020-04-03 9:38 a.m., Amber Lin wrote: Thanks Felix. I'll make changes accordingly but please pay attention to my last reply inline. On 2020-04-02 7:51 p.m., Felix Kuehling wrote: On 2020-04-02 4:46 p.m., Amber Lin wrote: When the compute is malfunctioning or performance drops, the system admin will use SMI (System Management Interface) tool to monitor/diagnostic what went wrong. This patch provides an event watch interface for the user space to register events they are interested. After the event is registered, the user can use annoymous file descriptor's poll function with wait-time specified to wait for the event to happen. Once the event happens, the user can use read() to retrieve information related to the event. VM fault event is done in this patch. v2: - remove UNREGISTER and add event ENABLE/DISABLE - correct kfifo usage - move event message API to kfd_ioctl.h Signed-off-by: Amber Lin --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 177 +++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 31 include/uapi/linux/kfd_ioctl.h | 30 +++- 9 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 6147462..cc98b4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,7 +53,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_smi_events.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba9..24b4717 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a..591ac28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1243,6 +1244,32 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, return ret; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + switch (args->op) { + case KFD_SMI_EVENTS_REGISTER: + /* register the device */ + return kfd_smi_event_register(dev, >data); + case KFD_SMI_EVENTS_ENABLE: + /* subscribe events to the device */ + return kfd_smi_event_enable(dev, args->events); + case KFD_SMI_EVENTS_DISABLE: + /* unsubscribe events */ + return kfd_smi_event_disable(dev, args->events); + } + + return -EINVAL; +} + bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; @@ -1827,6 +1854,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, +
Re: [PATCH v2] drm/amdkfd: Provide SMI events watch
Thanks Felix. I'll make changes accordingly but please pay attention to my last reply inline. On 2020-04-02 7:51 p.m., Felix Kuehling wrote: On 2020-04-02 4:46 p.m., Amber Lin wrote: When the compute is malfunctioning or performance drops, the system admin will use SMI (System Management Interface) tool to monitor/diagnostic what went wrong. This patch provides an event watch interface for the user space to register events they are interested. After the event is registered, the user can use annoymous file descriptor's poll function with wait-time specified to wait for the event to happen. Once the event happens, the user can use read() to retrieve information related to the event. VM fault event is done in this patch. v2: - remove UNREGISTER and add event ENABLE/DISABLE - correct kfifo usage - move event message API to kfd_ioctl.h Signed-off-by: Amber Lin --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 177 +++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 31 include/uapi/linux/kfd_ioctl.h | 30 +++- 9 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 6147462..cc98b4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,7 +53,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_smi_events.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba9..24b4717 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a..591ac28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1243,6 +1244,32 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, return ret; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + switch (args->op) { + case KFD_SMI_EVENTS_REGISTER: + /* register the device */ + return kfd_smi_event_register(dev, >data); + case KFD_SMI_EVENTS_ENABLE: + /* subscribe events to the device */ + return kfd_smi_event_enable(dev, args->events); + case KFD_SMI_EVENTS_DISABLE: + /* unsubscribe events */ + return kfd_smi_event_disable(dev, args->events); + } + + return -EINVAL; +} + bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; @@ -1827,6 +1854,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0491ab2..6ac6f31 100644 ---
Re: [PATCH v2] drm/amdkfd: Provide SMI events watch
On 2020-04-02 4:46 p.m., Amber Lin wrote: When the compute is malfunctioning or performance drops, the system admin will use SMI (System Management Interface) tool to monitor/diagnostic what went wrong. This patch provides an event watch interface for the user space to register events they are interested. After the event is registered, the user can use annoymous file descriptor's poll function with wait-time specified to wait for the event to happen. Once the event happens, the user can use read() to retrieve information related to the event. VM fault event is done in this patch. v2: - remove UNREGISTER and add event ENABLE/DISABLE - correct kfifo usage - move event message API to kfd_ioctl.h Signed-off-by: Amber Lin --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 177 +++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 31 include/uapi/linux/kfd_ioctl.h | 30 +++- 9 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 6147462..cc98b4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,7 +53,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_smi_events.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba9..24b4717 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a..591ac28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1243,6 +1244,32 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, return ret; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + switch (args->op) { + case KFD_SMI_EVENTS_REGISTER: + /* register the device */ + return kfd_smi_event_register(dev, >data); + case KFD_SMI_EVENTS_ENABLE: + /* subscribe events to the device */ + return kfd_smi_event_enable(dev, args->events); + case KFD_SMI_EVENTS_DISABLE: + /* unsubscribe events */ + return kfd_smi_event_disable(dev, args->events); + } + + return -EINVAL; +} + bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; @@ -1827,6 +1854,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index
[PATCH v2] drm/amdkfd: Provide SMI events watch
When the compute is malfunctioning or performance drops, the system admin will use SMI (System Management Interface) tool to monitor/diagnostic what went wrong. This patch provides an event watch interface for the user space to register events they are interested. After the event is registered, the user can use annoymous file descriptor's poll function with wait-time specified to wait for the event to happen. Once the event happens, the user can use read() to retrieve information related to the event. VM fault event is done in this patch. v2: - remove UNREGISTER and add event ENABLE/DISABLE - correct kfifo usage - move event message API to kfd_ioctl.h Signed-off-by: Amber Lin --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 177 +++ drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 31 include/uapi/linux/kfd_ioctl.h | 30 +++- 9 files changed, 286 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 6147462..cc98b4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,7 +53,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_smi_events.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba9..24b4717 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a..591ac28 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1243,6 +1244,32 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, return ret; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + switch (args->op) { + case KFD_SMI_EVENTS_REGISTER: + /* register the device */ + return kfd_smi_event_register(dev, >data); + case KFD_SMI_EVENTS_ENABLE: + /* subscribe events to the device */ + return kfd_smi_event_enable(dev, args->events); + case KFD_SMI_EVENTS_DISABLE: + /* unsubscribe events */ + return kfd_smi_event_disable(dev, args->events); + } + + return -EINVAL; +} + bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; @@ -1827,6 +1854,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0491ab2..6ac6f31 100644 ---