From: Shashank Sharma <shashank.sha...@amd.com>

This patch adds a work function, which sends a GPU reset
uevent and some contextual infomration, like the PID and
some status flags. This work should be scheduled during
a GPU reset.

The userspace can do some recovery and post-processing work
based on this event and information.

V2: Addressed review comments from Christian
- Changed the name of the work to gpu_reset_event_work
- Added a structure to accommodate some additional information
  (like a PID and some flags)
- Do not add new structure in amdgpu.h

Cc: Alexander Deucher <alexander.deuc...@amd.com>
Cc: Christian Koenig <christian.koe...@amd.com>
Cc: Amaranath Somalapuram <amaranath.somalapu...@amd.com>
Signed-off-by: Shashank Sharma <shashank.sha...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d8b854fcbffa..6a97c585bdfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -61,6 +61,7 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
 #include <drm/gpu_scheduler.h>
+#include <drm/drm_sysfs.h>
 
 #include <kgd_kfd_interface.h>
 #include "dm_pp_interface.h"
@@ -813,6 +814,7 @@ struct amd_powerplay {
 #define AMDGPU_RESET_MAGIC_NUM 64
 #define AMDGPU_MAX_DF_PERFMONS 4
 #define AMDGPU_PRODUCT_NAME_LEN 64
+
 struct amdgpu_device {
        struct device                   *dev;
        struct pci_dev                  *pdev;
@@ -1063,6 +1065,7 @@ struct amdgpu_device {
 
        int asic_reset_res;
        struct work_struct              xgmi_reset_work;
+       struct work_struct              gpu_reset_event_work;
        struct list_head                reset_list;
 
        long                            gfx_timeout;
@@ -1097,6 +1100,7 @@ struct amdgpu_device {
        pci_channel_state_t             pci_channel_state;
 
        struct amdgpu_reset_control     *reset_cntl;
+       struct drm_reset_event          reset_event_info;
        uint32_t                        
ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
 
        bool                            ram_is_direct_mapped;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ed077de426d9..1aef07fd0dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -73,6 +73,7 @@
 #include <linux/pm_runtime.h>
 
 #include <drm/drm_drv.h>
+#include <drm/drm_sysfs.h>
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -3277,6 +3278,17 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device 
*adev)
        return amdgpu_device_asic_has_dc_support(adev->asic_type);
 }
 
+static void amdgpu_device_reset_event_func(struct work_struct *__work)
+{
+       struct amdgpu_device *adev = container_of(__work, struct amdgpu_device,
+                                                 gpu_reset_event_work);
+       /*
+        * A GPU reset has happened, inform the userspace and pass the
+        * reset related information.
+        */
+       drm_sysfs_reset_event(&adev->ddev, &adev->reset_event_info);
+}
+
 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 {
        struct amdgpu_device *adev =
@@ -3525,6 +3537,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                          amdgpu_device_delay_enable_gfx_off);
 
        INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+       INIT_WORK(&adev->gpu_reset_event_work, amdgpu_device_reset_event_func);
 
        adev->gfx.gfx_off_req_count = 1;
        adev->pm.ac_power = power_supply_is_system_supplied() > 0;
-- 
2.32.0

Reply via email to