Re: [PATCH] drm/amdgpu: SW part of MES event log enablement

2023-11-23 Thread Felix Kuehling



On 2023-11-23 16:29, Felix Kuehling wrote:

On 2023-11-23 14:48, shaoyunl wrote:
This is the generic SW part, prepare the event log buffer and dump it 
through debugfs


Signed-off-by: shaoyunl 


Reviewed-by: Felix Kuehling 


Sorry, I just realized a potential problem, see inline.






---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  5 ++
  4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index a53f436fa9f1..8b2cbeae99ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device 
*adev)

  amdgpu_debugfs_firmware_init(adev);
  amdgpu_ta_if_debugfs_init(adev);
  +    amdgpu_debugfs_mes_event_log_init(adev);


This always gets initialized, even if the GPU isn't using MES. But the 
log buffer only gets allocated on GPUs that have MES. I think reading 
the log in debugfs on a GPU without MES would cause a kernel oops. You 
either need to add a check for that in ..._event_log_show, or skip the 
debugfs file creation in ..._event_log_init if the GPU doesn't use MES.


Regards,
  Felix



+
  #if defined(CONFIG_DRM_AMD_DC)
  if (adev->dc_enabled)
  dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h

index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
  void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

index 45280fb0e00c..b4ba556dc733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct 
amdgpu_device *adev)

  return 0;
  }
  +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+    int r;
+
+    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+    AMDGPU_GEM_DOMAIN_GTT,
+    >mes.event_log_gpu_obj,
+    >mes.event_log_gpu_addr,
+    >mes.event_log_cpu_addr);
+    if (r) {
+    dev_warn(adev->dev, "failed to create MES event log buffer 
(%d)", r);

+    return r;
+    }
+
+    memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+    return  0;
+
+}
+
  static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
  {
  bitmap_free(adev->mes.doorbell_bitmap);
@@ -181,8 +201,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  if (r)
  goto error;
  +    r = amdgpu_mes_event_log_init(adev);
+    if (r)
+    goto error_doorbell;
+
  return 0;
  +error_doorbell:
+    amdgpu_mes_doorbell_free(adev);
  error:
  amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
  amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
    void amdgpu_mes_fini(struct amdgpu_device *adev)
  {
+    amdgpu_bo_free_kernel(>mes.event_log_gpu_obj,
+  >mes.event_log_gpu_addr,
+  >mes.event_log_cpu_addr);
+
  amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
  amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
  amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct 
amdgpu_device *adev, int pipe)

  amdgpu_ucode_release(>mes.fw[pipe]);
  return r;
  }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, 
void *unused)

+{
+    struct amdgpu_device *adev = m->private;
+    uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+    seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+ mem, PAGE_SIZE, false);
+
+    return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+    struct drm_minor *minor = adev_to_drm(adev)->primary;
+    struct dentry *root = minor->debugfs_root;
+
+    debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+    adev, _debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h

index a27b424ffe00..894b9b133000 100644
--- 

Re: [PATCH] drm/amdgpu: SW part of MES event log enablement

2023-11-23 Thread Felix Kuehling

On 2023-11-23 14:48, shaoyunl wrote:

This is the generic SW part, prepare the event log buffer and dump it through 
debugfs

Signed-off-by: shaoyunl 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  5 ++
  4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a53f436fa9f1..8b2cbeae99ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
  
+	amdgpu_debugfs_mes_event_log_init(adev);

+
  #if defined(CONFIG_DRM_AMD_DC)
if (adev->dc_enabled)
dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
  void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 45280fb0e00c..b4ba556dc733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device 
*adev)
return 0;
  }
  
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)

+{
+   int r;
+
+   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mes.event_log_gpu_obj,
+   >mes.event_log_gpu_addr,
+   >mes.event_log_cpu_addr);
+   if (r) {
+   dev_warn(adev->dev, "failed to create MES event log buffer 
(%d)", r);
+   return r;
+   }
+
+   memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+   return  0;
+
+}
+
  static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
  {
bitmap_free(adev->mes.doorbell_bitmap);
@@ -181,8 +201,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
if (r)
goto error;
  
+	r = amdgpu_mes_event_log_init(adev);

+   if (r)
+   goto error_doorbell;
+
return 0;
  
+error_doorbell:

+   amdgpu_mes_doorbell_free(adev);
  error:
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  
  void amdgpu_mes_fini(struct amdgpu_device *adev)

  {
+   amdgpu_bo_free_kernel(>mes.event_log_gpu_obj,
+ >mes.event_log_gpu_addr,
+ >mes.event_log_cpu_addr);
+
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
amdgpu_ucode_release(>mes.fw[pipe]);
return r;
  }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+   struct amdgpu_device *adev = m->private;
+   uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+   seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+mem, PAGE_SIZE, false);
+
+   return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+   struct drm_minor *minor = adev_to_drm(adev)->primary;
+   struct dentry *root = minor->debugfs_root;
+
+   debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+   adev, _debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..894b9b133000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
uint32_tnum_mes_dbs;
unsigned long   *doorbell_bitmap;
  
+	/* MES event log buffer */

+   struct amdgpu_bo

[PATCH] drm/amdgpu: SW part of MES event log enablement

2023-11-23 Thread shaoyunl
This is the generic SW part, prepare the event log buffer and dump it through 
debugfs

Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  5 ++
 4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a53f436fa9f1..8b2cbeae99ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
 
+   amdgpu_debugfs_mes_event_log_init(adev);
+
 #if defined(CONFIG_DRM_AMD_DC)
if (adev->dc_enabled)
dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 45280fb0e00c..b4ba556dc733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device 
*adev)
return 0;
 }
 
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+   int r;
+
+   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mes.event_log_gpu_obj,
+   >mes.event_log_gpu_addr,
+   >mes.event_log_cpu_addr);
+   if (r) {
+   dev_warn(adev->dev, "failed to create MES event log buffer 
(%d)", r);
+   return r;
+   }
+
+   memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+   return  0;
+
+}
+
 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 {
bitmap_free(adev->mes.doorbell_bitmap);
@@ -181,8 +201,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
if (r)
goto error;
 
+   r = amdgpu_mes_event_log_init(adev);
+   if (r)
+   goto error_doorbell;
+
return 0;
 
+error_doorbell:
+   amdgpu_mes_doorbell_free(adev);
 error:
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
@@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 
 void amdgpu_mes_fini(struct amdgpu_device *adev)
 {
+   amdgpu_bo_free_kernel(>mes.event_log_gpu_obj,
+ >mes.event_log_gpu_addr,
+ >mes.event_log_cpu_addr);
+
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
amdgpu_ucode_release(>mes.fw[pipe]);
return r;
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+   struct amdgpu_device *adev = m->private;
+   uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+   seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+mem, PAGE_SIZE, false);
+
+   return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+   struct drm_minor *minor = adev_to_drm(adev)->primary;
+   struct dentry *root = minor->debugfs_root;
+
+   debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+   adev, _debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..894b9b133000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
uint32_tnum_mes_dbs;
unsigned long   *doorbell_bitmap;
 
+   /* MES event log buffer */
+   struct amdgpu_bo*event_log_gpu_obj;
+   uint64_tevent_log_gpu_addr;
+   void  

Re: [PATCH] drm/amdgpu: SW part of MES event log enablement

2023-11-23 Thread Felix Kuehling



On 2023-11-23 14:12, shaoyunl wrote:

This is the generic SW part, prepare the event log buffer and dump it through 
debugfs

Signed-off-by: shaoyunl 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  5 ++
  4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a53f436fa9f1..8b2cbeae99ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
  
+	amdgpu_debugfs_mes_event_log_init(adev);

+
  #if defined(CONFIG_DRM_AMD_DC)
if (adev->dc_enabled)
dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
  void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
  void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 45280fb0e00c..b7af24d7db0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device 
*adev)
return 0;
  }
  
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)

+{
+   int r;
+
+   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mes.event_log_gpu_obj,
+   >mes.event_log_gpu_addr,
+   >mes.event_log_cpu_addr);
+   if (r) {
+   dev_warn(adev->dev, "failed to create MES event log buffer 
(%d)", r);
+   return r;
+   }
+
+   memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+   return  0;
+
+}
+
  static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
  {
bitmap_free(adev->mes.doorbell_bitmap);
@@ -181,6 +201,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
if (r)
goto error;
  
+	r = amdgpu_mes_event_log_init(adev);

+   if (r) {
+   amdgpu_mes_doorbell_free(adev);
+   goto error;


The usual preferred way of goto-error handling would be to add another 
error label and do all the cleanup in reverse. Then just jump to the 
correct error label depending on where the error happened. So here you 
would goto error_doorbell. See below.




+   }
+
return 0;
  


So you'd create another error label here to handle the doorbell cleanup:

error_doorbell:
amdgpu_mes_doorbell_free(adev);

With that fixed, the patch is

Reviewed-by: Felix Kuehling 



  error:
@@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
  
  void amdgpu_mes_fini(struct amdgpu_device *adev)

  {
+   amdgpu_bo_free_kernel(>mes.event_log_gpu_obj,
+ >mes.event_log_gpu_addr,
+ >mes.event_log_cpu_addr);
+
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
amdgpu_ucode_release(>mes.fw[pipe]);
return r;
  }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+   struct amdgpu_device *adev = m->private;
+   uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+   seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+mem, PAGE_SIZE, false);
+
+   return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+   struct drm_minor *minor = adev_to_drm(adev)->primary;
+   struct dentry *root = minor->debugfs_root;
+
+   debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+   adev, _debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..894b9b133000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h

[PATCH] drm/amdgpu: SW part of MES event log enablement

2023-11-23 Thread shaoyunl
This is the generic SW part, prepare the event log buffer and dump it through 
debugfs

Signed-off-by: shaoyunl 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  5 ++
 4 files changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a53f436fa9f1..8b2cbeae99ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
 
+   amdgpu_debugfs_mes_event_log_init(adev);
+
 #if defined(CONFIG_DRM_AMD_DC)
if (adev->dc_enabled)
dtn_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..0425432d8659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
 void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 45280fb0e00c..b7af24d7db0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device 
*adev)
return 0;
 }
 
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+   int r;
+
+   r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   >mes.event_log_gpu_obj,
+   >mes.event_log_gpu_addr,
+   >mes.event_log_cpu_addr);
+   if (r) {
+   dev_warn(adev->dev, "failed to create MES event log buffer 
(%d)", r);
+   return r;
+   }
+
+   memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
+
+   return  0;
+
+}
+
 static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 {
bitmap_free(adev->mes.doorbell_bitmap);
@@ -181,6 +201,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
if (r)
goto error;
 
+   r = amdgpu_mes_event_log_init(adev);
+   if (r) {
+   amdgpu_mes_doorbell_free(adev);
+   goto error;
+   }
+
return 0;
 
 error:
@@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 
 void amdgpu_mes_fini(struct amdgpu_device *adev)
 {
+   amdgpu_bo_free_kernel(>mes.event_log_gpu_obj,
+ >mes.event_log_gpu_addr,
+ >mes.event_log_cpu_addr);
+
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
@@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device 
*adev, int pipe)
amdgpu_ucode_release(>mes.fw[pipe]);
return r;
 }
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+   struct amdgpu_device *adev = m->private;
+   uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+   seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+mem, PAGE_SIZE, false);
+
+   return 0;
+}
+
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+   struct drm_minor *minor = adev_to_drm(adev)->primary;
+   struct dentry *root = minor->debugfs_root;
+
+   debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+   adev, _debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..894b9b133000 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -133,6 +133,11 @@ struct amdgpu_mes {
uint32_tnum_mes_dbs;
unsigned long   *doorbell_bitmap;
 
+   /* MES event log buffer */
+   struct amdgpu_bo*event_log_gpu_obj;
+   uint64_tevent_log_gpu_addr;
+   void*event_log_cpu_addr;
+
/* ip specific functions */
const struct amdgpu_mes_funcs   *funcs;
 };