Re: [PATCH v4 04/10] drm/amdgpu: Create hqd info structure

2026-04-13 Thread Alex Deucher
On Mon, Apr 13, 2026 at 2:34 PM Amber Lin  wrote:
>
> Create hung_queue_hqd_info structure and fill in hung queses information
> passed by MES, including queue type, pipe id, and queue id.
>
> Suggested-by: Jonathan Kim 
> Signed-off-by: Amber Lin 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
>  2 files changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index f1f8bbfc31e0..ae42fbaba34f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
> amdgpu_device *adev,
>  {
> struct mes_detect_and_reset_queue_input input;
> u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
> -   int r, i;
> +   int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
>
> if (!hung_db_num || !hung_db_array)
> return -EINVAL;
> @@ -481,18 +481,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
> amdgpu_device *adev,
> }
> }
>
> -   if (r && !hung_db_num) {
> +   if (r && !(*hung_db_num)) {
> dev_err(adev->dev, "Failed to detect and reset hung 
> queues\n");
> return r;
> }
>
> -   /*
> -* TODO: return HQD info for MES scheduled user compute queue reset 
> cases
> -* stored in hung_db_array hqd info offset to full array size
> -*/
> -
> -   if (r)
> -   dev_err(adev->dev, "failed to reset\n");
> +   for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
> +   hung_db_array[i] = db_array[i];
>
> return r;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index f80e3aca9c78..2e6ae9f84db0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -170,6 +170,19 @@ struct amdgpu_mes {
> uint64_t
> shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
>  };
>
> +struct amdgpu_mes_hung_queue_hqd_info {
> +   union {
> +   struct {
> +   uint32_t queue_type: 3; // queue type
> +   uint32_t pipe_index: 4; // pipe index
> +   uint32_t queue_index: 8; // queue index
> +   uint32_t reserved: 17;
> +   };
> +
> +   uint32_t bit0_31;
> +   };
> +};
> +
>  struct amdgpu_mes_gang {
> int gang_id;
> int priority;
> --
> 2.43.0
>


[PATCH v4 04/10] drm/amdgpu: Create hqd info structure

2026-04-13 Thread Amber Lin
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.

Suggested-by: Jonathan Kim 
Signed-off-by: Amber Lin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f1f8bbfc31e0..ae42fbaba34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
 {
struct mes_detect_and_reset_queue_input input;
u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
-   int r, i;
+   int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
 
if (!hung_db_num || !hung_db_array)
return -EINVAL;
@@ -481,18 +481,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
}
}
 
-   if (r && !hung_db_num) {
+   if (r && !(*hung_db_num)) {
dev_err(adev->dev, "Failed to detect and reset hung queues\n");
return r;
}
 
-   /*
-* TODO: return HQD info for MES scheduled user compute queue reset 
cases
-* stored in hung_db_array hqd info offset to full array size
-*/
-
-   if (r)
-   dev_err(adev->dev, "failed to reset\n");
+   for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+   hung_db_array[i] = db_array[i];
 
return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
uint64_tshared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
 };
 
+struct amdgpu_mes_hung_queue_hqd_info {
+   union {
+   struct {
+   uint32_t queue_type: 3; // queue type
+   uint32_t pipe_index: 4; // pipe index
+   uint32_t queue_index: 8; // queue index
+   uint32_t reserved: 17;
+   };
+
+   uint32_t bit0_31;
+   };
+};
+
 struct amdgpu_mes_gang {
int gang_id;
int priority;
-- 
2.43.0



[PATCH v4 04/10] drm/amdgpu: Create hqd info structure

2026-04-01 Thread Amber Lin
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.

Suggested-by: Jonathan Kim 
Signed-off-by: Amber Lin 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 20 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f1f8bbfc31e0..47c989980824 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
 {
struct mes_detect_and_reset_queue_input input;
u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
-   int r, i;
+   int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
 
if (!hung_db_num || !hung_db_array)
return -EINVAL;
@@ -466,8 +466,9 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
  &input);
 
-   if (r && detect_only) {
-   dev_err(adev->dev, "Failed to detect hung queues\n");
+   if (r) {
+   dev_warn(adev->dev, "Failed to %s hung queues\n",
+   detect_only? "detect" : "reset");
return r;
}
 
@@ -481,18 +482,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
}
}
 
-   if (r && !hung_db_num) {
-   dev_err(adev->dev, "Failed to detect and reset hung queues\n");
+   if (!hung_db_num) {
+   dev_warn(adev->dev, "No hung queues info from MES\n");
return r;
}
 
-   /*
-* TODO: return HQD info for MES scheduled user compute queue reset 
cases
-* stored in hung_db_array hqd info offset to full array size
-*/
-
-   if (r)
-   dev_err(adev->dev, "failed to reset\n");
+   for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+   hung_db_array[i] = db_array[i];
 
return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
uint64_tshared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
 };
 
+struct amdgpu_mes_hung_queue_hqd_info {
+   union {
+   struct {
+   uint32_t queue_type: 3; // queue type
+   uint32_t pipe_index: 4; // pipe index
+   uint32_t queue_index: 8; // queue index
+   uint32_t reserved: 17;
+   };
+
+   uint32_t bit0_31;
+   };
+};
+
 struct amdgpu_mes_gang {
int gang_id;
int priority;
-- 
2.43.0