Re: [PATCH v4 04/10] drm/amdgpu: Create hqd info structure
On Mon, Apr 13, 2026 at 2:34 PM Amber Lin wrote:
>
> Create hung_queue_hqd_info structure and fill in hung queses information
> passed by MES, including queue type, pipe id, and queue id.
>
> Suggested-by: Jonathan Kim
> Signed-off-by: Amber Lin
Reviewed-by: Alex Deucher
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
> 2 files changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index f1f8bbfc31e0..ae42fbaba34f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
> amdgpu_device *adev,
> {
> struct mes_detect_and_reset_queue_input input;
> u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
> - int r, i;
> + int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
>
> if (!hung_db_num || !hung_db_array)
> return -EINVAL;
> @@ -481,18 +481,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
> amdgpu_device *adev,
> }
> }
>
> - if (r && !hung_db_num) {
> + if (r && !(*hung_db_num)) {
> dev_err(adev->dev, "Failed to detect and reset hung
> queues\n");
> return r;
> }
>
> - /*
> -* TODO: return HQD info for MES scheduled user compute queue reset
> cases
> -* stored in hung_db_array hqd info offset to full array size
> -*/
> -
> - if (r)
> - dev_err(adev->dev, "failed to reset\n");
> + for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
> + hung_db_array[i] = db_array[i];
>
> return r;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index f80e3aca9c78..2e6ae9f84db0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -170,6 +170,19 @@ struct amdgpu_mes {
> uint64_t
> shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
> };
>
> +struct amdgpu_mes_hung_queue_hqd_info {
> + union {
> + struct {
> + uint32_t queue_type: 3; // queue type
> + uint32_t pipe_index: 4; // pipe index
> + uint32_t queue_index: 8; // queue index
> + uint32_t reserved: 17;
> + };
> +
> + uint32_t bit0_31;
> + };
> +};
> +
> struct amdgpu_mes_gang {
> int gang_id;
> int priority;
> --
> 2.43.0
>
[PATCH v4 04/10] drm/amdgpu: Create hqd info structure
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.
Suggested-by: Jonathan Kim
Signed-off-by: Amber Lin
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 -
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f1f8bbfc31e0..ae42fbaba34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
{
struct mes_detect_and_reset_queue_input input;
u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
- int r, i;
+ int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
if (!hung_db_num || !hung_db_array)
return -EINVAL;
@@ -481,18 +481,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
}
}
- if (r && !hung_db_num) {
+ if (r && !(*hung_db_num)) {
dev_err(adev->dev, "Failed to detect and reset hung queues\n");
return r;
}
- /*
-* TODO: return HQD info for MES scheduled user compute queue reset
cases
-* stored in hung_db_array hqd info offset to full array size
-*/
-
- if (r)
- dev_err(adev->dev, "failed to reset\n");
+ for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+ hung_db_array[i] = db_array[i];
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
uint64_tshared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
};
+struct amdgpu_mes_hung_queue_hqd_info {
+ union {
+ struct {
+ uint32_t queue_type: 3; // queue type
+ uint32_t pipe_index: 4; // pipe index
+ uint32_t queue_index: 8; // queue index
+ uint32_t reserved: 17;
+ };
+
+ uint32_t bit0_31;
+ };
+};
+
struct amdgpu_mes_gang {
int gang_id;
int priority;
--
2.43.0
[PATCH v4 04/10] drm/amdgpu: Create hqd info structure
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.
Suggested-by: Jonathan Kim
Signed-off-by: Amber Lin
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 20
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f1f8bbfc31e0..47c989980824 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
{
struct mes_detect_and_reset_queue_input input;
u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
- int r, i;
+ int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
if (!hung_db_num || !hung_db_array)
return -EINVAL;
@@ -466,8 +466,9 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
&input);
- if (r && detect_only) {
- dev_err(adev->dev, "Failed to detect hung queues\n");
+ if (r) {
+ dev_warn(adev->dev, "Failed to %s hung queues\n",
+ detect_only? "detect" : "reset");
return r;
}
@@ -481,18 +482,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
}
}
- if (r && !hung_db_num) {
- dev_err(adev->dev, "Failed to detect and reset hung queues\n");
+ if (!hung_db_num) {
+ dev_warn(adev->dev, "No hung queues info from MES\n");
return r;
}
- /*
-* TODO: return HQD info for MES scheduled user compute queue reset
cases
-* stored in hung_db_array hqd info offset to full array size
-*/
-
- if (r)
- dev_err(adev->dev, "failed to reset\n");
+ for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+ hung_db_array[i] = db_array[i];
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
uint64_tshared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
};
+struct amdgpu_mes_hung_queue_hqd_info {
+ union {
+ struct {
+ uint32_t queue_type: 3; // queue type
+ uint32_t pipe_index: 4; // pipe index
+ uint32_t queue_index: 8; // queue index
+ uint32_t reserved: 17;
+ };
+
+ uint32_t bit0_31;
+ };
+};
+
struct amdgpu_mes_gang {
int gang_id;
int priority;
--
2.43.0
