By design the MES will return an array result that is twice the number
of hung doorbells it can report.

i.e. if up k reported doorbells are supported, then the
second half of the array, also of length k, holds the HQD information
(type/queue/pipe) where queue 1 corresponds to index 0 and k,
queue 2 corresponds to index 1 and k + 1 etc ...

The driver will use the HDQ info to target queue/pipe reset for
hardware scheduled user compute queues.

v2: only extend memory to accommodate HQD info for now as it will
be used later.

Signed-off-by: Jonathan Kim <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c    | 7 ++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h    | 1 +
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c     | 8 +++++---
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c     | 8 +++++---
 5 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 01ac942c1bb1..45bf8309cb37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
                dev_err(adev->dev, "failed to detect and reset\n");
        } else {
                *hung_db_num = 0;
-               for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) {
+               for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
                        if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
                                hung_db_array[i] = db_array[i];
                                *hung_db_num += 1;
                        }
                }
+
+               /*
+                * TODO: return HQD info for MES scheduled user compute queue 
reset cases
+                * stored in hung_db_array hqd info offset to full array size
+                */
        }
 
        return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 6b506fc72f58..97c137c90f97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -149,6 +149,7 @@ struct amdgpu_mes {
        void                *resource_1_addr[AMDGPU_MAX_MES_PIPES];
 
        int                             hung_queue_db_array_size;
+       int                             hung_queue_hqd_info_offset;
        struct amdgpu_bo                *hung_queue_db_array_gpu_obj;
        uint64_t                        hung_queue_db_array_gpu_addr;
        void                            *hung_queue_db_array_cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 829129ad7bd1..5c63480dda9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -208,10 +208,10 @@ static int mes_userq_detect_and_reset(struct 
amdgpu_device *adev,
        struct amdgpu_userq_mgr *uqm, *tmp;
        unsigned int hung_db_num = 0;
        int queue_id, r, i;
-       u32 db_array[4];
+       u32 db_array[8];
 
-       if (db_array_size > 4) {
-               dev_err(adev->dev, "DB array size (%d vs 4) too small\n",
+       if (db_array_size > 8) {
+               dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
                        db_array_size);
                return -EINVAL;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e82188431f79..da575bb1377f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -66,7 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
 #define GFX_MES_DRAM_SIZE      0x80000
 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
 
-#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4
+#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd 
info */
+#define MES11_HUNG_HQD_INFO_OFFSET     4
 
 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
@@ -1720,8 +1721,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block 
*ip_block)
        struct amdgpu_device *adev = ip_block->adev;
        int pipe, r;
 
-       adev->mes.hung_queue_db_array_size =
-               MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+       adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+       adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET;
+
        for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
                if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
                        continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index e3149196143e..7f3512d9de07 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -47,7 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
 
 #define MES_EOP_SIZE   2048
 
-#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4
+#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info 
*/
+#define MES12_HUNG_HQD_INFO_OFFSET     4
 
 static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
@@ -1904,8 +1905,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block 
*ip_block)
        struct amdgpu_device *adev = ip_block->adev;
        int pipe, r;
 
-       adev->mes.hung_queue_db_array_size =
-               MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+       adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+       adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET;
+
        for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
                r = amdgpu_mes_init_microcode(adev, pipe);
                if (r)
-- 
2.34.1

Reply via email to