From: Mukul Joshi <[email protected]>

This patch fixes the CU info calculations for gfx 12.1.

Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by: Lijo Lazar <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c | 78 +++++++++-----------------
 1 file changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index a8f020a375c92..f5a7ccf9e02d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -46,6 +46,7 @@
 #include "mes_v12_1.h"
 
 #define GFX12_MEC_HPD_SIZE     2048
+#define NUM_SIMD_PER_CU_GFX12_1        4
 
 #define RLCG_UCODE_LOADING_START_ADDRESS       0x00002000L
 
@@ -69,9 +70,6 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev);
 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
                                       u32 sh_num, u32 instance, int xcc_id);
-static u32 gfx_v12_1_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev,
-                                                 int xcc_id);
-
 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
                                     uint32_t val);
 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device 
*adev);
@@ -3804,7 +3802,7 @@ static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device 
*adev)
                gfx_v12_1_compute_mqd_init;
 }
 
-static void gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device 
*adev,
+static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device 
*adev,
                                                          u32 bitmap, int 
xcc_id)
 {
        u32 data;
@@ -3818,39 +3816,20 @@ static void 
gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
        WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, 
data);
 }
 
-static u32 gfx_v12_1_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev,
-                                                 int xcc_id)
+static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
+                                                int xcc_id)
 {
-       u32 data, wgp_bitmask;
+       u32 data, mask;
+
        data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regCC_GC_SHADER_ARRAY_CONFIG);
        data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), 
regGC_USER_SHADER_ARRAY_CONFIG);
 
        data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
        data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
 
-       wgp_bitmask =
-               amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
-
-       return (~data) & wgp_bitmask;
-}
-
-static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
-                                                int xcc_id)
-{
-       u32 wgp_idx, wgp_active_bitmap;
-       u32 cu_bitmap_per_wgp, cu_active_bitmap;
-
-       wgp_active_bitmap = gfx_v12_1_get_wgp_active_bitmap_per_sh(adev, 
xcc_id);
-       cu_active_bitmap = 0;
+       mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
-       for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
-               /* if there is one WGP enabled, it means 2 CUs will be enabled 
*/
-               cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
-               if (wgp_active_bitmap & (1 << wgp_idx))
-                       cu_active_bitmap |= cu_bitmap_per_wgp;
-       }
-
-       return cu_active_bitmap;
+       return (~data) & mask;
 }
 
 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
@@ -3858,12 +3837,23 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device 
*adev,
 {
        int i, j, k, counter, xcc_id, active_cu_number = 0;
        u32 mask, bitmap;
-       unsigned disable_masks[8 * 2];
+       unsigned int disable_masks[2 * 2];
 
        if (!adev || !cu_info)
                return -EINVAL;
 
-       amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+       if (adev->gfx.config.max_shader_engines > 2 ||
+           adev->gfx.config.max_sh_per_se > 2) {
+               dev_err(adev->dev,
+                       "Max SE (%d) and Max SA per SE (%d) is greater than 
expected\n",
+                       adev->gfx.config.max_shader_engines,
+                       adev->gfx.config.max_sh_per_se);
+               return -EINVAL;
+       }
+
+       amdgpu_gfx_parse_disable_cu(disable_masks,
+                                   adev->gfx.config.max_shader_engines,
+                                   adev->gfx.config.max_sh_per_se);
 
        mutex_lock(&adev->grbm_idx_mutex);
        for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
@@ -3875,27 +3865,13 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device 
*adev,
                                mask = 1;
                                counter = 0;
                                gfx_v12_1_xcc_select_se_sh(adev, i, j, 
0xffffffff, xcc_id);
-                               if (i < 8 && j < 2)
-                                       
gfx_v12_1_set_user_wgp_inactive_bitmap_per_sh(
-                                               adev, disable_masks[i * 2 + j], 
xcc_id);
+                               gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(
+                                       adev,
+                                       disable_masks[i * 
adev->gfx.config.max_sh_per_se + j],
+                                       xcc_id);
                                bitmap = 
gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id);
 
-                               /**
-                                * GFX12 could support more than 4 SEs, while 
the bitmap
-                                * in cu_info struct is 4x4 and ioctl interface 
struct
-                                * drm_amdgpu_info_device should keep stable.
-                                * So we use last two columns of bitmap to 
store cu mask for
-                                * SEs 4 to 7, the layout of the bitmap is as 
below:
-                                *    SE0: {SH0,SH1} --> {bitmap[0][0], 
bitmap[0][1]}
-                                *    SE1: {SH0,SH1} --> {bitmap[1][0], 
bitmap[1][1]}
-                                *    SE2: {SH0,SH1} --> {bitmap[2][0], 
bitmap[2][1]}
-                                *    SE3: {SH0,SH1} --> {bitmap[3][0], 
bitmap[3][1]}
-                                *    SE4: {SH0,SH1} --> {bitmap[0][2], 
bitmap[0][3]}
-                                *    SE5: {SH0,SH1} --> {bitmap[1][2], 
bitmap[1][3]}
-                                *    SE6: {SH0,SH1} --> {bitmap[2][2], 
bitmap[2][3]}
-                                *    SE7: {SH0,SH1} --> {bitmap[3][2], 
bitmap[3][3]}
-                                */
-                               cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = 
bitmap;
+                               cu_info->bitmap[xcc_id][i][j] = bitmap;
 
                                for (k = 0; k < adev->gfx.config.max_cu_per_sh; 
k++) {
                                        if (bitmap & mask)
@@ -3911,7 +3887,7 @@ static int gfx_v12_1_get_cu_info(struct amdgpu_device 
*adev,
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
-       cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+       cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1;
        cu_info->lds_size = 320;
 
        return 0;
-- 
2.52.0

Reply via email to