- Extend amdgpu_mqd_prop with CU mask fields and debug WA flags
- Implement GFX11 CU mask mapping (WGP mode) and MQD setters for CU 
mask/priority
- Integrate setters into gfx_v11_0_compute_mqd_update() to apply runtime changes

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  4 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 82 ++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 64e69f0f9a02..246d74205b48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -809,6 +809,10 @@ struct amdgpu_mqd_prop {
        uint64_t fence_address;
        bool tmz_queue;
        bool kernel_queue;
+       uint32_t *cu_mask;
+       uint32_t cu_mask_count;
+       uint32_t cu_flags;
+       bool is_user_cu_masked;
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a47c91d33846..d807d77c56b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4238,6 +4238,87 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct 
amdgpu_device *adev)
        return gfx_v11_0_cp_gfx_start(adev);
 }
 
+static void gfx_v11_0_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev,
+                                                   const uint32_t *cu_mask,
+                                                   uint32_t cu_mask_count,
+                                                   uint32_t *se_mask)
+{
+       struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+       struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
+       uint32_t cu_per_sh[8][4] = {0};
+       int i, se, sh, cu, cu_bitmap_sh_mul;
+       int xcc_inst = ffs(adev->gfx.xcc_mask) - 1;
+       int cu_inc = 2; /* WGP mode */
+       int num_xcc, inc, inst = 0;
+       uint32_t en_mask = 3;
+
+       if (xcc_inst < 0)
+               xcc_inst = 0;
+
+       num_xcc = hweight16(adev->gfx.xcc_mask);
+       if (!num_xcc)
+               num_xcc = 1;
+
+       inc = cu_inc * num_xcc;
+
+       cu_bitmap_sh_mul = 2;
+
+       for (se = 0; se < gfx_info->max_shader_engines; se++)
+               for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
+                       cu_per_sh[se][sh] = hweight32(
+                               cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) 
*
+                               cu_bitmap_sh_mul]);
+
+       for (i = 0; i < gfx_info->max_shader_engines; i++)
+               se_mask[i] = 0;
+
+       i = inst;
+       for (cu = 0; cu < 16; cu += cu_inc) {
+               for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
+                       for (se = 0; se < gfx_info->max_shader_engines; se++) {
+                               if (cu_per_sh[se][sh] > cu) {
+                                       if ((i / 32) < cu_mask_count && 
(cu_mask[i / 32] & (1 << (i % 32))))
+                                               se_mask[se] |= en_mask << (cu + 
sh * 16);
+                                       i += inc;
+                                       if (i >= cu_mask_count * 32)
+                                               return;
+                               }
+                       }
+               }
+       }
+}
+
+static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev,
+                                             struct v11_compute_mqd *mqd,
+                                             struct amdgpu_mqd_prop *prop)
+{
+       uint32_t se_mask[8] = {0};
+       uint32_t wa_mask;
+       bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE |
+                                         AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE);
+
+       if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count))
+               return;
+
+       if (has_wa_flag) {
+               wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ?
+                         0xffff : 0xffffffff;
+               mqd->compute_static_thread_mgmt_se0 = wa_mask;
+               mqd->compute_static_thread_mgmt_se1 = wa_mask;
+               mqd->compute_static_thread_mgmt_se2 = wa_mask;
+               mqd->compute_static_thread_mgmt_se3 = wa_mask;
+               return;
+       }
+
+       gfx_v11_0_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask,
+                                               prop->cu_mask_count, se_mask);
+
+       mqd->compute_static_thread_mgmt_se0 = se_mask[0];
+       mqd->compute_static_thread_mgmt_se1 = se_mask[1];
+       mqd->compute_static_thread_mgmt_se2 = se_mask[2];
+       mqd->compute_static_thread_mgmt_se3 = se_mask[3];
+}
+
 /**
  * gfx_v11_0_compute_update_queue - Update runtime-configurable queue 
parameters
  * @adev: amdgpu device pointer
@@ -4278,6 +4359,7 @@ static int gfx_v11_0_compute_mqd_update(struct 
amdgpu_device *adev, void *m,
        mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
 
        mqd->cp_hqd_active = prop->hqd_active;
+       gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop);
 
        return 0;
 }
-- 
2.49.0

Reply via email to