- Extend amdgpu_mqd_prop with CU mask fields and debug WA flags - Implement GFX11 CU mask mapping (WGP mode) and MQD setters for CU mask/priority - Integrate setters into gfx_v11_0_compute_mqd_update() to apply runtime changes
Signed-off-by: Jesse Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 82 ++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 64e69f0f9a02..246d74205b48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -809,6 +809,10 @@ struct amdgpu_mqd_prop { uint64_t fence_address; bool tmz_queue; bool kernel_queue; + uint32_t *cu_mask; + uint32_t cu_mask_count; + uint32_t cu_flags; + bool is_user_cu_masked; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a47c91d33846..d807d77c56b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4238,6 +4238,87 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) return gfx_v11_0_cp_gfx_start(adev); } +static void gfx_v11_0_mqd_symmetrically_map_cu_mask(struct amdgpu_device *adev, + const uint32_t *cu_mask, + uint32_t cu_mask_count, + uint32_t *se_mask) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + struct amdgpu_gfx_config *gfx_info = &adev->gfx.config; + uint32_t cu_per_sh[8][4] = {0}; + int i, se, sh, cu, cu_bitmap_sh_mul; + int xcc_inst = ffs(adev->gfx.xcc_mask) - 1; + int cu_inc = 2; /* WGP mode */ + int num_xcc, inc, inst = 0; + uint32_t en_mask = 3; + + if (xcc_inst < 0) + xcc_inst = 0; + + num_xcc = hweight16(adev->gfx.xcc_mask); + if (!num_xcc) + num_xcc = 1; + + inc = cu_inc * num_xcc; + + cu_bitmap_sh_mul = 2; + + for (se = 0; se < gfx_info->max_shader_engines; se++) + for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) + cu_per_sh[se][sh] = hweight32( + cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) * + cu_bitmap_sh_mul]); + + for (i = 0; i < gfx_info->max_shader_engines; i++) + se_mask[i] = 0; + + i = inst; + for (cu = 0; cu < 16; cu += cu_inc) { + for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) { + for (se = 0; se < gfx_info->max_shader_engines; se++) { + if (cu_per_sh[se][sh] > cu) { + if ((i / 32) < cu_mask_count && (cu_mask[i / 32] & (1 << (i % 32)))) + se_mask[se] |= en_mask << (cu + sh * 16); + i += inc; + if (i >= cu_mask_count * 32) + return; + } + } + } + } +} + +static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, + struct v11_compute_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + uint32_t se_mask[8] = {0}; + uint32_t wa_mask; + bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | + AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); + + if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) + return; + + if (has_wa_flag) { + wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? + 0xffff : 0xffffffff; + mqd->compute_static_thread_mgmt_se0 = wa_mask; + mqd->compute_static_thread_mgmt_se1 = wa_mask; + mqd->compute_static_thread_mgmt_se2 = wa_mask; + mqd->compute_static_thread_mgmt_se3 = wa_mask; + return; + } + + gfx_v11_0_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, + prop->cu_mask_count, se_mask); + + mqd->compute_static_thread_mgmt_se0 = se_mask[0]; + mqd->compute_static_thread_mgmt_se1 = se_mask[1]; + mqd->compute_static_thread_mgmt_se2 = se_mask[2]; + mqd->compute_static_thread_mgmt_se3 = se_mask[3]; +} + /** * gfx_v11_0_compute_update_queue - Update runtime-configurable queue parameters * @adev: amdgpu device pointer @@ -4278,6 +4359,7 @@ static int gfx_v11_0_compute_mqd_update(struct amdgpu_device *adev, void *m, mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; mqd->cp_hqd_active = prop->hqd_active; + gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); return 0; } -- 2.49.0
