On Thu, Sep 18, 2025 at 9:04 PM Mario Limonciello (AMD) <[email protected]> wrote: > > From: Mario Limonciello <[email protected]> > > The MES set resources packet has an optional bit 'lr_compute_wa' > which can be used for preventing MES hangs on long compute jobs. > > Set this bit by default. > > Co-developed-by: Yifan Zhang <[email protected]> > Signed-off-by: Yifan Zhang <[email protected]> > Signed-off-by: Mario Limonciello <[email protected]>
Acked-by: Alex Deucher <[email protected]> > --- > v3: > * gate on fw version > v2: > * drop module parameter > * add more description to commit text > --- > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 6 ++++++ > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 +++++ > drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- > drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 ++- > 4 files changed, 15 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index 3b91ea601add4..e82188431f796 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -713,6 +713,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes > *mes) > mes_set_hw_res_pkt.enable_reg_active_poll = 1; > mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; > mes_set_hw_res_pkt.oversubscription_timer = 50; > + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) > + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; > + else > + dev_info_once(mes->adev->dev, > + "MES FW version must be >= 0x7f to enable LR > compute workaround.\n"); > + > if (amdgpu_mes_log_enable) { > mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; > mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > index 998893dff08e9..aff06f06aeeec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > @@ -769,6 +769,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes > *mes, int pipe) > mes_set_hw_res_pkt.use_different_vmid_compute = 1; > mes_set_hw_res_pkt.enable_reg_active_poll = 1; > mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; > + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) > + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; > + else > + dev_info_once(adev->dev, > + "MES FW version must be >= 0x82 to enable LR > compute workaround.\n"); > > /* > * Keep oversubscribe timer for sdma . When we have unmapped doorbell > diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h > b/drivers/gpu/drm/amd/include/mes_v11_api_def.h > index 15680c3f49704..ab1cfc92dbeb1 100644 > --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h > +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h > @@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES { > uint32_t enable_mes_sch_stb_log : 1; > uint32_t limit_single_process : 1; > uint32_t is_strix_tmz_wa_enabled :1; > - uint32_t reserved : 13; > + uint32_t enable_lr_compute_wa : 1; > + uint32_t reserved : 12; > }; > uint32_t uint32_t_all; > }; > diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h > b/drivers/gpu/drm/amd/include/mes_v12_api_def.h > index c04bd351b2505..69611c7e30e35 100644 > --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h > +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h > @@ -287,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES { > uint32_t limit_single_process : 1; > uint32_t unmapped_doorbell_handling: 2; > uint32_t enable_mes_fence_int: 1; > - uint32_t reserved : 10; > + uint32_t enable_lr_compute_wa : 1; > + uint32_t reserved : 9; > }; > uint32_t uint32_all; > }; > -- > 2.43.0 >
