On 3/12/2025 7:49 PM, Alex Deucher wrote:
> We need to make sure the workload profile ref counts are
> balanced.  This isn't currently the case because we can
> increment the count on submissions, but the decrement may
> be delayed as work comes in.  Track when we enable the
> workload profile so the references are balanced.
> 
> v2: switch to a mutex and active flag
> 
> Fixes: 8fdb3958e396 ("drm/amdgpu/gfx: add ring helpers for setting workload 
> profile")
> Cc: Yang Wang <[email protected]>
> Cc: Kenneth Feng <[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 ++++++++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
>  2 files changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 984e6ff6e4632..099329d15b9ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -2160,11 +2160,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct 
> work_struct *work)
>       for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); 
> ++i)
>               fences += 
> amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
>       if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
> -             r = amdgpu_dpm_switch_power_profile(adev, profile, false);
> -             if (r)
> -                     dev_warn(adev->dev, "(%d) failed to disable %s power 
> profile mode\n", r,
> -                              profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> -                              "fullscreen 3D" : "compute");
> +             mutex_lock(&adev->gfx.workload_profile_mutex);
> +             if (adev->gfx.workload_profile_active) {
> +                     r = amdgpu_dpm_switch_power_profile(adev, profile, 
> false);
> +                     if (r)
> +                             dev_warn(adev->dev, "(%d) failed to disable %s 
> power profile mode\n", r,
> +                                      profile == 
> PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> +                                      "fullscreen 3D" : "compute");
> +                     adev->gfx.workload_profile_active = false;
> +             }
> +             mutex_unlock(&adev->gfx.workload_profile_mutex);
>       } else {
>               schedule_delayed_work(&adev->gfx.idle_work, 
> GFX_PROFILE_IDLE_TIMEOUT);
>       }
> @@ -2184,11 +2189,16 @@ void amdgpu_gfx_profile_ring_begin_use(struct 
> amdgpu_ring *ring)
>       atomic_inc(&adev->gfx.total_submission_cnt);
>  
>       if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) {

I guess this has the same problem as mentioned in the earlier patch.
AFAIU, this will switch profile only if no idle work is scheduled. If a
begin_use call comes when idle_work is being executed, there is a chance
that idle_work completes amdgpu_dpm_switch_power_profile(adev, profile,
false). Then this would skip changing the profile.

Thanks,
Lijo

> -             r = amdgpu_dpm_switch_power_profile(adev, profile, true);
> -             if (r)
> -                     dev_warn(adev->dev, "(%d) failed to disable %s power 
> profile mode\n", r,
> -                              profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> -                              "fullscreen 3D" : "compute");
> +             mutex_lock(&adev->gfx.workload_profile_mutex);
> +             if (!adev->gfx.workload_profile_active) {
> +                     r = amdgpu_dpm_switch_power_profile(adev, profile, 
> true);
> +                     if (r)
> +                             dev_warn(adev->dev, "(%d) failed to disable %s 
> power profile mode\n", r,
> +                                      profile == 
> PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
> +                                      "fullscreen 3D" : "compute");
> +                     adev->gfx.workload_profile_active = true;
> +             }
> +             mutex_unlock(&adev->gfx.workload_profile_mutex);
>       }
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index ddf4533614bac..75af4f25a133b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -483,6 +483,8 @@ struct amdgpu_gfx {
>  
>       atomic_t                        total_submission_cnt;
>       struct delayed_work             idle_work;
> +     bool                            workload_profile_active;
> +     struct mutex                    workload_profile_mutex;
>  };
>  
>  struct amdgpu_gfx_ras_reg_entry {

Reply via email to