[AMD Official Use Only]


> -----Original Message-----
> From: Lazar, Lijo <lijo.la...@amd.com>
> Sent: Tuesday, January 11, 2022 11:32 AM
> To: Quan, Evan <evan.q...@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <alexander.deuc...@amd.com>
> Subject: Re: [PATCH] drm/amd/pm: correct the checks for fan attributes
> support
> 
> 
> 
> On 1/11/2022 8:02 AM, Quan, Evan wrote:
> > [AMD Official Use Only]
> >
> >
> >
> >> -----Original Message-----
> >> From: Lazar, Lijo <lijo.la...@amd.com>
> >> Sent: Monday, January 10, 2022 4:31 PM
> >> To: Quan, Evan <evan.q...@amd.com>; amd-gfx@lists.freedesktop.org
> >> Cc: Deucher, Alexander <alexander.deuc...@amd.com>
> >> Subject: Re: [PATCH] drm/amd/pm: correct the checks for fan
> >> attributes support
> >>
> >>
> >>
> >> On 1/10/2022 1:25 PM, Quan, Evan wrote:
> >>> [AMD Official Use Only]
> >>>
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Lazar, Lijo <lijo.la...@amd.com>
> >>>> Sent: Monday, January 10, 2022 3:36 PM
> >>>> To: Quan, Evan <evan.q...@amd.com>; amd-
> g...@lists.freedesktop.org
> >>>> Cc: Deucher, Alexander <alexander.deuc...@amd.com>
> >>>> Subject: Re: [PATCH] drm/amd/pm: correct the checks for fan
> >>>> attributes support
> >>>>
> >>>>
> >>>>
> >>>> On 1/10/2022 11:30 AM, Evan Quan wrote:
> >>>>> Before we relied on the return values from the corresponding
> interfaces.
> >>>>> That is with low efficiency. And the wrong intermediate variable
> >>>>> used makes the fan mode stuck at manual mode which then causes
> >>>>> overheating
> >>>> in
> >>>>> 3D graphics tests.
> >>>>>
> >>>>> Signed-off-by: Evan Quan <evan.q...@amd.com>
> >>>>> Change-Id: Ia93ccf3b929c12e6d10b50c8f3596783ac63f0e3
> >>>>> ---
> >>>>>     drivers/gpu/drm/amd/pm/amdgpu_dpm.c     | 23
> >>>> +++++++++++++++++++++++
> >>>>>     drivers/gpu/drm/amd/pm/amdgpu_pm.c      | 20 ++++++++++-------
> ---
> >>>>>     drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 12 ++++++++++++
> >>>>>     3 files changed, 45 insertions(+), 10 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>>>> index 68d2e80a673b..e732418a9558 100644
> >>>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>>>> @@ -1547,3 +1547,26 @@ int
> >> amdgpu_dpm_get_dpm_clock_table(struct
> >>>> amdgpu_device *adev,
> >>>>>
> >>>>>         return ret;
> >>>>>     }
> >>>>> +
> >>>>> +int amdgpu_dpm_is_fan_operation_supported(struct
> amdgpu_device
> >>>> *adev,
> >>>>> +                                         enum fan_operation_id id)
> >>>>> +{
> >>>>> +       const struct amd_pm_funcs *pp_funcs = adev-
> >powerplay.pp_funcs;
> >>>>> +
> >>>>> +       switch (id) {
> >>>>> +       case FAN_CONTROL_MODE_RETRIEVING:
> >>>>> +               return pp_funcs->get_fan_control_mode ? 1 : 0;
> >>>>> +       case FAN_CONTROL_MODE_SETTING:
> >>>>> +               return pp_funcs->set_fan_control_mode ? 1 : 0;
> >>>>> +       case FAN_SPEED_PWM_RETRIEVING:
> >>>>> +               return pp_funcs->get_fan_speed_pwm ? 1 : 0;
> >>>>> +       case FAN_SPEED_PWM_SETTING:
> >>>>> +               return pp_funcs->set_fan_speed_pwm ? 1 : 0;
> >>>>> +       case FAN_SPEED_RPM_RETRIEVING:
> >>>>> +               return pp_funcs->get_fan_speed_rpm ? 1 : 0;
> >>>>> +       case FAN_SPEED_RPM_SETTING:
> >>>>> +               return pp_funcs->set_fan_speed_rpm ? 1 : 0;
> >>>>> +       default:
> >>>>> +               return 0;
> >>>>> +       }
> >>>>> +}
> >>>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>>> index d3eab245e0fe..57721750c51a 100644
> >>>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >>>>> @@ -3263,15 +3263,15 @@ static umode_t
> >>>> hwmon_attributes_visible(struct kobject *kobj,
> >>>>>                 return 0;
> >>>>>
> >>>>>         /* mask fan attributes if we have no bindings for this asic
> >>>>> to expose
> >>>> */
> >>>>> -       if (((amdgpu_dpm_get_fan_speed_pwm(adev, &speed) ==
> -EINVAL)
> >>>> &&
> >>>>> +       if ((!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_PWM_RETRIEVING) &&
> >>>>
> >>>> As per the current logic, it's really checking the hardware registers.
> >>> [Quan, Evan] I probably should mention the "current" version you see
> >>> now
> >> is actually a regression introduced by the commit below:
> >>> 801771de0331 drm/amd/pm: do not expose power implementation
> details
> >> to
> >>> amdgpu_pm.c
> >>>
> >>> The very early version(which works good) is something like below:
> >>> -       if (!is_support_sw_smu(adev)) {
> >>> -               /* mask fan attributes if we have no bindings for this 
> >>> asic to
> expose
> >> */
> >>> -               if ((!adev->powerplay.pp_funcs->get_fan_speed_pwm &&
> >>> -                    attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* 
> >>> can't
> >> query fan */
> >>> -                   (!adev->powerplay.pp_funcs->get_fan_control_mode &&
> >>> -                    attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) 
> >>> /*
> can't
> >> query state */
> >>> -                       effective_mode &= ~S_IRUGO;
> >>>
> >>> So, the changes here are really just back to old working version. It
> >>> aims to
> >> provide a quick fix for the failures reported by CQE.
> >>
> >> I see. Could you model on it based on below one? This is preferrable
> >> rather than introducing new API.
> >>
> >> drm/amdgpu/pm: Don't show pp_power_profile_mode for unsupported
> >> devices.
> > [Quan, Evan] In fact, those piece of code from the mentioned change was
> updated as below
> >          } else if (DEVICE_ATTR_IS(pp_power_profile_mode)) {
> >                  if (amdgpu_dpm_get_power_profile_mode(adev, NULL) == -
> EOPNOTSUPP)
> >                          *states = ATTR_STATE_UNSUPPORTED;
> >          }
> > As the access for adev->powerplay.pp_funcs from amdgpu_pm.c was
> forbidden after the pm cleanups.
> > So, we have to rely on some (new)API in amdgpu_dpm.c to do those
> checks.
> >
> 
> To be clear, the model is to use a dummy call to check if the API is
> implemented -
> 
>       amdgpu_dpm_get_fan_speed_rpm(adev, NULL) == -EOPNOTSUPP
>       amdgpu_dpm_set_fan_speed_rpm(adev, -1) == -EOPNOTSUPP
> 
> That is better instead of adding another API and flags for each set/get API.
[Quan, Evan] I see. OK, I can refactor this patch that way.

BR
Evan
> 
> Thanks,
> Lijo
> 
> > A more proper way to cleanup all those attributes support checks stuff is to
> have a flag like "adev->pm.sysfs_attribtues_flags".
> > It labels all those sysfs attributes supported on each ASIC. However,
> considering the ASICs involved and the difference between them, that may
> be not an easy job.
> >
> > BR
> > Evan
> >>
> >> Thanks,
> >> Lijo
> >>
> >>>> For ex: we could have some SKUs that have PMFW based fan control
> >>>> and for some other SKUs, AIBs could be having a different cooling
> >>>> solution which doesn't make use of PMFW.
> >>>>
> >>>>
> >>>>>               attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /*
> >>>>> can't query
> >>>> fan */
> >>>>> -           ((amdgpu_dpm_get_fan_control_mode(adev, &speed) ==
> -
> >>>> EOPNOTSUPP) &&
> >>>>> +           (!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_CONTROL_MODE_RETRIEVING) &&
> >>>>>              attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr))
> /*
> >>>>> can't
> >>>> query state */
> >>>>>                 effective_mode &= ~S_IRUGO;
> >>>>>
> >>>>> -       if (((amdgpu_dpm_set_fan_speed_pwm(adev, speed) == -
> EINVAL)
> >>>> &&
> >>>>> +       if ((!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_PWM_SETTING) &&
> >>>>>               attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /*
> >>>>> can't
> >>>> manage fan */
> >>>>> -             ((amdgpu_dpm_set_fan_control_mode(adev, speed) ==
> -
> >>>> EOPNOTSUPP) &&
> >>>>> +           (!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_CONTROL_MODE_SETTING) &&
> >>>>>               attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr))
> /*
> >>>>> can't
> >>>> manage state */
> >>>>>                 effective_mode &= ~S_IWUSR;
> >>>>>
> >>>>> @@ -3291,16 +3291,16 @@ static umode_t
> >>>> hwmon_attributes_visible(struct kobject *kobj,
> >>>>>                 return 0;
> >>>>>
> >>>>>         /* hide max/min values if we can't both query and manage
> the fan */
> >>>>> -       if (((amdgpu_dpm_set_fan_speed_pwm(adev, speed) == -
> EINVAL)
> >>>> &&
> >>>>> -             (amdgpu_dpm_get_fan_speed_pwm(adev, &speed) == -
> EINVAL)
> >>>> &&
> >>>>> -             (amdgpu_dpm_set_fan_speed_rpm(adev, speed) == -
> EINVAL)
> >>>> &&
> >>>>> -             (amdgpu_dpm_get_fan_speed_rpm(adev, &speed) == -
> EINVAL))
> >>>> &&
> >>>>> +       if ((!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_PWM_SETTING) &&
> >>>>> +            !amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_PWM_RETRIEVING) &&
> >>>>> +            !amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_RPM_SETTING) &&
> >>>>> +            !amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_RPM_RETRIEVING)) &&
> >>>>
> >>>> If this is the case, I think we should set pm.no_fan since nothing
> >>>> is possible.
> >>> [Quan, Evan] Yep, I agree a more optimized version should be
> >>> something
> >> like that.
> >>> Let's take this a quick solution and do further optimizations later.
> >>>
> >>> BR
> >>> Evan
> >>>>
> >>>> Thanks,
> >>>> Lijo
> >>>>
> >>>>>             (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
> >>>>>              attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
> >>>>>                 return 0;
> >>>>>
> >>>>> -       if ((amdgpu_dpm_set_fan_speed_rpm(adev, speed) == -
> EINVAL)
> >>>> &&
> >>>>> -            (amdgpu_dpm_get_fan_speed_rpm(adev, &speed) == -
> EINVAL)
> >>>> &&
> >>>>> +       if ((!amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_RPM_SETTING) &&
> >>>>> +            !amdgpu_dpm_is_fan_operation_supported(adev,
> >>>> FAN_SPEED_RPM_RETRIEVING)) &&
> >>>>>              (attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
> >>>>>              attr == &sensor_dev_attr_fan1_min.dev_attr.attr))
> >>>>>                 return 0;
> >>>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>>>> index ba857ca75392..9e18151a3c46 100644
> >>>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>>>> @@ -330,6 +330,16 @@ struct amdgpu_pm {
> >>>>>         bool                    pp_force_state_enabled;
> >>>>>     };
> >>>>>
> >>>>> +enum fan_operation_id
> >>>>> +{
> >>>>> +       FAN_CONTROL_MODE_RETRIEVING = 0,
> >>>>> +       FAN_CONTROL_MODE_SETTING    = 1,
> >>>>> +       FAN_SPEED_PWM_RETRIEVING    = 2,
> >>>>> +       FAN_SPEED_PWM_SETTING       = 3,
> >>>>> +       FAN_SPEED_RPM_RETRIEVING    = 4,
> >>>>> +       FAN_SPEED_RPM_SETTING       = 5,
> >>>>> +};
> >>>>> +
> >>>>>     u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
> >>>>>     int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum
> >>>> amd_pp_sensors sensor,
> >>>>>                            void *data, uint32_t *size); @@ -510,4
> +520,6 @@
> >> enum
> >>>>> pp_smu_status
> >>>> amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev,
> >>>>>                                                   unsigned int
> *num_states);
> >>>>>     int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device
> *adev,
> >>>>>                                    struct dpm_clocks *clock_table);
> >>>>> +int amdgpu_dpm_is_fan_operation_supported(struct
> amdgpu_device
> >>>> *adev,
> >>>>> +                                         enum fan_operation_id id);
> >>>>>     #endif
> >>>>>

Reply via email to