On 2019.08.07 00:06 Viresh Kumar wrote:

Thanks for your work on this.

> Intel pstate driver exposes min_perf_pct and max_perf_pct sysfs files,
> which can be used to force a limit on the min/max P state of the driver.
> Though these files eventually control the min/max frequencies that the
> CPUs will run at, they don't make a change to policy->min/max values.
>
> When the values of these files are changed (in passive mode of the
> driver), it leads to calling ->limits() callback of the cpufreq
> governors, like schedutil. On a call to it the governors shall
> forcefully update the frequency to come within the limits. Since the
> limits, i.e.  policy->min/max, aren't updated by the driver, the
> governors fails to get the target freq within limit and sometimes aborts
> the update believing that the frequency is already set to the target
> value.
>
> This patch implements the QoS supported frequency constraints to update
> policy->min/max values whenever min_perf_pct or max_perf_pct files are
> updated. This is only done for the passive mode as of now, as the driver
> is already working fine in active mode.
>
> Fixes: ecd288429126 ("cpufreq: schedutil: Don't set next_freq to UINT_MAX")
> Reported-by: Doug Smythies <dsmyth...@telus.net>
> Signed-off-by: Viresh Kumar <viresh.ku...@linaro.org>

Tested by: Doug Smythies <dsmyth...@telus.net>
Thermald seems to now be working O.K. for all the governors.

I do note that if one sets
/sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq
It seems to override subsequent attempts via
/sys/devices/system/cpu/intel_pstate/max_perf_pct.
Myself, I find this confusing.

So the question becomes which one is the "master"?

Example:

# for file in /sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq; do echo 
"2200000" > $file; done
# cat /sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq
2200000
2200000
2200000
2200000
2200000
2200000
2200000
2200000
root@s15:/home/doug/temp# cat /sys/devices/system/cpu/intel_pstate/max_perf_pct
... (Note: 50% = 1900000)
root@s15:/home/doug/temp# cat 
/sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq
1900000
1900000
1900000
1900000
1900000
1900000
1900000
1900000
root@s15:/home/doug/temp# echo 100 > 
/sys/devices/system/cpu/intel_pstate/max_perf_pct
... (Note: 50% = 3800000, and my expectation is 3.8 GHz below)
root@s15:/home/doug/temp# cat 
/sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq
2200000
2200000
2200000
2200000
2200000
2200000
2200000
2200000

Similarly for the minimum side of things:

root@s15:/home/doug/temp# for file in 
/sys/devices/system/cpu/cpufreq/policy*/scaling_min_freq; do echo "3200000" > 
$file; done
root@s15:/home/doug/temp# cat 
/sys/devices/system/cpu/cpufreq/policy*/scaling_min_freq
3200000
3200000
3200000
3200000
3200000
3200000
3200000
3200000
root@s15:/home/doug/temp# echo 42 > 
/sys/devices/system/cpu/intel_pstate/min_perf_pct
root@s15:/home/doug/temp# cat /sys/devices/system/cpu/intel_pstate/min_perf_pct
42   ... (note 42% = 1600000 = processor minimum, and that is my expectation 
below.)
root@s15:/home/doug/temp# cat 
/sys/devices/system/cpu/cpufreq/policy*/scaling_min_freq
3200000
3200000
3200000
3200000
3200000
3200000
3200000
3200000

I thought these minimum anomalies would cause problems for thermald, but
for whatever reason, it seems to work properly.

> ---
> V3->V4:
> - Reimplemented the solution using QoS constraints instead of
>   resolve_freq() callback.
>
> drivers/cpufreq/intel_pstate.c | 120 +++++++++++++++++++++++++++++++--
> 1 file changed, 116 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index cc27d4c59dca..e9fbd6c36822 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -24,6 +24,7 @@
>  #include <linux/fs.h>
>  #include <linux/acpi.h>
>  #include <linux/vmalloc.h>
> +#include <linux/pm_qos.h>
>  #include <trace/events/power.h>
>  
>  #include <asm/div64.h>
> @@ -1085,6 +1086,47 @@ static ssize_t store_no_turbo(struct kobject *a, 
> struct kobj_attribute *b,
>       return count;
>  }
>  
> +static struct cpufreq_driver intel_pstate;
> +
> +static void update_qos_request(enum dev_pm_qos_req_type type)
> +{
> +     int max_state, turbo_max, freq, i, perf_pct;
> +     struct dev_pm_qos_request *req;
> +     struct cpufreq_policy *policy;
> +
> +     for_each_possible_cpu(i) {
> +             struct cpudata *cpu = all_cpu_data[i];
> +
> +             policy = cpufreq_cpu_get(i);
> +             if (!policy)
> +                     continue;
> +
> +             req = policy->driver_data;
> +             cpufreq_cpu_put(policy);
> +
> +             if (!req)
> +                     continue;
> +
> +             if (hwp_active)
> +                     intel_pstate_get_hwp_max(i, &turbo_max, &max_state);
> +             else
> +                     turbo_max = cpu->pstate.turbo_pstate;
> +
> +             if (type == DEV_PM_QOS_MIN_FREQUENCY) {

Is it O.K. to assume if the passed op code is
not DEV_PM_QOS_MIN_FREQUENCY
then it must have been
DEV_PM_QOS_MAX_FREQUENCY
?

It is within this patch, but what about in future?

> +                     perf_pct = global.min_perf_pct;
> +             } else {
> +                     req++;
> +                     perf_pct = global.max_perf_pct;
> +             }
> +
> +             freq = DIV_ROUND_UP(turbo_max * perf_pct, 100);
> +             freq *= cpu->pstate.scaling;
> +
> +             if (dev_pm_qos_update_request(req, freq))
> +                     pr_warn("Failed to update freq constraint: CPU%d\n", i);

I get many of these messages (4520 so far, always in groups of 8 (I have 8 
CPUs)),
and have yet to figure out exactly why. It seems to actually be working fine.

> +     }
> +}
> +
>  static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute 
> *b,
>                                 const char *buf, size_t count)
>  {
> @@ -1108,7 +1150,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, 
> struct kobj_attribute *b,
>  
>       mutex_unlock(&intel_pstate_limits_lock);
>  
> -     intel_pstate_update_policies();
> +     if (intel_pstate_driver == &intel_pstate)
> +             intel_pstate_update_policies();
> +     else
> +             update_qos_request(DEV_PM_QOS_MAX_FREQUENCY);
>  
>       mutex_unlock(&intel_pstate_driver_lock);
>  
> @@ -1139,7 +1184,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, 
> struct kobj_attribute *b,
>  
>       mutex_unlock(&intel_pstate_limits_lock);
>  
> -     intel_pstate_update_policies();
> +     if (intel_pstate_driver == &intel_pstate)
> +             intel_pstate_update_policies();
> +     else
> +             update_qos_request(DEV_PM_QOS_MIN_FREQUENCY);
>  
>       mutex_unlock(&intel_pstate_driver_lock);
>  
> @@ -2332,8 +2380,16 @@ static unsigned int intel_cpufreq_fast_switch(struct 
> cpufreq_policy *policy,
>  
>  static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -     int ret = __intel_pstate_cpu_init(policy);
> +     int max_state, turbo_max, min_freq, max_freq, ret;
> +     struct dev_pm_qos_request *req;
> +     struct cpudata *cpu;
> +     struct device *dev;
> +
> +     dev = get_cpu_device(policy->cpu);
> +     if (!dev)
> +             return -ENODEV;
>  
> +     ret = __intel_pstate_cpu_init(policy);
>       if (ret)
>               return ret;
>  
> @@ -2342,7 +2398,63 @@ static int intel_cpufreq_cpu_init(struct 
> cpufreq_policy *policy)
>       /* This reflects the intel_pstate_get_cpu_pstates() setting. */
>       policy->cur = policy->cpuinfo.min_freq;
>  
> +     req = kcalloc(2, sizeof(*req), GFP_KERNEL);
> +     if (!req) {
> +             ret = -ENOMEM;
> +             goto pstate_exit;
> +     }
> +
> +     cpu = all_cpu_data[policy->cpu];
> +
> +     if (hwp_active)
> +             intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state);
> +     else
> +             turbo_max = cpu->pstate.turbo_pstate;
> +
> +     min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
> +     min_freq *= cpu->pstate.scaling;
> +     max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
> +     max_freq *= cpu->pstate.scaling;
> +
> +     ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_MIN_FREQUENCY,
> +                                  min_freq);
> +     if (ret < 0) {
> +             dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
> +             goto free_req;
> +     }
> +
> +     ret = dev_pm_qos_add_request(dev, req + 1, DEV_PM_QOS_MAX_FREQUENCY,
> +                                  max_freq);
> +     if (ret < 0) {
> +             dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
> +             goto remove_min_req;
> +     }
> +
> +     policy->driver_data = req;
> +
>       return 0;
> +
> +remove_min_req:
> +     dev_pm_qos_remove_request(req);
> +free_req:
> +     kfree(req);
> +pstate_exit:
> +     intel_pstate_exit_perf_limits(policy);
> +
> +     return ret;
> +}
> +
> +static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> +{
> +     struct dev_pm_qos_request *req;
> +
> +     req = policy->driver_data;
> +
> +     dev_pm_qos_remove_request(req + 1);
> +     dev_pm_qos_remove_request(req);
> +     kfree(req);
> +
> +     return intel_pstate_cpu_exit(policy);
>  }
>  
>  static struct cpufreq_driver intel_cpufreq = {
> @@ -2351,7 +2463,7 @@ static struct cpufreq_driver intel_cpufreq = {
>       .target         = intel_cpufreq_target,
>       .fast_switch    = intel_cpufreq_fast_switch,
>       .init           = intel_cpufreq_cpu_init,
> -     .exit           = intel_pstate_cpu_exit,
> +     .exit           = intel_cpufreq_cpu_exit,
>       .stop_cpu       = intel_cpufreq_stop_cpu,
>       .update_limits  = intel_pstate_update_limits,
>       .name           = "intel_cpufreq",
> -- 
> 2.21.0.rc0.269.g1a574e7a288b


Reply via email to