[Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-10 Thread Francisco Jerez
This implements a simple variably low-pass-filtering governor in
control of the HWP MIN/MAX PERF range based on the previously
introduced get_vlp_target_range().  See "cpufreq: intel_pstate:
Implement VLP controller target P-state range estimation." for the
rationale.

Signed-off-by: Francisco Jerez 
---
 drivers/cpufreq/intel_pstate.c | 79 +-
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cecadfec8bc1..a01eed40d897 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct cpudata *cpu)
vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml));
vlp->target.p_base = 0;
vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
+
+   if (hwp_active) {
+   const uint32_t p0 = max(cpu->pstate.min_pstate,
+   cpu->min_perf_ratio);
+   const uint32_t p1 = max_t(uint32_t, p0, cpu->max_perf_ratio);
+   const uint64_t hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
+ ~(HWP_MAX_PERF(~0L) |
+   HWP_MIN_PERF(~0L) |
+   HWP_DESIRED_PERF(~0L))) |
+HWP_MIN_PERF(p0) | HWP_MAX_PERF(p1);
+
+   wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req);
+   cpu->hwp_req_cached = hwp_req;
+   }
 }
 
 /**
@@ -,6 +2236,46 @@ static void intel_pstate_adjust_pstate(struct cpudata 
*cpu)
fp_toint(cpu->iowait_boost * 100));
 }
 
+static void intel_pstate_adjust_pstate_range(struct cpudata *cpu,
+const unsigned int range[])
+{
+   const int from = cpu->hwp_req_cached;
+   unsigned int p0, p1, p_min, p_max;
+   struct sample *sample;
+   uint64_t hwp_req;
+
+   update_turbo_state();
+
+   p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
+   p1 = max_t(unsigned int, p0, cpu->max_perf_ratio);
+   p_min = clamp_t(unsigned int, range[0], p0, p1);
+   p_max = clamp_t(unsigned int, range[1], p0, p1);
+
+   trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu);
+
+   hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
+  ~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) |
+HWP_DESIRED_PERF(~0L))) |
+ HWP_MIN_PERF(vlp_params.debug & 2 ? p0 : p_min) |
+ HWP_MAX_PERF(vlp_params.debug & 4 ? p1 : p_max);
+
+   if (hwp_req != cpu->hwp_req_cached) {
+   wrmsrl(MSR_HWP_REQUEST, hwp_req);
+   cpu->hwp_req_cached = hwp_req;
+   }
+
+   sample = &cpu->sample;
+   trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
+   fp_toint(sample->busy_scaled),
+   from,
+   hwp_req,
+   sample->mperf,
+   sample->aperf,
+   sample->tsc,
+   get_avg_frequency(cpu),
+   fp_toint(cpu->iowait_boost * 100));
+}
+
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 unsigned int flags)
 {
@@ -2260,6 +2314,22 @@ static void intel_pstate_update_util(struct 
update_util_data *data, u64 time,
intel_pstate_adjust_pstate(cpu);
 }
 
+/**
+ * Implementation of the cpufreq update_util hook based on the VLP
+ * controller (see get_vlp_target_range()).
+ */
+static void intel_pstate_update_util_hwp_vlp(struct update_util_data *data,
+u64 time, unsigned int flags)
+{
+   struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+
+   if (update_vlp_sample(cpu, time, flags)) {
+   const struct vlp_target_range *target =
+   get_vlp_target_range(cpu);
+   intel_pstate_adjust_pstate_range(cpu, target->value);
+   }
+}
+
 static struct pstate_funcs core_funcs = {
.get_max = core_get_max_pstate,
.get_max_physical = core_get_max_pstate_physical,
@@ -2389,6 +2459,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
intel_pstate_get_cpu_pstates(cpu);
 
+   if (pstate_funcs.update_util == intel_pstate_update_util_hwp_vlp)
+   intel_pstate_reset_vlp(cpu);
+
pr_debug("controlling: cpu %d\n", cpunum);
 
return 0;
@@ -2398,7 +2471,8 @@ static void intel_pstate_set_update_util_hook(unsigned 
int cpu_num)
 {
struct cpudata *cpu = all_cpu_data[cpu_num];
 
-   if (hwp_active && !hwp_boost)
+   if (hwp_active && !hwp_boost &&
+   pstate_funcs.update_util != intel_pstate_update_util_hwp_vlp)
return;
 
if (cpu->update_util_set

Re: [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-17 Thread Pandruvada, Srinivas
On Tue, 2020-03-10 at 14:42 -0700, Francisco Jerez wrote:
> This implements a simple variably low-pass-filtering governor in
> control of the HWP MIN/MAX PERF range based on the previously
> introduced get_vlp_target_range().  See "cpufreq: intel_pstate:
> Implement VLP controller target P-state range estimation." for the
> rationale.

I just gave a try on a pretty idle system with just systemd processes
and usual background tasks with nomodset. 

I see that there HWP min is getting changed between 4-8. Why are
changing HWP dynamic range even on an idle system running no where
close to TDP?

Thanks,
Srinivas


> 
> Signed-off-by: Francisco Jerez 
> ---
>  drivers/cpufreq/intel_pstate.c | 79
> +-
>  1 file changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c
> b/drivers/cpufreq/intel_pstate.c
> index cecadfec8bc1..a01eed40d897 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct
> cpudata *cpu)
>   vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml));
>   vlp->target.p_base = 0;
>   vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
> +
> + if (hwp_active) {
> + const uint32_t p0 = max(cpu->pstate.min_pstate,
> + cpu->min_perf_ratio);
> + const uint32_t p1 = max_t(uint32_t, p0, cpu-
> >max_perf_ratio);
> + const uint64_t hwp_req = (READ_ONCE(cpu-
> >hwp_req_cached) &
> +   ~(HWP_MAX_PERF(~0L) |
> + HWP_MIN_PERF(~0L) |
> + HWP_DESIRED_PERF(~0L))) |
> +  HWP_MIN_PERF(p0) |
> HWP_MAX_PERF(p1);
> +
> + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req);
> + cpu->hwp_req_cached = hwp_req;
> + }
>  }
>  
>  /**
> @@ -,6 +2236,46 @@ static void intel_pstate_adjust_pstate(struct
> cpudata *cpu)
>   fp_toint(cpu->iowait_boost * 100));
>  }
>  
> +static void intel_pstate_adjust_pstate_range(struct cpudata *cpu,
> +  const unsigned int
> range[])
> +{
> + const int from = cpu->hwp_req_cached;
> + unsigned int p0, p1, p_min, p_max;
> + struct sample *sample;
> + uint64_t hwp_req;
> +
> + update_turbo_state();
> +
> + p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
> + p1 = max_t(unsigned int, p0, cpu->max_perf_ratio);
> + p_min = clamp_t(unsigned int, range[0], p0, p1);
> + p_max = clamp_t(unsigned int, range[1], p0, p1);
> +
> + trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu);
> +
> + hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
> +~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) |
> +  HWP_DESIRED_PERF(~0L))) |
> +   HWP_MIN_PERF(vlp_params.debug & 2 ? p0 : p_min) |
> +   HWP_MAX_PERF(vlp_params.debug & 4 ? p1 : p_max);
> +
> + if (hwp_req != cpu->hwp_req_cached) {
> + wrmsrl(MSR_HWP_REQUEST, hwp_req);
> + cpu->hwp_req_cached = hwp_req;
> + }
> +
> + sample = &cpu->sample;
> + trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
> + fp_toint(sample->busy_scaled),
> + from,
> + hwp_req,
> + sample->mperf,
> + sample->aperf,
> + sample->tsc,
> + get_avg_frequency(cpu),
> + fp_toint(cpu->iowait_boost * 100));
> +}
> +
>  static void intel_pstate_update_util(struct update_util_data *data,
> u64 time,
>unsigned int flags)
>  {
> @@ -2260,6 +2314,22 @@ static void intel_pstate_update_util(struct
> update_util_data *data, u64 time,
>   intel_pstate_adjust_pstate(cpu);
>  }
>  
> +/**
> + * Implementation of the cpufreq update_util hook based on the VLP
> + * controller (see get_vlp_target_range()).
> + */
> +static void intel_pstate_update_util_hwp_vlp(struct update_util_data
> *data,
> +  u64 time, unsigned int
> flags)
> +{
> + struct cpudata *cpu = container_of(data, struct cpudata,
> update_util);
> +
> + if (update_vlp_sample(cpu, time, flags)) {
> + const struct vlp_target_range *target =
> + get_vlp_target_range(cpu);
> + intel_pstate_adjust_pstate_range(cpu, target->value);
> + }
> +}
> +
>  static struct pstate_funcs core_funcs = {
>   .get_max = core_get_max_pstate,
>   .get_max_physical = core_get_max_pstate_physical,
> @@ -2389,6 +2459,9 @@ static int intel_pstate_init_cpu(unsigned int
> cpunum)
>  
>   intel_pstate_get_cpu_pstates(cpu);
>  
> + if (pstate_funcs.update_util ==
> intel_pstate_update_util_hwp_vlp)
> + 

Re: [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-18 Thread Francisco Jerez
"Pandruvada, Srinivas"  writes:

> On Tue, 2020-03-10 at 14:42 -0700, Francisco Jerez wrote:
>> This implements a simple variably low-pass-filtering governor in
>> control of the HWP MIN/MAX PERF range based on the previously
>> introduced get_vlp_target_range().  See "cpufreq: intel_pstate:
>> Implement VLP controller target P-state range estimation." for the
>> rationale.
>
> I just gave a try on a pretty idle system with just systemd processes
> and usual background tasks with nomodset. 
>
> I see that there HWP min is getting changed between 4-8. Why are
> changing HWP dynamic range even on an idle system running no where
> close to TDP?
>

The HWP request range is clamped to the frequency range specified by the
CPUFREQ policy and to the cpu->pstate.min_pstate bound.

If you see the HWP minimum fluctuating above that it's likely a sign of
your system not being completely idle -- If that's the case it's likely
to go away after you do:

 echo 0 > /sys/kernel/debug/pstate_snb/vlp_realtime_gain_pml

> Thanks,
> Srinivas
>
>
>> 
>> Signed-off-by: Francisco Jerez 
>> ---
>>  drivers/cpufreq/intel_pstate.c | 79
>> +-
>>  1 file changed, 77 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/cpufreq/intel_pstate.c
>> b/drivers/cpufreq/intel_pstate.c
>> index cecadfec8bc1..a01eed40d897 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct
>> cpudata *cpu)
>>  vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml));
>>  vlp->target.p_base = 0;
>>  vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
>> +
>> +if (hwp_active) {
>> +const uint32_t p0 = max(cpu->pstate.min_pstate,
>> +cpu->min_perf_ratio);
>> +const uint32_t p1 = max_t(uint32_t, p0, cpu-
>> >max_perf_ratio);
>> +const uint64_t hwp_req = (READ_ONCE(cpu-
>> >hwp_req_cached) &
>> +  ~(HWP_MAX_PERF(~0L) |
>> +HWP_MIN_PERF(~0L) |
>> +HWP_DESIRED_PERF(~0L))) |
>> + HWP_MIN_PERF(p0) |
>> HWP_MAX_PERF(p1);
>> +
>> +wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req);
>> +cpu->hwp_req_cached = hwp_req;
>> +}
>>  }
>>  
>>  /**
>> @@ -,6 +2236,46 @@ static void intel_pstate_adjust_pstate(struct
>> cpudata *cpu)
>>  fp_toint(cpu->iowait_boost * 100));
>>  }
>>  
>> +static void intel_pstate_adjust_pstate_range(struct cpudata *cpu,
>> + const unsigned int
>> range[])
>> +{
>> +const int from = cpu->hwp_req_cached;
>> +unsigned int p0, p1, p_min, p_max;
>> +struct sample *sample;
>> +uint64_t hwp_req;
>> +
>> +update_turbo_state();
>> +
>> +p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
>> +p1 = max_t(unsigned int, p0, cpu->max_perf_ratio);
>> +p_min = clamp_t(unsigned int, range[0], p0, p1);
>> +p_max = clamp_t(unsigned int, range[1], p0, p1);
>> +
>> +trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu);
>> +
>> +hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
>> +   ~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) |
>> + HWP_DESIRED_PERF(~0L))) |
>> +  HWP_MIN_PERF(vlp_params.debug & 2 ? p0 : p_min) |
>> +  HWP_MAX_PERF(vlp_params.debug & 4 ? p1 : p_max);
>> +
>> +if (hwp_req != cpu->hwp_req_cached) {
>> +wrmsrl(MSR_HWP_REQUEST, hwp_req);
>> +cpu->hwp_req_cached = hwp_req;
>> +}
>> +
>> +sample = &cpu->sample;
>> +trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
>> +fp_toint(sample->busy_scaled),
>> +from,
>> +hwp_req,
>> +sample->mperf,
>> +sample->aperf,
>> +sample->tsc,
>> +get_avg_frequency(cpu),
>> +fp_toint(cpu->iowait_boost * 100));
>> +}
>> +
>>  static void intel_pstate_update_util(struct update_util_data *data,
>> u64 time,
>>   unsigned int flags)
>>  {
>> @@ -2260,6 +2314,22 @@ static void intel_pstate_update_util(struct
>> update_util_data *data, u64 time,
>>  intel_pstate_adjust_pstate(cpu);
>>  }
>>  
>> +/**
>> + * Implementation of the cpufreq update_util hook based on the VLP
>> + * controller (see get_vlp_target_range()).
>> + */
>> +static void intel_pstate_update_util_hwp_vlp(struct update_util_data
>> *data,
>> + u64 time, unsigned int
>> flags)
>> +{
>> +struct cpudata *cpu = container_of(data, struct cpudata,
>> update_util);
>> +
>> +if (update_vlp_sample(cpu, time, flags)) {
>> +const struct vlp_target_range *target =
>> +  

Re: [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-18 Thread Pandruvada, Srinivas
On Wed, 2020-03-18 at 12:51 -0700, Francisco Jerez wrote:
> "Pandruvada, Srinivas"  writes:
> 
> > On Tue, 2020-03-10 at 14:42 -0700, Francisco Jerez wrote:
> > > This implements a simple variably low-pass-filtering governor in
> > > control of the HWP MIN/MAX PERF range based on the previously
> > > introduced get_vlp_target_range().  See "cpufreq: intel_pstate:
> > > Implement VLP controller target P-state range estimation." for
> > > the
> > > rationale.
> > 
> > I just gave a try on a pretty idle system with just systemd
> > processes
> > and usual background tasks with nomodset. 
> > 
> > I see that there HWP min is getting changed between 4-8. Why are
> > changing HWP dynamic range even on an idle system running no where
> > close to TDP?
> > 
> 
> The HWP request range is clamped to the frequency range specified by
> the
> CPUFREQ policy and to the cpu->pstate.min_pstate bound.
> 
> If you see the HWP minimum fluctuating above that it's likely a sign
> of
> your system not being completely idle -- If that's the case it's
> likely
> to go away after you do:
> 
>  echo 0 > /sys/kernel/debug/pstate_snb/vlp_realtime_gain_pml
> 
The objective which I though was to improve performance of GPU
workloads limited by TDP because of P-states ramping up and resulting
in less power to GPU to complete a task.
 
HWP takes decision not on just load on a CPU but several other factors
like total SoC power and scalability. We don't want to disturb HWP
algorithms when there is no TDP limitations. If writing 0, causes this
behavior then that should be the default.

Thanks,
Srinivas





> > Thanks,
> > Srinivas
> > 
> > 
> > > Signed-off-by: Francisco Jerez 
> > > ---
> > >  drivers/cpufreq/intel_pstate.c | 79
> > > +-
> > >  1 file changed, 77 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/cpufreq/intel_pstate.c
> > > b/drivers/cpufreq/intel_pstate.c
> > > index cecadfec8bc1..a01eed40d897 100644
> > > --- a/drivers/cpufreq/intel_pstate.c
> > > +++ b/drivers/cpufreq/intel_pstate.c
> > > @@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct
> > > cpudata *cpu)
> > >   vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml));
> > >   vlp->target.p_base = 0;
> > >   vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
> > > +
> > > + if (hwp_active) {
> > > + const uint32_t p0 = max(cpu->pstate.min_pstate,
> > > + cpu->min_perf_ratio);
> > > + const uint32_t p1 = max_t(uint32_t, p0, cpu-
> > > > max_perf_ratio);
> > > + const uint64_t hwp_req = (READ_ONCE(cpu-
> > > > hwp_req_cached) &
> > > +   ~(HWP_MAX_PERF(~0L) |
> > > + HWP_MIN_PERF(~0L) |
> > > + HWP_DESIRED_PERF(~0L))) |
> > > +  HWP_MIN_PERF(p0) |
> > > HWP_MAX_PERF(p1);
> > > +
> > > + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req);
> > > + cpu->hwp_req_cached = hwp_req;
> > > + }
> > >  }
> > >  
> > >  /**
> > > @@ -,6 +2236,46 @@ static void
> > > intel_pstate_adjust_pstate(struct
> > > cpudata *cpu)
> > >   fp_toint(cpu->iowait_boost * 100));
> > >  }
> > >  
> > > +static void intel_pstate_adjust_pstate_range(struct cpudata
> > > *cpu,
> > > +  const unsigned int
> > > range[])
> > > +{
> > > + const int from = cpu->hwp_req_cached;
> > > + unsigned int p0, p1, p_min, p_max;
> > > + struct sample *sample;
> > > + uint64_t hwp_req;
> > > +
> > > + update_turbo_state();
> > > +
> > > + p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
> > > + p1 = max_t(unsigned int, p0, cpu->max_perf_ratio);
> > > + p_min = clamp_t(unsigned int, range[0], p0, p1);
> > > + p_max = clamp_t(unsigned int, range[1], p0, p1);
> > > +
> > > + trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu);
> > > +
> > > + hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
> > > +~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) |
> > > +  HWP_DESIRED_PERF(~0L))) |
> > > +   HWP_MIN_PERF(vlp_params.debug & 2 ? p0 : p_min) |
> > > +   HWP_MAX_PERF(vlp_params.debug & 4 ? p1 : p_max);
> > > +
> > > + if (hwp_req != cpu->hwp_req_cached) {
> > > + wrmsrl(MSR_HWP_REQUEST, hwp_req);
> > > + cpu->hwp_req_cached = hwp_req;
> > > + }
> > > +
> > > + sample = &cpu->sample;
> > > + trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
> > > + fp_toint(sample->busy_scaled),
> > > + from,
> > > + hwp_req,
> > > + sample->mperf,
> > > + sample->aperf,
> > > + sample->tsc,
> > > + get_avg_frequency(cpu),
> > > + fp_toint(cpu->iowait_boost * 100));
> > > +}
> > > +
> > >  static void intel_pstate_update_util(struct update_util_data
> > > *data,
> > > u64 time,
> > >   

Re: [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-18 Thread Francisco Jerez
"Pandruvada, Srinivas"  writes:

> On Wed, 2020-03-18 at 12:51 -0700, Francisco Jerez wrote:
>> "Pandruvada, Srinivas"  writes:
>> 
>> > On Tue, 2020-03-10 at 14:42 -0700, Francisco Jerez wrote:
>> > > This implements a simple variably low-pass-filtering governor in
>> > > control of the HWP MIN/MAX PERF range based on the previously
>> > > introduced get_vlp_target_range().  See "cpufreq: intel_pstate:
>> > > Implement VLP controller target P-state range estimation." for
>> > > the
>> > > rationale.
>> > 
>> > I just gave a try on a pretty idle system with just systemd
>> > processes
>> > and usual background tasks with nomodset. 
>> > 
>> > I see that there HWP min is getting changed between 4-8. Why are
>> > changing HWP dynamic range even on an idle system running no where
>> > close to TDP?
>> > 
>> 
>> The HWP request range is clamped to the frequency range specified by
>> the
>> CPUFREQ policy and to the cpu->pstate.min_pstate bound.
>> 
>> If you see the HWP minimum fluctuating above that it's likely a sign
>> of
>> your system not being completely idle -- If that's the case it's
>> likely
>> to go away after you do:
>> 
>>  echo 0 > /sys/kernel/debug/pstate_snb/vlp_realtime_gain_pml
>> 
> The objective which I though was to improve performance of GPU
> workloads limited by TDP because of P-states ramping up and resulting
> in less power to GPU to complete a task.
>  
> HWP takes decision not on just load on a CPU but several other factors
> like total SoC power and scalability. We don't want to disturb HWP
> algorithms when there is no TDP limitations. If writing 0, causes this
> behavior then that should be the default.
>

The heuristic disabled by that debugfs file is there to avoid
regressions in latency-sensitive workloads as you can probably get from
the ecomments.  However ISTR those regressions were specific to non-HWP
systems, so I wouldn't mind disabling it for the moment (or punting it
to the non-HWP series if you like)j.  But first I need to verify that
there are no performance regressions on HWP systems after changing that.
Can you confirm that the debugfs write above prevents the behavior you'd
like to avoid?

> Thanks,
> Srinivas
>
>
>
>
>
>> > Thanks,
>> > Srinivas
>> > 
>> > 
>> > > Signed-off-by: Francisco Jerez 
>> > > ---
>> > >  drivers/cpufreq/intel_pstate.c | 79
>> > > +-
>> > >  1 file changed, 77 insertions(+), 2 deletions(-)
>> > > 
>> > > diff --git a/drivers/cpufreq/intel_pstate.c
>> > > b/drivers/cpufreq/intel_pstate.c
>> > > index cecadfec8bc1..a01eed40d897 100644
>> > > --- a/drivers/cpufreq/intel_pstate.c
>> > > +++ b/drivers/cpufreq/intel_pstate.c
>> > > @@ -1905,6 +1905,20 @@ static void intel_pstate_reset_vlp(struct
>> > > cpudata *cpu)
>> > >  vlp->gain = max(1, div_fp(1000, vlp_params.setpoint_0_pml));
>> > >  vlp->target.p_base = 0;
>> > >  vlp->stats.last_response_frequency_hz = vlp_params.avg_hz;
>> > > +
>> > > +if (hwp_active) {
>> > > +const uint32_t p0 = max(cpu->pstate.min_pstate,
>> > > +cpu->min_perf_ratio);
>> > > +const uint32_t p1 = max_t(uint32_t, p0, cpu-
>> > > > max_perf_ratio);
>> > > +const uint64_t hwp_req = (READ_ONCE(cpu-
>> > > > hwp_req_cached) &
>> > > +  ~(HWP_MAX_PERF(~0L) |
>> > > +HWP_MIN_PERF(~0L) |
>> > > +HWP_DESIRED_PERF(~0L))) |
>> > > + HWP_MIN_PERF(p0) |
>> > > HWP_MAX_PERF(p1);
>> > > +
>> > > +wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, hwp_req);
>> > > +cpu->hwp_req_cached = hwp_req;
>> > > +}
>> > >  }
>> > >  
>> > >  /**
>> > > @@ -,6 +2236,46 @@ static void
>> > > intel_pstate_adjust_pstate(struct
>> > > cpudata *cpu)
>> > >  fp_toint(cpu->iowait_boost * 100));
>> > >  }
>> > >  
>> > > +static void intel_pstate_adjust_pstate_range(struct cpudata
>> > > *cpu,
>> > > + const unsigned int
>> > > range[])
>> > > +{
>> > > +const int from = cpu->hwp_req_cached;
>> > > +unsigned int p0, p1, p_min, p_max;
>> > > +struct sample *sample;
>> > > +uint64_t hwp_req;
>> > > +
>> > > +update_turbo_state();
>> > > +
>> > > +p0 = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
>> > > +p1 = max_t(unsigned int, p0, cpu->max_perf_ratio);
>> > > +p_min = clamp_t(unsigned int, range[0], p0, p1);
>> > > +p_max = clamp_t(unsigned int, range[1], p0, p1);
>> > > +
>> > > +trace_cpu_frequency(p_max * cpu->pstate.scaling, cpu->cpu);
>> > > +
>> > > +hwp_req = (READ_ONCE(cpu->hwp_req_cached) &
>> > > +   ~(HWP_MAX_PERF(~0L) | HWP_MIN_PERF(~0L) |
>> > > + HWP_DESIRED_PERF(~0L))) |
>> > > +  HWP_

Re: [Intel-gfx] [PATCH 07/10] cpufreq: intel_pstate: Implement VLP controller for HWP parts.

2020-03-23 Thread Pandruvada, Srinivas
On Wed, 2020-03-18 at 13:22 -0700, Francisco Jerez wrote:
> "Pandruvada, Srinivas"  writes:
> 
> > On Wed, 2020-03-18 at 12:51 -0700, Francisco Jerez wrote:
> > > "Pandruvada, Srinivas"  writes:
> > > 
> > > > On Tue, 2020-03-10 at 14:42 -0700, Francisco Jerez wrote:
> > > > > This implements a simple variably low-pass-filtering governor
> > > > > in
> > > > > control of the HWP MIN/MAX PERF range based on the previously
> > > > > introduced get_vlp_target_range().  See "cpufreq:
> > > > > intel_pstate:
> > > > > Implement VLP controller target P-state range estimation."
> > > > > for
> > > > > the
> > > > > rationale.
> > > > 
> > > > I just gave a try on a pretty idle system with just systemd
> > > > processes
> > > > and usual background tasks with nomodset. 
> > > > 
> > > > I see that there HWP min is getting changed between 4-8. Why
> > > > are
> > > > changing HWP dynamic range even on an idle system running no
> > > > where
> > > > close to TDP?
> > > > 
> > > 
> > > The HWP request range is clamped to the frequency range specified
> > > by
> > > the
> > > CPUFREQ policy and to the cpu->pstate.min_pstate bound.
> > > 
> > > If you see the HWP minimum fluctuating above that it's likely a
> > > sign
> > > of
> > > your system not being completely idle -- If that's the case it's
> > > likely
> > > to go away after you do:
> > > 
> > >  echo 0 > /sys/kernel/debug/pstate_snb/vlp_realtime_gain_pml
> > > 
> > The objective which I though was to improve performance of GPU
> > workloads limited by TDP because of P-states ramping up and
> > resulting
> > in less power to GPU to complete a task.
> >  
> > HWP takes decision not on just load on a CPU but several other
> > factors
> > like total SoC power and scalability. We don't want to disturb HWP
> > algorithms when there is no TDP limitations. If writing 0, causes
> > this
> > behavior then that should be the default.
> > 
> 
> The heuristic disabled by that debugfs file is there to avoid
> regressions in latency-sensitive workloads as you can probably get
> from
> the ecomments.  However ISTR those regressions were specific to non-
> HWP
> systems, so I wouldn't mind disabling it for the moment (or punting
> it
> to the non-HWP series if you like)j.  But first I need to verify that
> there are no performance regressions on HWP systems after changing
> that.
> Can you confirm that the debugfs write above prevents the behavior
> you'd
> like to avoid?
It does prevent. I monitored for 10 min and didn't see any hwp_req
update.

Thanks,
Srinivas

> 
> > Thanks,
> > Srinivas
> > 
> > 
> > 
> > 
> > 
> > > > Thanks,
> > > > Srinivas
> > > > 
> > > > 
> > > > > Signed-off-by: Francisco Jerez 
> > > > > ---
> > > > >  drivers/cpufreq/intel_pstate.c | 79
> > > > > +-
> > > > >  1 file changed, 77 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/cpufreq/intel_pstate.c
> > > > > b/drivers/cpufreq/intel_pstate.c
> > > > > index cecadfec8bc1..a01eed40d897 100644
> > > > > --- a/drivers/cpufreq/intel_pstate.c
> > > > > +++ b/drivers/cpufreq/intel_pstate.c
> > > > > @@ -1905,6 +1905,20 @@ static void
> > > > > intel_pstate_reset_vlp(struct
> > > > > cpudata *cpu)
> > > > >   vlp->gain = max(1, div_fp(1000,
> > > > > vlp_params.setpoint_0_pml));
> > > > >   vlp->target.p_base = 0;
> > > > >   vlp->stats.last_response_frequency_hz =
> > > > > vlp_params.avg_hz;
> > > > > +
> > > > > + if (hwp_active) {
> > > > > + const uint32_t p0 = max(cpu->pstate.min_pstate,
> > > > > + cpu->min_perf_ratio);
> > > > > + const uint32_t p1 = max_t(uint32_t, p0, cpu-
> > > > > > max_perf_ratio);
> > > > > + const uint64_t hwp_req = (READ_ONCE(cpu-
> > > > > > hwp_req_cached) &
> > > > > +   ~(HWP_MAX_PERF(~0L) |
> > > > > + HWP_MIN_PERF(~0L) |
> > > > > + HWP_DESIRED_PERF(~0
> > > > > L))) |
> > > > > +  HWP_MIN_PERF(p0) |
> > > > > HWP_MAX_PERF(p1);
> > > > > +
> > > > > + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST,
> > > > > hwp_req);
> > > > > + cpu->hwp_req_cached = hwp_req;
> > > > > + }
> > > > >  }
> > > > >  
> > > > >  /**
> > > > > @@ -,6 +2236,46 @@ static void
> > > > > intel_pstate_adjust_pstate(struct
> > > > > cpudata *cpu)
> > > > >   fp_toint(cpu->iowait_boost * 100));
> > > > >  }
> > > > >  
> > > > > +static void intel_pstate_adjust_pstate_range(struct cpudata
> > > > > *cpu,
> > > > > +  const unsigned int
> > > > > range[])
> > > > > +{
> > > > > + const int from = cpu->hwp_req_cached;
> > > > > + unsigned int p0, p1, p_min, p_max;
> > > > > + struct sample *sample;
> > > > > + uint64_t hwp_req;
> > > > > +
> > > > > + update_turbo_state();
> > > > > +
> > > > > + p0 = max(cp