LGTM

On Fri, Aug 12, 2016 at 02:04:42AM +0200, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wyso...@intel.com>
> 
> It is useful to know the reason why cpufreq_update_util() has just
> been called and that can be passed as flags to cpufreq_update_util()
> and to the ->func() callback in struct update_util_data.  However,
> doing that in addition to passing the util and max arguments they
> already take would be clumsy, so avoid it.
> 
> Instead, use the observation that the schedutil governor is part
> of the scheduler proper, so it can access scheduler data directly.
> This allows the util and max arguments of cpufreq_update_util()
> and the ->func() callback in struct update_util_data to be replaced
> with a flags one, but schedutil has to be modified to follow.
> 
> Thus make the schedutil governor obtain the CFS utilization
> information from the scheduler and use the "RT" and "DL" flags
> instead of the special utilization value of ULONG_MAX to track
> updates from the RT and DL sched classes.  Make it non-modular
> too to avoid having to export scheduler variables to modules at
> large.
> 
> Next, update all of the other users of cpufreq_update_util()
> and the ->func() callback in struct update_util_data accordingly.
> 
> Suggested-by: Peter Zijlstra <pet...@infradead.org>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wyso...@intel.com>
> ---
> 
> v1 -> v2: Do not check cpu_of(rq) against smp_processor_id() in
>           cfs_rq_util_change().
> 
> ---
>  drivers/cpufreq/Kconfig            |    5 --
>  drivers/cpufreq/cpufreq_governor.c |    2 -
>  drivers/cpufreq/intel_pstate.c     |    2 -
>  include/linux/sched.h              |   12 ++++--
>  kernel/sched/cpufreq.c             |    2 -
>  kernel/sched/cpufreq_schedutil.c   |   67 
> ++++++++++++++++++++-----------------
>  kernel/sched/deadline.c            |    4 +-
>  kernel/sched/fair.c                |   10 +----
>  kernel/sched/rt.c                  |    4 +-
>  kernel/sched/sched.h               |   31 +++++------------
>  10 files changed, 66 insertions(+), 73 deletions(-)
> 
> Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/cpufreq_governor.c
> +++ linux-pm/drivers/cpufreq/cpufreq_governor.c
> @@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work
>  }
>  
>  static void dbs_update_util_handler(struct update_util_data *data, u64 time,
> -                                 unsigned long util, unsigned long max)
> +                                 unsigned int flags)
>  {
>       struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, 
> update_util);
>       struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
> Index: linux-pm/drivers/cpufreq/intel_pstate.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/intel_pstate.c
> +++ linux-pm/drivers/cpufreq/intel_pstate.c
> @@ -1329,7 +1329,7 @@ static inline void intel_pstate_adjust_b
>  }
>  
>  static void intel_pstate_update_util(struct update_util_data *data, u64 time,
> -                                  unsigned long util, unsigned long max)
> +                                  unsigned int flags)
>  {
>       struct cpudata *cpu = container_of(data, struct cpudata, update_util);
>       u64 delta_ns = time - cpu->sample.time;
> Index: linux-pm/include/linux/sched.h
> ===================================================================
> --- linux-pm.orig/include/linux/sched.h
> +++ linux-pm/include/linux/sched.h
> @@ -3469,15 +3469,19 @@ static inline unsigned long rlimit_max(u
>       return task_rlimit_max(current, limit);
>  }
>  
> +#define SCHED_CPUFREQ_RT     (1U << 0)
> +#define SCHED_CPUFREQ_DL     (1U << 1)
> +
> +#define SCHED_CPUFREQ_RT_DL  (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
> +
>  #ifdef CONFIG_CPU_FREQ
>  struct update_util_data {
> -     void (*func)(struct update_util_data *data,
> -                  u64 time, unsigned long util, unsigned long max);
> +       void (*func)(struct update_util_data *data, u64 time, unsigned int 
> flags);
>  };
>  
>  void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
> -                     void (*func)(struct update_util_data *data, u64 time,
> -                                  unsigned long util, unsigned long max));
> +                       void (*func)(struct update_util_data *data, u64 time,
> +                                 unsigned int flags));
>  void cpufreq_remove_update_util_hook(int cpu);
>  #endif /* CONFIG_CPU_FREQ */
>  
> Index: linux-pm/kernel/sched/cpufreq.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/cpufreq.c
> +++ linux-pm/kernel/sched/cpufreq.c
> @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *
>   */
>  void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
>                       void (*func)(struct update_util_data *data, u64 time,
> -                                  unsigned long util, unsigned long max))
> +                                  unsigned int flags))
>  {
>       if (WARN_ON(!data || !func))
>               return;
> Index: linux-pm/kernel/sched/cpufreq_schedutil.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/cpufreq_schedutil.c
> +++ linux-pm/kernel/sched/cpufreq_schedutil.c
> @@ -12,7 +12,6 @@
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
>  #include <linux/cpufreq.h>
> -#include <linux/module.h>
>  #include <linux/slab.h>
>  #include <trace/events/power.h>
>  
> @@ -53,6 +52,7 @@ struct sugov_cpu {
>       unsigned long util;
>       unsigned long max;
>       u64 last_update;
> +     unsigned int flags;
>  };
>  
>  static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
> @@ -144,24 +144,39 @@ static unsigned int get_next_freq(struct
>       return cpufreq_driver_resolve_freq(policy, freq);
>  }
>  
> +static void sugov_get_util(unsigned long *util, unsigned long *max)
> +{
> +     struct rq *rq = this_rq();
> +     unsigned long cfs_max = rq->cpu_capacity_orig;
> +
> +     *util = min(rq->cfs.avg.util_avg, cfs_max);
> +     *max = cfs_max;
> +}
> +
>  static void sugov_update_single(struct update_util_data *hook, u64 time,
> -                             unsigned long util, unsigned long max)
> +                             unsigned int flags)
>  {
>       struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
> update_util);
>       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>       struct cpufreq_policy *policy = sg_policy->policy;
> +     unsigned long util, max;
>       unsigned int next_f;
>  
>       if (!sugov_should_update_freq(sg_policy, time))
>               return;
>  
> -     next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
> -                     get_next_freq(sg_cpu, util, max);
> +     if (flags & SCHED_CPUFREQ_RT_DL) {
> +             next_f = policy->cpuinfo.max_freq;
> +     } else {
> +             sugov_get_util(&util, &max);
> +             next_f = get_next_freq(sg_cpu, util, max);
> +     }
>       sugov_update_commit(sg_policy, time, next_f);
>  }
>  
>  static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
> -                                        unsigned long util, unsigned long 
> max)
> +                                        unsigned long util, unsigned long 
> max,
> +                                        unsigned int flags)
>  {
>       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>       struct cpufreq_policy *policy = sg_policy->policy;
> @@ -169,7 +184,7 @@ static unsigned int sugov_next_freq_shar
>       u64 last_freq_update_time = sg_policy->last_freq_update_time;
>       unsigned int j;
>  
> -     if (util == ULONG_MAX)
> +     if (flags & SCHED_CPUFREQ_RT_DL)
>               return max_f;
>  
>       for_each_cpu(j, policy->cpus) {
> @@ -192,10 +207,10 @@ static unsigned int sugov_next_freq_shar
>               if (delta_ns > TICK_NSEC)
>                       continue;
>  
> -             j_util = j_sg_cpu->util;
> -             if (j_util == ULONG_MAX)
> +             if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
>                       return max_f;
>  
> +             j_util = j_sg_cpu->util;
>               j_max = j_sg_cpu->max;
>               if (j_util * max > j_max * util) {
>                       util = j_util;
> @@ -207,20 +222,24 @@ static unsigned int sugov_next_freq_shar
>  }
>  
>  static void sugov_update_shared(struct update_util_data *hook, u64 time,
> -                             unsigned long util, unsigned long max)
> +                             unsigned int flags)
>  {
>       struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
> update_util);
>       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
> +     unsigned long util, max;
>       unsigned int next_f;
>  
> +     sugov_get_util(&util, &max);
> +
>       raw_spin_lock(&sg_policy->update_lock);
>  
>       sg_cpu->util = util;
>       sg_cpu->max = max;
> +     sg_cpu->flags = flags;
>       sg_cpu->last_update = time;
>  
>       if (sugov_should_update_freq(sg_policy, time)) {
> -             next_f = sugov_next_freq_shared(sg_cpu, util, max);
> +             next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
>               sugov_update_commit(sg_policy, time, next_f);
>       }
>  
> @@ -444,8 +463,9 @@ static int sugov_start(struct cpufreq_po
>  
>               sg_cpu->sg_policy = sg_policy;
>               if (policy_is_shared(policy)) {
> -                     sg_cpu->util = ULONG_MAX;
> +                     sg_cpu->util = 0;
>                       sg_cpu->max = 0;
> +                     sg_cpu->flags = SCHED_CPUFREQ_RT;
>                       sg_cpu->last_update = 0;
>                       sg_cpu->cached_raw_freq = 0;
>                       cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
> @@ -495,28 +515,15 @@ static struct cpufreq_governor schedutil
>       .limits = sugov_limits,
>  };
>  
> -static int __init sugov_module_init(void)
> -{
> -     return cpufreq_register_governor(&schedutil_gov);
> -}
> -
> -static void __exit sugov_module_exit(void)
> -{
> -     cpufreq_unregister_governor(&schedutil_gov);
> -}
> -
> -MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wyso...@intel.com>");
> -MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
> -MODULE_LICENSE("GPL");
> -
>  #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
>  struct cpufreq_governor *cpufreq_default_governor(void)
>  {
>       return &schedutil_gov;
>  }
> -
> -fs_initcall(sugov_module_init);
> -#else
> -module_init(sugov_module_init);
>  #endif
> -module_exit(sugov_module_exit);
> +
> +static int __init sugov_register(void)
> +{
> +     return cpufreq_register_governor(&schedutil_gov);
> +}
> +fs_initcall(sugov_register);
> Index: linux-pm/kernel/sched/fair.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/fair.c
> +++ linux-pm/kernel/sched/fair.c
> @@ -2875,11 +2875,8 @@ static inline void update_tg_load_avg(st
>  
>  static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
>  {
> -     struct rq *rq = rq_of(cfs_rq);
> -     int cpu = cpu_of(rq);
> -
> -     if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
> -             unsigned long max = rq->cpu_capacity_orig;
> +     if (&this_rq()->cfs == cfs_rq) {
> +             struct rq *rq = rq_of(cfs_rq);
>  
>               /*
>                * There are a few boundary cases this might miss but it should
> @@ -2897,8 +2894,7 @@ static inline void cfs_rq_util_change(st
>                *
>                * See cpu_util().
>                */
> -             cpufreq_update_util(rq_clock(rq),
> -                                 min(cfs_rq->avg.util_avg, max), max);
> +             cpufreq_update_util(rq_clock(rq), 0);
>       }
>  }
>  
> Index: linux-pm/kernel/sched/sched.h
> ===================================================================
> --- linux-pm.orig/kernel/sched/sched.h
> +++ linux-pm/kernel/sched/sched.h
> @@ -1764,26 +1764,12 @@ DECLARE_PER_CPU(struct update_util_data
>  /**
>   * cpufreq_update_util - Take a note about CPU utilization changes.
>   * @time: Current time.
> - * @util: Current utilization.
> - * @max: Utilization ceiling.
> + * @flags: Update reason flags.
>   *
> - * This function is called by the scheduler on every invocation of
> - * update_load_avg() on the CPU whose utilization is being updated.
> + * This function is called by the scheduler on the CPU whose utilization is
> + * being updated.
>   *
>   * It can only be called from RCU-sched read-side critical sections.
> - */
> -static inline void cpufreq_update_util(u64 time, unsigned long util, 
> unsigned long max)
> -{
> -       struct update_util_data *data;
> -
> -       data = 
> rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
> -       if (data)
> -               data->func(data, time, util, max);
> -}
> -
> -/**
> - * cpufreq_trigger_update - Trigger CPU performance state evaluation if 
> needed.
> - * @time: Current time.
>   *
>   * The way cpufreq is currently arranged requires it to evaluate the CPU
>   * performance state (frequency/voltage) on a regular basis to prevent it 
> from
> @@ -1797,13 +1783,16 @@ static inline void cpufreq_update_util(u
>   * but that really is a band-aid.  Going forward it should be replaced with
>   * solutions targeted more specifically at RT and DL tasks.
>   */
> -static inline void cpufreq_trigger_update(u64 time)
> +static inline void cpufreq_update_util(u64 time, unsigned int flags)
>  {
> -     cpufreq_update_util(time, ULONG_MAX, 0);
> +     struct update_util_data *data;
> +
> +     data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
> +     if (data)
> +             data->func(data, time, flags);
>  }
>  #else
> -static inline void cpufreq_update_util(u64 time, unsigned long util, 
> unsigned long max) {}
> -static inline void cpufreq_trigger_update(u64 time) {}
> +static inline void cpufreq_update_util(u64 time, unsigned int flags) {}
>  #endif /* CONFIG_CPU_FREQ */
>  
>  #ifdef arch_scale_freq_capacity
> Index: linux-pm/kernel/sched/deadline.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/deadline.c
> +++ linux-pm/kernel/sched/deadline.c
> @@ -732,9 +732,9 @@ static void update_curr_dl(struct rq *rq
>               return;
>       }
>  
> -     /* kick cpufreq (see the comment in linux/cpufreq.h). */
> +     /* kick cpufreq (see the comment in kernel/sched/sched.h). */
>       if (cpu_of(rq) == smp_processor_id())
> -             cpufreq_trigger_update(rq_clock(rq));
> +             cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_DL);
>  
>       schedstat_set(curr->se.statistics.exec_max,
>                     max(curr->se.statistics.exec_max, delta_exec));
> Index: linux-pm/kernel/sched/rt.c
> ===================================================================
> --- linux-pm.orig/kernel/sched/rt.c
> +++ linux-pm/kernel/sched/rt.c
> @@ -957,9 +957,9 @@ static void update_curr_rt(struct rq *rq
>       if (unlikely((s64)delta_exec <= 0))
>               return;
>  
> -     /* Kick cpufreq (see the comment in linux/cpufreq.h). */
> +     /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
>       if (cpu_of(rq) == smp_processor_id())
> -             cpufreq_trigger_update(rq_clock(rq));
> +             cpufreq_update_util(rq_clock(rq), SCHED_CPUFREQ_RT);
>  
>       schedstat_set(curr->se.statistics.exec_max,
>                     max(curr->se.statistics.exec_max, delta_exec));
> Index: linux-pm/drivers/cpufreq/Kconfig
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/Kconfig
> +++ linux-pm/drivers/cpufreq/Kconfig
> @@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
>         If in doubt, say N.
>  
>  config CPU_FREQ_GOV_SCHEDUTIL
> -     tristate "'schedutil' cpufreq policy governor"
> +     bool "'schedutil' cpufreq policy governor"
>       depends on CPU_FREQ && SMP
>       select CPU_FREQ_GOV_ATTR_SET
>       select IRQ_WORK
> @@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL
>         frequency tipping point is at utilization/capacity equal to 80% in
>         both cases.
>  
> -       To compile this driver as a module, choose M here: the module will
> -       be called cpufreq_schedutil.
> -
>         If in doubt, say N.
>  
>  comment "CPU frequency scaling drivers"

Reply via email to