Re: [Update][PATCH 3/3] cpufreq: governor: Replace timers with utilization update callbacks

2016-02-04 Thread Rafael J. Wysocki
On Thursday, February 04, 2016 10:19:59 AM Viresh Kumar wrote:
> On 03-02-16, 02:16, Rafael J. Wysocki wrote:
> > Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
> > -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
> > +void gov_set_update_util(struct cpu_common_dbs_info *shared,
> > +unsigned int delay_us)
> >  {
> > +   struct cpufreq_policy *policy = shared->policy;
> > struct dbs_data *dbs_data = policy->governor_data;
> > -   struct cpu_dbs_info *cdbs;
> > int cpu;
> >  
> > +   shared->sample_delay_ns = delay_us * NSEC_PER_USEC;
> > +   shared->time_stamp = ktime_get();
> > +
> > for_each_cpu(cpu, policy->cpus) {
> > -   cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> > -   cdbs->timer.expires = jiffies + delay;
> > -   add_timer_on(&cdbs->timer, cpu);
> > +   struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> > +
> > +   cdbs->last_sample_time = 0;
> > +   cpufreq_set_update_util_data(cpu, &cdbs->update_util);
> 
> Why no synchronize_rcu() here?

Because it is not needed.  This always changes a NULL pointer into a non-NULL.

> This can be called from ondemand governor on sampling-rate updates ..

But that calls gov_cancel_work() before, right?

> 
> > }
> >  }
> > -EXPORT_SYMBOL_GPL(gov_add_timers);
> > +EXPORT_SYMBOL_GPL(gov_set_update_util);
> >  
> > -static inline void gov_cancel_timers(struct cpufreq_policy *policy)
> > +static inline void gov_clear_update_util(struct cpufreq_policy *policy)
> >  {
> > -   struct dbs_data *dbs_data = policy->governor_data;
> > -   struct cpu_dbs_info *cdbs;
> > int i;
> >  
> > -   for_each_cpu(i, policy->cpus) {
> > -   cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> > -   del_timer_sync(&cdbs->timer);
> > -   }
> > +   for_each_cpu(i, policy->cpus)
> > +   cpufreq_set_update_util_data(i, NULL);
> > +
> > +   synchronize_rcu();
> >  }
> >  
> >  void gov_cancel_work(struct cpu_common_dbs_info *shared)
> >  {
> > -   /* Tell dbs_timer_handler() to skip queuing up work items. */
> > +   /* Tell dbs_update_util_handler() to skip queuing up work items. */
> > atomic_inc(&shared->skip_work);
> > /*
> > -* If dbs_timer_handler() is already running, it may not notice the
> > -* incremented skip_work, so wait for it to complete to prevent its work
> > -* item from being queued up after the cancel_work_sync() below.
> > -*/
> > -   gov_cancel_timers(shared->policy);
> > -   /*
> > -* In case dbs_timer_handler() managed to run and spawn a work item
> > -* before the timers have been canceled, wait for that work item to
> > -* complete and then cancel all of the timers set up by it.  If
> > -* dbs_timer_handler() runs again at that point, it will see the
> > -* positive value of skip_work and won't spawn any more work items.
> > +* If dbs_update_util_handler() is already running, it may not notice
> > +* the incremented skip_work, so wait for it to complete to prevent its
> > +* work item from being queued up after the cancel_work_sync() below.
> >  */
> > +   gov_clear_update_util(shared->policy);
> > cancel_work_sync(&shared->work);
> 
> How are we sure that the irq-work can't be pending at this point of
> time, which will queue the above works again ?

Good point.  The irq_work has to be waited for here too.

> > -   gov_cancel_timers(shared->policy);
> > atomic_set(&shared->skip_work, 0);
> >  }
> >  EXPORT_SYMBOL_GPL(gov_cancel_work);
> >  
> > -/* Will return if we need to evaluate cpu load again or not */
> > -static bool need_load_eval(struct cpu_common_dbs_info *shared,
> > -  unsigned int sampling_rate)
> > -{
> > -   if (policy_is_shared(shared->policy)) {
> > -   ktime_t time_now = ktime_get();
> > -   s64 delta_us = ktime_us_delta(time_now, shared->time_stamp);
> > -
> > -   /* Do nothing if we recently have sampled */
> > -   if (delta_us < (s64)(sampling_rate / 2))
> > -   return false;
> > -   else
> > -   shared->time_stamp = time_now;
> > -   }
> > -
> > -   return true;
> > -}
> > -
> >  static void dbs_work_handler(struct work_struct *work)
> >  {
> > struct cpu_common_dbs_info *shared = container_of(work, struct
> > @@ -235,14 +212,10 @@ static void dbs_work_handler(struct work
> > struct cpufreq_policy *policy;
> > struct dbs_data *dbs_data;
> > unsigned int sampling_rate, delay;
> > -   bool eval_load;
> >  
> > policy = shared->policy;
> > dbs_data = policy->governor_data;
> >  
> > -   /* Kill all timers */
> > -   gov_cancel_timers(policy);
> > -
> > if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
> > struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
> >  
> > @@ -253,37 +226,53 @@ static void dbs_work_handler(struct work
> > sampling_rate = od_tuners->sampling_rate;
> > }
> >  
> > -

Re: [Update][PATCH 3/3] cpufreq: governor: Replace timers with utilization update callbacks

2016-02-03 Thread Viresh Kumar
On 03-02-16, 02:16, Rafael J. Wysocki wrote:
> Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
> -void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
> +void gov_set_update_util(struct cpu_common_dbs_info *shared,
> +  unsigned int delay_us)
>  {
> + struct cpufreq_policy *policy = shared->policy;
>   struct dbs_data *dbs_data = policy->governor_data;
> - struct cpu_dbs_info *cdbs;
>   int cpu;
>  
> + shared->sample_delay_ns = delay_us * NSEC_PER_USEC;
> + shared->time_stamp = ktime_get();
> +
>   for_each_cpu(cpu, policy->cpus) {
> - cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> - cdbs->timer.expires = jiffies + delay;
> - add_timer_on(&cdbs->timer, cpu);
> + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> +
> + cdbs->last_sample_time = 0;
> + cpufreq_set_update_util_data(cpu, &cdbs->update_util);

Why no synchronize_rcu() here? This can be called from ondemand
governor on sampling-rate updates ..

>   }
>  }
> -EXPORT_SYMBOL_GPL(gov_add_timers);
> +EXPORT_SYMBOL_GPL(gov_set_update_util);
>  
> -static inline void gov_cancel_timers(struct cpufreq_policy *policy)
> +static inline void gov_clear_update_util(struct cpufreq_policy *policy)
>  {
> - struct dbs_data *dbs_data = policy->governor_data;
> - struct cpu_dbs_info *cdbs;
>   int i;
>  
> - for_each_cpu(i, policy->cpus) {
> - cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> - del_timer_sync(&cdbs->timer);
> - }
> + for_each_cpu(i, policy->cpus)
> + cpufreq_set_update_util_data(i, NULL);
> +
> + synchronize_rcu();
>  }
>  
>  void gov_cancel_work(struct cpu_common_dbs_info *shared)
>  {
> - /* Tell dbs_timer_handler() to skip queuing up work items. */
> + /* Tell dbs_update_util_handler() to skip queuing up work items. */
>   atomic_inc(&shared->skip_work);
>   /*
> -  * If dbs_timer_handler() is already running, it may not notice the
> -  * incremented skip_work, so wait for it to complete to prevent its work
> -  * item from being queued up after the cancel_work_sync() below.
> -  */
> - gov_cancel_timers(shared->policy);
> - /*
> -  * In case dbs_timer_handler() managed to run and spawn a work item
> -  * before the timers have been canceled, wait for that work item to
> -  * complete and then cancel all of the timers set up by it.  If
> -  * dbs_timer_handler() runs again at that point, it will see the
> -  * positive value of skip_work and won't spawn any more work items.
> +  * If dbs_update_util_handler() is already running, it may not notice
> +  * the incremented skip_work, so wait for it to complete to prevent its
> +  * work item from being queued up after the cancel_work_sync() below.
>*/
> + gov_clear_update_util(shared->policy);
>   cancel_work_sync(&shared->work);

How are we sure that the irq-work can't be pending at this point of
time, which will queue the above works again ?

> - gov_cancel_timers(shared->policy);
>   atomic_set(&shared->skip_work, 0);
>  }
>  EXPORT_SYMBOL_GPL(gov_cancel_work);
>  
> -/* Will return if we need to evaluate cpu load again or not */
> -static bool need_load_eval(struct cpu_common_dbs_info *shared,
> -unsigned int sampling_rate)
> -{
> - if (policy_is_shared(shared->policy)) {
> - ktime_t time_now = ktime_get();
> - s64 delta_us = ktime_us_delta(time_now, shared->time_stamp);
> -
> - /* Do nothing if we recently have sampled */
> - if (delta_us < (s64)(sampling_rate / 2))
> - return false;
> - else
> - shared->time_stamp = time_now;
> - }
> -
> - return true;
> -}
> -
>  static void dbs_work_handler(struct work_struct *work)
>  {
>   struct cpu_common_dbs_info *shared = container_of(work, struct
> @@ -235,14 +212,10 @@ static void dbs_work_handler(struct work
>   struct cpufreq_policy *policy;
>   struct dbs_data *dbs_data;
>   unsigned int sampling_rate, delay;
> - bool eval_load;
>  
>   policy = shared->policy;
>   dbs_data = policy->governor_data;
>  
> - /* Kill all timers */
> - gov_cancel_timers(policy);
> -
>   if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
>   struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
>  
> @@ -253,37 +226,53 @@ static void dbs_work_handler(struct work
>   sampling_rate = od_tuners->sampling_rate;
>   }
>  
> - eval_load = need_load_eval(shared, sampling_rate);
> -
>   /*
> -  * Make sure cpufreq_governor_limits() isn't evaluating load in
> +  * Make sure cpufreq_governor_limits() isn't evaluating load or the
> +  * ondemand governor isn't reading the time stamp and sampling rate in
>* parallel.
>*/
>   mutex_loc

[Update][PATCH 3/3] cpufreq: governor: Replace timers with utilization update callbacks

2016-02-02 Thread Rafael J. Wysocki
From: Rafael J. Wysocki 
Subject: [PATCH] cpufreq: governor: Replace timers with utilization update 
callbacks

Instead of using a per-CPU deferrable timer for queuing up governor
work items, register a utilization update callback that will be
invoked from the scheduler on utilization changes.

The sampling rate is still the same as what was used for the
deferrable timers and the added irq_work overhead should be offset by
the eliminated timers overhead, so in theory the functional impact of
this patch should not be significant.

Signed-off-by: Rafael J. Wysocki 
---

I realized that the previous version of this patch didn't remove some code
that wasn't necessary any more, so here's an update.

---
 drivers/cpufreq/cpufreq_conservative.c |6 -
 drivers/cpufreq/cpufreq_governor.c |  136 ++---
 drivers/cpufreq/cpufreq_governor.h |   13 +--
 drivers/cpufreq/cpufreq_ondemand.c |   25 ++
 4 files changed, 83 insertions(+), 97 deletions(-)

Index: linux-pm/drivers/cpufreq/cpufreq_governor.h
===
--- linux-pm.orig/drivers/cpufreq/cpufreq_governor.h
+++ linux-pm/drivers/cpufreq/cpufreq_governor.h
@@ -18,6 +18,7 @@
 #define _CPUFREQ_GOVERNOR_H
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -139,7 +140,9 @@ struct cpu_common_dbs_info {
struct mutex timer_mutex;
 
ktime_t time_stamp;
+   s64 sample_delay_ns;
atomic_t skip_work;
+   struct irq_work irq_work;
struct work_struct work;
 };
 
@@ -155,7 +158,8 @@ struct cpu_dbs_info {
 * wake-up from idle.
 */
unsigned int prev_load;
-   struct timer_list timer;
+   u64 last_sample_time;
+   struct update_util_data update_util;
struct cpu_common_dbs_info *shared;
 };
 
@@ -212,8 +216,7 @@ struct common_dbs_data {
 
struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
void *(*get_cpu_dbs_info_s)(int cpu);
-   unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy,
- bool modify_all);
+   unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
void (*gov_check_cpu)(int cpu, unsigned int load);
int (*init)(struct dbs_data *dbs_data, bool notify);
void (*exit)(struct dbs_data *dbs_data, bool notify);
@@ -270,8 +273,8 @@ static ssize_t show_sampling_rate_min_go
 }
 
 extern struct mutex cpufreq_governor_lock;
-
-void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay);
+void gov_set_update_util(struct cpu_common_dbs_info *shared,
+unsigned int delay_us);
 void gov_cancel_work(struct cpu_common_dbs_info *shared);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
===
--- linux-pm.orig/drivers/cpufreq/cpufreq_governor.c
+++ linux-pm/drivers/cpufreq/cpufreq_governor.c
@@ -128,10 +128,10 @@ void dbs_check_cpu(struct dbs_data *dbs_
 * dropped down. So we perform the copy only once, upon the
 * first wake-up from idle.)
 *
-* Detecting this situation is easy: the governor's deferrable
-* timer would not have fired during CPU-idle periods. Hence
-* an unusually large 'wall_time' (as compared to the sampling
-* rate) indicates this scenario.
+* Detecting this situation is easy: the governor's utilization
+* update handler would not have run during CPU-idle periods.
+* Hence, an unusually large 'wall_time' (as compared to the
+* sampling rate) indicates this scenario.
 *
 * prev_load can be zero in two cases and we must recalculate it
 * for both cases:
@@ -161,73 +161,50 @@ void dbs_check_cpu(struct dbs_data *dbs_
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
-void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
+void gov_set_update_util(struct cpu_common_dbs_info *shared,
+unsigned int delay_us)
 {
+   struct cpufreq_policy *policy = shared->policy;
struct dbs_data *dbs_data = policy->governor_data;
-   struct cpu_dbs_info *cdbs;
int cpu;
 
+   shared->sample_delay_ns = delay_us * NSEC_PER_USEC;
+   shared->time_stamp = ktime_get();
+
for_each_cpu(cpu, policy->cpus) {
-   cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
-   cdbs->timer.expires = jiffies + delay;
-   add_timer_on(&cdbs->timer, cpu);
+   struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+
+   cdbs->last_sample_time = 0;
+   cpufreq_set_update_util_data(cpu, &cdbs->update_util);
}
 }
-EXPORT_SYMBOL_GPL(gov_add_timers);
+EXPORT_SYMBOL_GPL(