Record the overhead of idle entry in micro-second Signed-off-by: Aubrey Li <aubrey...@linux.intel.com> --- drivers/cpuidle/cpuidle.c | 33 +++++++++++++++++++++++++++++++++ include/linux/cpuidle.h | 14 ++++++++++++++ kernel/sched/idle.c | 8 +++++++- 3 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 60bb64f..4066308 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -302,6 +302,39 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) cpuidle_curr_governor->reflect(dev, index); } +/* cpuidle_entry_start - record idle entry start */ +void cpuidle_entry_start(void) +{ + struct cpuidle_device *dev = cpuidle_get_device(); + + if (dev) + dev->idle_stat.entry_start = local_clock(); +} + +/* + * cpuidle_entry_end - record idle entry end, and maintain + * the entry overhead average in micro-second + */ +void cpuidle_entry_end(void) +{ + struct cpuidle_device *dev = cpuidle_get_device(); + u64 overhead; + s64 diff; + + if (dev) { + dev->idle_stat.entry_end = local_clock(); + overhead = div_u64(dev->idle_stat.entry_end - + dev->idle_stat.entry_start, NSEC_PER_USEC); + diff = overhead - dev->idle_stat.overhead; + dev->idle_stat.overhead += diff >> 3; + /* + * limit overhead to 1us + */ + if (dev->idle_stat.overhead == 0) + dev->idle_stat.overhead = 1; + } +} + /** * cpuidle_install_idle_handler - installs the cpuidle idle loop handler */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index fc1e5d7..cad9b71 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -72,6 +72,15 @@ struct cpuidle_device_kobj; struct cpuidle_state_kobj; struct cpuidle_driver_kobj; +struct cpuidle_stat { + u64 entry_start; /* nanosecond */ + u64 entry_end; /* nanosecond */ + u64 overhead; /* nanosecond */ + unsigned int predicted_us; /* microsecond */ + bool predicted; /* ever predicted? */ + bool fast_idle; /* fast idle? */ +}; + struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; @@ -89,6 +98,7 @@ struct cpuidle_device { cpumask_t coupled_cpus; struct cpuidle_coupled *coupled; #endif + struct cpuidle_stat idle_stat; }; DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); @@ -131,6 +141,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv, extern int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev); +extern void cpuidle_entry_start(void); +extern void cpuidle_entry_end(void); extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); @@ -164,6 +176,8 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv, static inline int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } +static inline void cpuidle_entry_start(void) { } +static inline void cpuidle_entry_end(void) { } static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6c23e30..0951dac 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -210,6 +210,12 @@ static void cpuidle_idle_call(void) static void do_idle(void) { /* + * we record idle entry overhead now, so any deferrable items + * in idle entry path need to be placed between cpuidle_entry_start() + * and cpuidle_entry_end() + */ + cpuidle_entry_start(); + /* * If the arch has a polling bit, we maintain an invariant: * * Our polling bit is clear if we're not scheduled (i.e. if rq->curr != @@ -217,10 +223,10 @@ static void do_idle(void) * then setting need_resched is guaranteed to cause the CPU to * reschedule. */ - __current_set_polling(); quiet_vmstat(); tick_nohz_idle_enter(); + cpuidle_entry_end(); while (!need_resched()) { check_pgt_cache(); -- 2.7.4