Now observer cpu can refer both of idle entry time and iowait exit
time of observed sleeping cpu, so observer can get idle/iowait time
of sleeping cpu by calculating cputimes not accounted yet.

Not-Tested-by: Hidetoshi Seto <seto.hideto...@jp.fujitsu.com>
---
 include/linux/sched.h    |    1 +
 kernel/sched/core.c      |   27 +++++++++++++++++++++++++
 kernel/time/tick-sched.c |   48 +++++++++++++++++++++++++++++++++------------
 3 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 306f4f0..29e1af0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -168,6 +168,7 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
+extern void nr_iowait_deltas(int cpu, ktime_t start, ktime_t now, ktime_t 
*iowait, ktime_t *idle);
 extern unsigned long this_cpu_load(void);
 
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e759238..814ee2e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2384,6 +2384,33 @@ unsigned long nr_iowait_cpu(int cpu)
        return cpu_rq(cpu)->nr_iowait;
 }
 
+/*
+ * nr_iowait_deltas - divide idle time into idle delta and iowait delta
+ *
+ * @start: time stamp at start of idle span
+ * @now: time stamp at end of idle span
+ * @iowait_delta: address to store calculated iowait
+ * @idle_delta: address to store calculated idle
+ */
+void nr_iowait_deltas(int cpu, ktime_t start, ktime_t now,
+                     ktime_t *iowait_delta, ktime_t *idle_delta)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       raw_spin_lock(&rq->iowait_lock);
+       if (rq->nr_iowait || ktime_compare(rq->last_iowait, now) > 0) {
+               *iowait_delta = ktime_sub(now, start);
+               *idle_delta = ktime_set(0, 0);
+       } else if (ktime_compare(rq->last_iowait, start) > 0) {
+               *iowait_delta = ktime_sub(rq->last_iowait, start);
+               *idle_delta = ktime_sub(now, rq->last_iowait);
+       } else {
+               *iowait_delta = ktime_set(0, 0);
+               *idle_delta = ktime_sub(now, start);
+       }
+       raw_spin_unlock(&rq->iowait_lock);
+}
+
 #ifdef CONFIG_SMP
 
 /*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 44eb187..8d23af5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -408,16 +408,22 @@ static void tick_nohz_update_jiffies(ktime_t now)
 
 static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 {
-       ktime_t delta;
+       static const ktime_t ktime_zero = { .tv64 = 0 };
+       ktime_t iowait_delta = ktime_zero, idle_delta = ktime_zero;
 
        write_seqcount_begin(&ts->idle_sleeptime_seq);
 
        /* Updates the per cpu time idle statistics counters */
-       delta = ktime_sub(now, ts->idle_entrytime);
-       if (nr_iowait_cpu(smp_processor_id()) > 0)
-               ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
-       else
-               ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+       if (ts->idle_active == 2) {
+               nr_iowait_deltas(smp_processor_id(), ts->idle_entrytime, now,
+                                &iowait_delta, &idle_delta);
+               ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime,
+                                                iowait_delta);
+       } else {
+               idle_delta = ktime_sub(now, ts->idle_entrytime);
+       }
+       ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, idle_delta);
+
        ts->idle_entrytime = now;
        ts->idle_active = 0;
 
@@ -432,7 +438,13 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 
        write_seqcount_begin(&ts->idle_sleeptime_seq);
        ts->idle_entrytime = now;
-       ts->idle_active = 1;
+       /*
+        * idle_active:
+        *  0: cpu is not idle
+        *  1: cpu is performing idle
+        *  2: cpu is performing iowait and idle
+        */
+       ts->idle_active = 1 + !!nr_iowait_cpu(smp_processor_id());
        write_seqcount_end(&ts->idle_sleeptime_seq);
 
        sched_clock_idle_sleep_event();
@@ -467,10 +479,18 @@ u64 get_cpu_idle_time_us(int cpu, u64 *wall)
 
        do {
                seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
- 
-               if (ts->idle_active && !nr_iowait_cpu(cpu)) {
-                       ktime_t delta = ktime_sub(now, ts->idle_entrytime);
 
+               if (ts->idle_active) {
+                       ktime_t delta;
+
+                       if (ts->idle_active == 2) {
+                               ktime_t unused;
+
+                               nr_iowait_deltas(cpu, ts->idle_entrytime, now,
+                                                &unused, &delta);
+                       } else {
+                               delta = ktime_sub(now, ts->idle_entrytime);
+                       }
                        idle = ktime_add(ts->idle_sleeptime, delta);
                } else {
                        idle = ts->idle_sleeptime;
@@ -510,10 +530,12 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *wall)
 
        do {
                seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
- 
-               if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
-                       ktime_t delta = ktime_sub(now, ts->idle_entrytime);
 
+               if (ts->idle_active == 2) {
+                       ktime_t delta, unused;
+
+                       nr_iowait_deltas(cpu, ts->idle_entrytime, now,
+                                        &delta, &unused);
                        iowait = ktime_add(ts->iowait_sleeptime, delta);
                } else {
                        iowait = ts->iowait_sleeptime;
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to