CONFIG_NO_HZ=y can cause idle/iowait values to decrease.

If /proc/stat is monitored with a short interval (e.g. 1 or 2 secs) using
sysstat package, sar reports bogus %idle/iowait values because sar expects
that idle/iowait values do not decrease unless wraparound happens.

This patch makes idle/iowait values visible from /proc/stat increase
monotonically, with an assumption that we don't need to worry about
wraparound.

Signed-off-by: Tetsuo Handa <penguin-ker...@i-love.sakura.ne.jp>
---
 fs/proc/stat.c |   42 ++++++++++++++++++++++++++++++++++++++----
 1 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e296572..9fff534 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -19,6 +19,40 @@
 #define arch_irq_stat() 0
 #endif
 
+/*
+ * CONFIG_NO_HZ=y can cause idle/iowait values to decrease.
+ * Make sure that idle/iowait values visible from /proc/stat do not decrease.
+ */
+static inline u64 validate_iowait(u64 iowait, const int cpu)
+{
+#ifdef CONFIG_NO_HZ
+       static u64 max_iowait[NR_CPUS];
+       static DEFINE_SPINLOCK(lock);
+       spin_lock(&lock);
+       if (likely(iowait >= max_iowait[cpu]))
+               max_iowait[cpu] = iowait;
+       else
+               iowait = max_iowait[cpu];
+       spin_unlock(&lock);
+#endif
+       return iowait;
+}
+
+static inline u64 validate_idle(u64 idle, const int cpu)
+{
+#ifdef CONFIG_NO_HZ
+       static u64 max_idle[NR_CPUS];
+       static DEFINE_SPINLOCK(lock);
+       spin_lock(&lock);
+       if (likely(idle >= max_idle[cpu]))
+               max_idle[cpu] = idle;
+       else
+               idle = max_idle[cpu];
+       spin_unlock(&lock);
+#endif
+       return idle;
+}
+
 #ifdef arch_idle_time
 
 static cputime64_t get_idle_time(int cpu)
@@ -28,7 +62,7 @@ static cputime64_t get_idle_time(int cpu)
        idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
        if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
                idle += arch_idle_time(cpu);
-       return idle;
+       return validate_idle(idle, cpu);
 }
 
 static cputime64_t get_iowait_time(int cpu)
@@ -38,7 +72,7 @@ static cputime64_t get_iowait_time(int cpu)
        iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
        if (cpu_online(cpu) && nr_iowait_cpu(cpu))
                iowait += arch_idle_time(cpu);
-       return iowait;
+       return validate_iowait(iowait, cpu);
 }
 
 #else
@@ -56,7 +90,7 @@ static u64 get_idle_time(int cpu)
        else
                idle = usecs_to_cputime64(idle_time);
 
-       return idle;
+       return validate_idle(idle, cpu);
 }
 
 static u64 get_iowait_time(int cpu)
@@ -72,7 +106,7 @@ static u64 get_iowait_time(int cpu)
        else
                iowait = usecs_to_cputime64(iowait_time);
 
-       return iowait;
+       return validate_iowait(iowait, cpu);
 }
 
 #endif
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to