Hi,

I was looking into making mod_timer() be somewhat imprecise in when
it sets the target timer to fire.  The goal of this is saving power,
as measured indirectly by powertop wakeups per second.

There is already a notion of slack (or delta) time in
kernel/hrtimer.c::hrtimer_start_range_ns() and in
kernel/hrtimer.c::schedule_hrtimeout_range(), so maybe I'm attempting
to modify the incorrect function, or maybe another interface function
is needed for this. (?)

I did find one user of hrtimer_start_range_ns() (in fs/ubifs/io.c).
Is that the preferred interface to use when imprecise timers are
acceptable?


Results of this patch (that is below) are inconclusive IMO.
I ran multiple runs of a short database test on an HP ProLiant BladeCenter
(BL685c G1), which also has firmware for power usage measurements.

averages:
kernel 2.6.31 unpatched:        159 Watts, 302.7 wakeups/second
kernel 2.6.31 + patch(N=9):     161 Watts, 291.1 wakeups/second
kernel 2.6.31 + patch(N=19):    163 Watts, 316.3 wakeups/second
kernel 2.6.31 + patch(N=99):    161 Watts, 284 wakeups/second


Any comments or suggestions?

Thanks,
---
~Randy



From: Randy Dunlap <[email protected]>

In __mod_timer(), when CONFIG_NO_HZ is enabled, be a little
imprecise with timer expiry values by using the next scheduled
timer interrupt if it is within N jiffies (N = 9 in this patch)
of the requested timer expiry.  This allows timers to expire
a bit later than requested, but not earlier.

Signed-off-by: Randy Dunlap <[email protected]>
---
 kernel/timer.c |   39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

--- linux-2.6.31.orig/kernel/timer.c
+++ linux-2.6.31/kernel/timer.c
@@ -614,6 +614,10 @@ __mod_timer(struct timer_list *timer, un
        struct tvec_base *base, *new_base;
        unsigned long flags;
        int ret = 0 , cpu;
+#ifdef CONFIG_NO_HZ
+       unsigned long next_timer_int;
+       static unsigned int exp_debugs = 0;
+#endif
 
        timer_stats_timer_set_start_info(timer);
        BUG_ON(!timer->function);
@@ -662,6 +666,24 @@ __mod_timer(struct timer_list *timer, un
                }
        }
 
+#ifdef CONFIG_NO_HZ
+       if (system_state == SYSTEM_RUNNING) {
+
+               unsigned long __get_next_timer_interrupt(unsigned long now);
+
+               next_timer_int = __get_next_timer_interrupt(jiffies);
+
+               if (time_before(expires, next_timer_int) &&
+                   time_before_eq(next_timer_int, jiffies + 9UL)) {
+#if 1
+                       if (++exp_debugs <= 10)
+                               printk(KERN_DEBUG "%s: expires changes from %lu 
to %lu @jiffies=%lu\n",
+                                       __func__, expires, next_timer_int, 
jiffies);
+#endif
+                       expires = next_timer_int;
+               }
+       }
+#endif
        timer->expires = expires;
        internal_add_timer(base, timer);
 
@@ -1125,6 +1147,23 @@ static unsigned long cmp_next_hrtimer_ev
 }
 
 /**
+ * __get_next_timer_interrupt - return the jiffy of the next pending timer
+ * @now: current time (in jiffies)
+ */
+unsigned long __get_next_timer_interrupt(unsigned long now)
+{
+       struct tvec_base *base = __get_cpu_var(tvec_bases);
+       unsigned long expires;
+
+       expires = __next_timer_interrupt(base);
+
+       if (time_before_eq(expires, now))
+               return now;
+
+       return cmp_next_hrtimer_event(now, expires);
+}
+
+/**
  * get_next_timer_interrupt - return the jiffy of the next pending timer
  * @now: current time (in jiffies)
  */
_______________________________________________
Discuss mailing list
[email protected]
http://lists.lesswatts.org/listinfo/discuss

Reply via email to