On 14.07.2005 [13:40:11 -0700], Nishanth Aravamudan wrote:
> From: Nishanth Aravamudan <[EMAIL PROTECTED]>
> 
> Description: The core revision to the soft-timer subsystem to divorce it
> from the timer interrupt in software, i.e. jiffies. Instead, use
> getnstimeofday() (via do_monotonic_clock()) as the basis for addition
> and expiration of timers. Add a new unit, the timerinterval, which is
> a 2^TIMERINTERVAL_BITS nanoseconds in length. The converted value in
> timerintervals is used where we would have used the timer's expires
> member before. Add set_timer_nsecs() and set_timer_nsecs_on() functions
> to directly request nanosecond delays. These functions replace
> add_timer(), mod_timer() and add_timer_on().
> 
> Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]>

Sigh, one version of my development patches removed the export of
mod_timer(). Of course, I forgot to revert that hunk before sending it
out. If anyone (maybe not likely) is testing out these patches, please
use this version.

Thanks,
Nish

---

 include/linux/time.h  |    1 
 include/linux/timer.h |   27 +-----
 kernel/time.c         |   18 ++++
 kernel/timer.c        |  215 +++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 221 insertions(+), 40 deletions(-)

diff -urpN 2.6.13-rc3-base/include/linux/time.h 
2.6.13-rc3-dev/include/linux/time.h
--- 2.6.13-rc3-base/include/linux/time.h        2005-03-01 23:38:12.000000000 
-0800
+++ 2.6.13-rc3-dev/include/linux/time.h 2005-07-14 12:44:40.000000000 -0700
@@ -103,6 +103,7 @@ struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value, struct itimerval 
*ovalue);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday (struct timespec *tv);
+extern u64 do_monotonic_clock(void);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 
diff -urpN 2.6.13-rc3-base/include/linux/timer.h 
2.6.13-rc3-dev/include/linux/timer.h
--- 2.6.13-rc3-base/include/linux/timer.h       2005-07-13 15:52:14.000000000 
-0700
+++ 2.6.13-rc3-dev/include/linux/timer.h        2005-07-14 12:44:40.000000000 
-0700
@@ -11,6 +11,7 @@ struct timer_base_s;
 struct timer_list {
        struct list_head entry;
        unsigned long expires;
+       u64 expires_nsecs;
 
        unsigned long magic;
 
@@ -27,6 +28,7 @@ extern struct timer_base_s __init_timer_
 #define TIMER_INITIALIZER(_function, _expires, _data) {                \
                .function = (_function),                        \
                .expires = (_expires),                          \
+               .expires_nsecs = 0,                             \
                .data = (_data),                                \
                .base = &__init_timer_base,                     \
                .magic = TIMER_MAGIC,                           \
@@ -51,30 +53,15 @@ static inline int timer_pending(const st
 
 extern void add_timer_on(struct timer_list *timer, int cpu);
 extern int del_timer(struct timer_list * timer);
-extern int __mod_timer(struct timer_list *timer, unsigned long expires);
+extern int __mod_timer(struct timer_list *timer);
 extern int mod_timer(struct timer_list *timer, unsigned long expires);
+extern void add_timer(struct timer_list *timer);
+extern int set_timer_nsecs(struct timer_list *timer, u64 expires_nsecs);
+extern void set_timer_on_nsecs(struct timer_list *timer, u64 expires_nsecs,
+                                                               int cpu);
 
 extern unsigned long next_timer_interrupt(void);
 
-/***
- * add_timer - start a timer
- * @timer: the timer to be added
- *
- * The kernel will do a ->function(->data) callback from the
- * timer interrupt at the ->expired point in the future. The
- * current time is 'jiffies'.
- *
- * The timer's ->expired, ->function (and if the handler uses it, ->data)
- * fields must be set prior calling this function.
- *
- * Timers with an ->expired field in the past will be executed in the next
- * timer tick.
- */
-static inline void add_timer(struct timer_list * timer)
-{
-       __mod_timer(timer, timer->expires);
-}
-
 #ifdef CONFIG_SMP
   extern int try_to_del_timer_sync(struct timer_list *timer);
   extern int del_timer_sync(struct timer_list *timer);
diff -urpN 2.6.13-rc3-base/kernel/time.c 2.6.13-rc3-dev/kernel/time.c
--- 2.6.13-rc3-base/kernel/time.c       2005-07-13 15:51:57.000000000 -0700
+++ 2.6.13-rc3-dev/kernel/time.c        2005-07-14 12:44:40.000000000 -0700
@@ -589,3 +589,21 @@ EXPORT_SYMBOL(get_jiffies_64);
 #endif
 
 EXPORT_SYMBOL(jiffies);
+
+u64 do_monotonic_clock(void)
+{
+       struct timespec now, now_w2m;
+       unsigned long seq;
+
+       getnstimeofday(&now);
+
+       do {
+               seq = read_seqbegin(&xtime_lock);
+               now_w2m = wall_to_monotonic;
+       } while (read_seqretry(&xtime_lock, seq));
+
+       return (u64)(now.tv_sec + now_w2m.tv_sec) * NSEC_PER_SEC +
+                               (now.tv_nsec + now_w2m.tv_nsec);
+}
+
+EXPORT_SYMBOL_GPL(do_monotonic_clock);
diff -urpN 2.6.13-rc3-base/kernel/timer.c 2.6.13-rc3-dev/kernel/timer.c
--- 2.6.13-rc3-base/kernel/timer.c      2005-07-13 15:52:14.000000000 -0700
+++ 2.6.13-rc3-dev/kernel/timer.c       2005-07-14 12:44:40.000000000 -0700
@@ -56,6 +56,15 @@ static void time_interpolator_update(lon
 #define TVR_SIZE (1 << TVR_BITS)
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
+/*
+ * Modifying TIMERINTERVAL_BITS changes the software resolution of
+ * soft-timers. While 20 bits would be closer to a millisecond, there
+ * are performance gains from allowing a software resolution finer than
+ * the hardware (HZ=1000)
+ */
+#define TIMERINTERVAL_BITS 19
+#define TIMERINTERVAL_SIZE (1 << TIMERINTERVAL_BITS)
+#define TIMERINTERVAL_MASK (TIMERINTERVAL_SIZE - 1)
 
 struct timer_base_s {
        spinlock_t lock;
@@ -72,7 +81,7 @@ typedef struct tvec_root_s {
 
 struct tvec_t_base_s {
        struct timer_base_s t_base;
-       unsigned long timer_jiffies;
+       unsigned long last_timer_time;
        tvec_root_t tv1;
        tvec_t tv2;
        tvec_t tv3;
@@ -114,11 +123,88 @@ static inline void check_timer(struct ti
                check_timer_failed(timer);
 }
 
+/*
+ * nsecs_to_timerintervals_ceiling - convert nanoseconds to timerintervals
+ * @n: number of nanoseconds to convert
+ *
+ * This is where changes to TIMERINTERVAL_BITS affect the soft-timer
+ * subsystem.
+ *
+ * Some explanation of the math is necessary:
+ * Rather than do decimal arithmetic, we shift for the sake of speed.
+ * This does mean that the actual requestable sleeps are
+ *     2^(sizeof(unsigned long)*8 - TIMERINTERVAL_BITS)
+ * timerintervals.
+ *
+ * The conditional takes care of the corner case where we request a 0
+ * nanosecond sleep; if the quantity were unsigned, we would not
+ * propogate the carry and force a wrap when adding the 1.
+ *
+ * To prevent timers from being expired early, we:
+ *     Take the ceiling when we add; and
+ *     Take the floor when we expire.
+ */
+static inline unsigned long nsecs_to_timerintervals_ceiling(u64 nsecs)
+{
+       if (nsecs)
+               return (unsigned long)(((nsecs - 1) >> TIMERINTERVAL_BITS) + 1);
+       else
+               return 0UL;
+}
+
+/*
+ * nsecs_to_timerintervals_floor - convert nanoseconds to timerintervals
+ * @n: number of nanoseconds to convert
+ *
+ * This is where changes to TIMERINTERVAL_BITS affect the soft-timer
+ * subsystem.
+ *
+ * Some explanation of the math is necessary:
+ * Rather than do decimal arithmetic, we shift for the sake of speed.
+ * This does mean that the actual requestable sleeps are
+ *     2^(sizeof(unsigned long)*8 - TIMERINTERVAL_BITS)
+ *
+ * There is no special case for 0 in the floor function, since we do not
+ * do any subtraction or addition of 1
+ *
+ * To prevent timers from being expired early, we:
+ *     Take the ceiling when we add; and
+ *     Take the floor when we expire.
+ */
+static inline unsigned long nsecs_to_timerintervals_floor(u64 nsecs)
+{
+       return (unsigned long)(nsecs >> TIMERINTERVAL_BITS);
+}
+
+/*
+ * jiffies_to_timerintervals - convert absolute jiffies to timerintervals
+ * @abs_jiffies: number of jiffies to convert
+ *
+ * First, we convert the absolute jiffies parameter to a relative
+ * jiffies value. To maintain precision, we convert the relative
+ * jiffies value to a relative nanosecond value and then convert that
+ * to a relative soft-timer interval unit value. We then add this
+ * relative value to the current time according to the timeofday-
+ * subsystem, converted to soft-timer interval units.
+ *
+ * We only use this function when adding timers, so we are free to
+ * always use the ceiling version of nsecs_to_timerintervals.
+ *
+ * This function only exists to support deprecated interfaces. Once
+ * those interfaces have been converted to the alternatives, it should
+ * be removed.
+ */
+static inline unsigned long jiffies_to_timerintervals(unsigned long 
abs_jiffies)
+{
+       unsigned long relative_jiffies = abs_jiffies - jiffies;
+       return nsecs_to_timerintervals_ceiling(do_monotonic_clock() +
+               jiffies_to_nsecs(relative_jiffies));
+}
 
 static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
 {
-       unsigned long expires = timer->expires;
-       unsigned long idx = expires - base->timer_jiffies;
+       unsigned long expires = 
nsecs_to_timerintervals_ceiling(timer->expires_nsecs);
+       unsigned long idx = expires - base->last_timer_time;
        struct list_head *vec;
 
        if (idx < TVR_SIZE) {
@@ -138,7 +224,7 @@ static void internal_add_timer(tvec_base
                 * Can happen if you add a timer with expires == jiffies,
                 * or you set a timer to go off in the past
                 */
-               vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
+               vec = base->tv1.vec + (base->last_timer_time & TVR_MASK);
        } else {
                int i;
                /* If the timeout is larger than 0xffffffff on 64-bit
@@ -146,7 +232,7 @@ static void internal_add_timer(tvec_base
                 */
                if (idx > 0xffffffffUL) {
                        idx = 0xffffffffUL;
-                       expires = idx + base->timer_jiffies;
+                       expires = idx + base->last_timer_time;
                }
                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
                vec = base->tv5.vec + i;
@@ -222,7 +308,7 @@ static timer_base_t *lock_timer_base(str
        }
 }
 
-int __mod_timer(struct timer_list *timer, unsigned long expires)
+int __mod_timer(struct timer_list *timer)
 {
        timer_base_t *base;
        tvec_base_t *new_base;
@@ -261,7 +347,7 @@ int __mod_timer(struct timer_list *timer
                }
        }
 
-       timer->expires = expires;
+       /* expires should be in timerintervals, and is currently ignored? */
        internal_add_timer(new_base, timer);
        spin_unlock_irqrestore(&new_base->t_base.lock, flags);
 
@@ -281,21 +367,50 @@ void add_timer_on(struct timer_list *tim
 {
        tvec_base_t *base = &per_cpu(tvec_bases, cpu);
        unsigned long flags;
-
+  
        BUG_ON(timer_pending(timer) || !timer->function);
 
        check_timer(timer);
 
        spin_lock_irqsave(&base->t_base.lock, flags);
+       timer->expires_nsecs = do_monotonic_clock() +
+               jiffies_to_nsecs(timer->expires - jiffies);
        timer->base = &base->t_base;
        internal_add_timer(base, timer);
        spin_unlock_irqrestore(&base->t_base.lock, flags);
 }
 
+/***
+ * add_timer - start a timer
+ * @timer: the timer to be added
+ *
+ * The kernel will do a ->function(->data) callback from the
+ * timer interrupt at the ->expired point in the future. The
+ * current time is 'jiffies'.
+ *
+ * The timer's ->expired, ->function (and if the handler uses it, ->data)
+ * fields must be set prior calling this function.
+ *
+ * Timers with an ->expired field in the past will be executed in the next
+ * timer tick.
+ *
+ * The callers of add_timer() should be aware that the interface is now
+ * deprecated. set_timer_nsecs() is the single interface for adding and
+ * modifying timers.
+ */
+void add_timer(struct timer_list * timer)
+{
+       timer->expires_nsecs = do_monotonic_clock() +
+               jiffies_to_nsecs(timer->expires - jiffies);
+       __mod_timer(timer);
+}
+
+EXPORT_SYMBOL(add_timer);
 
 /***
  * mod_timer - modify a timer's timeout
  * @timer: the timer to be modified
+ * @expires: absolute time, in jiffies, when timer should expire
  *
  * mod_timer is a more efficient way to update the expire field of an
  * active timer (if the timer is inactive it will be activated)
@@ -311,6 +426,10 @@ void add_timer_on(struct timer_list *tim
  * The function returns whether it has modified a pending timer or not.
  * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
  * active timer returns 1.)
+ *
+ * The callers of mod_timer() should be aware that the interface is now
+ * deprecated. set_timer_nsecs() is the single interface for adding and
+ * modifying timers.
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
@@ -318,6 +437,9 @@ int mod_timer(struct timer_list *timer, 
 
        check_timer(timer);
 
+       timer->expires_nsecs = do_monotonic_clock() +
+               jiffies_to_nsecs(expires - jiffies);
+
        /*
         * This is a common optimization triggered by the
         * networking code - if the timer is re-modified
@@ -326,10 +448,56 @@ int mod_timer(struct timer_list *timer, 
        if (timer->expires == expires && timer_pending(timer))
                return 1;
 
-       return __mod_timer(timer, expires);
+       return __mod_timer(timer);
 }
 
EXPORT_SYMBOL(mod_timer);
+
+/*
+ * set_timer_nsecs - modify a timer's timeout in nsecs
+ * @timer: the timer to be modified
+ *
+ * set_timer_nsecs replaces both add_timer and mod_timer. The caller
+ * should call do_monotonic_clock() to determine the absolute timeout
+ * necessary.
+ */
+int set_timer_nsecs(struct timer_list *timer, u64 expires_nsecs)
+{
+       BUG_ON(!timer->function);
+
+       check_timer(timer);
+
+       if (timer_pending(timer) && timer->expires_nsecs == expires_nsecs)
+               return 1;
+
+       timer->expires_nsecs = expires_nsecs;
+
+       return __mod_timer(timer);
+}
+
+EXPORT_SYMBOL_GPL(set_timer_nsecs);
+
+/***
+ * set_timer_on_nsecs - start a timer on a particular CPU
+ * @timer: the timer to be added
+ * @expires_nsecs: absolute time in nsecs when timer should expire
+ * @cpu: the CPU to start it on
+ *
+ * This is not very scalable on SMP. Double adds are not possible.
+ */
+void set_timer_on_nsecs(struct timer_list *timer, u64 expires_nsecs, int cpu)
+{
+       tvec_base_t *base = &per_cpu(tvec_bases, cpu);
+       unsigned long flags;
+
+       BUG_ON(timer_pending(timer) || !timer->function);
+
+       check_timer(timer);
+
+       spin_lock_irqsave(&base->t_base.lock, flags);
+       timer->expires_nsecs = expires_nsecs;
+       timer->base = &base->t_base;
+       internal_add_timer(base, timer);
+       spin_unlock_irqrestore(&base->t_base.lock, flags);
+}
 
 /***
  * del_timer - deactive a timer.
@@ -455,17 +623,17 @@ static int cascade(tvec_base_t *base, tv
  * This function cascades all vectors and executes all expired timer
  * vectors.
  */
-#define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK
+#define INDEX(N) (base->last_timer_time >> (TVR_BITS + N * TVN_BITS)) & 
TVN_MASK
 
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(tvec_base_t *base, unsigned long 
current_timer_time)
 {
        struct timer_list *timer;
 
        spin_lock_irq(&base->t_base.lock);
-       while (time_after_eq(jiffies, base->timer_jiffies)) {
+       while (time_after_eq(current_timer_time, base->last_timer_time)) {
                struct list_head work_list = LIST_HEAD_INIT(work_list);
                struct list_head *head = &work_list;
-               int index = base->timer_jiffies & TVR_MASK;
+               int index = base->last_timer_time & TVR_MASK;
  
                /*
                 * Cascade timers:
@@ -475,7 +643,7 @@ static inline void __run_timers(tvec_bas
                                (!cascade(base, &base->tv3, INDEX(1))) &&
                                        !cascade(base, &base->tv4, INDEX(2)))
                        cascade(base, &base->tv5, INDEX(3));
-               ++base->timer_jiffies; 
+               ++base->last_timer_time;
                list_splice_init(base->tv1.vec + index, &work_list);
                while (!list_empty(head)) {
                        void (*fn)(unsigned long);
@@ -524,20 +692,20 @@ unsigned long next_timer_interrupt(void)
 
        base = &__get_cpu_var(tvec_bases);
        spin_lock(&base->t_base.lock);
-       expires = base->timer_jiffies + (LONG_MAX >> 1);
+       expires = base->last_timer_time + (LONG_MAX >> 1);
        list = 0;
 
        /* Look for timer events in tv1. */
-       j = base->timer_jiffies & TVR_MASK;
+       j = base->last_timer_time & TVR_MASK;
        do {
                list_for_each_entry(nte, base->tv1.vec + j, entry) {
                        expires = nte->expires;
-                       if (j < (base->timer_jiffies & TVR_MASK))
+                       if (j < (base->last_timer_time & TVR_MASK))
                                list = base->tv2.vec + (INDEX(0));
                        goto found;
                }
                j = (j + 1) & TVR_MASK;
-       } while (j != (base->timer_jiffies & TVR_MASK));
+       } while (j != (base->last_timer_time & TVR_MASK));
 
        /* Check tv2-tv5. */
        varray[0] = &base->tv2;
@@ -910,10 +1078,15 @@ EXPORT_SYMBOL(xtime_lock);
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
+       unsigned long current_timer_time;
        tvec_base_t *base = &__get_cpu_var(tvec_bases);
 
-       if (time_after_eq(jiffies, base->timer_jiffies))
-               __run_timers(base);
+       /* cache the converted current time, rounding down */
+       current_timer_time =
+               nsecs_to_timerintervals_floor(do_monotonic_clock());
+
+       if (time_after_eq(current_timer_time, base->last_timer_time))
+               __run_timers(base, current_timer_time);
 }
 
 /*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to