There are problems with del_timer_sync().

1. Scalability. All cpus are scanned to determine if the timer is
running on that cpu.

2. It is racy. The timer can be fired again after del_timer_sync
have checked all cpus and before it will recheck timer_pending().

This patch adds 'pending flag' to timer_list. This flag is encoded
in the least significant bit of timer->base. This way we do not
waste the space and can read/write base+pending atomically.

__run_timers(), del_timer() do not set ->base = NULL anymore, they
only clear pending flag, so that del_timer_sync can wait while
timer->base->running_timer == timer.

It works only if recurring timer do not use add_timer_on().

The following patch  renames ->base to ->_base. Now this field is
used directly only in __get_base/__set_base helpers.

No changes in kernel/timer.o.

Signed-off-by: Oleg Nesterov <[EMAIL PROTECTED]>

--- 2.6.12-rc1/include/linux/timer.h~1_BASE     2005-03-19 14:54:58.000000000 
+0300
+++ 2.6.12-rc1/include/linux/timer.h    2005-03-19 15:05:46.000000000 +0300
@@ -18,16 +18,21 @@ struct timer_list {
        void (*function)(unsigned long);
        unsigned long data;
 
-       struct tvec_t_base_s *base;
+       struct tvec_t_base_s *_base;
 };
 
+static inline int __timer_pending(struct tvec_t_base_s *base)
+{
+       return base != NULL;
+}
+
 #define TIMER_MAGIC    0x4b87ad6e
 
 #define TIMER_INITIALIZER(_function, _expires, _data) {                \
                .function = (_function),                        \
                .expires = (_expires),                          \
                .data = (_data),                                \
-               .base = NULL,                                   \
+               ._base = NULL,                                  \
                .magic = TIMER_MAGIC,                           \
                .lock = SPIN_LOCK_UNLOCKED,                     \
        }
@@ -41,7 +46,7 @@ struct timer_list {
  */
 static inline void init_timer(struct timer_list * timer)
 {
-       timer->base = NULL;
+       timer->_base = NULL;
        timer->magic = TIMER_MAGIC;
        spin_lock_init(&timer->lock);
 }
@@ -58,7 +63,7 @@ static inline void init_timer(struct tim
  */
 static inline int timer_pending(const struct timer_list * timer)
 {
-       return timer->base != NULL;
+       return __timer_pending(timer->_base);
 }
 
 extern void add_timer_on(struct timer_list *timer, int cpu);
--- 2.6.12-rc1/kernel/timer.c~1_BASE    2005-03-19 14:55:17.000000000 +0300
+++ 2.6.12-rc1/kernel/timer.c   2005-03-19 15:05:46.000000000 +0300
@@ -86,6 +86,20 @@ static inline void set_running_timer(tve
 #endif
 }
 
+static inline tvec_base_t *__get_base(struct timer_list *timer)
+{
+       return timer->_base;
+}
+
+static inline void __set_base(struct timer_list *timer,
+                               tvec_base_t *base, int pending)
+{
+       if (pending)
+               timer->_base = base;
+       else
+               timer->_base = NULL;
+}
+
 /* Fake initialization */
 static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
 
@@ -169,7 +183,7 @@ int __mod_timer(struct timer_list *timer
        spin_lock_irqsave(&timer->lock, flags);
        new_base = &__get_cpu_var(tvec_bases);
 repeat:
-       old_base = timer->base;
+       old_base = __get_base(timer);
 
        /*
         * Prevent deadlocks via ordering by old_base < new_base.
@@ -186,14 +200,14 @@ repeat:
                 * The timer base might have been cancelled while we were
                 * trying to take the lock(s):
                 */
-               if (timer->base != old_base) {
+               if (__get_base(timer) != old_base) {
                        spin_unlock(&new_base->lock);
                        spin_unlock(&old_base->lock);
                        goto repeat;
                }
        } else {
                spin_lock(&new_base->lock);
-               if (timer->base != old_base) {
+               if (__get_base(timer) != old_base) {
                        spin_unlock(&new_base->lock);
                        goto repeat;
                }
@@ -209,7 +223,7 @@ repeat:
        }
        timer->expires = expires;
        internal_add_timer(new_base, timer);
-       timer->base = new_base;
+       __set_base(timer, new_base, 1);
 
        if (old_base && (new_base != old_base))
                spin_unlock(&old_base->lock);
@@ -239,7 +253,7 @@ void add_timer_on(struct timer_list *tim
 
        spin_lock_irqsave(&base->lock, flags);
        internal_add_timer(base, timer);
-       timer->base = base;
+       __set_base(timer, base, 1);
        spin_unlock_irqrestore(&base->lock, flags);
 }
 
@@ -301,18 +315,18 @@ int del_timer(struct timer_list *timer)
        check_timer(timer);
 
 repeat:
-       base = timer->base;
+       base = __get_base(timer);
        if (!base)
                return 0;
        spin_lock_irqsave(&base->lock, flags);
-       if (base != timer->base) {
+       if (base != __get_base(timer)) {
                spin_unlock_irqrestore(&base->lock, flags);
                goto repeat;
        }
        list_del(&timer->entry);
        /* Need to make sure that anybody who sees a NULL base also sees the 
list ops */
        smp_wmb();
-       timer->base = NULL;
+       __set_base(timer, base, 0);
        spin_unlock_irqrestore(&base->lock, flags);
 
        return 1;
@@ -415,7 +429,7 @@ static int cascade(tvec_base_t *base, tv
                struct timer_list *tmp;
 
                tmp = list_entry(curr, struct timer_list, entry);
-               BUG_ON(tmp->base != base);
+               BUG_ON(__get_base(tmp) != base);
                curr = curr->next;
                internal_add_timer(base, tmp);
        }
@@ -465,7 +479,7 @@ repeat:
                        list_del(&timer->entry);
                        set_running_timer(base, timer);
                        smp_wmb();
-                       timer->base = NULL;
+                       __set_base(timer, base, 0);
                        spin_unlock_irq(&base->lock);
                        {
                                u32 preempt_count = preempt_count();
@@ -1314,7 +1328,7 @@ static int migrate_timer_list(tvec_base_
                        return 0;
                list_del(&timer->entry);
                internal_add_timer(new_base, timer);
-               timer->base = new_base;
+               __set_base(timer, new_base, 1);
                spin_unlock(&timer->lock);
        }
        return 1;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to