On Tue, 31 Mar 2015, Viresh Kumar wrote:
> @@ -1189,12 +1195,41 @@ static inline void __run_timers(struct tvec_base 
> *base)
>                       cascade(base, &base->tv5, INDEX(3));
>               ++base->timer_jiffies;
>               list_replace_init(base->tv1.vec + index, head);
> +
> +again:
>               while (!list_empty(head)) {
>                       void (*fn)(unsigned long);
>                       unsigned long data;
>                       bool irqsafe;
>  
> -                     timer = list_first_entry(head, struct timer_list,entry);
> +                     timer = list_first_entry(head, struct timer_list, 
> entry);
> +
> +                     if (unlikely(timer_running(timer))) {
> +                             /*
> +                              * The only way to get here is if the handler,
> +                              * running on another base, re-queued itself on
> +                              * this base, and the handler hasn't finished
> +                              * yet.
> +                              */
> +
> +                             if (list_is_last(&timer->entry, head)) {
> +                                     /*
> +                                      * Drop lock, so that TIMER_RUNNING can
> +                                      * be cleared on another base.
> +                                      */
> +                                     spin_unlock(&base->lock);
> +
> +                                     while (timer_running(timer))
> +                                             cpu_relax();
> +
> +                                     spin_lock(&base->lock);
> +                             } else {
> +                                     /* Rotate the list and try someone else 
> */
> +                                     list_move_tail(&timer->entry, head);
> +                             }

Can we please stick that timer into the next bucket and be done with it?

> +                             goto again;

"continue;" is old school, right?

> +                     }
> +
>                       fn = timer->function;
>                       data = timer->data;
>                       irqsafe = tbase_get_irqsafe(timer->base);
> @@ -1202,6 +1237,7 @@ static inline void __run_timers(struct tvec_base *base)
>                       timer_stats_account_timer(timer);
>  
>                       base->running_timer = timer;
> +                     timer_set_running(timer);
>                       detach_expired_timer(timer, base);
>  
>                       if (irqsafe) {
> @@ -1213,6 +1249,25 @@ static inline void __run_timers(struct tvec_base *base)
>                               call_timer_fn(timer, fn, data);
>                               spin_lock_irq(&base->lock);
>                       }
> +
> +                     /*
> +                      * Handler running on this base, re-queued itself on
> +                      * another base ?
> +                      */
> +                     if (unlikely(timer->base != base)) {
> +                             unsigned long flags;
> +                             struct tvec_base *tbase;
> +
> +                             spin_unlock(&base->lock);
> +
> +                             tbase = lock_timer_base(timer, &flags);
> +                             timer_clear_running(timer);
> +                             spin_unlock(&tbase->lock);
> +
> +                             spin_lock(&base->lock);
> +                     } else {
> +                             timer_clear_running(timer);
> +                     }

Aside of the above this is really horrible. Why not doing the obvious:

__mod_timer()

        if (base != newbase) {
                if (timer_running()) {
                   list_add(&base->migration_list);
                   goto out_unlock;
                }
                .....

__run_timers()

        while(!list_empty(head)) {
                ....
        }

        if (unlikely(!list_empty(&base->migration_list)) {
                /* dequeue and requeue again */
        }

Simple, isn't it?

No new flags in the timer base, no concurrent expiry, no extra
conditionals in the expiry path. Just a single extra check at the end
of the softirq handler for this rare condition instead of imposing all
that nonsense to the hotpath.

We might even optimize that:

                if (timer_running()) {
                   list_add(&base->migration_list);
                   base->preferred_target = newbase;
                   goto out_unlock;
                }

        if (unlikely(!list_empty(&base->migration_list)) {
                /* dequeue and requeue again */
                while (!list_empty(&base->migration_list)) {
                        dequeue_timer();
                        newbase = base->preferred_target;
                        unlock(base);
                        lock(newbase);
                        enqueue(newbase);
                        unlock(newbase);
                        lock(base);
                }
        }

Thanks,

        tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to