If the expiry happens in task context, there is no point in collecting the expired timers on a list first. Just expire them directly.
Signed-off-by: Thomas Gleixner <t...@linutronix.de> --- kernel/time/posix-cpu-timers.c | 92 +++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 13 deletions(-) --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -753,13 +753,71 @@ static void posix_cpu_timer_get(struct k #define MAX_COLLECTED 20 -static u64 collect_timerqueue(struct timerqueue_head *head, +#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +/* + * Expiry in task work context. Access to sighand->siglock is safe here. + */ +static void handle_expired_timer(struct cpu_timer *ctmr, + struct list_head *firing) +{ + struct k_itimer *timer; + int cpu_firing; + + /* + * Unlock sighand lock so the timer can be locked. Keep interrupts + * disabled accross the lock switch. + */ + spin_unlock(¤t->sighand->siglock); + timer = container_of(ctmr, struct k_itimer, it.cpu); + spin_lock(&timer->it_lock); + cpu_firing = timer->it.cpu.firing; + timer->it.cpu.firing = 0; + /* + * The firing flag is -1 if this raced with a reset of the timer, + * which already reported this almost-firing as an overrun. So + * don't generate an event. + */ + if (likely(cpu_firing >= 0)) + cpu_timer_fire(timer); + /* + * Drop timer lock again and reacquire sighand lock. Allow + * interrupts to come in between so this wont block interrupts + * accross the delivery of a gazillion of timers. + */ + spin_unlock_irq(&timer->it_lock); + spin_lock_irq(¤t->sighand->siglock); +} +#else +/* + * Expiry in interupt context. Just move them to the firing list. + */ +static void handle_expired_timer(struct cpu_timer *ctmr, + struct list_head *firing) +{ + list_add_tail(&ctmr->elist, firing); +} +#endif + +static u64 collect_timerqueue(struct posix_cputimer_base *base, struct list_head *firing, u64 now) { struct timerqueue_node *next; int i = 0; - while ((next = timerqueue_getnext(head))) { + /* + * Reset the expiry cache first when expiry context is task work. + * This is required because when sighand lock is dropped new timers + * can be enqueued. That's not a problem for regular posix timers + * as the expiry time would be correct when expire_timerqueue() + * returns, but the expiry cache is also used by itimers which do + * not have a corresponding posix timer and therefore a simple + * update after expire_timerqueue() might overwrite their newly + * written expiry time. + */ + if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) + base->nextevt = U64_MAX; + + while ((next = timerqueue_getnext(&base->tqhead))) { struct cpu_timer *ctmr; u64 expires; @@ -771,7 +829,7 @@ static u64 collect_timerqueue(struct tim ctmr->firing = 1; cpu_timer_dequeue(ctmr); - list_add_tail(&ctmr->elist, firing); + handle_expired_timer(ctmr, firing); } return U64_MAX; @@ -783,10 +841,8 @@ static void collect_posix_cputimers(stru struct posix_cputimer_base *base = pct->bases; int i; - for (i = 0; i < CPUCLOCK_MAX; i++, base++) { - base->nextevt = collect_timerqueue(&base->tqhead, firing, - samples[i]); - } + for (i = 0; i < CPUCLOCK_MAX; i++, base++) + base->nextevt = collect_timerqueue(base, firing, samples[i]); } static inline void check_dl_overrun(struct task_struct *tsk) @@ -812,9 +868,10 @@ static bool check_rlimit(u64 time, u64 l } /* - * Check for any per-thread CPU timers that have fired and move them off - * the tsk->cpu_timers[N] list onto the firing list. Here we update the - * tsk->it_*_expires values to reflect the remaining thread CPU timers. + * Check for any per-thread CPU timers that have fired and depending on the + * context (task work or interrupt) move them off the tsk->cpu_timers[N] + * list onto the firing list or expire them directly. Update the expiry + * cache as well to reflect the remaining thread CPU timers. */ static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) @@ -889,9 +946,11 @@ static void check_cpu_itimer(struct task } /* - * Check for any per-thread CPU timers that have fired and move them - * off the tsk->*_timers list onto the firing list. Per-thread timers - * have already been taken off. + * Check for any per-process CPU timers that have fired and depending on + * the context (task work or interrupt) move them off the tsk->signal timer + * list onto the firing list or expire them directly. Update the expiry + * cache to reflect the resulting state. Per-thread timers have already + * been handled. */ static void check_process_timers(struct task_struct *tsk, struct list_head *firing) @@ -1115,6 +1174,12 @@ static void handle_posix_cpu_timers(stru unlock_task_sighand(tsk, &flags); /* + * If task work delivery is enabled, the timers are already + * expired. + */ + if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK)) + goto out; + /* * Now that all the timers on our list have the firing flag, * no one will touch their list entries but us. We'll take * each timer's lock before clearing its firing flag, so no @@ -1136,6 +1201,7 @@ static void handle_posix_cpu_timers(stru cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } +out: lockdep_posixtimer_exit(); }