As we plan to handle the full nohz IPI using irq work, we need to enforce non-lazy works outside the tick because it's called under hrtimer lock. This is not desired from the nohz callback revaluating the tick because it can take hrtimer lock itself.
Cc: Andrew Morton <a...@linux-foundation.org> Cc: Ingo Molnar <mi...@kernel.org> Cc: Kevin Hilman <khil...@linaro.org> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Thomas Gleixner <t...@linutronix.de> Cc: Viresh Kumar <viresh.ku...@linaro.org> Signed-off-by: Frederic Weisbecker <fweis...@gmail.com> --- include/linux/irq_work.h | 1 + kernel/irq_work.c | 58 ++++++++++++++++++++++++++---------------------- kernel/timer.c | 2 +- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 19ae05d..429b1ba 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -34,6 +34,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) bool irq_work_queue(struct irq_work *work); void irq_work_run(void); +void irq_work_run_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 2559383..0a554a6 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -19,8 +19,8 @@ #include <asm/processor.h> -static DEFINE_PER_CPU(struct llist_head, irq_work_list); -static DEFINE_PER_CPU(int, irq_work_raised); +static DEFINE_PER_CPU(struct llist_head, lazy_list); +static DEFINE_PER_CPU(struct llist_head, raised_list); /* * Claim the entry so that no one else will poke at it. @@ -63,14 +63,14 @@ void __weak arch_irq_work_raise(int cpu) */ bool irq_work_queue(struct irq_work *work) { + unsigned long flags; + /* Only queue if not already pending */ if (!irq_work_claim(work)) return false; /* Queue the entry and raise the IPI if needed. */ - preempt_disable(); - - llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); + local_irq_save(flags); /* * If the work is not "lazy" or the tick is stopped, raise the irq @@ -78,11 +78,13 @@ bool irq_work_queue(struct irq_work *work) * for the next tick. */ if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { - if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) arch_irq_work_raise(smp_processor_id()); + } else { + llist_add(&work->llnode, &__get_cpu_var(lazy_list)); } - preempt_enable(); + local_irq_restore(flags); return true; } @@ -90,10 +92,7 @@ EXPORT_SYMBOL_GPL(irq_work_queue); bool irq_work_needs_cpu(void) { - struct llist_head *this_list; - - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + if (llist_empty(&__get_cpu_var(lazy_list))) return false; /* All work should have been flushed before going offline */ @@ -102,28 +101,18 @@ bool irq_work_needs_cpu(void) return true; } -static void __irq_work_run(void) +static void __irq_work_run(struct llist_head *list) { unsigned long flags; struct irq_work *work; - struct llist_head *this_list; struct llist_node *llnode; - - /* - * Reset the "raised" state right before we check the list because - * an NMI may enqueue after we find the list empty from the runner. - */ - __this_cpu_write(irq_work_raised, 0); - barrier(); - - this_list = &__get_cpu_var(irq_work_list); - if (llist_empty(this_list)) + if (llist_empty(list)) return; BUG_ON(!irqs_disabled()); - llnode = llist_del_all(this_list); + llnode = llist_del_all(list); while (llnode != NULL) { work = llist_entry(llnode, struct irq_work, llnode); @@ -155,11 +144,27 @@ static void __irq_work_run(void) void irq_work_run(void) { BUG_ON(!in_irq()); - __irq_work_run(); + __irq_work_run(&__get_cpu_var(raised_list)); + __irq_work_run(&__get_cpu_var(lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); /* + * Run the lazy irq_work entries on this cpu from the tick. But let + * the IPI handle the others. Some works may require to work outside + * the tick due to its locking dependencies (hrtimer lock). + */ +void irq_work_run_tick(void) +{ + BUG_ON(!in_irq()); +#ifndef CONFIG_HAVE_IRQ_WORK_IPI + /* No IPI support, we don't have the choice... */ + __irq_work_run(&__get_cpu_var(raised_list)); +#endif + __irq_work_run(&__get_cpu_var(lazy_list)); +} + +/* * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. */ @@ -183,7 +188,8 @@ static int irq_work_cpu_notify(struct notifier_block *self, /* Called from stop_machine */ if (WARN_ON_ONCE(cpu != smp_processor_id())) break; - __irq_work_run(); + __irq_work_run(&__get_cpu_var(raised_list)); + __irq_work_run(&__get_cpu_var(lazy_list)); break; default: break; diff --git a/kernel/timer.c b/kernel/timer.c index 3bb01a3..0251dfa 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1384,7 +1384,7 @@ void update_process_times(int user_tick) rcu_check_callbacks(cpu, user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) - irq_work_run(); + irq_work_run_tick(); #endif scheduler_tick(); run_posix_cpu_timers(p); -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/