Seperate the storage space for pinned timers. This is preparatory work for changing the NOHZ timer placement from a push at enqueue time to a pull at expiry time model.
No functional change. Signed-off-by: Richard Cochran <rcoch...@linutronix.de> Signed-off-by: Anna-Maria Gleixner <anna-ma...@linutronix.de> Signed-off-by: Thomas Gleixner <t...@linutronix.de> --- kernel/time/timer.c | 98 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 28 deletions(-) --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -185,12 +185,14 @@ EXPORT_SYMBOL(jiffies_64); #define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) #ifdef CONFIG_NO_HZ_COMMON -# define NR_BASES 2 -# define BASE_STD 0 -# define BASE_DEF 1 +# define NR_BASES 3 +# define BASE_LOCAL 0 +# define BASE_GLOBAL 1 +# define BASE_DEF 2 #else # define NR_BASES 1 -# define BASE_STD 0 +# define BASE_LOCAL 0 +# define BASE_GLOBAL 0 # define BASE_DEF 0 #endif @@ -218,16 +220,18 @@ void timers_update_migration(bool update unsigned int cpu; /* Avoid the loop, if nothing to update */ - if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) + if (this_cpu_read(timer_bases[BASE_GLOBAL].migration_enabled) == on) return; for_each_possible_cpu(cpu) { - per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; + per_cpu(timer_bases[BASE_LOCAL].migration_enabled, cpu) = on; + per_cpu(timer_bases[BASE_GLOBAL].migration_enabled, cpu) = on; per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; per_cpu(hrtimer_bases.migration_enabled, cpu) = on; if (!update_nohz) continue; - per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; + per_cpu(timer_bases[BASE_LOCAL].nohz_active, cpu) = true; + per_cpu(timer_bases[BASE_GLOBAL].nohz_active, cpu) = true; per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; per_cpu(hrtimer_bases.nohz_active, cpu) = true; } @@ -810,7 +814,10 @@ static int detach_if_pending(struct time static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) { - struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); + int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL; + struct timer_base *base; + + base = per_cpu_ptr(&timer_bases[index], cpu); /* * If the timer is deferrable and nohz is active then we need to use @@ -824,7 +831,10 @@ static inline struct timer_base *get_tim static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) { - struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + int index = tflags & TIMER_PINNED ? BASE_LOCAL : BASE_GLOBAL; + struct timer_base *base; + + base = this_cpu_ptr(&timer_bases[index]); /* * If the timer is deferrable and nohz is active then we need to use @@ -1468,10 +1478,10 @@ static u64 cmp_next_hrtimer_event(u64 ba */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { - struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + unsigned long nextevt, nextevt_local, nextevt_global; + bool local_empty, global_empty, local_first, is_idle; + struct timer_base *base_local, *base_global; u64 expires = KTIME_MAX; - unsigned long nextevt; - bool is_empty; /* * Pretend that there is no timer pending if the cpu is offline. @@ -1480,26 +1490,49 @@ u64 get_next_timer_interrupt(unsigned lo if (cpu_is_offline(smp_processor_id())) return expires; - spin_lock(&base->lock); - is_empty = __next_timer_interrupt(base); - nextevt = base->next_expiry; + base_local = this_cpu_ptr(&timer_bases[BASE_LOCAL]); + base_global = this_cpu_ptr(&timer_bases[BASE_GLOBAL]); + + spin_lock(&base_global->lock); + spin_lock_nested(&base_local->lock, SINGLE_DEPTH_NESTING); + + local_empty = __next_timer_interrupt(base_local); + nextevt_local = base_local->next_expiry; + + global_empty = __next_timer_interrupt(base_global); + nextevt_global = base_global->next_expiry; + /* * We have a fresh next event. Check whether we can forward the * base. We can only do that when @basej is past base->clk * otherwise we might rewind base->clk. */ - if (time_after(basej, base->clk)) { - if (time_after(nextevt, basej)) - base->clk = basej; - else if (time_after(nextevt, base->clk)) - base->clk = nextevt; + if (time_after(basej, base_local->clk)) { + if (time_after(nextevt_local, basej)) + base_local->clk = basej; + else if (time_after(nextevt_local, base_local->clk)) + base_local->clk = nextevt_local; + } + + if (time_after(basej, base_global->clk)) { + if (time_after(nextevt_global, basej)) + base_global->clk = basej; + else if (time_after(nextevt_global, base_global->clk)) + base_global->clk = nextevt_global; } /* Base is idle if the next event is more than a tick away. */ - base->is_idle = time_after(nextevt, basej + 1); - spin_unlock(&base->lock); + local_first = time_before_eq(nextevt_local, nextevt_global); + nextevt = local_first ? nextevt_local : nextevt_global; + is_idle = time_after(nextevt, basej + 1); + + /* We need to mark both bases in sync */ + base_local->is_idle = base_global->is_idle = is_idle; - if (!is_empty) { + spin_unlock(&base_local->lock); + spin_unlock(&base_global->lock); + + if (!local_empty || !global_empty) { /* If we missed a tick already, force 0 delta */ if (time_before_eq(nextevt, basej)) nextevt = basej; @@ -1516,7 +1549,7 @@ u64 get_next_timer_interrupt(unsigned lo */ void timer_clear_idle(void) { - struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]); /* * We do this unlocked. The worst outcome is a remote enqueue sending @@ -1525,6 +1558,9 @@ void timer_clear_idle(void) * the lock in the exit from idle path. */ base->is_idle = false; + + base = this_cpu_ptr(&timer_bases[BASE_GLOBAL]); + base->is_idle = false; } static int collect_expired_timers(struct timer_base *base, @@ -1614,11 +1650,17 @@ static inline void __run_timers(struct t */ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { - struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]); __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) - __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) { + base = this_cpu_ptr(&timer_bases[BASE_GLOBAL]); + __run_timers(base); + + base = this_cpu_ptr(&timer_bases[BASE_DEF]); + if (base->nohz_active) + __run_timers(base); + } } /* @@ -1626,7 +1668,7 @@ static __latent_entropy void run_timer_s */ void run_local_timers(void) { - struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_LOCAL]); hrtimer_run_queues(); /* Raise the softirq only if required. */