To provide nohz_full tick support, there is a set of tick dependency masks that need to be evaluated on every IRQ and context switch. Switching on nohz_full tick support at runtime will be problematic as some of the tick dependency masks may not be properly set causing problem down the road.
Allow nohz_full boot option to be specified without any parameter to force enable nohz_full tick support without any CPU in the tick_nohz_full_mask yet. The context_tracking_key and tick_nohz_full_running flag will be enabled in this case to make tick_nohz_full_enabled() return true. There is still a small performance overhead by force enable nohz_full this way. So it should only be used if there is a chance that some CPUs may become isolated later via the cpuset isolated partition functionality and better CPU isolation closed to nohz_full is desired. Signed-off-by: Waiman Long <[email protected]> --- Documentation/admin-guide/kernel-parameters.txt | 15 +++++++++------ include/linux/context_tracking.h | 7 ++++++- kernel/context_tracking.c | 4 +++- kernel/rcu/tree_nocb.h | 2 +- kernel/sched/isolation.c | 13 ++++++++++++- kernel/time/tick-sched.c | 11 +++++++++-- 6 files changed, 40 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 95f97ce487a4..f0eedaebe9d6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4550,13 +4550,16 @@ Kernel parameters Valid arguments: on, off Default: on - nohz_full= [KNL,BOOT,SMP,ISOL] - The argument is a cpu list, as described above. + nohz_full[=cpu-list] + [KNL,BOOT,SMP,ISOL] In kernels built with CONFIG_NO_HZ_FULL=y, set - the specified list of CPUs whose tick will be stopped - whenever possible. The boot CPU will be forced outside - the range to maintain the timekeeping. Any CPUs - in this list will have their RCU callbacks offloaded, + the specified list of CPUs whose tick will be + stopped whenever possible. If the argument is + not specified, nohz_full will be forced enabled + without any CPU in the nohz_full list yet. + The boot CPU will be forced outside the range + to maintain the timekeeping. Any CPUs in this + list will have their RCU callbacks offloaded, just as if they had also been called out in the rcu_nocbs= boot parameter. diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index af9fe87a0922..a3fea7f9fef6 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -9,8 +9,13 @@ #include <asm/ptrace.h> - #ifdef CONFIG_CONTEXT_TRACKING_USER +/* + * Pass CONTEXT_TRACKING_FORCE_ENABLE to ct_cpu_track_user() to force enable + * user context tracking. + */ +#define CONTEXT_TRACKING_FORCE_ENABLE (-1) + extern void ct_cpu_track_user(int cpu); /* Called with interrupts disabled. */ diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index a743e7ffa6c0..925999de1a28 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -678,7 +678,9 @@ void __init ct_cpu_track_user(int cpu) { static __initdata bool initialized = false; - if (!per_cpu(context_tracking.active, cpu)) { + if (cpu == CONTEXT_TRACKING_FORCE_ENABLE) { + static_branch_inc(&context_tracking_key); + } else if (!per_cpu(context_tracking.active, cpu)) { per_cpu(context_tracking.active, cpu) = true; static_branch_inc(&context_tracking_key); } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b3337c7231cc..2d06dcb61f37 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1267,7 +1267,7 @@ void __init rcu_init_nohz(void) struct shrinker * __maybe_unused lazy_rcu_shrinker; #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) + if (tick_nohz_full_running) cpumask = tick_nohz_full_mask; #endif diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 965d6f8fe344..c233d55a1e95 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -268,6 +268,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) } alloc_bootmem_cpumask_var(&non_housekeeping_mask); + if (cpulist_parse(str, non_housekeeping_mask) < 0) { pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); goto free_non_housekeeping_mask; @@ -277,6 +278,13 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_andnot(housekeeping_staging, cpu_possible_mask, non_housekeeping_mask); + /* + * Allow "nohz_full" without parameter to force enable nohz_full + * at boot time without any CPUs in the nohz_full list yet. + */ + if ((flags & HK_FLAG_KERNEL_NOISE) && !*str) + goto setup_housekeeping_staging; + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); @@ -290,6 +298,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) if (cpumask_empty(non_housekeeping_mask)) goto free_housekeeping_staging; +setup_housekeeping_staging: if (!housekeeping.flags) { /* First setup call ("nohz_full=" or "isolcpus=") */ enum hk_type type; @@ -357,10 +366,12 @@ static int __init housekeeping_nohz_full_setup(char *str) unsigned long flags; flags = HK_FLAG_KERNEL_NOISE | HK_FLAG_KERNEL_NOISE_BOOT; + if (*str == '=') + str++; return housekeeping_setup(str, flags); } -__setup("nohz_full=", housekeeping_nohz_full_setup); +__setup("nohz_full", housekeeping_nohz_full_setup); static int __init housekeeping_isolcpus_setup(char *str) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 9e5264458414..ed877b2c9040 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -676,8 +676,15 @@ void __init tick_nohz_init(void) } } - for_each_cpu(cpu, tick_nohz_full_mask) - ct_cpu_track_user(cpu); + /* + * Force enable context_tracking_key if tick_nohz_full_mask empty + */ + if (cpumask_empty(tick_nohz_full_mask)) { + ct_cpu_track_user(CONTEXT_TRACKING_FORCE_ENABLE); + } else { + for_each_cpu(cpu, tick_nohz_full_mask) + ct_cpu_track_user(cpu); + } ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "kernel/nohz:predown", NULL, -- 2.53.0

