Chris, in principle the change looks o.k. to me, even though I'm not really familiar with the watchdog_nmi_disable_all() and watchdog_nmi_enable_all() functions. It is my understanding that those functions are only called once via 'initcall' early during kernel startup as shown in the following flow of execution:
kernel_init { kernel_init_freeable { lockup_detector_init { cpumask_andnot(watchdog_cpumask, cpu_possible_mask,tick_nohz_full_mask) watchdog_enable_all_cpus smpboot_register_percpu_thread(&watchdog_threads) smpboot_update_cpumask_percpu_thread(&watchdog_threads,watchdog_cpumask) // here we make sure that watchdog threads don't run on nohz_full CPUs // only the watchdog threads of housekeeping CPUs keep on running } do_basic_setup do_initcalls do_initcall_level do_one_initcall fixup_ht_bug // subsys_initcall(fixup_ht_bug) { watchdog_nmi_disable_all // here we disable NMI watchdog only on housekeeping CPUs for_each_cpu_and(cpu,cpu_online_mask,watchdog_cpumask) watchdog_nmi_disable watchdog_nmi_enable_all // here we enable NMI watchdog only on housekeeping CPUs for_each_cpu_and(cpu,cpu_online_mask,watchdog_cpumask) watchdog_nmi_enable } } } It seems crucial that lockup_detector_init() is executed before fixup_ht_bug(). Regards, Uli On 04/16/2015 06:46 AM, Ulrich Obergfell wrote: > if a user changes watchdog parameters in /proc/sys/kernel, the watchdog > threads > are not stopped and restarted in all cases. Parameters can also be changed 'on > the fly', for example like 'watchdog_thresh' in the following flow of > execution: > > proc_watchdog_thresh > proc_watchdog_update > if (watchdog_enabled && watchdog_thresh) > watchdog_enable_all_cpus > if (!watchdog_running) { > // watchdog threads are already running so we don't get here > } else { > update_watchdog_all_cpus > for_each_online_cpu <-----------------------------. > update_watchdog | > watchdog_nmi_disable | > watchdog_nmi_enable | > } | > | > I think we would not want to call watchdog_nmi_enable() for each_online_ CPU, > but rather for each CPU that has an_unparked_ watchdog thread (i.e. where the > watchdog mechanism is actually enabled). How about something like this? I'll fold it into v9 of the patchset. Thanks! diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 0c5a37cdbedd..a4e1c9a2e769 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -61,6 +61,10 @@ static cpumask_var_t watchdog_cpumask_for_smpboot; static cpumask_var_t watchdog_cpumask; unsigned long *watchdog_cpumask_bits; +/* Helper for online, unparked cpus. */ +#define for_each_watchdog_cpu(cpu) \ + for_each_cpu_and((cpu), cpu_online_mask, watchdog_cpumask) + static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; @@ -209,7 +213,7 @@ void touch_all_softlockup_watchdogs(void) * do we care if a 0 races with a timestamp? * all it means is the softlock check starts one cycle later */ - for_each_online_cpu(cpu) + for_each_watchdog_cpu(cpu) per_cpu(watchdog_touch_ts, cpu) = 0; } @@ -616,7 +620,7 @@ void watchdog_nmi_enable_all(void) return; get_online_cpus(); - for_each_online_cpu(cpu) + for_each_watchdog_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); } @@ -629,7 +633,7 @@ void watchdog_nmi_disable_all(void) return; get_online_cpus(); - for_each_online_cpu(cpu) + for_each_watchdog_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); } @@ -688,7 +692,7 @@ static void update_watchdog_all_cpus(void) int cpu; get_online_cpus(); - for_each_online_cpu(cpu) + for_each_watchdog_cpu(cpu) update_watchdog(cpu); put_online_cpus(); } -- Chris Metcalf, EZChip Semiconductor http://www.ezchip.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/