From: Peter Zijlstra <pet...@infradead.org> With the goal of deprecating / removing VOLUNTARY preempt, live-patch needs to stop relying on cond_resched() to make forward progress.
Instead, rely on schedule() with TASK_FREEZABLE set. Just like live-patching, the freezer needs to be able to stop tasks in a safe / known state. Compile tested only. [bigeasy: use likely() in __klp_sched_try_switch() and update comments] Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de> --- v1…v2: https://lore.kernel.org/all/20250324134909.ga14...@noisy.programming.kicks-ass.net/ - Updated comments in __klp_sched_try_switch() - Replaced unlikely with likely in __klp_sched_try_switch() - Dropped RFC include/linux/livepatch_sched.h | 14 ++++----- include/linux/sched.h | 6 ---- kernel/livepatch/transition.c | 52 ++++++++++----------------------- kernel/sched/core.c | 50 +++++-------------------------- 4 files changed, 29 insertions(+), 93 deletions(-) diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h index 013794fb5da08..065c185f27638 100644 --- a/include/linux/livepatch_sched.h +++ b/include/linux/livepatch_sched.h @@ -3,27 +3,23 @@ #define _LINUX_LIVEPATCH_SCHED_H_ #include <linux/jump_label.h> -#include <linux/static_call_types.h> +#include <linux/sched.h> #ifdef CONFIG_LIVEPATCH void __klp_sched_try_switch(void); -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -static __always_inline void klp_sched_try_switch(void) +static __always_inline void klp_sched_try_switch(struct task_struct *curr) { - if (static_branch_unlikely(&klp_sched_try_switch_key)) + if (static_branch_unlikely(&klp_sched_try_switch_key) && + READ_ONCE(curr->__state) & TASK_FREEZABLE) __klp_sched_try_switch(); } -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - #else /* !CONFIG_LIVEPATCH */ -static inline void klp_sched_try_switch(void) {} -static inline void __klp_sched_try_switch(void) {} +static inline void klp_sched_try_switch(struct task_struct *curr) {} #endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_SCHED_H_ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac19828934..b98195991031c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -44,7 +44,6 @@ #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> -#include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> #include <asm/kmap_size.h> @@ -2089,9 +2088,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2112,7 +2108,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2122,7 +2117,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index ba069459c1017..25b9372a4b66f 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -29,22 +29,13 @@ static unsigned int klp_signals_cnt; /* * When a livepatch is in progress, enable klp stack checking in - * cond_resched(). This helps CPU-bound kthreads get patched. + * schedule(). This helps CPU-bound kthreads get patched. */ -#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - -#define klp_cond_resched_enable() sched_dynamic_klp_enable() -#define klp_cond_resched_disable() sched_dynamic_klp_disable() - -#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -EXPORT_SYMBOL(klp_sched_try_switch_key); -#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key) -#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key) - -#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ +#define klp_resched_enable() static_branch_enable(&klp_sched_try_switch_key) +#define klp_resched_disable() static_branch_disable(&klp_sched_try_switch_key) /* * This work can be performed periodically to finish patching or unpatching any @@ -365,27 +356,20 @@ static bool klp_try_switch_task(struct task_struct *task) void __klp_sched_try_switch(void) { + /* + * This function is called from __schedule() while a context switch is + * about to happen. Preemption is already disabled and klp_mutex + * can't be acquired. + * Disabled preemption is used to prevent racing with other callers of + * klp_try_switch_task(). Thanks to task_call_func() they won't be + * able to switch to this task while it's running. + */ + lockdep_assert_preemption_disabled(); + + /* Make sure current didn't get patched */ if (likely(!klp_patch_pending(current))) return; - /* - * This function is called from cond_resched() which is called in many - * places throughout the kernel. Using the klp_mutex here might - * deadlock. - * - * Instead, disable preemption to prevent racing with other callers of - * klp_try_switch_task(). Thanks to task_call_func() they won't be - * able to switch this task while it's running. - */ - preempt_disable(); - - /* - * Make sure current didn't get patched between the above check and - * preempt_disable(). - */ - if (unlikely(!klp_patch_pending(current))) - goto out; - /* * Enforce the order of the TIF_PATCH_PENDING read above and the * klp_target_state read in klp_try_switch_task(). The corresponding @@ -395,11 +379,7 @@ void __klp_sched_try_switch(void) smp_rmb(); klp_try_switch_task(current); - -out: - preempt_enable(); } -EXPORT_SYMBOL(__klp_sched_try_switch); /* * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set. @@ -508,7 +488,7 @@ void klp_try_complete_transition(void) } /* Done! Now cleanup the data structures. */ - klp_cond_resched_disable(); + klp_resched_disable(); patch = klp_transition_patch; klp_complete_transition(); @@ -560,7 +540,7 @@ void klp_start_transition(void) set_tsk_thread_flag(task, TIF_PATCH_PENDING); } - klp_cond_resched_enable(); + klp_resched_enable(); klp_signals_cnt = 0; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c81cf642dba05..2a973d0e414a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -66,6 +66,7 @@ #include <linux/vtime.h> #include <linux/wait_api.h> #include <linux/workqueue_api.h> +#include <linux/livepatch_sched.h> #ifdef CONFIG_PREEMPT_DYNAMIC # ifdef CONFIG_GENERIC_ENTRY @@ -6668,6 +6669,8 @@ static void __sched notrace __schedule(int sched_mode) if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) hrtick_clear(rq); + klp_sched_try_switch(prev); + local_irq_disable(); rcu_note_context_switch(preempt); @@ -7328,7 +7331,6 @@ EXPORT_STATIC_CALL_TRAMP(might_resched); static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched); int __sched dynamic_cond_resched(void) { - klp_sched_try_switch(); if (!static_branch_unlikely(&sk_dynamic_cond_resched)) return 0; return __cond_resched(); @@ -7500,7 +7502,6 @@ int sched_dynamic_mode(const char *str) #endif static DEFINE_MUTEX(sched_dynamic_mutex); -static bool klp_override; static void __sched_dynamic_update(int mode) { @@ -7508,8 +7509,7 @@ static void __sched_dynamic_update(int mode) * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in * the ZERO state, which is invalid. */ - if (!klp_override) - preempt_dynamic_enable(cond_resched); + preempt_dynamic_enable(cond_resched); preempt_dynamic_enable(might_resched); preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule_notrace); @@ -7518,8 +7518,7 @@ static void __sched_dynamic_update(int mode) switch (mode) { case preempt_dynamic_none: - if (!klp_override) - preempt_dynamic_enable(cond_resched); + preempt_dynamic_enable(cond_resched); preempt_dynamic_disable(might_resched); preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule_notrace); @@ -7530,8 +7529,7 @@ static void __sched_dynamic_update(int mode) break; case preempt_dynamic_voluntary: - if (!klp_override) - preempt_dynamic_enable(cond_resched); + preempt_dynamic_enable(cond_resched); preempt_dynamic_enable(might_resched); preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule_notrace); @@ -7542,8 +7540,7 @@ static void __sched_dynamic_update(int mode) break; case preempt_dynamic_full: - if (!klp_override) - preempt_dynamic_disable(cond_resched); + preempt_dynamic_disable(cond_resched); preempt_dynamic_disable(might_resched); preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule_notrace); @@ -7554,8 +7551,7 @@ static void __sched_dynamic_update(int mode) break; case preempt_dynamic_lazy: - if (!klp_override) - preempt_dynamic_disable(cond_resched); + preempt_dynamic_disable(cond_resched); preempt_dynamic_disable(might_resched); preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule_notrace); @@ -7576,36 +7572,6 @@ void sched_dynamic_update(int mode) mutex_unlock(&sched_dynamic_mutex); } -#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL - -static int klp_cond_resched(void) -{ - __klp_sched_try_switch(); - return __cond_resched(); -} - -void sched_dynamic_klp_enable(void) -{ - mutex_lock(&sched_dynamic_mutex); - - klp_override = true; - static_call_update(cond_resched, klp_cond_resched); - - mutex_unlock(&sched_dynamic_mutex); -} - -void sched_dynamic_klp_disable(void) -{ - mutex_lock(&sched_dynamic_mutex); - - klp_override = false; - __sched_dynamic_update(preempt_dynamic_mode); - - mutex_unlock(&sched_dynamic_mutex); -} - -#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - static int __init setup_preempt_mode(char *str) { int mode = sched_dynamic_mode(str); -- 2.49.0