On 2/16/26 5:49 PM, Vishal Chourasia wrote:
Expedite synchronize_rcu during the SMT mode switch operation when
initiated via /sys/devices/system/cpu/smt/control interface

SMT mode switch operation i.e. between SMT 8 to SMT 1 or vice versa and
others, are user driven operations and therefore should complete as soon
as possible. Switching SMT states involves iterating over a list of CPUs
and performing hotplug operations. It was found these transitions took
significantly large amount of time to complete particularly on
high-core-count systems because system was blocked on synchronize_rcu
calls.

Below is one of the call-stacks that accounted for most of the blocking
time overhead as reported by offcputime bcc script for CPU offline
operation,

     finish_task_switch
     __schedule
     schedule
     schedule_timeout
     wait_for_completion
     __wait_rcu_gp
     synchronize_rcu
     cpuidle_uninstall_idle_handler
     powernv_cpuidle_cpu_dead
     cpuhp_invoke_callback
     __cpuhp_invoke_callback_range
     _cpu_down
     cpu_device_down
     cpu_subsys_offline
     device_offline
     online_store
     dev_attr_store
     sysfs_kf_write
     kernfs_fop_write_iter
     vfs_write
     ksys_write
     system_call_exception
     system_call_common
    -                bash (29705)
         5771569  ------------------------>  Duration (us)

Signed-off-by: Vishal Chourasia <[email protected]>
---
  include/linux/rcupdate.h | 3 +++
  kernel/cpu.c             | 4 ++++
  2 files changed, 7 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7729fef249e1..f12d0d0f008d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,6 +1190,9 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, 
rcu_callback_t f)
  extern int rcu_expedited;
  extern int rcu_normal;
+extern void rcu_expedite_gp(void);
+extern void rcu_unexpedite_gp(void);
+
  DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
  DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), 
__releases_shared(RCU))

IMHO, below maybe better.

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f12d0d0f008d..61b80c29d53b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,8 +1190,13 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
 extern int rcu_expedited;
 extern int rcu_normal;

-extern void rcu_expedite_gp(void);
-extern void rcu_unexpedite_gp(void);
+#ifdef CONFIG_TINY_RCU
+static inline void rcu_expedite_gp(void) { }
+static inline void rcu_unexpedite_gp(void) { }
+#else
+void rcu_expedite_gp(void);
+void rcu_unexpedite_gp(void);
+#endif

 DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU))
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index dc5d614b372c..41a0d262e964 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -512,8 +512,6 @@ do { \
 static inline bool rcu_gp_is_normal(void) { return true; }
 static inline bool rcu_gp_is_expedited(void) { return false; }
 static inline bool rcu_async_should_hurry(void) { return false; }
-static inline void rcu_expedite_gp(void) { }
-static inline void rcu_unexpedite_gp(void) { }
 static inline void rcu_async_hurry(void) { }
 static inline void rcu_async_relax(void) { }
 static inline bool rcu_cpu_online(int cpu) { return true; }
@@ -521,8 +519,6 @@ static inline bool rcu_cpu_online(int cpu) { return true; }
 bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 bool rcu_async_should_hurry(void);  /* Internal RCU use. */
-void rcu_expedite_gp(void);
-void rcu_unexpedite_gp(void);
 void rcu_async_hurry(void);
 void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);


Reply via email to