Re: [PATCH v2 1/2] rcu: Enable rcu_normal_wake_from_gp on small systems

2025-06-12 Thread Uladzislau Rezki
On Wed, Jun 11, 2025 at 05:48:57PM -0400, Joel Fernandes wrote:
> 
> 
> On 6/11/2025 1:56 PM, Uladzislau Rezki (Sony) wrote:
> > Automatically enable the rcu_normal_wake_from_gp parameter on
> > systems with a small number of CPUs. The activation threshold
> > is set to 16 CPUs.
> > 
> > This helps to reduce a latency of normal synchronize_rcu() API
> > by waking up GP-waiters earlier and decoupling synchronize_rcu()
> > callers from regular callback handling.
> > 
> > A benchmark running 64 parallel jobs(system with 64 CPUs) invoking
> > synchronize_rcu() demonstrates a notable latency reduction with the
> > setting enabled.
> 
> Reviewed-by: Joel Fernandes 
> 
Thank you for review!

> Just one nit below:
> 
> > 
> > Latency distribution (microseconds):
> > 
> > 
> >  0  -    : 1
> >  1  - 1  : 4
> >  2  - 2  : 399
> >  3  - 3  : 3197
> >  4  - 4  : 10428
> >  5  - 5  : 17363
> >  6  - 6  : 15529
> >  7  - 7  : 9287
> >  8  - 8  : 4249
> >  9  - 9  : 1915
> >  10 - 10 : 922
> >  11 - 11 : 390
> >  12 - 12 : 187
> >  ...
> > 
> > 
> > 
> >  0  -   : 1
> >  1  - 1 : 234
> >  2  - 2 : 6678
> >  3  - 3 : 33463
> >  4  - 4 : 20669
> >  5  - 5 : 2766
> >  6  - 6 : 183
> >  ...
> > 
> > 
> > Signed-off-by: Uladzislau Rezki (Sony) 
> > ---
> >  kernel/rcu/tree.c | 14 +++---
> >  1 file changed, 11 insertions(+), 3 deletions(-)
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index e8a4b720d7d2..b88ceb35cebd 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1625,8 +1625,10 @@ static void rcu_sr_put_wait_head(struct llist_node 
> > *node)
> > atomic_set_release(&sr_wn->inuse, 0);
> >  }
> >  
> > -/* Disabled by default. */
> > -static int rcu_normal_wake_from_gp;
> > +/* Enable rcu_normal_wake_from_gp automatically on small systems. */
> > +#define WAKE_FROM_GP_CPU_THRESHOLD 16
> > +
> > +static int rcu_normal_wake_from_gp = -1;
> >  module_param(rcu_normal_wake_from_gp, int, 0644);
> >  static struct workqueue_struct *sync_wq;
> >  
> > @@ -3239,7 +3241,7 @@ static void synchronize_rcu_normal(void)
> >  
> > trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
> >  
> > -   if (!READ_ONCE(rcu_normal_wake_from_gp)) {
> > +   if (READ_ONCE(rcu_normal_wake_from_gp) < 1) {
> > wait_rcu_gp(call_rcu_hurry);
> > goto trace_complete_out;
> > }
> > @@ -4843,6 +4845,12 @@ void __init rcu_init(void)
> > sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0);
> > WARN_ON(!sync_wq);
> >  
> > +   /* Respect if explicitly disabled via a boot parameter. */
> > +   if (rcu_normal_wake_from_gp < 0) {
> > +   if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
> > +   rcu_normal_wake_from_gp = 1;
> > +   }
> 
> nit: better to just do:
> 
>   if (rcu_normal_wake_from_gp < 0 &&
>   num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
>   rcu_normal_wake_from_gp = 1;
>   }
> 
Two lines anyway :)

--
Uladzislau Rezki




Re: [PATCH v2 1/2] rcu: Enable rcu_normal_wake_from_gp on small systems

2025-06-11 Thread Joel Fernandes



On 6/11/2025 1:56 PM, Uladzislau Rezki (Sony) wrote:
> Automatically enable the rcu_normal_wake_from_gp parameter on
> systems with a small number of CPUs. The activation threshold
> is set to 16 CPUs.
> 
> This helps to reduce a latency of normal synchronize_rcu() API
> by waking up GP-waiters earlier and decoupling synchronize_rcu()
> callers from regular callback handling.
> 
> A benchmark running 64 parallel jobs(system with 64 CPUs) invoking
> synchronize_rcu() demonstrates a notable latency reduction with the
> setting enabled.

Reviewed-by: Joel Fernandes 

Just one nit below:

> 
> Latency distribution (microseconds):
> 
> 
>  0  -    : 1
>  1  - 1  : 4
>  2  - 2  : 399
>  3  - 3  : 3197
>  4  - 4  : 10428
>  5  - 5  : 17363
>  6  - 6  : 15529
>  7  - 7  : 9287
>  8  - 8  : 4249
>  9  - 9  : 1915
>  10 - 10 : 922
>  11 - 11 : 390
>  12 - 12 : 187
>  ...
> 
> 
> 
>  0  -   : 1
>  1  - 1 : 234
>  2  - 2 : 6678
>  3  - 3 : 33463
>  4  - 4 : 20669
>  5  - 5 : 2766
>  6  - 6 : 183
>  ...
> 
> 
> Signed-off-by: Uladzislau Rezki (Sony) 
> ---
>  kernel/rcu/tree.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index e8a4b720d7d2..b88ceb35cebd 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1625,8 +1625,10 @@ static void rcu_sr_put_wait_head(struct llist_node 
> *node)
>   atomic_set_release(&sr_wn->inuse, 0);
>  }
>  
> -/* Disabled by default. */
> -static int rcu_normal_wake_from_gp;
> +/* Enable rcu_normal_wake_from_gp automatically on small systems. */
> +#define WAKE_FROM_GP_CPU_THRESHOLD 16
> +
> +static int rcu_normal_wake_from_gp = -1;
>  module_param(rcu_normal_wake_from_gp, int, 0644);
>  static struct workqueue_struct *sync_wq;
>  
> @@ -3239,7 +3241,7 @@ static void synchronize_rcu_normal(void)
>  
>   trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
>  
> - if (!READ_ONCE(rcu_normal_wake_from_gp)) {
> + if (READ_ONCE(rcu_normal_wake_from_gp) < 1) {
>   wait_rcu_gp(call_rcu_hurry);
>   goto trace_complete_out;
>   }
> @@ -4843,6 +4845,12 @@ void __init rcu_init(void)
>   sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0);
>   WARN_ON(!sync_wq);
>  
> + /* Respect if explicitly disabled via a boot parameter. */
> + if (rcu_normal_wake_from_gp < 0) {
> + if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
> + rcu_normal_wake_from_gp = 1;
> + }

nit: better to just do:

if (rcu_normal_wake_from_gp < 0 &&
num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
rcu_normal_wake_from_gp = 1;
}

> +
>   /* Fill in default value for rcutree.qovld boot parameter. */
>   /* -After- the rcu_node ->lock fields are initialized! */
>   if (qovld < 0)