Re: [PATCH 02/10] tick/nohz: Add tick_nohz_full_this_cpu()

2021-03-16 Thread Frederic Weisbecker
On Tue, Mar 16, 2021 at 01:28:01PM +0100, Peter Zijlstra wrote:
> On Thu, Mar 11, 2021 at 01:37:00PM +0100, Frederic Weisbecker wrote:
> > Optimize further the check for local full dynticks CPU. Testing directly
> > tick_nohz_full_cpu(smp_processor_id()) is suboptimal because the
> > compiler first fetches the CPU number and only then processes the
> > static key.
> > 
> > It's best to evaluate the static branch before anything.
> 
> Or you do tricky things like this ;-)

Good point!

I'll check the asm diff to see if that really does what we want.
I expect it will.

Thanks.

> 
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index 7340613c7eff..bd4a6b055b80 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -185,13 +185,12 @@ static inline bool tick_nohz_full_enabled(void)
>   return tick_nohz_full_running;
>  }
>  
> -static inline bool tick_nohz_full_cpu(int cpu)
> -{
> - if (!tick_nohz_full_enabled())
> - return false;
> -
> - return cpumask_test_cpu(cpu, tick_nohz_full_mask);
> -}
> +#define tick_nohz_full_cpu(_cpu) ({  \
> + bool __ret = false; \
> + if (tick_nohz_full_enabled())   \
> + __ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask);  \
> + __ret;  \
> +})
>  
>  static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
>  {
> 
> 


Re: [PATCH 02/10] tick/nohz: Add tick_nohz_full_this_cpu()

2021-03-16 Thread Peter Zijlstra
On Thu, Mar 11, 2021 at 01:37:00PM +0100, Frederic Weisbecker wrote:
> Optimize further the check for local full dynticks CPU. Testing directly
> tick_nohz_full_cpu(smp_processor_id()) is suboptimal because the
> compiler first fetches the CPU number and only then processes the
> static key.
> 
> It's best to evaluate the static branch before anything.

Or you do tricky things like this ;-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff..bd4a6b055b80 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -185,13 +185,12 @@ static inline bool tick_nohz_full_enabled(void)
return tick_nohz_full_running;
 }
 
-static inline bool tick_nohz_full_cpu(int cpu)
-{
-   if (!tick_nohz_full_enabled())
-   return false;
-
-   return cpumask_test_cpu(cpu, tick_nohz_full_mask);
-}
+#define tick_nohz_full_cpu(_cpu) ({\
+   bool __ret = false; \
+   if (tick_nohz_full_enabled())   \
+   __ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask);  \
+   __ret;  \
+})
 
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
 {




[PATCH 02/10] tick/nohz: Add tick_nohz_full_this_cpu()

2021-03-11 Thread Frederic Weisbecker
Optimize further the check for local full dynticks CPU. Testing directly
tick_nohz_full_cpu(smp_processor_id()) is suboptimal because the
compiler first fetches the CPU number and only then processes the
static key.

It's best to evaluate the static branch before anything. Provide with a
function that handles that correctly and convert some users along.

Signed-off-by: Frederic Weisbecker 
Cc: Yunfeng Ye 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Marcelo Tosatti 
Cc: Rafael J. Wysocki 
---
 include/linux/tick.h | 11 ++-
 kernel/time/tick-sched.c | 12 +---
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff..bfc96cbe955c 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -193,6 +193,14 @@ static inline bool tick_nohz_full_cpu(int cpu)
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
 }
 
+static inline bool tick_nohz_full_this_cpu(void)
+{
+   if (!tick_nohz_full_enabled())
+   return false;
+
+   return cpumask_test_cpu(smp_processor_id(), tick_nohz_full_mask);
+}
+
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
 {
if (tick_nohz_full_enabled())
@@ -271,6 +279,7 @@ extern void __init tick_nohz_full_setup(cpumask_var_t 
cpumask);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
+static inline bool tick_nohz_full_this_cpu(void) { return false; }
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
 
 static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
@@ -296,7 +305,7 @@ static inline void tick_nohz_full_setup(cpumask_var_t 
cpumask) { }
 
 static inline void tick_nohz_task_switch(void)
 {
-   if (tick_nohz_full_enabled())
+   if (tick_nohz_full_this_cpu())
__tick_nohz_task_switch();
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 22b6a46818cf..af76cfa51b57 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -304,7 +304,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) 
=
  */
 static void tick_nohz_full_kick(void)
 {
-   if (!tick_nohz_full_cpu(smp_processor_id()))
+   if (!tick_nohz_full_this_cpu())
return;
 
irq_work_queue(this_cpu_ptr(_full_kick_work));
@@ -452,9 +452,6 @@ void __tick_nohz_task_switch(void)
 
local_irq_save(flags);
 
-   if (!tick_nohz_full_cpu(smp_processor_id()))
-   goto out;
-
ts = this_cpu_ptr(_cpu_sched);
 
if (ts->tick_stopped) {
@@ -462,7 +459,6 @@ void __tick_nohz_task_switch(void)
atomic_read(>signal->tick_dep_mask))
tick_nohz_full_kick();
}
-out:
local_irq_restore(flags);
 }
 
@@ -929,14 +925,16 @@ static void tick_nohz_restart_sched_tick(struct 
tick_sched *ts, ktime_t now)
 static void tick_nohz_full_update_tick(struct tick_sched *ts)
 {
 #ifdef CONFIG_NO_HZ_FULL
-   int cpu = smp_processor_id();
+   int cpu;
 
-   if (!tick_nohz_full_cpu(cpu))
+   if (!tick_nohz_full_this_cpu())
return;
 
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
 
+   cpu = smp_processor_id();
+
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
-- 
2.25.1