On Wed, May 18, 2016 at 02:30:44PM +0200, Peter Zijlstra wrote:
>  void cpu_idle (void)
>  {
> +     int cpu = smp_processor_id();
> +
>       /* endless idle loop with no priority at all */
>       while (1) {
>               while (!need_resched()) {
>                       void (*idle)(void);
> -                     /*
> -                      * Mark this as an RCU critical section so that
> -                      * synchronize_kernel() in the unload path waits
> -                      * for our completion.
> -                      */
> -                     rcu_read_lock();
> +
> +                     if (cpu_isset(cpu, cpu_idle_map))
> +                             cpu_clear(cpu, cpu_idle_map);
> +                     rmb();
>                       idle = pm_idle;
>  
>                       if (!idle)
>                               idle = default_idle;
>  
> -                     irq_stat[smp_processor_id()].idle_timestamp = jiffies;
> +                     irq_stat[cpu].idle_timestamp = jiffies;
>                       idle();
> -                     rcu_read_unlock();
>               }
>               schedule();
>       }
>  }
>  
> +void cpu_idle_wait(void)
> +{
> +     int cpu;
> +     cpumask_t map;
> +
> +     for_each_online_cpu(cpu)
> +             cpu_set(cpu, cpu_idle_map);
> +
> +     wmb();
> +     do {
> +             ssleep(1);
> +             cpus_and(map, cpu_idle_map, cpu_online_map);
> +     } while (!cpus_empty(map));
> +}
> +EXPORT_SYMBOL_GPL(cpu_idle_wait);


Which then got 'wrecked' by the below commit.

That commit removes the cpu_idle_state, and thereby removes the need for
the rmb(), since you cannot 'order' one load.

All the idle loop needs to guarantee (and in today's code that's
non-obvious) is that it _must_ reload all values on every loop.


---
commit 783e391b7b5b273cd20856d8f6f4878da8ec31b3
Author: Venki Pallipadi <[email protected]>
Date:   Thu Apr 10 09:49:58 2008 -0700

    x86: Simplify cpu_idle_wait
    
    This patch also resolves hangs on boot:
        http://lkml.org/lkml/2008/2/23/263
        http://bugzilla.kernel.org/show_bug.cgi?id=10093
    
    The bug was causing once-in-few-reboots 10-15 sec wait during boot on
    certain laptops.
    
    Earlier commit 40d6a146629b98d8e322b6f9332b182c7cbff3df added
    smp_call_function in cpu_idle_wait() to kick cpus that are in tickless
    idle.  Looking at cpu_idle_wait code at that time, code seemed to be
    over-engineered for a case which is rarely used (while changing idle
    handler).
    
    Below is a simplified version of cpu_idle_wait, which just makes a dummy
    smp_call_function to all cpus, to make them come out of old idle handler
    and start using the new idle handler.  It eliminates code in the idle
    loop to handle cpu_idle_wait.
    
    Signed-off-by: Venkatesh Pallipadi <[email protected]>
    Signed-off-by: Linus Torvalds <[email protected]>

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index be3c7a299f02..43930e73f657 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  */
 void (*pm_idle)(void);
 EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 void disable_hlt(void)
 {
@@ -190,9 +189,6 @@ void cpu_idle(void)
                while (!need_resched()) {
                        void (*idle)(void);
 
-                       if (__get_cpu_var(cpu_idle_state))
-                               __get_cpu_var(cpu_idle_state) = 0;
-
                        check_pgt_cache();
                        rmb();
                        idle = pm_idle;
@@ -220,40 +216,19 @@ static void do_nothing(void *unused)
 {
 }
 
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
 void cpu_idle_wait(void)
 {
-       unsigned int cpu, this_cpu = get_cpu();
-       cpumask_t map, tmp = current->cpus_allowed;
-
-       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-       put_cpu();
-
-       cpus_clear(map);
-       for_each_online_cpu(cpu) {
-               per_cpu(cpu_idle_state, cpu) = 1;
-               cpu_set(cpu, map);
-       }
-
-       __get_cpu_var(cpu_idle_state) = 0;
-
-       wmb();
-       do {
-               ssleep(1);
-               for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
-                               cpu_clear(cpu, map);
-               }
-               cpus_and(map, map, cpu_online_map);
-               /*
-                * We waited 1 sec, if a CPU still did not call idle
-                * it may be because it is in idle and not waking up
-                * because it has nothing to do.
-                * Give all the remaining CPUS a kick.
-                */
-               smp_call_function_mask(map, do_nothing, NULL, 0);
-       } while (!cpus_empty(map));
-
-       set_cpus_allowed(current, tmp);
+       smp_mb();
+       /* kick all the CPUs so that they exit out of pm_idle */
+       smp_call_function(do_nothing, NULL, 0, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3baf9b9f4c87..46c4c546b499 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override);
  */
 void (*pm_idle)(void);
 EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 
@@ -173,9 +172,6 @@ void cpu_idle(void)
                while (!need_resched()) {
                        void (*idle)(void);
 
-                       if (__get_cpu_var(cpu_idle_state))
-                               __get_cpu_var(cpu_idle_state) = 0;
-
                        rmb();
                        idle = pm_idle;
                        if (!idle)
@@ -207,40 +203,19 @@ static void do_nothing(void *unused)
 {
 }
 
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
 void cpu_idle_wait(void)
 {
-       unsigned int cpu, this_cpu = get_cpu();
-       cpumask_t map, tmp = current->cpus_allowed;
-
-       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
-       put_cpu();
-
-       cpus_clear(map);
-       for_each_online_cpu(cpu) {
-               per_cpu(cpu_idle_state, cpu) = 1;
-               cpu_set(cpu, map);
-       }
-
-       __get_cpu_var(cpu_idle_state) = 0;
-
-       wmb();
-       do {
-               ssleep(1);
-               for_each_online_cpu(cpu) {
-                       if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, 
cpu))
-                               cpu_clear(cpu, map);
-               }
-               cpus_and(map, map, cpu_online_map);
-               /*
-                * We waited 1 sec, if a CPU still did not call idle
-                * it may be because it is in idle and not waking up
-                * because it has nothing to do.
-                * Give all the remaining CPUS a kick.
-                */
-               smp_call_function_mask(map, do_nothing, 0, 0);
-       } while (!cpus_empty(map));
-
-       set_cpus_allowed(current, tmp);
+       smp_mb();
+       /* kick all the CPUs so that they exit out of pm_idle */
+       smp_call_function(do_nothing, NULL, 0, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 

Reply via email to