On Tue, Oct 13, 2020 at 02:13:28PM -0300, Marcelo Tosatti wrote:

> > Yes but if the task isn't running, run_posix_cpu_timers() doesn't have
> > anything to elapse. So indeed we can spare the IPI if the task is not
> > running. Provided ordering makes sure that the task sees the new dependency
> > when it schedules in of course.
> 
> True.
> 
>  * p->on_cpu <- { 0, 1 }:
>  *
>  *   is set by prepare_task() and cleared by finish_task() such that it will 
> be
>  *   set before p is scheduled-in and cleared after p is scheduled-out, both
>  *   under rq->lock. Non-zero indicates the task is running on its CPU.
> 
> 
> CPU-0 (tick_set_dep)            CPU-1 (task switch)
> 
> STORE p->tick_dep_mask
> smp_mb() (atomic_fetch_or())
> LOAD p->on_cpu
> 
> 
>                                 context_switch(prev, next)
>                                 STORE next->on_cpu = 1
>                                 ...                             [*]
> 
>                                 LOAD current->tick_dep_mask
> 

That load is in tick_nohz_task_switch() right? (which BTW is placed
completely wrong) You could easily do something like the below I
suppose.

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 81632cd5e3b7..2a5fafe66bb0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -410,6 +410,14 @@ void __tick_nohz_task_switch(void)
        ts = this_cpu_ptr(&tick_cpu_sched);
 
        if (ts->tick_stopped) {
+               /*
+                * tick_set_dep()               (this)
+                *
+                * STORE p->tick_dep_mask       STORE p->on_cpu
+                * smp_mb()                     smp_mb()
+                * LOAD p->on_cpu               LOAD p->tick_dep_mask
+                */
+               smp_mb();
                if (atomic_read(&current->tick_dep_mask) ||
                    atomic_read(&current->signal->tick_dep_mask))
                        tick_nohz_full_kick();



re tick_nohz_task_switch() being placed wrong, it should probably be
placed before finish_lock_switch(). Something like so.


diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cf044580683c..5c92c959824f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4084,6 +4084,7 @@ static struct rq *finish_task_switch(struct task_struct 
*prev)
        vtime_task_switch(prev);
        perf_event_task_sched_in(prev, current);
        finish_task(prev);
+       tick_nohz_task_switch();
        finish_lock_switch(rq);
        finish_arch_post_lock_switch();
        kcov_finish_switch(current);
@@ -4121,7 +4122,6 @@ static struct rq *finish_task_switch(struct task_struct 
*prev)
                put_task_struct_rcu_user(prev);
        }
 
-       tick_nohz_task_switch();
        return rq;
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 81632cd5e3b7..c8bddc9fb792 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -402,10 +402,8 @@ void __tick_nohz_task_switch(void)
        unsigned long flags;
        struct tick_sched *ts;
 
-       local_irq_save(flags);
-
        if (!tick_nohz_full_cpu(smp_processor_id()))
-               goto out;
+               return;
 
        ts = this_cpu_ptr(&tick_cpu_sched);
 
@@ -414,8 +412,6 @@ void __tick_nohz_task_switch(void)
                    atomic_read(&current->signal->tick_dep_mask))
                        tick_nohz_full_kick();
        }
-out:
-       local_irq_restore(flags);
 }
 
 /* Get the boot-time nohz CPU list from the kernel parameters. */

Reply via email to