Re: [RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-11 Thread Frederic Weisbecker
On Mon, Jan 11, 2021 at 01:08:08PM +0100, Peter Zijlstra wrote:
> On Sat, Jan 09, 2021 at 03:05:33AM +0100, Frederic Weisbecker wrote:
> > Following the idle loop model, cleanly check for pending rcuog wakeup
> > before the last rescheduling point on resuming to user mode. This
> > way we can avoid to do it from rcu_user_enter() with the last resort
> > self-IPI hack that enforces rescheduling.
> > 
> > Signed-off-by: Frederic Weisbecker 
> > Cc: Peter Zijlstra 
> > Cc: Thomas Gleixner 
> > Cc: Ingo Molnar
> > Cc: Paul E. McKenney 
> > Cc: Rafael J. Wysocki 
> > ---
> >  kernel/entry/common.c |  6 ++
> >  kernel/rcu/tree.c | 12 +++-
> >  2 files changed, 13 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> > index 378341642f94..8f3292b5f9b7 100644
> > --- a/kernel/entry/common.c
> > +++ b/kernel/entry/common.c
> > @@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct 
> > pt_regs *regs,
> > /* Architecture specific TIF work */
> > arch_exit_to_user_mode_work(regs, ti_work);
> >  
> > +   /* Check if any of the above work has queued a deferred wakeup 
> > */
> > +   rcu_nocb_flush_deferred_wakeup();
> > +
> > /*
> >  * Disable interrupts and reevaluate the work flags as they
> >  * might have changed while interrupts and preemption was
> > @@ -197,6 +200,9 @@ static void exit_to_user_mode_prepare(struct pt_regs 
> > *regs)
> >  
> > lockdep_assert_irqs_disabled();
> >  
> > +   /* Flush pending rcuog wakeup before the last need_resched() check */
> > +   rcu_nocb_flush_deferred_wakeup();
> > +
> > if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
> > ti_work = exit_to_user_mode_loop(regs, ti_work);
> >  
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 2920dfc9f58c..3c4c0d5cea65 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -705,12 +705,14 @@ noinstr void rcu_user_enter(void)
> >  
> > lockdep_assert_irqs_disabled();
> > /*
> > -* We may be past the last rescheduling opportunity in the entry code.
> > -* Trigger a self IPI that will fire and reschedule once we resume to
> > -* user/guest mode.
> > +* Other than generic entry implementation, we may be past the last
> > +* rescheduling opportunity in the entry code. Trigger a self IPI
> > +* that will fire and reschedule once we resume in user/guest mode.
> >  */
> > -   if (do_nocb_deferred_wakeup(rdp) && need_resched())
> > -   irq_work_queue(this_cpu_ptr(_wakeup_work));
> > +   if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
> 
> We have xfer_to_guest_mode_work() for that PF_VCPU case.

Ah very nice! I'll integrate that on the next iteration.

Thanks.

> 
> > +   if (do_nocb_deferred_wakeup(rdp) && need_resched())
> > +   irq_work_queue(this_cpu_ptr(_wakeup_work));
> > +   }


Re: [RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-11 Thread Peter Zijlstra
On Sat, Jan 09, 2021 at 03:05:33AM +0100, Frederic Weisbecker wrote:
> Following the idle loop model, cleanly check for pending rcuog wakeup
> before the last rescheduling point on resuming to user mode. This
> way we can avoid to do it from rcu_user_enter() with the last resort
> self-IPI hack that enforces rescheduling.
> 
> Signed-off-by: Frederic Weisbecker 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar
> Cc: Paul E. McKenney 
> Cc: Rafael J. Wysocki 
> ---
>  kernel/entry/common.c |  6 ++
>  kernel/rcu/tree.c | 12 +++-
>  2 files changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index 378341642f94..8f3292b5f9b7 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct 
> pt_regs *regs,
>   /* Architecture specific TIF work */
>   arch_exit_to_user_mode_work(regs, ti_work);
>  
> + /* Check if any of the above work has queued a deferred wakeup 
> */
> + rcu_nocb_flush_deferred_wakeup();
> +
>   /*
>* Disable interrupts and reevaluate the work flags as they
>* might have changed while interrupts and preemption was
> @@ -197,6 +200,9 @@ static void exit_to_user_mode_prepare(struct pt_regs 
> *regs)
>  
>   lockdep_assert_irqs_disabled();
>  
> + /* Flush pending rcuog wakeup before the last need_resched() check */
> + rcu_nocb_flush_deferred_wakeup();
> +
>   if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
>   ti_work = exit_to_user_mode_loop(regs, ti_work);
>  
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 2920dfc9f58c..3c4c0d5cea65 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -705,12 +705,14 @@ noinstr void rcu_user_enter(void)
>  
>   lockdep_assert_irqs_disabled();
>   /*
> -  * We may be past the last rescheduling opportunity in the entry code.
> -  * Trigger a self IPI that will fire and reschedule once we resume to
> -  * user/guest mode.
> +  * Other than generic entry implementation, we may be past the last
> +  * rescheduling opportunity in the entry code. Trigger a self IPI
> +  * that will fire and reschedule once we resume in user/guest mode.
>*/
> - if (do_nocb_deferred_wakeup(rdp) && need_resched())
> - irq_work_queue(this_cpu_ptr(_wakeup_work));
> + if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {

We have xfer_to_guest_mode_work() for that PF_VCPU case.

> + if (do_nocb_deferred_wakeup(rdp) && need_resched())
> + irq_work_queue(this_cpu_ptr(_wakeup_work));
> + }


Re: [RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-11 Thread Frederic Weisbecker
On Sun, Jan 10, 2021 at 09:13:18PM -0800, Paul E. McKenney wrote:
> On Mon, Jan 11, 2021 at 01:40:14AM +0100, Frederic Weisbecker wrote:
> > On Sat, Jan 09, 2021 at 03:05:33AM +0100, Frederic Weisbecker wrote:
> > > Following the idle loop model, cleanly check for pending rcuog wakeup
> > > before the last rescheduling point on resuming to user mode. This
> > > way we can avoid to do it from rcu_user_enter() with the last resort
> > > self-IPI hack that enforces rescheduling.
> > > 
> > > Signed-off-by: Frederic Weisbecker 
> > > Cc: Peter Zijlstra 
> > > Cc: Thomas Gleixner 
> > > Cc: Ingo Molnar
> > > Cc: Paul E. McKenney 
> > > Cc: Rafael J. Wysocki 
> > > ---
> > >  kernel/entry/common.c |  6 ++
> > >  kernel/rcu/tree.c | 12 +++-
> > >  2 files changed, 13 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> > > index 378341642f94..8f3292b5f9b7 100644
> > > --- a/kernel/entry/common.c
> > > +++ b/kernel/entry/common.c
> > > @@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct 
> > > pt_regs *regs,
> > >   /* Architecture specific TIF work */
> > >   arch_exit_to_user_mode_work(regs, ti_work);
> > >  
> > > + /* Check if any of the above work has queued a deferred wakeup 
> > > */
> > > + rcu_nocb_flush_deferred_wakeup();
> > 
> > So this needs to be moved to the IRQs disabled section, just a few lines 
> > later,
> > otherwise preemption may schedule another task that in turn do call_rcu() 
> > and create
> > new deferred wake up (thank Paul for the warning). Not to mention moving to
> > another CPU with its own deferred wakeups to flush...
> > 
> > I'll fix that for the next version.
> 
> Ah, so it was not just my laptop dying, then!  ;-)

Note that it fixes the "smp_processor_id() in preemptible" warnings you
reported but it shouldn't fix the other issues.


Re: [RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-10 Thread Paul E. McKenney
On Mon, Jan 11, 2021 at 01:40:14AM +0100, Frederic Weisbecker wrote:
> On Sat, Jan 09, 2021 at 03:05:33AM +0100, Frederic Weisbecker wrote:
> > Following the idle loop model, cleanly check for pending rcuog wakeup
> > before the last rescheduling point on resuming to user mode. This
> > way we can avoid to do it from rcu_user_enter() with the last resort
> > self-IPI hack that enforces rescheduling.
> > 
> > Signed-off-by: Frederic Weisbecker 
> > Cc: Peter Zijlstra 
> > Cc: Thomas Gleixner 
> > Cc: Ingo Molnar
> > Cc: Paul E. McKenney 
> > Cc: Rafael J. Wysocki 
> > ---
> >  kernel/entry/common.c |  6 ++
> >  kernel/rcu/tree.c | 12 +++-
> >  2 files changed, 13 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> > index 378341642f94..8f3292b5f9b7 100644
> > --- a/kernel/entry/common.c
> > +++ b/kernel/entry/common.c
> > @@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct 
> > pt_regs *regs,
> > /* Architecture specific TIF work */
> > arch_exit_to_user_mode_work(regs, ti_work);
> >  
> > +   /* Check if any of the above work has queued a deferred wakeup 
> > */
> > +   rcu_nocb_flush_deferred_wakeup();
> 
> So this needs to be moved to the IRQs disabled section, just a few lines 
> later,
> otherwise preemption may schedule another task that in turn do call_rcu() and 
> create
> new deferred wake up (thank Paul for the warning). Not to mention moving to
> another CPU with its own deferred wakeups to flush...
> 
> I'll fix that for the next version.

Ah, so it was not just my laptop dying, then!  ;-)

Thanx, Paul


Re: [RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-10 Thread Frederic Weisbecker
On Sat, Jan 09, 2021 at 03:05:33AM +0100, Frederic Weisbecker wrote:
> Following the idle loop model, cleanly check for pending rcuog wakeup
> before the last rescheduling point on resuming to user mode. This
> way we can avoid to do it from rcu_user_enter() with the last resort
> self-IPI hack that enforces rescheduling.
> 
> Signed-off-by: Frederic Weisbecker 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar
> Cc: Paul E. McKenney 
> Cc: Rafael J. Wysocki 
> ---
>  kernel/entry/common.c |  6 ++
>  kernel/rcu/tree.c | 12 +++-
>  2 files changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/entry/common.c b/kernel/entry/common.c
> index 378341642f94..8f3292b5f9b7 100644
> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
> @@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct 
> pt_regs *regs,
>   /* Architecture specific TIF work */
>   arch_exit_to_user_mode_work(regs, ti_work);
>  
> + /* Check if any of the above work has queued a deferred wakeup 
> */
> + rcu_nocb_flush_deferred_wakeup();

So this needs to be moved to the IRQs disabled section, just a few lines later,
otherwise preemption may schedule another task that in turn do call_rcu() and 
create
new deferred wake up (thank Paul for the warning). Not to mention moving to
another CPU with its own deferred wakeups to flush...

I'll fix that for the next version.

Thanks.


[RFC PATCH 5/8] entry: Explicitly flush pending rcuog wakeup before last rescheduling points

2021-01-08 Thread Frederic Weisbecker
Following the idle loop model, cleanly check for pending rcuog wakeup
before the last rescheduling point on resuming to user mode. This
way we can avoid to do it from rcu_user_enter() with the last resort
self-IPI hack that enforces rescheduling.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar
Cc: Paul E. McKenney 
Cc: Rafael J. Wysocki 
---
 kernel/entry/common.c |  6 ++
 kernel/rcu/tree.c | 12 +++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 378341642f94..8f3292b5f9b7 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -178,6 +178,9 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs 
*regs,
/* Architecture specific TIF work */
arch_exit_to_user_mode_work(regs, ti_work);
 
+   /* Check if any of the above work has queued a deferred wakeup 
*/
+   rcu_nocb_flush_deferred_wakeup();
+
/*
 * Disable interrupts and reevaluate the work flags as they
 * might have changed while interrupts and preemption was
@@ -197,6 +200,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 
lockdep_assert_irqs_disabled();
 
+   /* Flush pending rcuog wakeup before the last need_resched() check */
+   rcu_nocb_flush_deferred_wakeup();
+
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
ti_work = exit_to_user_mode_loop(regs, ti_work);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2920dfc9f58c..3c4c0d5cea65 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -705,12 +705,14 @@ noinstr void rcu_user_enter(void)
 
lockdep_assert_irqs_disabled();
/*
-* We may be past the last rescheduling opportunity in the entry code.
-* Trigger a self IPI that will fire and reschedule once we resume to
-* user/guest mode.
+* Other than generic entry implementation, we may be past the last
+* rescheduling opportunity in the entry code. Trigger a self IPI
+* that will fire and reschedule once we resume in user/guest mode.
 */
-   if (do_nocb_deferred_wakeup(rdp) && need_resched())
-   irq_work_queue(this_cpu_ptr(_wakeup_work));
+   if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
+   if (do_nocb_deferred_wakeup(rdp) && need_resched())
+   irq_work_queue(this_cpu_ptr(_wakeup_work));
+   }
 
rcu_eqs_enter(true);
 }
-- 
2.25.1