On Wed, 5 Apr 2017 12:39:28 -0700
"Paul E. McKenney" <paul...@linux.vnet.ibm.com> wrote:


> > Here's the crucial part of that stack dump again:
> > 
> >  save_stack_trace+0x1b/0x1d
> >  check_stack+0xec/0x24a
> >  stack_trace_call+0x40/0x53
> >  0xffffffffa0026077
> >  ? ftrace_graph_caller+0x78/0xa8
> >  ? trace_hardirqs_off+0xd/0xf
> >  ? rcu_eqs_enter_common.constprop.71+0x5/0x108
> >  rcu_eqs_enter_common.constprop.71+0x5/0x108
> >  rcu_idle_enter+0x51/0x72
> > 
> > 
> > The stack trace was called on rcu_eqs_enter_common() inside the
> > rcu_idle_enter() function call.
> > 
> > Perhaps if I just let rcu disable stack tracing? Something like this:
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 50fee76..f894fc3 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -853,8 +853,10 @@ void rcu_idle_enter(void)
> >     unsigned long flags;
> > 
> >     local_irq_save(flags);
> > +   disable_stack_tracer();
> >     rcu_eqs_enter(false);
> >     rcu_sysidle_enter(0);
> > +   enable_stack_tracer();
> >     local_irq_restore(flags);
> >  }
> >  EXPORT_SYMBOL_GPL(rcu_idle_enter);  
> 
> That only covers one of two code paths, and still end up with the
> trace code thinking that RCU is paying attention to it when it really
> is not.

OK, that sounds like a separate bug to what I'm tracking.

> 
> But if there is a disable_stack_tracer() and enable_stack_tracer(),
> I could use them as shown below.  (Hand-edited patch, probably doesn't
> apply, let alone build.)
> 
>                                                       Thanx, Paul
> 
> ------------------------------------------------------------------------
> 
> commit 1a11d6fd0ae344f7dccd57d3c587d9ef2a4bf08e
> Author: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Date:   Wed Apr 5 09:05:18 2017 -0700
> 
>     rcu: Fix dyntick-idle tracing
>     
>     The tracing subsystem started using rcu_irq_entry() and rcu_irq_exit()
>     (with my blessing) to allow the current _rcuidle alternative tracepoint
>     name to be dispensed with while still maintaining good performance.
>     Unfortunately, this causes RCU's dyntick-idle entry code's tracing to
>     appear to RCU like an interrupt that occurs where RCU is not designed
>     to handle interrupts.
>     
>     This commit fixes this problem by moving the zeroing of ->dynticks_nesting
>     after the offending trace_rcu_dyntick() statement, which narrows the
>     window of vulnerability to a pair of adjacent statements that are now
>     marked with comments to that effect.
>     
>     Reported-by: Steven Rostedt <rost...@goodmis.org>
>     Signed-off-by: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 50fee7689e71..7308c88ee57a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -771,25 +771,24 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct 
> rcu_data *rdp)
>  }
>  
>  /*
> - * rcu_eqs_enter_common - current CPU is moving towards extended quiescent 
> state
> + * rcu_eqs_enter_common - current CPU is entering an extended quiescent state
>   *
> - * If the new value of the ->dynticks_nesting counter now is zero,
> - * we really have entered idle, and must do the appropriate accounting.
> - * The caller must have disabled interrupts.
> + * Enter idle, doing appropriate accounting.  The caller must have
> + * disabled interrupts.
>   */
> -static void rcu_eqs_enter_common(long long oldval, bool user)
> +static void rcu_eqs_enter_common(bool user)
>  {
>       struct rcu_state *rsp;
>       struct rcu_data *rdp;
> -     RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
> +     struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
>  
> -     trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
> +     trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
>       if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
>           !user && !is_idle_task(current)) {
>               struct task_struct *idle __maybe_unused =
>                       idle_task(smp_processor_id());
>  
> -             trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 
> 0);
> +             trace_rcu_dyntick(TPS("Error on entry: not idle task"), 
> rdtp->dynticks_nesting, 0);
>               rcu_ftrace_dump(DUMP_ORIG);
>               WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
>                         current->pid, current->comm,
> @@ -800,7 +799,8 @@ static void rcu_eqs_enter_common(long long oldval, bool 
> user)
>               do_nocb_deferred_wakeup(rdp);
>       }
>       rcu_prepare_for_idle();
> -     rcu_dynticks_eqs_enter();
> +     disable_stack_tracer();
> +     rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */
> +     rcu_dynticks_eqs_enter(); /* After this, tracing works again. */
> +     enable_stack_tracer();

OK, do you want me to send you a patch, or should I take your patch
into my tree and add this?

-- Steve

>       rcu_dynticks_task_enter();
>  
>       /*
> @@ -821,19 +821,15 @@ static void rcu_eqs_enter_common(long long oldval, bool 
> user)
>   */
>  static void rcu_eqs_enter(bool user)
>  {
> -     long long oldval;
>       struct rcu_dynticks *rdtp;
>  
>       rdtp = this_cpu_ptr(&rcu_dynticks);
> -     oldval = rdtp->dynticks_nesting;
>       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -                  (oldval & DYNTICK_TASK_NEST_MASK) == 0);
> -     if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
> -             rdtp->dynticks_nesting = 0;
> -             rcu_eqs_enter_common(oldval, user);
> -     } else {
> +                  (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
> +     if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 
> DYNTICK_TASK_NEST_VALUE)
> +             rcu_eqs_enter_common(user);
> +     else
>               rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
> -     }
>  }
>  
>  /**
> @@ -892,19 +888,18 @@ void rcu_user_enter(void)
>   */
>  void rcu_irq_exit(void)
>  {
> -     long long oldval;
>       struct rcu_dynticks *rdtp;
>  
>       RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs 
> enabled!!!");
>       rdtp = this_cpu_ptr(&rcu_dynticks);
> -     oldval = rdtp->dynticks_nesting;
> -     rdtp->dynticks_nesting--;
>       WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -                  rdtp->dynticks_nesting < 0);
> -     if (rdtp->dynticks_nesting)
> -             trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
> -     else
> -             rcu_eqs_enter_common(oldval, true);
> +                  rdtp->dynticks_nesting < 1);
> +     if (rdtp->dynticks_nesting <= 1) {
> +             rcu_eqs_enter_common(true);
> +     } else {
> +             trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, 
> rdtp->dynticks_nesting - 1);
> +             rdtp->dynticks_nesting--;
> +     }
>       rcu_sysidle_enter(1);
>  }
>  

Reply via email to