Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-11 Thread Joel Fernandes
On Wed, Nov 11, 2020 at 08:49:41AM -0500, Steven Rostedt wrote:
> On Tue, 10 Nov 2020 16:35:30 -0800
> "Paul E. McKenney"  wrote:
> 
> > > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > > *context)
> > > +{
> > > + int cbs[RCU_CBLIST_NSEGS];
> > > + unsigned long gps[RCU_CBLIST_NSEGS];
> > > +
> > > + if (!trace_rcu_segcb_stats_enabled())
> > > + return;  
> > 
> > Can't you rely on the trace system to enable and disable this trace
> > event?  If the thought is to save instructions, then moving all this
> > into TP_fast_assign() enables the trace system to deal with that as well.

Makes sense.

> > > + rcu_segcblist_countseq(rsclp, cbs, gps);
> > > +
> > > + trace_rcu_segcb_stats(context, cbs, gps);
> > > +}
> > > +#endif
> 
> Yeah, I agree with Paul. I think it is possible to move this all into the
> TP_fast_assign. If you have trouble doing so, let me know.

Sure. Last time I tried this for this patch, I ran into some issue. I will
try again and let you know if I need help.

thanks,

 - Joel

> 
> -- Steve


Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-11 Thread Steven Rostedt
On Tue, 10 Nov 2020 16:35:30 -0800
"Paul E. McKenney"  wrote:

> > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > *context)
> > +{
> > +   int cbs[RCU_CBLIST_NSEGS];
> > +   unsigned long gps[RCU_CBLIST_NSEGS];
> > +
> > +   if (!trace_rcu_segcb_stats_enabled())
> > +   return;  
> 
> Can't you rely on the trace system to enable and disable this trace
> event?  If the thought is to save instructions, then moving all this
> into TP_fast_assign() enables the trace system to deal with that as well.
> 
> > +   rcu_segcblist_countseq(rsclp, cbs, gps);
> > +
> > +   trace_rcu_segcb_stats(context, cbs, gps);
> > +}
> > +#endif

Yeah, I agree with Paul. I think it is possible to move this all into the
TP_fast_assign. If you have trouble doing so, let me know.

-- Steve


Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-10 Thread Paul E. McKenney
On Tue, Nov 03, 2020 at 09:26:00AM -0500, Joel Fernandes (Google) wrote:
> Track how the segcb list changes before/after acceleration, during
> queuing and during dequeuing.
> 
> This has proved useful to discover an optimization to avoid unwanted GP
> requests when there are no callbacks accelerated. The overhead is minimal as
> each segment's length is now stored in the respective segment.
> 
> Reviewed-by: Frederic Weisbecker 
> Reviewed-by: Neeraj Upadhyay 
> Signed-off-by: Joel Fernandes (Google) 

I thought I had responded to this one, but I find no record of having
done so.  But please see below for some questions and comments.

> ---
>  include/trace/events/rcu.h | 25 +
>  kernel/rcu/rcu_segcblist.c | 34 ++
>  kernel/rcu/rcu_segcblist.h |  5 +
>  kernel/rcu/tree.c  |  9 +
>  4 files changed, 73 insertions(+)
> 
> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
> index 155b5cb43cfd..5f8f2ee1a936 100644
> --- a/include/trace/events/rcu.h
> +++ b/include/trace/events/rcu.h
> @@ -505,6 +505,31 @@ TRACE_EVENT_RCU(rcu_callback,
> __entry->qlen)
>  );
>  
> +TRACE_EVENT_RCU(rcu_segcb_stats,
> +
> + TP_PROTO(const char *ctx, int *cb_count, unsigned long *gp_seq),
> +
> + TP_ARGS(ctx, cb_count, gp_seq),
> +
> + TP_STRUCT__entry(
> + __field(const char *, ctx)
> + __array(int, cb_count, RCU_CBLIST_NSEGS)
> + __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
> + ),
> +
> + TP_fast_assign(
> + __entry->ctx = ctx;
> + memcpy(__entry->cb_count, cb_count, RCU_CBLIST_NSEGS * 
> sizeof(int));
> + memcpy(__entry->gp_seq, gp_seq, RCU_CBLIST_NSEGS * 
> sizeof(unsigned long));

OK, I will bite...

Why not include linux/rcu_segcblist.h and then fold all of
rcu_segcblist_countseq() and much of __trace_rcu_segcb_stats() into
this TP_fast_assign()?  If you must have "unsigned int" for cb_count,
then you can use "do {} while (0)" to declare the index variable, but
if you can use unsigned long as suggested by Neeraj, you can just use
a pair of memcpy()s.

> + ),
> +
> + TP_printk("%s cb_count: (DONE=%d, WAIT=%d, NEXT_READY=%d, 
> NEXT=%d) "
> +   "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, 
> NEXT=%lu)", __entry->ctx,
> +   __entry->cb_count[0], __entry->cb_count[1], 
> __entry->cb_count[2], __entry->cb_count[3],
> +   __entry->gp_seq[0], __entry->gp_seq[1], 
> __entry->gp_seq[2], __entry->gp_seq[3])
> +
> +);
> +
>  /*
>   * Tracepoint for the registration of a single RCU callback of the special
>   * kvfree() form.  The first argument is the RCU type, the second argument
> diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
> index 357c19bbcb00..2a03949d0b82 100644
> --- a/kernel/rcu/rcu_segcblist.c
> +++ b/kernel/rcu/rcu_segcblist.c
> @@ -14,6 +14,7 @@
>  #include 
>  
>  #include "rcu_segcblist.h"
> +#include "rcu.h"
>  
>  /* Initialize simple callback list. */
>  void rcu_cblist_init(struct rcu_cblist *rclp)
> @@ -328,6 +329,39 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist 
> *rsclp,
>   rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0);
>  }
>  
> +/*
> + * Return how many CBs each segment along with their gp_seq values.
> + *
> + * This function is O(N) where N is the number of segments. Only used from
> + * tracing code which is usually disabled in production.
> + */
> +#ifdef CONFIG_RCU_TRACE
> +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> +  int cbcount[RCU_CBLIST_NSEGS],
> +  unsigned long gpseq[RCU_CBLIST_NSEGS])
> +{
> + int i;
> +
> + for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> + cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> + gpseq[i] = rsclp->gp_seq[i];
> + }
> +}
> +
> +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> *context)
> +{
> + int cbs[RCU_CBLIST_NSEGS];
> + unsigned long gps[RCU_CBLIST_NSEGS];
> +
> + if (!trace_rcu_segcb_stats_enabled())
> + return;

Can't you rely on the trace system to enable and disable this trace
event?  If the thought is to save instructions, then moving all this
into TP_fast_assign() enables the trace system to deal with that as well.

> + rcu_segcblist_countseq(rsclp, cbs, gps);
> +
> + trace_rcu_segcb_stats(context, cbs, gps);
> +}
> +#endif
> +
>  /*
>   * Extract only those callbacks still pending (not yet ready to be
>   * invoked) from the specified rcu_segcblist structure and place them in
> diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
> index cd35c9faaf51..7750734fa116 100644
> --- a/kernel/rcu/rcu_segcblist.h
> +++ b/kernel/rcu/rcu_segcblist.h
> @@ -103,3 +103,8 @@ void 

Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-06 Thread Joel Fernandes
On Wed, Nov 04, 2020 at 03:33:14PM +0100, Frederic Weisbecker wrote:
> On Wed, Nov 04, 2020 at 06:08:07AM -0800, Paul E. McKenney wrote:
> > On Tue, Nov 03, 2020 at 04:17:31PM +0100, Frederic Weisbecker wrote:
> > > On Tue, Nov 03, 2020 at 09:26:00AM -0500, Joel Fernandes (Google) wrote:
> > > > +/*
> > > > + * Return how many CBs each segment along with their gp_seq values.
> > > > + *
> > > > + * This function is O(N) where N is the number of segments. Only used 
> > > > from
> > > > + * tracing code which is usually disabled in production.
> > > > + */
> > > > +#ifdef CONFIG_RCU_TRACE
> > > > +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> > > > +int cbcount[RCU_CBLIST_NSEGS],
> > > > +unsigned long gpseq[RCU_CBLIST_NSEGS])
> > > > +{
> > > > +   int i;
> > > > +
> > > > +   for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> > > > +   cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> > > > +   gpseq[i] = rsclp->gp_seq[i];
> > > > +   }
> > > > +}
> > > > +
> > > > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > > > *context)
> > > > +{
> > > > +   int cbs[RCU_CBLIST_NSEGS];
> > > > +   unsigned long gps[RCU_CBLIST_NSEGS];
> > > > +
> > > > +   if (!trace_rcu_segcb_stats_enabled())
> > > > +   return;
> > > 
> > > Yes, very good!
> > > 
> > > Paul just told me that RCU_TRACE can be used in production so that 
> > > confirms that we
> > > wanted to avoid this loop of 8 iterations when tracing is disabled.
> > 
> > RCU's "don't try this in production" Kconfig option is PROVE_RCU.
> > 
> > I would be looking for checks that the sum of the segment lengths
> > match the overall ->len or checks that all of the segment lengths
> > are zero when ->cblist is empty to be guarded by something like
> > IS_ENABLED(CONFIG_PROVE_RCU).  Of course, checks of this sort need to
> > be confined to those portions of rcu_do_batch() that are excluding other
> > accesses to ->cblist.
> 
> Right.
> 
> > 
> > But if rcu_segcblist_countseq() is invoked only when a specific trace
> > event is enabled, it should be OK to have it guarded only by RCU_TRACE.
> 
> Indeed, so I think we are good.

Thanks, so the only changes are to patches 2/7 and 4/7 which I will work on.
1/7 was already taken by Paul. For 7/7, it sounds like I did not understand
Paul's reworks on the comments and we're still discussing it; but some
comments are better than none, so I am Ok with Pauls version of it.

thanks,

 - Joel



Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-04 Thread Joel Fernandes
On Wed, Nov 04, 2020 at 07:10:17PM +0530, Neeraj Upadhyay wrote:
> 
> 
> On 11/3/2020 7:56 PM, Joel Fernandes (Google) wrote:
> > Track how the segcb list changes before/after acceleration, during
> > queuing and during dequeuing.
> > 
> > This has proved useful to discover an optimization to avoid unwanted GP
> > requests when there are no callbacks accelerated. The overhead is minimal as
> > each segment's length is now stored in the respective segment.
> > 
> > Reviewed-by: Frederic Weisbecker 
> > Reviewed-by: Neeraj Upadhyay 
> > Signed-off-by: Joel Fernandes (Google) 
> > ---
> >   include/trace/events/rcu.h | 25 +
> >   kernel/rcu/rcu_segcblist.c | 34 ++
> >   kernel/rcu/rcu_segcblist.h |  5 +
> >   kernel/rcu/tree.c  |  9 +
> >   4 files changed, 73 insertions(+)
> > 
> > diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
> > index 155b5cb43cfd..5f8f2ee1a936 100644
> > --- a/include/trace/events/rcu.h
> > +++ b/include/trace/events/rcu.h
> > @@ -505,6 +505,31 @@ TRACE_EVENT_RCU(rcu_callback,
> >   __entry->qlen)
> >   );
> > +TRACE_EVENT_RCU(rcu_segcb_stats,
> > +
> > +   TP_PROTO(const char *ctx, int *cb_count, unsigned long *gp_seq),
> 
> I think we need to use long[] instead of int[] for cb_count everywhere in
> this patch?

More than 4 billion callbacks on a single cblist sounds like a bug though so
int should work. Plus I prefer to keep the tracepoint size small (on 64-bit
systems, long is 64 bits, int is 32 bits).

Thanks for all the reviews on this and other patches!

 - Joel


> 
> 
> Thanks
> Neeraj
> 
> > +
> > +   TP_ARGS(ctx, cb_count, gp_seq),
> > +
> > +   TP_STRUCT__entry(
> > +   __field(const char *, ctx)
> > +   __array(int, cb_count, RCU_CBLIST_NSEGS)
> > +   __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
> > +   ),
> > +
> > +   TP_fast_assign(
> > +   __entry->ctx = ctx;
> > +   memcpy(__entry->cb_count, cb_count, RCU_CBLIST_NSEGS * 
> > sizeof(int));
> > +   memcpy(__entry->gp_seq, gp_seq, RCU_CBLIST_NSEGS * 
> > sizeof(unsigned long));
> > +   ),
> > +
> > +   TP_printk("%s cb_count: (DONE=%d, WAIT=%d, NEXT_READY=%d, 
> > NEXT=%d) "
> > + "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, 
> > NEXT=%lu)", __entry->ctx,
> > + __entry->cb_count[0], __entry->cb_count[1], 
> > __entry->cb_count[2], __entry->cb_count[3],
> > + __entry->gp_seq[0], __entry->gp_seq[1], 
> > __entry->gp_seq[2], __entry->gp_seq[3])
> > +
> > +);
> > +
> >   /*
> >* Tracepoint for the registration of a single RCU callback of the special
> >* kvfree() form.  The first argument is the RCU type, the second argument
> > diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
> > index 357c19bbcb00..2a03949d0b82 100644
> > --- a/kernel/rcu/rcu_segcblist.c
> > +++ b/kernel/rcu/rcu_segcblist.c
> > @@ -14,6 +14,7 @@
> >   #include 
> >   #include "rcu_segcblist.h"
> > +#include "rcu.h"
> >   /* Initialize simple callback list. */
> >   void rcu_cblist_init(struct rcu_cblist *rclp)
> > @@ -328,6 +329,39 @@ void rcu_segcblist_extract_done_cbs(struct 
> > rcu_segcblist *rsclp,
> > rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0);
> >   }
> > +/*
> > + * Return how many CBs each segment along with their gp_seq values.
> > + *
> > + * This function is O(N) where N is the number of segments. Only used from
> > + * tracing code which is usually disabled in production.
> > + */
> > +#ifdef CONFIG_RCU_TRACE
> > +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> > +int cbcount[RCU_CBLIST_NSEGS],
> > +unsigned long gpseq[RCU_CBLIST_NSEGS])
> > +{
> > +   int i;
> > +
> > +   for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> > +   cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> > +   gpseq[i] = rsclp->gp_seq[i];
> > +   }
> > +}
> > +
> > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > *context)
> > +{
> > +   int cbs[RCU_CBLIST_NSEGS];
> > +   unsigned long gps[RCU_CBLIST_NSEGS];
> > +
> > +   if (!trace_rcu_segcb_stats_enabled())
> > +   return;
> > +
> > +   rcu_segcblist_countseq(rsclp, cbs, gps);
> > +
> > +   trace_rcu_segcb_stats(context, cbs, gps);
> > +}
> > +#endif
> > +
> >   /*
> >* Extract only those callbacks still pending (not yet ready to be
> >* invoked) from the specified rcu_segcblist structure and place them in
> > diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
> > index cd35c9faaf51..7750734fa116 100644
> > --- a/kernel/rcu/rcu_segcblist.h
> > +++ b/kernel/rcu/rcu_segcblist.h
> > @@ -103,3 +103,8 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, 
> > unsigned long seq);
> >   bool rcu_segcblist_accelerate(struct rcu_segcblist 

Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-04 Thread Frederic Weisbecker
On Wed, Nov 04, 2020 at 06:08:07AM -0800, Paul E. McKenney wrote:
> On Tue, Nov 03, 2020 at 04:17:31PM +0100, Frederic Weisbecker wrote:
> > On Tue, Nov 03, 2020 at 09:26:00AM -0500, Joel Fernandes (Google) wrote:
> > > +/*
> > > + * Return how many CBs each segment along with their gp_seq values.
> > > + *
> > > + * This function is O(N) where N is the number of segments. Only used 
> > > from
> > > + * tracing code which is usually disabled in production.
> > > + */
> > > +#ifdef CONFIG_RCU_TRACE
> > > +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> > > +  int cbcount[RCU_CBLIST_NSEGS],
> > > +  unsigned long gpseq[RCU_CBLIST_NSEGS])
> > > +{
> > > + int i;
> > > +
> > > + for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> > > + cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> > > + gpseq[i] = rsclp->gp_seq[i];
> > > + }
> > > +}
> > > +
> > > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > > *context)
> > > +{
> > > + int cbs[RCU_CBLIST_NSEGS];
> > > + unsigned long gps[RCU_CBLIST_NSEGS];
> > > +
> > > + if (!trace_rcu_segcb_stats_enabled())
> > > + return;
> > 
> > Yes, very good!
> > 
> > Paul just told me that RCU_TRACE can be used in production so that confirms 
> > that we
> > wanted to avoid this loop of 8 iterations when tracing is disabled.
> 
> RCU's "don't try this in production" Kconfig option is PROVE_RCU.
> 
> I would be looking for checks that the sum of the segment lengths
> match the overall ->len or checks that all of the segment lengths
> are zero when ->cblist is empty to be guarded by something like
> IS_ENABLED(CONFIG_PROVE_RCU).  Of course, checks of this sort need to
> be confined to those portions of rcu_do_batch() that are excluding other
> accesses to ->cblist.

Right.

> 
> But if rcu_segcblist_countseq() is invoked only when a specific trace
> event is enabled, it should be OK to have it guarded only by RCU_TRACE.

Indeed, so I think we are good.

Thanks.


Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-04 Thread Paul E. McKenney
On Tue, Nov 03, 2020 at 04:17:31PM +0100, Frederic Weisbecker wrote:
> On Tue, Nov 03, 2020 at 09:26:00AM -0500, Joel Fernandes (Google) wrote:
> > +/*
> > + * Return how many CBs each segment along with their gp_seq values.
> > + *
> > + * This function is O(N) where N is the number of segments. Only used from
> > + * tracing code which is usually disabled in production.
> > + */
> > +#ifdef CONFIG_RCU_TRACE
> > +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> > +int cbcount[RCU_CBLIST_NSEGS],
> > +unsigned long gpseq[RCU_CBLIST_NSEGS])
> > +{
> > +   int i;
> > +
> > +   for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> > +   cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> > +   gpseq[i] = rsclp->gp_seq[i];
> > +   }
> > +}
> > +
> > +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> > *context)
> > +{
> > +   int cbs[RCU_CBLIST_NSEGS];
> > +   unsigned long gps[RCU_CBLIST_NSEGS];
> > +
> > +   if (!trace_rcu_segcb_stats_enabled())
> > +   return;
> 
> Yes, very good!
> 
> Paul just told me that RCU_TRACE can be used in production so that confirms 
> that we
> wanted to avoid this loop of 8 iterations when tracing is disabled.

RCU's "don't try this in production" Kconfig option is PROVE_RCU.

I would be looking for checks that the sum of the segment lengths
match the overall ->len or checks that all of the segment lengths
are zero when ->cblist is empty to be guarded by something like
IS_ENABLED(CONFIG_PROVE_RCU).  Of course, checks of this sort need to
be confined to those portions of rcu_do_batch() that are excluding other
accesses to ->cblist.

But if rcu_segcblist_countseq() is invoked only when a specific trace
event is enabled, it should be OK to have it guarded only by RCU_TRACE.

Thanx, Paul


Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-04 Thread Neeraj Upadhyay




On 11/3/2020 7:56 PM, Joel Fernandes (Google) wrote:

Track how the segcb list changes before/after acceleration, during
queuing and during dequeuing.

This has proved useful to discover an optimization to avoid unwanted GP
requests when there are no callbacks accelerated. The overhead is minimal as
each segment's length is now stored in the respective segment.

Reviewed-by: Frederic Weisbecker 
Reviewed-by: Neeraj Upadhyay 
Signed-off-by: Joel Fernandes (Google) 
---
  include/trace/events/rcu.h | 25 +
  kernel/rcu/rcu_segcblist.c | 34 ++
  kernel/rcu/rcu_segcblist.h |  5 +
  kernel/rcu/tree.c  |  9 +
  4 files changed, 73 insertions(+)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 155b5cb43cfd..5f8f2ee1a936 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -505,6 +505,31 @@ TRACE_EVENT_RCU(rcu_callback,
  __entry->qlen)
  );
  
+TRACE_EVENT_RCU(rcu_segcb_stats,

+
+   TP_PROTO(const char *ctx, int *cb_count, unsigned long *gp_seq),


I think we need to use long[] instead of int[] for cb_count everywhere 
in this patch?



Thanks
Neeraj


+
+   TP_ARGS(ctx, cb_count, gp_seq),
+
+   TP_STRUCT__entry(
+   __field(const char *, ctx)
+   __array(int, cb_count, RCU_CBLIST_NSEGS)
+   __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
+   ),
+
+   TP_fast_assign(
+   __entry->ctx = ctx;
+   memcpy(__entry->cb_count, cb_count, RCU_CBLIST_NSEGS * 
sizeof(int));
+   memcpy(__entry->gp_seq, gp_seq, RCU_CBLIST_NSEGS * 
sizeof(unsigned long));
+   ),
+
+   TP_printk("%s cb_count: (DONE=%d, WAIT=%d, NEXT_READY=%d, NEXT=%d) 
"
+ "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, 
NEXT=%lu)", __entry->ctx,
+ __entry->cb_count[0], __entry->cb_count[1], 
__entry->cb_count[2], __entry->cb_count[3],
+ __entry->gp_seq[0], __entry->gp_seq[1], 
__entry->gp_seq[2], __entry->gp_seq[3])
+
+);
+
  /*
   * Tracepoint for the registration of a single RCU callback of the special
   * kvfree() form.  The first argument is the RCU type, the second argument
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 357c19bbcb00..2a03949d0b82 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -14,6 +14,7 @@
  #include 
  
  #include "rcu_segcblist.h"

+#include "rcu.h"
  
  /* Initialize simple callback list. */

  void rcu_cblist_init(struct rcu_cblist *rclp)
@@ -328,6 +329,39 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist 
*rsclp,
rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0);
  }
  
+/*

+ * Return how many CBs each segment along with their gp_seq values.
+ *
+ * This function is O(N) where N is the number of segments. Only used from
+ * tracing code which is usually disabled in production.
+ */
+#ifdef CONFIG_RCU_TRACE
+static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
+int cbcount[RCU_CBLIST_NSEGS],
+unsigned long gpseq[RCU_CBLIST_NSEGS])
+{
+   int i;
+
+   for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
+   cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
+   gpseq[i] = rsclp->gp_seq[i];
+   }
+}
+
+void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char *context)
+{
+   int cbs[RCU_CBLIST_NSEGS];
+   unsigned long gps[RCU_CBLIST_NSEGS];
+
+   if (!trace_rcu_segcb_stats_enabled())
+   return;
+
+   rcu_segcblist_countseq(rsclp, cbs, gps);
+
+   trace_rcu_segcb_stats(context, cbs, gps);
+}
+#endif
+
  /*
   * Extract only those callbacks still pending (not yet ready to be
   * invoked) from the specified rcu_segcblist structure and place them in
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index cd35c9faaf51..7750734fa116 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -103,3 +103,8 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, 
unsigned long seq);
  bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
  void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
 struct rcu_segcblist *src_rsclp);
+#ifdef CONFIG_RCU_TRACE
+void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char *context);
+#else
+#define __trace_rcu_segcb_stats(...)
+#endif
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 24c00020ab83..f6c6653b3ec2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1497,6 +1497,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, 
struct rcu_data *rdp)
if (!rcu_segcblist_pend_cbs(>cblist))
return false;
  
+	__trace_rcu_segcb_stats(>cblist, 

Re: [PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-03 Thread Frederic Weisbecker
On Tue, Nov 03, 2020 at 09:26:00AM -0500, Joel Fernandes (Google) wrote:
> +/*
> + * Return how many CBs each segment along with their gp_seq values.
> + *
> + * This function is O(N) where N is the number of segments. Only used from
> + * tracing code which is usually disabled in production.
> + */
> +#ifdef CONFIG_RCU_TRACE
> +static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
> +  int cbcount[RCU_CBLIST_NSEGS],
> +  unsigned long gpseq[RCU_CBLIST_NSEGS])
> +{
> + int i;
> +
> + for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
> + cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
> + gpseq[i] = rsclp->gp_seq[i];
> + }
> +}
> +
> +void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char 
> *context)
> +{
> + int cbs[RCU_CBLIST_NSEGS];
> + unsigned long gps[RCU_CBLIST_NSEGS];
> +
> + if (!trace_rcu_segcb_stats_enabled())
> + return;

Yes, very good!

Paul just told me that RCU_TRACE can be used in production so that confirms 
that we
wanted to avoid this loop of 8 iterations when tracing is disabled.

Thanks.


[PATCH v9 4/7] rcu/trace: Add tracing for how segcb list changes

2020-11-03 Thread Joel Fernandes (Google)
Track how the segcb list changes before/after acceleration, during
queuing and during dequeuing.

This has proved useful to discover an optimization to avoid unwanted GP
requests when there are no callbacks accelerated. The overhead is minimal as
each segment's length is now stored in the respective segment.

Reviewed-by: Frederic Weisbecker 
Reviewed-by: Neeraj Upadhyay 
Signed-off-by: Joel Fernandes (Google) 
---
 include/trace/events/rcu.h | 25 +
 kernel/rcu/rcu_segcblist.c | 34 ++
 kernel/rcu/rcu_segcblist.h |  5 +
 kernel/rcu/tree.c  |  9 +
 4 files changed, 73 insertions(+)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 155b5cb43cfd..5f8f2ee1a936 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -505,6 +505,31 @@ TRACE_EVENT_RCU(rcu_callback,
  __entry->qlen)
 );
 
+TRACE_EVENT_RCU(rcu_segcb_stats,
+
+   TP_PROTO(const char *ctx, int *cb_count, unsigned long *gp_seq),
+
+   TP_ARGS(ctx, cb_count, gp_seq),
+
+   TP_STRUCT__entry(
+   __field(const char *, ctx)
+   __array(int, cb_count, RCU_CBLIST_NSEGS)
+   __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
+   ),
+
+   TP_fast_assign(
+   __entry->ctx = ctx;
+   memcpy(__entry->cb_count, cb_count, RCU_CBLIST_NSEGS * 
sizeof(int));
+   memcpy(__entry->gp_seq, gp_seq, RCU_CBLIST_NSEGS * 
sizeof(unsigned long));
+   ),
+
+   TP_printk("%s cb_count: (DONE=%d, WAIT=%d, NEXT_READY=%d, 
NEXT=%d) "
+ "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, 
NEXT=%lu)", __entry->ctx,
+ __entry->cb_count[0], __entry->cb_count[1], 
__entry->cb_count[2], __entry->cb_count[3],
+ __entry->gp_seq[0], __entry->gp_seq[1], 
__entry->gp_seq[2], __entry->gp_seq[3])
+
+);
+
 /*
  * Tracepoint for the registration of a single RCU callback of the special
  * kvfree() form.  The first argument is the RCU type, the second argument
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 357c19bbcb00..2a03949d0b82 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -14,6 +14,7 @@
 #include 
 
 #include "rcu_segcblist.h"
+#include "rcu.h"
 
 /* Initialize simple callback list. */
 void rcu_cblist_init(struct rcu_cblist *rclp)
@@ -328,6 +329,39 @@ void rcu_segcblist_extract_done_cbs(struct rcu_segcblist 
*rsclp,
rcu_segcblist_set_seglen(rsclp, RCU_DONE_TAIL, 0);
 }
 
+/*
+ * Return how many CBs each segment along with their gp_seq values.
+ *
+ * This function is O(N) where N is the number of segments. Only used from
+ * tracing code which is usually disabled in production.
+ */
+#ifdef CONFIG_RCU_TRACE
+static void rcu_segcblist_countseq(struct rcu_segcblist *rsclp,
+int cbcount[RCU_CBLIST_NSEGS],
+unsigned long gpseq[RCU_CBLIST_NSEGS])
+{
+   int i;
+
+   for (i = 0; i < RCU_CBLIST_NSEGS; i++) {
+   cbcount[i] = rcu_segcblist_get_seglen(rsclp, i);
+   gpseq[i] = rsclp->gp_seq[i];
+   }
+}
+
+void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char *context)
+{
+   int cbs[RCU_CBLIST_NSEGS];
+   unsigned long gps[RCU_CBLIST_NSEGS];
+
+   if (!trace_rcu_segcb_stats_enabled())
+   return;
+
+   rcu_segcblist_countseq(rsclp, cbs, gps);
+
+   trace_rcu_segcb_stats(context, cbs, gps);
+}
+#endif
+
 /*
  * Extract only those callbacks still pending (not yet ready to be
  * invoked) from the specified rcu_segcblist structure and place them in
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index cd35c9faaf51..7750734fa116 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -103,3 +103,8 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, 
unsigned long seq);
 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
 struct rcu_segcblist *src_rsclp);
+#ifdef CONFIG_RCU_TRACE
+void __trace_rcu_segcb_stats(struct rcu_segcblist *rsclp, const char *context);
+#else
+#define __trace_rcu_segcb_stats(...)
+#endif
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 24c00020ab83..f6c6653b3ec2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1497,6 +1497,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, 
struct rcu_data *rdp)
if (!rcu_segcblist_pend_cbs(>cblist))
return false;
 
+   __trace_rcu_segcb_stats(>cblist, TPS("SegCbPreAcc"));
+
/*
 * Callbacks are often registered with incomplete grace-period
 * information.  Something about the fact that getting exact
@@ -1517,6 +1519,8 @@