On Sun, 25 Mar 2018 19:09:39 +0200
Matthias Schiffer <mschif...@universe-factory.net> wrote:

> 
> I have two more questions/issues regarding the generic code:
> 
> 1) There is a small window when enabling the profiler where rec->counter
> gets incremented, but profile_graph_return() does not record times yet. For
> functions that are called a lot, the effect is negligible, but for very
> infrequent calls, the shown average time and variance are way off. Moving
> the rec->counter increment to profile_graph_return() when
> CONFIG_FUNCTION_GRAPH_TRACER is enabled would fix this. Does this sound
> reasonable?

Because the counter is done with function_profile_call() which is used
by function tracer (when graph tracer is not enabled), I wouldn't move
it.

But instead, we could make a function_profile_call() helper that does
all that function_profile_call does, and return a value if it does
something. For example:

static int
calculate_profile_call(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *ops, struct pt_regs *regs)
{
        struct ftrace_profile_stat *stat;
        struct ftrace_profile *rec;
        unsigned long flags;
        int ret = 0;

        if (!ftrace_profile_enabled)
                return 0;

        local_irq_save(flags);

        stat = this_cpu_ptr(&ftrace_profile_stats);
        if (!stat->hash || !ftrace_profile_enabled)
                goto out;

        rec = ftrace_find_profiled_func(stat, ip);
        if (!rec) {
                rec = ftrace_profile_alloc(stat, ip);
                if (!rec)
                        goto out;
        }

        rec->counter++;
        ret = 1;
 out:
        local_irq_restore(flags);
        return ret;
}

static void
function_profile_call(unsigned long ip, unsigned long parent_ip,
                      struct ftrace_ops *ops, struct pt_regs *regs)
{
        calculate_profile_call(ip, parent_ip, ops, regs);
}

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
        int index = trace->depth;

        if (!calculate_profile_call(trace->func, 0, NULL, NULL))
                return 0;

        /* If function graph is shutting down, ret_stack can be NULL */
        if (!current->ret_stack)
                return 0;

        if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
                current->ret_stack[index].subtime = 0;

        return 1;
}

If we don't calculate the profile call, then we return 0, this will
tell the function graph tracer not to perform the return part of the
tracing.

> 
> 2) I noticed that my patch does not handle rec->counter overflows correctly
> and will cause a zero division. I can think of two solutions for handling
> overflows that keep the avg / stddev fields meaningful:
> - Reset time/avg/stddev to 0 on overflows
> - Stop updating the record at 0xffffffff, avoiding the overflow

Or add an option that lets the user decide. But I still would like to
find a way not to add a that division on every function call. That's
really heavy weight.

-- Steve

> 
> Matthias
> 
> 
> > 
> > 
> >   
> >> ---
> >>  kernel/trace/ftrace.c | 33 +++++++++++++++++++--------------
> >>  1 file changed, 19 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> >> index eac9ce2c57a2..16dce67b855a 100644
> >> --- a/kernel/trace/ftrace.c
> >> +++ b/kernel/trace/ftrace.c
> >> @@ -460,7 +460,8 @@ struct ftrace_profile {
> >>    unsigned long                   counter;
> >>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >>    unsigned long long              time;
> >> -  unsigned long long              time_squared;
> >> +  unsigned long long              avg;
> >> +  unsigned long long              stddev;
> >>  #endif
> >>  };
> >>  
> >> @@ -580,7 +581,6 @@ static int function_stat_show(struct seq_file *m, void 
> >> *v)
> >>    int ret = 0;
> >>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >>    static struct trace_seq s;
> >> -  unsigned long long avg;
> >>    unsigned long long stddev;
> >>  #endif
> >>    mutex_lock(&ftrace_profile_lock);
> >> @@ -592,9 +592,7 @@ static int function_stat_show(struct seq_file *m, void 
> >> *v)
> >>    }
> >>  
> >>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> >> -  avg = rec->time;
> >> -  do_div(avg, rec->counter);
> >> -  if (tracing_thresh && (avg < tracing_thresh))
> >> +  if (tracing_thresh && (rec->avg < tracing_thresh))
> >>            goto out;
> >>  #endif
> >>  
> >> @@ -608,24 +606,19 @@ static int function_stat_show(struct seq_file *m, 
> >> void *v)
> >>    if (rec->counter <= 1)
> >>            stddev = 0;
> >>    else {
> >> -          /*
> >> -           * Apply Welford's method:
> >> -           * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
> >> -           */
> >> -          stddev = rec->counter * rec->time_squared -
> >> -                   rec->time * rec->time;
> >> +          stddev = rec->stddev;
> >>  
> >>            /*
> >>             * Divide only 1000 for ns^2 -> us^2 conversion.
> >>             * trace_print_graph_duration will divide 1000 again.
> >>             */
> >> -          do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
> >> +          do_div(stddev, 1000 * (rec->counter - 1));
> >>    }
> >>  
> >>    trace_seq_init(&s);
> >>    trace_print_graph_duration(rec->time, &s);
> >>    trace_seq_puts(&s, "    ");
> >> -  trace_print_graph_duration(avg, &s);
> >> +  trace_print_graph_duration(rec->avg, &s);
> >>    trace_seq_puts(&s, "    ");
> >>    trace_print_graph_duration(stddev, &s);
> >>    trace_print_seq(m, &s);
> >> @@ -905,8 +898,20 @@ static void profile_graph_return(struct 
> >> ftrace_graph_ret *trace)
> >>  
> >>    rec = ftrace_find_profiled_func(stat, trace->func);
> >>    if (rec) {
> >> +          unsigned long long avg, delta1, delta2;
> >> +
> >>            rec->time += calltime;
> >> -          rec->time_squared += calltime * calltime;
> >> +
> >> +          /* Apply Welford's method */
> >> +          delta1 = calltime - rec->avg;
> >> +
> >> +          avg = rec->time;
> >> +          do_div(avg, rec->counter);
> >> +          rec->avg = avg;
> >> +
> >> +          delta2 = calltime - rec->avg;
> >> +
> >> +          rec->stddev += delta1 * delta2;
> >>    }
> >>  
> >>   out:
> >>  
> > 
> >   
> 
> 

Reply via email to