Hi Kan,

On Fri, Mar 13, 2015 at 02:18:07AM +0000, kan.li...@intel.com wrote:
> From: Kan Liang <kan.li...@intel.com>
> 
> When multiple events are sampled it may not be needed to collect
> callgraphs for all of them. The sample sites are usually nearby, and
> it's enough to collect the callgraphs on a reference event (such as
> precise cycles or precise instructions). Similarly we also don't need
> fine grained time stamps on all events, as it's enough to have time
> stamps on the regular reference events. This patchkit adds the ability
> to turn off callgraphs and time stamps per event. This in term can
> reduce sampling overhead and the size of the perf.data (add some data)

Have you taken a look into group sampling feature?
(e.g. perf record -e '{ev1,ev2}:S')

Thanks,
Namhyung


> 
> Here is an example.
> 
> Collect callgrap and time for all events. The perf.data size is ~22M
> 
> $ sudo ./perf record -e
> cpu/cpu-cycles,period=100000/,cpu/instructions,period=20000/p
> --call-graph fp ./tchain_edit
> [ perf record: Woken up 92 times to write data ]
> [ perf record: Captured and wrote 22.909 MB perf.data (249446 samples) ]
> 
> Only collect callgrap and time on first event. The size is ~12M
> 
> $ sudo ./perf record -e
> cpu/cpu-cycles,callgraph=1,time=1,period=100000/,
> cpu/instructions,callgraph=0,time=0,period=20000/p
> ./tchain_edit
> [ perf record: Woken up 50 times to write data ]
> [ perf record: Captured and wrote 12.489 MB perf.data (203267 samples) ]
> 
> perf reprot result for the second event.
> Samples: 101K of event
> 'cpu/instructions,callgraph=0,time=0,period=20000/p', Event count
> (approx.): 2035000000
>   Children      Self  Command      Shared Object     Symbol
> -   97.79%     0.00%  tchain_edit  libc-2.15.so      [.]
> __libc_start_main
>      __libc_start_main
> -   97.79%     0.00%  tchain_edit  tchain_edit       [.] main
>      main
>      __libc_start_main
> -   97.79%     0.00%  tchain_edit  tchain_edit       [.] f1
>      f1
>      main
>      __libc_start_main
> -   97.79%     0.00%  tchain_edit  tchain_edit       [.] f2
>      f2
>      f1
>      main
>      __libc_start_main
> -   97.79%    97.42%  tchain_edit  tchain_edit       [.] f3
>      f3
>      f2
>      f1
>      main
>      __libc_start_main
> 
> Signed-off-by: Kan Liang <kan.li...@intel.com>
> ---
>  tools/perf/Documentation/perf-record.txt | 13 +++++++++
>  tools/perf/builtin-record.c              |  7 +++--
>  tools/perf/perf.h                        |  2 ++
>  tools/perf/util/evsel.c                  | 50 
> ++++++++++++++++++++++++++++++--
>  tools/perf/util/parse-events.c           | 33 +++++++++++++++++++++
>  tools/perf/util/parse-events.h           |  3 ++
>  tools/perf/util/parse-events.l           |  3 ++
>  tools/perf/util/parse-options.c          |  2 ++
>  tools/perf/util/parse-options.h          |  4 +++
>  9 files changed, 111 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-record.txt 
> b/tools/perf/Documentation/perf-record.txt
> index 355c4f5..8b3e76c0 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -45,6 +45,19 @@ OPTIONS
>            param1 and param2 are defined as formats for the PMU in:
>            /sys/bus/event_sources/devices/<pmu>/format/*
>  
> +          There are also some params which are not defined in 
> .../<pmu>/format/*.
> +          These params can be used to set event defaults.
> +          Here is a list of the params.
> +          - 'period': Set event sampling period
> +          - 'callgraph': Disable/enable callgraph. Acceptable values are
> +                         1 for FP mode, 2 for dwarf mode, 3 for LBR mode,
> +                         0 for disabling callgraph.
> +          - 'stack_size': user stack size for dwarf mode
> +          - 'time': Disable/enable time stamping. Acceptable values are
> +                    1 for enabling time stamping. 0 for disabling time 
> stamping.
> +          Note: If user explicitly sets options which conflict with the 
> params,
> +          the value set by the params will be overridden.
> +
>          - a hardware breakpoint event in the form of 
> '\mem:addr[/len][:access]'
>            where addr is the address in memory you want to break in.
>            Access is the memory access type (read, write, execute) it can
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 5a2ff51..bf536d1 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -795,10 +795,10 @@ struct option __record_options[] = {
>                    perf_evlist__parse_mmap_pages),
>       OPT_BOOLEAN(0, "group", &record.opts.group,
>                   "put the counters into a counter group"),
> -     OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
> +     OPT_CALLBACK_NOOPT_SET('g', NULL, &record.opts, 
> &record.opts.callgraph_set,
>                          NULL, "enables call-graph recording" ,
>                          &record_callchain_opt),
> -     OPT_CALLBACK(0, "call-graph", &record.opts,
> +     OPT_CALLBACK_SET(0, "call-graph", &record.opts, 
> &record.opts.callgraph_set,
>                    "mode[,dump_size]", record_callchain_help,
>                    &record_parse_callchain_opt),
>       OPT_INCR('v', "verbose", &verbose,
> @@ -808,7 +808,8 @@ struct option __record_options[] = {
>                   "per thread counts"),
>       OPT_BOOLEAN('d', "data", &record.opts.sample_address,
>                   "Sample addresses"),
> -     OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample 
> timestamps"),
> +     OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
> +                     &record.opts.sample_time_set, "Sample timestamps"),
>       OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
>       OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
>                   "don't sample"),
> diff --git a/tools/perf/perf.h b/tools/perf/perf.h
> index 1caa70a..72ebc91 100644
> --- a/tools/perf/perf.h
> +++ b/tools/perf/perf.h
> @@ -51,9 +51,11 @@ struct record_opts {
>       bool         sample_address;
>       bool         sample_weight;
>       bool         sample_time;
> +     bool         sample_time_set;
>       bool         period;
>       bool         sample_intr_regs;
>       bool         running_time;
> +     bool         callgraph_set;
>       unsigned int freq;
>       unsigned int mmap_pages;
>       unsigned int user_freq;
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index bb4eff2..88a695b 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -614,10 +614,53 @@ void perf_evsel__config(struct perf_evsel *evsel, 
> struct record_opts *opts)
>       struct perf_event_attr *attr = &evsel->attr;
>       int track = evsel->tracking;
>       bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
> +     bool sample_time = opts->sample_time;
> +     bool callgraph = callchain_param.enabled;
>  
>       attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
>       attr->inherit       = !opts->no_inherit;
>  
> +     /*
> +      * If user doesn't explicitly set callgraph or time option,
> +      * let event attribute decide.
> +      */
> +     if (!opts->callgraph_set) {
> +             if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
> +                     callgraph = true;
> +                     if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
> +                             callchain_param.record_mode = CALLCHAIN_DWARF;
> +                             if (attr->sample_stack_user)
> +                                     callchain_param.dump_size = 
> attr->sample_stack_user;
> +                             else
> +                                     callchain_param.dump_size = 8192;
> +                     } else if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK)
> +                             callchain_param.record_mode = CALLCHAIN_LBR;
> +                     else
> +                             callchain_param.record_mode = CALLCHAIN_FP;
> +             } else
> +                     callgraph = false;
> +     }
> +     /*
> +      * Clear the bit which parse event may be set,
> +      * Let perf_evsel__config_callgraph check and reset later.
> +      */
> +     attr->sample_type &= ~(PERF_SAMPLE_CALLCHAIN |
> +                            PERF_SAMPLE_STACK_USER |
> +                            PERF_SAMPLE_BRANCH_STACK);
> +
> +
> +     if (!opts->sample_time_set) {
> +             if (attr->sample_type & PERF_SAMPLE_TIME)
> +                     sample_time = true;
> +             else
> +                     sample_time = false;
> +     }
> +     if (attr->sample_type & PERF_SAMPLE_TIME) {
> +             attr->sample_type &= ~PERF_SAMPLE_TIME;
> +             /* remove the size which add in perf_evsel__init */
> +             evsel->sample_size -= sizeof(u64);
> +     }
> +
>       perf_evsel__set_sample_bit(evsel, IP);
>       perf_evsel__set_sample_bit(evsel, TID);
>  
> @@ -683,7 +726,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct 
> record_opts *opts)
>       if (perf_evsel__is_function_event(evsel))
>               evsel->attr.exclude_callchain_user = 1;
>  
> -     if (callchain_param.enabled && !evsel->no_aux_samples)
> +     if (callgraph && !evsel->no_aux_samples)
>               perf_evsel__config_callgraph(evsel, opts);
>  
>       if (opts->sample_intr_regs) {
> @@ -700,13 +743,14 @@ void perf_evsel__config(struct perf_evsel *evsel, 
> struct record_opts *opts)
>       /*
>        * When the user explicitely disabled time don't force it here.
>        */
> -     if (opts->sample_time &&
> +     if (sample_time &&
>           (!perf_missing_features.sample_id_all &&
>           (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu)))
>               perf_evsel__set_sample_bit(evsel, TIME);
>  
>       if (opts->raw_samples && !evsel->no_aux_samples) {
> -             perf_evsel__set_sample_bit(evsel, TIME);
> +             if (sample_time)
> +                     perf_evsel__set_sample_bit(evsel, TIME);
>               perf_evsel__set_sample_bit(evsel, RAW);
>               perf_evsel__set_sample_bit(evsel, CPU);
>       }
> diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
> index fe07573..d0f30fb 100644
> --- a/tools/perf/util/parse-events.c
> +++ b/tools/perf/util/parse-events.c
> @@ -17,6 +17,7 @@
>  #include "parse-events-flex.h"
>  #include "pmu.h"
>  #include "thread_map.h"
> +#include "callchain.h"
>  
>  #define MAX_NAME_LEN 100
>  
> @@ -570,6 +571,38 @@ do {                                                     
>         \
>                * attr->branch_sample_type = term->val.num;
>                */
>               break;
> +     case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
> +             CHECK_TYPE_VAL(NUM);
> +             switch (term->val.num) {
> +             case CALLCHAIN_FP:
> +                     attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
> +                     break;
> +             case CALLCHAIN_DWARF:
> +                     attr->sample_type |= PERF_SAMPLE_CALLCHAIN |
> +                                          PERF_SAMPLE_STACK_USER;
> +                     break;
> +             case CALLCHAIN_LBR:
> +                     attr->sample_type |= PERF_SAMPLE_CALLCHAIN |
> +                                          PERF_SAMPLE_BRANCH_STACK;
> +                     break;
> +             case CALLCHAIN_NONE:
> +                     break;
> +             default:
> +                     return -EINVAL;;
> +             }
> +             break;
> +     case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
> +             CHECK_TYPE_VAL(NUM);
> +             attr->sample_stack_user = term->val.num;
> +             break;
> +     case PARSE_EVENTS__TERM_TYPE_TIME:
> +             CHECK_TYPE_VAL(NUM);
> +
> +             if (term->val.num > 1)
> +                     return -EINVAL;
> +             if (term->val.num == 1)
> +                     attr->sample_type |= PERF_SAMPLE_TIME;
> +             break;
>       case PARSE_EVENTS__TERM_TYPE_NAME:
>               CHECK_TYPE_VAL(STR);
>               break;
> diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
> index 52a2dda..2c16367 100644
> --- a/tools/perf/util/parse-events.h
> +++ b/tools/perf/util/parse-events.h
> @@ -60,6 +60,9 @@ enum {
>       PARSE_EVENTS__TERM_TYPE_NAME,
>       PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
>       PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
> +     PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
> +     PARSE_EVENTS__TERM_TYPE_STACKSIZE,
> +     PARSE_EVENTS__TERM_TYPE_TIME,
>  };
>  
>  struct parse_events_term {
> diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
> index 94eacb6..655306b 100644
> --- a/tools/perf/util/parse-events.l
> +++ b/tools/perf/util/parse-events.l
> @@ -151,6 +151,9 @@ config2                   { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_CONFIG2); }
>  name                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); 
> }
>  period                       { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
>  branch_type          { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
> +callgraph            { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
> +stack_size           { return term(yyscanner, 
> PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
> +time                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); 
> }
>  ,                    { return ','; }
>  "/"                  { BEGIN(INITIAL); return '/'; }
>  {name_minus}         { return str(yyscanner, PE_NAME); }
> diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
> index 1457d66..3786abb 100644
> --- a/tools/perf/util/parse-options.c
> +++ b/tools/perf/util/parse-options.c
> @@ -123,6 +123,8 @@ static int get_value(struct parse_opt_ctx_t *p,
>               return 0;
>  
>       case OPTION_CALLBACK:
> +             if (opt->set)
> +                     *(bool *)opt->set = true;
>               if (unset)
>                       return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
>               if (opt->flags & PARSE_OPT_NOARG)
> diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
> index 97b153f..20d0d29 100644
> --- a/tools/perf/util/parse-options.h
> +++ b/tools/perf/util/parse-options.h
> @@ -126,8 +126,12 @@ struct option {
>       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
>  #define OPT_CALLBACK(s, l, v, a, h, f) \
>       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), (a), .help = (h), .callback = (f) }
> +#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \
> +     { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *) }
>  #define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
>       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
> +#define OPT_CALLBACK_NOOPT_SET(s, l, v, os, a, h, f) \
> +     { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG, .set = 
> check_vtype(os, bool *) }
>  #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
>       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value 
> = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = 
> PARSE_OPT_LASTARG_DEFAULT }
>  #define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
> -- 
> 1.7.11.7
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to