Hi, So I have the patch below + the kernel code to handle it, i.e. per-event max-stack, in addition to the system wide kernel.perf_event_max_stack knob.
Interested people should try my perf/max-stack branch, where the two patches to implement this are at the top: https://git.kernel.org/cgit/linux/kernel/git/acme/linux.git/log/?h=perf/max-stack But people have expressed interested in being able to have kernel/user separate max-stack knobs, which should be easy to do with the way things are laid out now (that perf_callchain_entry_ctx thing). One way would be to use a one bit flag sample_max_stack_split and then a union to split perf_event_attr.sample_max_stack in two u16, that way we wouldn't extend the perf_event_attr size, but then we wouldn't be able to have both per event limits for the kernel+user stack and separate max-stack limits for for kernel and user, is that an acceptable limitation? I couldn't find other members I could put in a union where to store the sample_max_stack_{user,kernel} values, ideas? I think that we should push what I have there anyway, the split would come later, be it by using: __u64 sample_max_stack_split:1; __u64 __reserved_1:35; <SNIP> union { __u32 sample_max_stack; struct { __u16 sample_max_stack_user; __u16 sample_max_stack_kernel; }; }; Or by having space for all three. - Arnaldo ------------------------------------------------------------------------------------- The tooling counterpart, now it is possible to do have some events using "fp" stacks traces, while others use 'dwarf' or something else with all of them having different max-stack settings, as needed: # perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ] # perf evlist -v sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10 cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4 cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024 # Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should be further configurable by means of some .perfconfig knob. Signed-off-by: Arnaldo Carvalho de Melo <a...@redhat.com> --- tools/perf/util/callchain.h | 1 + tools/perf/util/evsel.c | 16 ++++++++++++++-- tools/perf/util/evsel.h | 2 ++ tools/perf/util/parse-events.c | 8 ++++++++ tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.l | 1 + tools/perf/util/session.c | 1 + 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 65e2a4f7cb4e..9101dbed4ac3 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -95,6 +95,7 @@ struct callchain_param { u32 dump_size; enum chain_mode mode; u32 print_limit; + u32 max_stack; double min_percent; sort_chain_func_t sort; enum chain_order order; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a23f54793e51..9f3871185550 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -571,6 +571,8 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, perf_evsel__set_sample_bit(evsel, CALLCHAIN); + attr->sample_max_stack = param->max_stack; + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { @@ -634,7 +636,8 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct callchain_param param; u32 dump_size = 0; - char *callgraph_buf = NULL; + u32 max_stack = 0; + const char *callgraph_buf = NULL; /* callgraph default */ param.record_mode = callchain_param.record_mode; @@ -661,6 +664,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; + case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + max_stack = term->val.max_stack; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -676,7 +682,12 @@ static void apply_config_terms(struct perf_evsel *evsel, } /* User explicitly set per-event callgraph, clear the old setting and reset. */ - if ((callgraph_buf != NULL) || (dump_size > 0)) { + if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + if (max_stack) { + param.max_stack = max_stack; + if (callgraph_buf == NULL) + callgraph_buf = "fp"; + } /* parse callgraph parameters */ if (callgraph_buf != NULL) { @@ -1328,6 +1339,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(clockid, p_signed); PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); return ret; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8a644fef452c..caaab94aea95 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -44,6 +44,7 @@ enum { PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -56,6 +57,7 @@ struct perf_evsel_config_term { bool time; char *callgraph; u64 stack_user; + u32 max_stack; bool inherit; } val; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bcbc983d4b12..89d40bb425e1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -900,6 +900,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", + [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", }; static bool config_term_shrinked; @@ -995,6 +996,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1040,6 +1044,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_STACKSIZE: case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: return config_term_common(attr, term, err); default: if (err) { @@ -1109,6 +1114,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d740c3ca9a1d..46c05ccd5dfe 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,6 +68,7 @@ enum { PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, + PARSE_EVENTS__TERM_TYPE_MAX_STACK, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 1477fbc78993..01af1ee90a27 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -199,6 +199,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2335b2824d8a..802c4ef5a47c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -608,6 +608,7 @@ do { \ bswap_field_64(sample_regs_user); bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); + bswap_field_32(sample_max_stack); /* * After read_format are bitfields. Check read_format because -- 2.5.5