Perf already has support for disassembling the branch instruction and using the branch type for filtering. The patch just records the branch type in perf_branch_entry.
Before recording, the patch converts the x86 branch classification to common branch classification. Signed-off-by: Jin Yao <[email protected]> --- arch/x86/events/intel/lbr.c | 69 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 81b321a..57d17a4 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -109,6 +109,9 @@ enum { X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ X86_BR_CALL_STACK = 1 << 16,/* call stack */ X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) @@ -670,6 +673,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_CALL) mask |= X86_BR_CALL | X86_BR_ZERO_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) + mask |= X86_BR_TYPE_SAVE; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -923,6 +930,58 @@ static int branch_type(unsigned long from, unsigned long to, int abort) return ret; } +static int +common_branch_type(int type, u64 from, u64 to) +{ + int ret; + + type = type & (~(X86_BR_KERNEL | X86_BR_USER)); + + switch (type) { + case X86_BR_CALL: + case X86_BR_ZERO_CALL: + ret = PERF_BR_CALL; + break; + + case X86_BR_RET: + ret = PERF_BR_RET; + break; + + case X86_BR_SYSCALL: + case X86_BR_SYSRET: + case X86_BR_INT: + case X86_BR_IRET: + case X86_BR_IRQ: + case X86_BR_ABORT: + ret = PERF_BR_FAR_BRANCH; + break; + + case X86_BR_JCC: + if (to > from) + ret = PERF_BR_JCC_FWD; + else + ret = PERF_BR_JCC_BWD; + break; + + case X86_BR_JMP: + ret = PERF_BR_JMP; + break; + + case X86_BR_IND_CALL: + ret = PERF_BR_IND_CALL; + break; + + case X86_BR_IND_JMP: + ret = PERF_BR_IND_JMP; + break; + + default: + ret = PERF_BR_NONE; + } + + return ret; +} + /* * implement actual branch filter based on user demand. * Hardware may not exactly satisfy that request, thus @@ -939,7 +998,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) bool compress = false; /* if sampling all branches, then nothing to filter */ - if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) return; for (i = 0; i < cpuc->lbr_stack.nr; i++) { @@ -960,6 +1020,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].from = 0; compress = true; } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type, + from, + to); + else + cpuc->lbr_entries[i].type = PERF_BR_NONE; } if (!compress) -- 2.7.4

