Perf already has support for disassembling the branch instruction
and using the branch type for filtering. The patch just records
the branch type in perf_branch_entry.

Before recording, the patch converts the x86 branch classification
to common branch classification.

Signed-off-by: Jin Yao <[email protected]>
---
 arch/x86/events/intel/lbr.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 81b321a..57d17a4 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -109,6 +109,9 @@ enum {
        X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
        X86_BR_CALL_STACK       = 1 << 16,/* call stack */
        X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
+
+       X86_BR_TYPE_SAVE        = 1 << 18,/* indicate to save branch type */
+
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -670,6 +673,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event 
*event)
 
        if (br_type & PERF_SAMPLE_BRANCH_CALL)
                mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+               mask |= X86_BR_TYPE_SAVE;
+
        /*
         * stash actual user request into reg, it may
         * be used by fixup code for some CPU
@@ -923,6 +930,58 @@ static int branch_type(unsigned long from, unsigned long 
to, int abort)
        return ret;
 }
 
+static int
+common_branch_type(int type, u64 from, u64 to)
+{
+       int ret;
+
+       type = type & (~(X86_BR_KERNEL | X86_BR_USER));
+
+       switch (type) {
+       case X86_BR_CALL:
+       case X86_BR_ZERO_CALL:
+               ret = PERF_BR_CALL;
+               break;
+
+       case X86_BR_RET:
+               ret = PERF_BR_RET;
+               break;
+
+       case X86_BR_SYSCALL:
+       case X86_BR_SYSRET:
+       case X86_BR_INT:
+       case X86_BR_IRET:
+       case X86_BR_IRQ:
+       case X86_BR_ABORT:
+               ret = PERF_BR_FAR_BRANCH;
+               break;
+
+       case X86_BR_JCC:
+               if (to > from)
+                       ret = PERF_BR_JCC_FWD;
+               else
+                       ret = PERF_BR_JCC_BWD;
+               break;
+
+       case X86_BR_JMP:
+               ret = PERF_BR_JMP;
+               break;
+
+       case X86_BR_IND_CALL:
+               ret = PERF_BR_IND_CALL;
+               break;
+
+       case X86_BR_IND_JMP:
+               ret = PERF_BR_IND_JMP;
+               break;
+
+       default:
+               ret = PERF_BR_NONE;
+       }
+
+       return ret;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -939,7 +998,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
        bool compress = false;
 
        /* if sampling all branches, then nothing to filter */
-       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+       if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+           ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
                return;
 
        for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -960,6 +1020,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
                        cpuc->lbr_entries[i].from = 0;
                        compress = true;
                }
+
+               if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+                       cpuc->lbr_entries[i].type = common_branch_type(type,
+                                                                      from,
+                                                                      to);
+               else
+                       cpuc->lbr_entries[i].type = PERF_BR_NONE;
        }
 
        if (!compress)
-- 
2.7.4

Reply via email to