Hi, These here patches improve the perf branch-stack support and add branch-stack support to perf-annotate.
They appear to work for me; but some of it is fairly hairy code so please have a hard look. The last patch includes the userspace changes; and includes samples output of 'perf annotate branches', but since email doesn't do color, lots of information is lost. A screenshot of the same data can be found here: http://programming.kicks-ass.net/sekrit/peterz1.png And the actual program can be found below. --- branches.c --- #include <stdlib.h> #include <stdio.h> #define B(x) (1 << x) long lfsr_taps[] = { [2] = B(0) | B(1), [3] = B(0) | B(2), [4] = B(0) | B(3), [5] = B(1) | B(4), [6] = B(0) | B(5), [7] = B(0) | B(6), [8] = B(1) | B(2) | B(3) | B(7), [9] = B(3) | B(8), [10] = B(2) | B(9), [11] = B(1) | B(10), [12] = B(0) | B(3) | B(5) | B(11), [13] = B(0) | B(2) | B(3) | B(12), [14] = B(0) | B(2) | B(4) | B(13), [15] = B(0) | B(14), [16] = B(1) | B(2) | B(4) | B(15), [17] = B(2) | B(16), [18] = B(6) | B(17), [19] = B(0) | B(1) | B(4) | B(18), [20] = B(2) | B(19), [21] = B(1) | B(20), [22] = B(0) | B(21), [23] = B(4) | B(22), [24] = B(0) | B(2) | B(3) | B(23), [25] = B(2) | B(24), [26] = B(0) | B(1) | B(5) | B(25), [27] = B(0) | B(1) | B(4) | B(26), [28] = B(2) | B(27), [29] = B(1) | B(28), [30] = B(0) | B(3) | B(5) | B(29), [31] = B(2) | B(30), [32] = B(1) | B(5) | B(6) | B(31), }; unsigned long taps; static unsigned long lfsr(unsigned long lfsr) { lfsr = (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); return lfsr; } void lfsr_init(long bits) { taps = lfsr_taps[bits]; } unsigned volatile long acc = 0; void branches(unsigned long seed, unsigned long iterations) { long i, reg = seed; for (i = 0; i < iterations; i++) { if (reg & 0x1) acc++; else acc--; reg = lfsr(reg); if (seed & 1) acc >>= 2; if (~reg & 0x1) acc--; else acc++; reg = lfsr(reg); } } int main(int argc, char **argv) { long bits = 22; long seed = 2; if (argc > 1) bits = atoi(argv[1]); if (argc > 2) seed = atoi(argv[2]); lfsr_init(bits); branches(seed, 1 << bits); return 0; }