Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-23 Thread Arnaldo Carvalho de Melo
Em Tue, Mar 23, 2021 at 09:37:42AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Tue, Mar 23, 2021 at 09:25:52AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Mar 19, 2021 at 03:41:57PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Thu, Mar 18, 2021 at 10:15:13PM +0100, Jiri Olsa escreveu:
> > > > On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
> > > > > bperf is off by default. To enable it, pass --bpf-counters option to
> > > > > perf-stat. bperf uses a BPF hashmap to share information about BPF
> > > > > programs and maps used by bperf. This map is pinned to bpffs. The 
> > > > > default
> > > > > path is /sys/fs/bpf/perf_attr_map. The user could change the path with
> > > > > option --bpf-attr-map.
> > > > > 
> > > > > Signed-off-by: Song Liu 
> > > > 
> > > > Reviewed-by: Jiri Olsa 
> > > 
> > > After applying just this first patch in the series I'm getting this
> > > after a 'make -C tools/ clean', now I'm checking if I need some new
> > > clang, ideas?
> > 
> > Works now with clang from fedora 33, I was using a locally built, older,
> > now I get this when trying as non-root, expected, but we need to improve
> > the wording.
> 
> Fails as root as well, investigating:
> 
> [root@five ~]# ls -lad /sys/fs/bpf/
> drwx-T. 2 root root 0 Mar 23 06:03 /sys/fs/bpf/
> [root@five ~]# strace -e bpf perf stat --bpf-counters sleep 1
> bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=120, value_size=8, 
> max_entries=16, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, 
> btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, 
> btf_vmlinux_value_type_id=0}, 120) = -1 EPERM (Operation not permitted)
> Failed to lock perf_event_attr map
> --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=13916, si_uid=0, 
> si_status=SIGTERM, si_utime=0, si_stime=0} ---
> +++ exited with 255 +++
> [root@five ~]#
>  
> > [acme@five perf]$ perf stat --bpf-counters sleep 1
> > Failed to lock perf_event_attr map
> > [acme@five perf]$

Now it works, on 5.12-rc2+

[root@five pahole]# perf stat --bpf-counters sleep 1
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame
libbpf: elf: skipping unrecognized data section(7) .eh_frame
libbpf: elf: skipping relo section(12) .rel.eh_frame for section(7) .eh_frame
libbpf: elf: skipping unrecognized data section(8) .eh_frame
libbpf: elf: skipping relo section(13) .rel.eh_frame for section(8) .eh_frame

 Performance counter stats for 'sleep 1':

  0.84 msec task-clock#0.001 CPUs utilized
 3  context-

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-23 Thread Arnaldo Carvalho de Melo
Em Tue, Mar 23, 2021 at 09:25:52AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Mar 19, 2021 at 03:41:57PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Thu, Mar 18, 2021 at 10:15:13PM +0100, Jiri Olsa escreveu:
> > > On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
> > > > bperf is off by default. To enable it, pass --bpf-counters option to
> > > > perf-stat. bperf uses a BPF hashmap to share information about BPF
> > > > programs and maps used by bperf. This map is pinned to bpffs. The 
> > > > default
> > > > path is /sys/fs/bpf/perf_attr_map. The user could change the path with
> > > > option --bpf-attr-map.
> > > > 
> > > > Signed-off-by: Song Liu 
> > > 
> > > Reviewed-by: Jiri Olsa 
> > 
> > After applying just this first patch in the series I'm getting this
> > after a 'make -C tools/ clean', now I'm checking if I need some new
> > clang, ideas?
> 
> Works now with clang from fedora 33, I was using a locally built, older,
> now I get this when trying as non-root, expected, but we need to improve
> the wording.

Fails as root as well, investigating:

[root@five ~]# ls -lad /sys/fs/bpf/
drwx-T. 2 root root 0 Mar 23 06:03 /sys/fs/bpf/
[root@five ~]# strace -e bpf perf stat --bpf-counters sleep 1
bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=120, value_size=8, 
max_entries=16, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, 
btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 
120) = -1 EPERM (Operation not permitted)
Failed to lock perf_event_attr map
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=13916, si_uid=0, 
si_status=SIGTERM, si_utime=0, si_stime=0} ---
+++ exited with 255 +++
[root@five ~]#
 
> [acme@five perf]$ perf stat --bpf-counters sleep 1
> Failed to lock perf_event_attr map
> [acme@five perf]$
>  
> > - Arnaldo
> > 
> > [acme@quaco perf]$ make O=/tmp/build/perf -C tools/perf BUILD_BPF_SKEL=1 
> > PYTHON=python3 install-bin
> > make: Entering directory '/home/acme/git/perf/tools/perf'
> >   BUILD:   Doing 'make -j8' parallel build
> > Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from 
> > latest version at 'include/uapi/linux/kvm.h'
> > diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
> > Warning: Kernel ABI header at 
> > 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest 
> > version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
> > diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl 
> > arch/mips/kernel/syscalls/syscall_n64.tbl
> > 
> > Auto-detecting system features:
> > ... dwarf: [ on  ]
> > ...dwarf_getlocations: [ on  ]
> > ... glibc: [ on  ]
> > ...libbfd: [ on  ]
> > ...libbfd-buildid: [ on  ]
> > ...libcap: [ on  ]
> > ...libelf: [ on  ]
> > ...   libnuma: [ on  ]
> > ...numa_num_possible_cpus: [ on  ]
> > ...   libperl: [ on  ]
> > ... libpython: [ on  ]
> > ... libcrypto: [ on  ]
> > ... libunwind: [ on  ]
> > ...libdw-dwarf-unwind: [ on  ]
> > ...  zlib: [ on  ]
> > ...  lzma: [ on  ]
> > ... get_cpuid: [ on  ]
> > ...   bpf: [ on  ]
> > ...libaio: [ on  ]
> > ...   libzstd: [ on  ]
> > ...disassembler-four-args: [ on  ]
> > 
> >   GEN  /tmp/build/perf/common-cmds.h
> >   CC   /tmp/build/perf/exec-cmd.o
> >   MKDIR/tmp/build/perf/fd/
> >   MKDIR/tmp/build/perf/fs/
> >   CC   /tmp/build/perf/fs/fs.o
> >   CC   /tmp/build/perf/event-parse.o
> >   CC   /tmp/build/perf/fd/array.o
> >   CC   /tmp/build/perf/core.o
> >   GEN  /tmp/build/perf/bpf_helper_defs.h
> >   CC   /tmp/build/perf/event-plugin.o
> >   MKDIR/tmp/build/perf/staticobjs/
> >   PERF_VERSION = 5.12.rc2.g3df07f57f205
> >   CC   /tmp/build/perf/staticobjs/libbpf.o
> >   CC   /tmp/build/perf/cpu.o
> >   LD   /tmp/build/perf/fd/libapi-in.o
> >   CC   /tmp/build/perf/cpumap.o
> >   CC   /tmp/build/perf/help.o
> >   MKDIR/tmp/build/perf/fs/
> >   CC   /tmp/build/perf/fs/tracing_path.o
> >   CC   /tmp/build/perf/fs/cgroup.o
> >   CC   /tmp/build/perf/trace-seq.o
> >   CC   /tmp/build/perf/pager.o
> >   CC   /tmp/build/perf/parse-options.o
> >   LD   /tmp/build/perf/fs/libapi-in.o
> >   CC   /tmp/build/perf/debug.o
> >   CC   /tmp/build/perf/str_error_r.o
> >   CC   /tmp/build/perf/run-command.o
> >   CC   /tmp/build/perf/sigchain.o
> >   LD   /tmp/build/perf/libapi-in.o
> >   AR   /tmp/build/perf/libapi.a
> >   CC   /tmp/build/perf/subcmd-config.o
> >   CC   /tmp/build/perf/threadmap.o
> >   CC   /tmp/build/perf/evsel.o
> >   CC   /

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-23 Thread Arnaldo Carvalho de Melo
Em Fri, Mar 19, 2021 at 03:41:57PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Mar 18, 2021 at 10:15:13PM +0100, Jiri Olsa escreveu:
> > On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
> > > bperf is off by default. To enable it, pass --bpf-counters option to
> > > perf-stat. bperf uses a BPF hashmap to share information about BPF
> > > programs and maps used by bperf. This map is pinned to bpffs. The default
> > > path is /sys/fs/bpf/perf_attr_map. The user could change the path with
> > > option --bpf-attr-map.
> > > 
> > > Signed-off-by: Song Liu 
> > 
> > Reviewed-by: Jiri Olsa 
> 
> After applying just this first patch in the series I'm getting this
> after a 'make -C tools/ clean', now I'm checking if I need some new
> clang, ideas?

Works now with clang from fedora 33, I was using a locally built, older,
now I get this when trying as non-root, expected, but we need to improve
the wording.

[acme@five perf]$ perf stat --bpf-counters sleep 1
Failed to lock perf_event_attr map
[acme@five perf]$
 
> - Arnaldo
> 
> [acme@quaco perf]$ make O=/tmp/build/perf -C tools/perf BUILD_BPF_SKEL=1 
> PYTHON=python3 install-bin
> make: Entering directory '/home/acme/git/perf/tools/perf'
>   BUILD:   Doing 'make -j8' parallel build
> Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from 
> latest version at 'include/uapi/linux/kvm.h'
> diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
> Warning: Kernel ABI header at 
> 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest 
> version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
> diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl 
> arch/mips/kernel/syscalls/syscall_n64.tbl
> 
> Auto-detecting system features:
> ... dwarf: [ on  ]
> ...dwarf_getlocations: [ on  ]
> ... glibc: [ on  ]
> ...libbfd: [ on  ]
> ...libbfd-buildid: [ on  ]
> ...libcap: [ on  ]
> ...libelf: [ on  ]
> ...   libnuma: [ on  ]
> ...numa_num_possible_cpus: [ on  ]
> ...   libperl: [ on  ]
> ... libpython: [ on  ]
> ... libcrypto: [ on  ]
> ... libunwind: [ on  ]
> ...libdw-dwarf-unwind: [ on  ]
> ...  zlib: [ on  ]
> ...  lzma: [ on  ]
> ... get_cpuid: [ on  ]
> ...   bpf: [ on  ]
> ...libaio: [ on  ]
> ...   libzstd: [ on  ]
> ...disassembler-four-args: [ on  ]
> 
>   GEN  /tmp/build/perf/common-cmds.h
>   CC   /tmp/build/perf/exec-cmd.o
>   MKDIR/tmp/build/perf/fd/
>   MKDIR/tmp/build/perf/fs/
>   CC   /tmp/build/perf/fs/fs.o
>   CC   /tmp/build/perf/event-parse.o
>   CC   /tmp/build/perf/fd/array.o
>   CC   /tmp/build/perf/core.o
>   GEN  /tmp/build/perf/bpf_helper_defs.h
>   CC   /tmp/build/perf/event-plugin.o
>   MKDIR/tmp/build/perf/staticobjs/
>   PERF_VERSION = 5.12.rc2.g3df07f57f205
>   CC   /tmp/build/perf/staticobjs/libbpf.o
>   CC   /tmp/build/perf/cpu.o
>   LD   /tmp/build/perf/fd/libapi-in.o
>   CC   /tmp/build/perf/cpumap.o
>   CC   /tmp/build/perf/help.o
>   MKDIR/tmp/build/perf/fs/
>   CC   /tmp/build/perf/fs/tracing_path.o
>   CC   /tmp/build/perf/fs/cgroup.o
>   CC   /tmp/build/perf/trace-seq.o
>   CC   /tmp/build/perf/pager.o
>   CC   /tmp/build/perf/parse-options.o
>   LD   /tmp/build/perf/fs/libapi-in.o
>   CC   /tmp/build/perf/debug.o
>   CC   /tmp/build/perf/str_error_r.o
>   CC   /tmp/build/perf/run-command.o
>   CC   /tmp/build/perf/sigchain.o
>   LD   /tmp/build/perf/libapi-in.o
>   AR   /tmp/build/perf/libapi.a
>   CC   /tmp/build/perf/subcmd-config.o
>   CC   /tmp/build/perf/threadmap.o
>   CC   /tmp/build/perf/evsel.o
>   CC   /tmp/build/perf/parse-filter.o
>   MKDIR/tmp/build/perf/staticobjs/
>   CC   /tmp/build/perf/staticobjs/bpf.o
>   CC   /tmp/build/perf/evlist.o
>   CC   /tmp/build/perf/parse-utils.o
>   CC   /tmp/build/perf/kbuffer-parse.o
>   CC   /tmp/build/perf/tep_strerror.o
>   CC   /tmp/build/perf/mmap.o
>   CC   /tmp/build/perf/zalloc.o
>   CC   /tmp/build/perf/event-parse-api.o
>   LD   /tmp/build/perf/libsubcmd-in.o
>   AR   /tmp/build/perf/libsubcmd.a
>   CC   /tmp/build/perf/xyarray.o
>   LD   /tmp/build/perf/libtraceevent-in.o
>   LINK /tmp/build/perf/libtraceevent.a
>   CC   /tmp/build/perf/staticobjs/nlattr.o
>   CC   /tmp/build/perf/staticobjs/btf.o
>   CC   /tmp/build/perf/lib.o
>   CC   /tmp/build/perf/staticobjs/libbpf_errno.o
>   CC   /tmp/build/perf/staticobjs/str_error.o
>   CC   /tmp/build/perf/staticobjs/netlink.o
>   CC   /tmp/build/perf/

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-22 Thread Song Liu



> On Mar 19, 2021, at 11:41 AM, Arnaldo Carvalho de Melo  
> wrote:
> 
> Em Thu, Mar 18, 2021 at 10:15:13PM +0100, Jiri Olsa escreveu:
>> On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
>>> bperf is off by default. To enable it, pass --bpf-counters option to
>>> perf-stat. bperf uses a BPF hashmap to share information about BPF
>>> programs and maps used by bperf. This map is pinned to bpffs. The default
>>> path is /sys/fs/bpf/perf_attr_map. The user could change the path with
>>> option --bpf-attr-map.
>>> 
>>> Signed-off-by: Song Liu 
>> 
>> Reviewed-by: Jiri Olsa 
> 
> After applying just this first patch in the series I'm getting this
> after a 'make -C tools/ clean', now I'm checking if I need some new
> clang, ideas?
> 
> - Arnaldo

Hi Arnaldo, 

Are you still getting this error? 

Thanks,
Song

> 
> [acme@quaco perf]$ make O=/tmp/build/perf -C tools/perf BUILD_BPF_SKEL=1 
> PYTHON=python3 install-bin
> make: Entering directory '/home/acme/git/perf/tools/perf'
>  BUILD:   Doing 'make -j8' parallel build
> Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from 
> latest version at 'include/uapi/linux/kvm.h'
> diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
> Warning: Kernel ABI header at 
> 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest 
> version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
> diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl 
> arch/mips/kernel/syscalls/syscall_n64.tbl
> 
> Auto-detecting system features:
> ... dwarf: [ on  ]
> ...dwarf_getlocations: [ on  ]
> ... glibc: [ on  ]
> ...libbfd: [ on  ]
> ...libbfd-buildid: [ on  ]
> ...libcap: [ on  ]
> ...libelf: [ on  ]
> ...   libnuma: [ on  ]
> ...numa_num_possible_cpus: [ on  ]
> ...   libperl: [ on  ]
> ... libpython: [ on  ]
> ... libcrypto: [ on  ]
> ... libunwind: [ on  ]
> ...libdw-dwarf-unwind: [ on  ]
> ...  zlib: [ on  ]
> ...  lzma: [ on  ]
> ... get_cpuid: [ on  ]
> ...   bpf: [ on  ]
> ...libaio: [ on  ]
> ...   libzstd: [ on  ]
> ...disassembler-four-args: [ on  ]
> 
>  GEN  /tmp/build/perf/common-cmds.h
>  CC   /tmp/build/perf/exec-cmd.o
>  MKDIR/tmp/build/perf/fd/
>  MKDIR/tmp/build/perf/fs/
>  CC   /tmp/build/perf/fs/fs.o
>  CC   /tmp/build/perf/event-parse.o
>  CC   /tmp/build/perf/fd/array.o
>  CC   /tmp/build/perf/core.o
>  GEN  /tmp/build/perf/bpf_helper_defs.h
>  CC   /tmp/build/perf/event-plugin.o
>  MKDIR/tmp/build/perf/staticobjs/
>  PERF_VERSION = 5.12.rc2.g3df07f57f205
>  CC   /tmp/build/perf/staticobjs/libbpf.o
>  CC   /tmp/build/perf/cpu.o
>  LD   /tmp/build/perf/fd/libapi-in.o
>  CC   /tmp/build/perf/cpumap.o
>  CC   /tmp/build/perf/help.o
>  MKDIR/tmp/build/perf/fs/
>  CC   /tmp/build/perf/fs/tracing_path.o
>  CC   /tmp/build/perf/fs/cgroup.o
>  CC   /tmp/build/perf/trace-seq.o
>  CC   /tmp/build/perf/pager.o
>  CC   /tmp/build/perf/parse-options.o
>  LD   /tmp/build/perf/fs/libapi-in.o
>  CC   /tmp/build/perf/debug.o
>  CC   /tmp/build/perf/str_error_r.o
>  CC   /tmp/build/perf/run-command.o
>  CC   /tmp/build/perf/sigchain.o
>  LD   /tmp/build/perf/libapi-in.o
>  AR   /tmp/build/perf/libapi.a
>  CC   /tmp/build/perf/subcmd-config.o
>  CC   /tmp/build/perf/threadmap.o
>  CC   /tmp/build/perf/evsel.o
>  CC   /tmp/build/perf/parse-filter.o
>  MKDIR/tmp/build/perf/staticobjs/
>  CC   /tmp/build/perf/staticobjs/bpf.o
>  CC   /tmp/build/perf/evlist.o
>  CC   /tmp/build/perf/parse-utils.o
>  CC   /tmp/build/perf/kbuffer-parse.o
>  CC   /tmp/build/perf/tep_strerror.o
>  CC   /tmp/build/perf/mmap.o
>  CC   /tmp/build/perf/zalloc.o
>  CC   /tmp/build/perf/event-parse-api.o
>  LD   /tmp/build/perf/libsubcmd-in.o
>  AR   /tmp/build/perf/libsubcmd.a
>  CC   /tmp/build/perf/xyarray.o
>  LD   /tmp/build/perf/libtraceevent-in.o
>  LINK /tmp/build/perf/libtraceevent.a
>  CC   /tmp/build/perf/staticobjs/nlattr.o
>  CC   /tmp/build/perf/staticobjs/btf.o
>  CC   /tmp/build/perf/lib.o
>  CC   /tmp/build/perf/staticobjs/libbpf_errno.o
>  CC   /tmp/build/perf/staticobjs/str_error.o
>  CC   /tmp/build/perf/staticobjs/netlink.o
>  CC   /tmp/build/perf/staticobjs/bpf_prog_linfo.o
>  CC   /tmp/build/perf/staticobjs/libbpf_probes.o
>  LD   /tmp/build/perf/libperf-in.o
>  AR   /tmp/build/perf/libperf.a
>  MKDIR/tmp/build/perf/pmu-events/
>  HOSTCC   /tmp/build/perf/pmu-events/json.o
>  CC   /tmp/build/perf

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-19 Thread Song Liu



> On Mar 19, 2021, at 11:55 AM, Jiri Olsa  wrote:
> 
> On Fri, Mar 19, 2021 at 03:41:57PM -0300, Arnaldo Carvalho de Melo wrote:
> 
> SNIP
> 
>>  LD   
>> /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/libbpf/staticobjs/libbpf-in.o
>>  LINK /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/libbpf/libbpf.a
>>  LINK /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/bpftool
>>  GEN-SKEL /tmp/build/perf/util/bpf_skel/bpf_prog_profiler.skel.h
>>  GEN-SKEL /tmp/build/perf/util/bpf_skel/bperf_leader.skel.h
>>  GEN-SKEL /tmp/build/perf/util/bpf_skel/bperf_follower.skel.h
>> libbpf: map 'prev_readings': unexpected def kind var.
>> Error: failed to open BPF object file: Invalid argument
>> libbpf: map 'diff_readings': unexpected def kind var.
>> Error: failed to open BPF object file: Invalid argument
> 
> I'm getting clean build for the same options,
> could you please send the same output also with 'JOBS=1 V=1'
> 
> 
>> make[2]: *** [Makefile.perf:1029: 
>> /tmp/build/perf/util/bpf_skel/bperf_leader.skel.h] Error 255
>> make[2]: *** Waiting for unfinished jobs
>> make[2]: *** [Makefile.perf:1029: 
>> /tmp/build/perf/util/bpf_skel/bperf_follower.skel.h] Error 255
>> make[1]: *** [Makefile.perf:236: sub-make] Error 2
>> make: *** [Makefile:110: install-bin] Error 2
>> make: Leaving directory '/home/acme/git/perf/tools/perf'
>> [acme@quaco perf]$ clang -v
>> clang version 11.0.0 (https://github.com/llvm/llvm-project 
>> 67420f1b0e9c673ee638f2680fa83f468019004f)
>> Target: x86_64-unknown-linux-gnu
>> Thread model: posix
>> InstalledDir: /usr/local/bin
>> Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/10
>> Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/10
>> Candidate multilib: .;@m64
>> Candidate multilib: 32;@m32
>> Selected multilib: .;@m64
>> [acme@quaco perf]$
>> 
> 
> I have:
> 
> [jolsa@dell-r440-01 linux-perf]$ clang --version
> clang version 11.0.0 (Fedora 11.0.0-2.fc33)
> Target: x86_64-unknown-linux-gnu
> Thread model: posix
> InstalledDir: /usr/bin

I am not able to repro this error either. I tried two versions of clang:

clang version 11.0.0 (Red Hat 11.0.0-0.2.rc2.module_el8.4.0+533+50191577)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /bin

clang version 12.0.0 (https://github.com/llvm/llvm-project.git 
07f1e1f44c87d1ee84caf13d6e5aa64eb7e1b068)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/local/bin

Thanks,
Song



Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-19 Thread Jiri Olsa
On Fri, Mar 19, 2021 at 03:41:57PM -0300, Arnaldo Carvalho de Melo wrote:

SNIP

>   LD   
> /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/libbpf/staticobjs/libbpf-in.o
>   LINK /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/libbpf/libbpf.a
>   LINK /tmp/build/perf/util/bpf_skel/.tmp//bootstrap/bpftool
>   GEN-SKEL /tmp/build/perf/util/bpf_skel/bpf_prog_profiler.skel.h
>   GEN-SKEL /tmp/build/perf/util/bpf_skel/bperf_leader.skel.h
>   GEN-SKEL /tmp/build/perf/util/bpf_skel/bperf_follower.skel.h
> libbpf: map 'prev_readings': unexpected def kind var.
> Error: failed to open BPF object file: Invalid argument
> libbpf: map 'diff_readings': unexpected def kind var.
> Error: failed to open BPF object file: Invalid argument

I'm getting clean build for the same options,
could you please send the same output also with 'JOBS=1 V=1'


> make[2]: *** [Makefile.perf:1029: 
> /tmp/build/perf/util/bpf_skel/bperf_leader.skel.h] Error 255
> make[2]: *** Waiting for unfinished jobs
> make[2]: *** [Makefile.perf:1029: 
> /tmp/build/perf/util/bpf_skel/bperf_follower.skel.h] Error 255
> make[1]: *** [Makefile.perf:236: sub-make] Error 2
> make: *** [Makefile:110: install-bin] Error 2
> make: Leaving directory '/home/acme/git/perf/tools/perf'
> [acme@quaco perf]$ clang -v
> clang version 11.0.0 (https://github.com/llvm/llvm-project 
> 67420f1b0e9c673ee638f2680fa83f468019004f)
> Target: x86_64-unknown-linux-gnu
> Thread model: posix
> InstalledDir: /usr/local/bin
> Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/10
> Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/10
> Candidate multilib: .;@m64
> Candidate multilib: 32;@m32
> Selected multilib: .;@m64
> [acme@quaco perf]$
> 

I have:

[jolsa@dell-r440-01 linux-perf]$ clang --version
clang version 11.0.0 (Fedora 11.0.0-2.fc33)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/bin


jirka



Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-19 Thread Arnaldo Carvalho de Melo
Em Thu, Mar 18, 2021 at 10:15:13PM +0100, Jiri Olsa escreveu:
> On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
> > bperf is off by default. To enable it, pass --bpf-counters option to
> > perf-stat. bperf uses a BPF hashmap to share information about BPF
> > programs and maps used by bperf. This map is pinned to bpffs. The default
> > path is /sys/fs/bpf/perf_attr_map. The user could change the path with
> > option --bpf-attr-map.
> > 
> > Signed-off-by: Song Liu 
> 
> Reviewed-by: Jiri Olsa 

After applying just this first patch in the series I'm getting this
after a 'make -C tools/ clean', now I'm checking if I need some new
clang, ideas?

- Arnaldo

[acme@quaco perf]$ make O=/tmp/build/perf -C tools/perf BUILD_BPF_SKEL=1 
PYTHON=python3 install-bin
make: Entering directory '/home/acme/git/perf/tools/perf'
  BUILD:   Doing 'make -j8' parallel build
Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from 
latest version at 'include/uapi/linux/kvm.h'
diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
Warning: Kernel ABI header at 
'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest 
version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl 
arch/mips/kernel/syscalls/syscall_n64.tbl

Auto-detecting system features:
... dwarf: [ on  ]
...dwarf_getlocations: [ on  ]
... glibc: [ on  ]
...libbfd: [ on  ]
...libbfd-buildid: [ on  ]
...libcap: [ on  ]
...libelf: [ on  ]
...   libnuma: [ on  ]
...numa_num_possible_cpus: [ on  ]
...   libperl: [ on  ]
... libpython: [ on  ]
... libcrypto: [ on  ]
... libunwind: [ on  ]
...libdw-dwarf-unwind: [ on  ]
...  zlib: [ on  ]
...  lzma: [ on  ]
... get_cpuid: [ on  ]
...   bpf: [ on  ]
...libaio: [ on  ]
...   libzstd: [ on  ]
...disassembler-four-args: [ on  ]

  GEN  /tmp/build/perf/common-cmds.h
  CC   /tmp/build/perf/exec-cmd.o
  MKDIR/tmp/build/perf/fd/
  MKDIR/tmp/build/perf/fs/
  CC   /tmp/build/perf/fs/fs.o
  CC   /tmp/build/perf/event-parse.o
  CC   /tmp/build/perf/fd/array.o
  CC   /tmp/build/perf/core.o
  GEN  /tmp/build/perf/bpf_helper_defs.h
  CC   /tmp/build/perf/event-plugin.o
  MKDIR/tmp/build/perf/staticobjs/
  PERF_VERSION = 5.12.rc2.g3df07f57f205
  CC   /tmp/build/perf/staticobjs/libbpf.o
  CC   /tmp/build/perf/cpu.o
  LD   /tmp/build/perf/fd/libapi-in.o
  CC   /tmp/build/perf/cpumap.o
  CC   /tmp/build/perf/help.o
  MKDIR/tmp/build/perf/fs/
  CC   /tmp/build/perf/fs/tracing_path.o
  CC   /tmp/build/perf/fs/cgroup.o
  CC   /tmp/build/perf/trace-seq.o
  CC   /tmp/build/perf/pager.o
  CC   /tmp/build/perf/parse-options.o
  LD   /tmp/build/perf/fs/libapi-in.o
  CC   /tmp/build/perf/debug.o
  CC   /tmp/build/perf/str_error_r.o
  CC   /tmp/build/perf/run-command.o
  CC   /tmp/build/perf/sigchain.o
  LD   /tmp/build/perf/libapi-in.o
  AR   /tmp/build/perf/libapi.a
  CC   /tmp/build/perf/subcmd-config.o
  CC   /tmp/build/perf/threadmap.o
  CC   /tmp/build/perf/evsel.o
  CC   /tmp/build/perf/parse-filter.o
  MKDIR/tmp/build/perf/staticobjs/
  CC   /tmp/build/perf/staticobjs/bpf.o
  CC   /tmp/build/perf/evlist.o
  CC   /tmp/build/perf/parse-utils.o
  CC   /tmp/build/perf/kbuffer-parse.o
  CC   /tmp/build/perf/tep_strerror.o
  CC   /tmp/build/perf/mmap.o
  CC   /tmp/build/perf/zalloc.o
  CC   /tmp/build/perf/event-parse-api.o
  LD   /tmp/build/perf/libsubcmd-in.o
  AR   /tmp/build/perf/libsubcmd.a
  CC   /tmp/build/perf/xyarray.o
  LD   /tmp/build/perf/libtraceevent-in.o
  LINK /tmp/build/perf/libtraceevent.a
  CC   /tmp/build/perf/staticobjs/nlattr.o
  CC   /tmp/build/perf/staticobjs/btf.o
  CC   /tmp/build/perf/lib.o
  CC   /tmp/build/perf/staticobjs/libbpf_errno.o
  CC   /tmp/build/perf/staticobjs/str_error.o
  CC   /tmp/build/perf/staticobjs/netlink.o
  CC   /tmp/build/perf/staticobjs/bpf_prog_linfo.o
  CC   /tmp/build/perf/staticobjs/libbpf_probes.o
  LD   /tmp/build/perf/libperf-in.o
  AR   /tmp/build/perf/libperf.a
  MKDIR/tmp/build/perf/pmu-events/
  HOSTCC   /tmp/build/perf/pmu-events/json.o
  CC   /tmp/build/perf/plugin_jbd2.o
  CC   /tmp/build/perf/staticobjs/xsk.o
  MKDIR/tmp/build/perf/pmu-events/
  HOSTCC   /tmp/build/perf/pmu-events/jsmn.o
  CC   /tmp/build/perf/staticobjs/hashmap.o
  LD   /tmp/build/perf/plugin_jbd2-in.o
  CC   /tmp/build/perf/staticobjs/btf_dump.o
  CC  

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-18 Thread Jiri Olsa
On Tue, Mar 16, 2021 at 02:18:35PM -0700, Song Liu wrote:
> perf uses performance monitoring counters (PMCs) to monitor system
> performance. The PMCs are limited hardware resources. For example,
> Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
> 
> Modern data center systems use these PMCs in many different ways:
> system level monitoring, (maybe nested) container level monitoring, per
> process monitoring, profiling (in sample mode), etc. In some cases,
> there are more active perf_events than available hardware PMCs. To allow
> all perf_events to have a chance to run, it is necessary to do expensive
> time multiplexing of events.
> 
> On the other hand, many monitoring tools count the common metrics (cycles,
> instructions). It is a waste to have multiple tools create multiple
> perf_events of "cycles" and occupy multiple PMCs.
> 
> bperf tries to reduce such wastes by allowing multiple perf_events of
> "cycles" or "instructions" (at different scopes) to share PMUs. Instead
> of having each perf-stat session to read its own perf_events, bperf uses
> BPF programs to read the perf_events and aggregate readings to BPF maps.
> Then, the perf-stat session(s) reads the values from these BPF maps.
> 
> Please refer to the comment before the definition of bperf_ops for the
> description of bperf architecture.
> 
> bperf is off by default. To enable it, pass --bpf-counters option to
> perf-stat. bperf uses a BPF hashmap to share information about BPF
> programs and maps used by bperf. This map is pinned to bpffs. The default
> path is /sys/fs/bpf/perf_attr_map. The user could change the path with
> option --bpf-attr-map.
> 
> Signed-off-by: Song Liu 

Reviewed-by: Jiri Olsa 

thanks,
jirka

> 
> ---
> Known limitations:
> 1. Do not support per cgroup events;
> 2. Do not support monitoring of BPF program (perf-stat -b);
> 3. Do not support event groups;
> 4. Do not support inherit events during fork().
> 
> The following commands have been tested:
> 
>perf stat --bpf-counters -e cycles,ref-cycles -a
>perf stat --bpf-counters -e cycles,instructions -C 1,3,4
>perf stat --bpf-counters -e cycles -p 123
>perf stat --bpf-counters -e cycles -t 100,101
>perf stat --bpf-counters -e cycles,ref-cycles -- stressapptest ...
> ---
>  tools/perf/Documentation/perf-stat.txt|  11 +
>  tools/perf/Makefile.perf  |   1 +
>  tools/perf/builtin-stat.c |  10 +
>  tools/perf/util/bpf_counter.c | 519 +-
>  tools/perf/util/bpf_skel/bperf.h  |  14 +
>  tools/perf/util/bpf_skel/bperf_follower.bpf.c |  69 +++
>  tools/perf/util/bpf_skel/bperf_leader.bpf.c   |  46 ++
>  tools/perf/util/bpf_skel/bperf_u.h|  14 +
>  tools/perf/util/evsel.h   |  20 +-
>  tools/perf/util/target.h  |   4 +-
>  10 files changed, 701 insertions(+), 7 deletions(-)
>  create mode 100644 tools/perf/util/bpf_skel/bperf.h
>  create mode 100644 tools/perf/util/bpf_skel/bperf_follower.bpf.c
>  create mode 100644 tools/perf/util/bpf_skel/bperf_leader.bpf.c
>  create mode 100644 tools/perf/util/bpf_skel/bperf_u.h
> 
> diff --git a/tools/perf/Documentation/perf-stat.txt 
> b/tools/perf/Documentation/perf-stat.txt
> index 08a1714494f87..d2e7656b5ef81 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -93,6 +93,17 @@ report::
>  
>  1.102235068 seconds time elapsed
>  
> +--bpf-counters::
> + Use BPF programs to aggregate readings from perf_events.  This
> + allows multiple perf-stat sessions that are counting the same metric 
> (cycles,
> + instructions, etc.) to share hardware counters.
> +
> +--bpf-attr-map::
> + With option "--bpf-counters", different perf-stat sessions share
> + information about shared BPF programs and maps via a pinned hashmap.
> + Use "--bpf-attr-map" to specify the path of this pinned hashmap.
> + The default path is /sys/fs/bpf/perf_attr_map.
> +
>  ifdef::HAVE_LIBPFM[]
>  --pfm-events events::
>  Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
> diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> index f6e609673de2b..ca9aa08e85a1f 100644
> --- a/tools/perf/Makefile.perf
> +++ b/tools/perf/Makefile.perf
> @@ -1007,6 +1007,7 @@ python-clean:
>  SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
>  SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
>  SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
> +SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h 
> $(SKEL_OUT)/bperf_follower.skel.h
>  
>  ifdef BUILD_BPF_SKEL
>  BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 2e2e4a8345ea2..92696373da994 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, 
> int run_idx)
>   }
>  
>   evlist__for_

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-18 Thread Song Liu



> On Mar 18, 2021, at 6:49 AM, Namhyung Kim  wrote:
> 
> On Thu, Mar 18, 2021 at 4:22 PM Song Liu  wrote:
>> 
>> 
>> 
>>> On Mar 17, 2021, at 10:54 PM, Namhyung Kim  wrote:
>>> 
>> 
>> [...]
>> 
 +
 +static int bperf_reload_leader_program(struct evsel *evsel, int 
 attr_map_fd,
 +  struct perf_event_attr_map_entry 
 *entry)
 +{
 +   struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
 +   int link_fd, diff_map_fd, err;
 +   struct bpf_link *link = NULL;
 +
 +   if (!skel) {
 +   pr_err("Failed to open leader skeleton\n");
 +   return -1;
 +   }
 +
 +   bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
 +   err = bperf_leader_bpf__load(skel);
 +   if (err) {
 +   pr_err("Failed to load leader skeleton\n");
 +   goto out;
 +   }
 +
 +   err = -1;
 +   link = bpf_program__attach(skel->progs.on_switch);
 +   if (!link) {
 +   pr_err("Failed to attach leader program\n");
 +   goto out;
 +   }
 +
 +   link_fd = bpf_link__fd(link);
 +   diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
 +   entry->link_id = bpf_link_get_id(link_fd);
 +   entry->diff_map_id = bpf_map_get_id(diff_map_fd);
 +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, 
 BPF_ANY);
 +   assert(err == 0);
 +
 +   evsel->bperf_leader_link_fd = 
 bpf_link_get_fd_by_id(entry->link_id);
 +   assert(evsel->bperf_leader_link_fd >= 0);
>>> 
>>> Isn't it the same as link_fd?
>> 
>> This is a different fd on the same link.
> 
> Ok
> 
>> 
>>> 
 +
 +   /*
 +* save leader_skel for install_pe, which is called within
 +* following evsel__open_per_cpu call
 +*/
 +   evsel->leader_skel = skel;
 +   evsel__open_per_cpu(evsel, all_cpu_map, -1);
 +
 +out:
 +   bperf_leader_bpf__destroy(skel);
 +   bpf_link__destroy(link);
>>> 
>>> Why do we destroy it?  Is it because we get an another reference?
>> 
>> Yes. We only need evsel->bperf_leader_link_fd to keep the whole
>> skeleton attached.
>> 
>> When multiple perf-stat sessions are sharing the leader skeleton,
>> only the first one loads the leader skeleton, by calling
>> bperf_reload_leader_program(). Other sessions simply hold a fd to
>> the bpf_link. More explanation in bperf__load() below.
> 
> Ok.
> 
>> 
>> 
>>> 
 +   return err;
 +}
 +
 +static int bperf__load(struct evsel *evsel, struct target *target)
 +{
 +   struct perf_event_attr_map_entry entry = {0x, 0x};
 +   int attr_map_fd, diff_map_fd = -1, err;
 +   enum bperf_filter_type filter_type;
 +   __u32 filter_entry_cnt, i;
 +
 +   if (bperf_check_target(evsel, target, &filter_type, 
 &filter_entry_cnt))
 +   return -1;
 +
 +   if (!all_cpu_map) {
 +   all_cpu_map = perf_cpu_map__new(NULL);
 +   if (!all_cpu_map)
 +   return -1;
 +   }
 +
 +   evsel->bperf_leader_prog_fd = -1;
 +   evsel->bperf_leader_link_fd = -1;
 +
 +   /*
 +* Step 1: hold a fd on the leader program and the bpf_link, if
 +* the program is not already gone, reload the program.
 +* Use flock() to ensure exclusive access to the perf_event_attr
 +* map.
 +*/
 +   attr_map_fd = bperf_lock_attr_map(target);
 +   if (attr_map_fd < 0) {
 +   pr_err("Failed to lock perf_event_attr map\n");
 +   return -1;
 +   }
 +
 +   err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
 +   if (err) {
 +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, 
 &entry, BPF_ANY);
 +   if (err)
 +   goto out;
 +   }
 +
 +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
 +   if (evsel->bperf_leader_link_fd < 0 &&
 +   bperf_reload_leader_program(evsel, attr_map_fd, &entry))
 +   goto out;
>> 
>> Continue with previous explanation. In bperf_reload_leader_program(),
>> we open another reference to the link, and destroy the skeleton. This
>> brings the code to the same state as evsel->bperf_leader_link_fd >=
>> condition above.
> 
> Thanks for the explanation.
> 
>> 
 +
 +   /*
 +* The bpf_link holds reference to the leader program, and the
 +* leader program holds reference to the maps. Therefore, if
 +* link_id is valid, diff_map_id should also be valid.
 +*/
 +   evsel->bperf_l

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-18 Thread Namhyung Kim
On Thu, Mar 18, 2021 at 4:22 PM Song Liu  wrote:
>
>
>
> > On Mar 17, 2021, at 10:54 PM, Namhyung Kim  wrote:
> >
>
> [...]
>
> >> +
> >> +static int bperf_reload_leader_program(struct evsel *evsel, int 
> >> attr_map_fd,
> >> +  struct perf_event_attr_map_entry 
> >> *entry)
> >> +{
> >> +   struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
> >> +   int link_fd, diff_map_fd, err;
> >> +   struct bpf_link *link = NULL;
> >> +
> >> +   if (!skel) {
> >> +   pr_err("Failed to open leader skeleton\n");
> >> +   return -1;
> >> +   }
> >> +
> >> +   bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
> >> +   err = bperf_leader_bpf__load(skel);
> >> +   if (err) {
> >> +   pr_err("Failed to load leader skeleton\n");
> >> +   goto out;
> >> +   }
> >> +
> >> +   err = -1;
> >> +   link = bpf_program__attach(skel->progs.on_switch);
> >> +   if (!link) {
> >> +   pr_err("Failed to attach leader program\n");
> >> +   goto out;
> >> +   }
> >> +
> >> +   link_fd = bpf_link__fd(link);
> >> +   diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
> >> +   entry->link_id = bpf_link_get_id(link_fd);
> >> +   entry->diff_map_id = bpf_map_get_id(diff_map_fd);
> >> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, 
> >> BPF_ANY);
> >> +   assert(err == 0);
> >> +
> >> +   evsel->bperf_leader_link_fd = 
> >> bpf_link_get_fd_by_id(entry->link_id);
> >> +   assert(evsel->bperf_leader_link_fd >= 0);
> >
> > Isn't it the same as link_fd?
>
> This is a different fd on the same link.

Ok

>
> >
> >> +
> >> +   /*
> >> +* save leader_skel for install_pe, which is called within
> >> +* following evsel__open_per_cpu call
> >> +*/
> >> +   evsel->leader_skel = skel;
> >> +   evsel__open_per_cpu(evsel, all_cpu_map, -1);
> >> +
> >> +out:
> >> +   bperf_leader_bpf__destroy(skel);
> >> +   bpf_link__destroy(link);
> >
> > Why do we destroy it?  Is it because we get an another reference?
>
> Yes. We only need evsel->bperf_leader_link_fd to keep the whole
> skeleton attached.
>
> When multiple perf-stat sessions are sharing the leader skeleton,
> only the first one loads the leader skeleton, by calling
> bperf_reload_leader_program(). Other sessions simply hold a fd to
> the bpf_link. More explanation in bperf__load() below.

Ok.

>
>
> >
> >> +   return err;
> >> +}
> >> +
> >> +static int bperf__load(struct evsel *evsel, struct target *target)
> >> +{
> >> +   struct perf_event_attr_map_entry entry = {0x, 0x};
> >> +   int attr_map_fd, diff_map_fd = -1, err;
> >> +   enum bperf_filter_type filter_type;
> >> +   __u32 filter_entry_cnt, i;
> >> +
> >> +   if (bperf_check_target(evsel, target, &filter_type, 
> >> &filter_entry_cnt))
> >> +   return -1;
> >> +
> >> +   if (!all_cpu_map) {
> >> +   all_cpu_map = perf_cpu_map__new(NULL);
> >> +   if (!all_cpu_map)
> >> +   return -1;
> >> +   }
> >> +
> >> +   evsel->bperf_leader_prog_fd = -1;
> >> +   evsel->bperf_leader_link_fd = -1;
> >> +
> >> +   /*
> >> +* Step 1: hold a fd on the leader program and the bpf_link, if
> >> +* the program is not already gone, reload the program.
> >> +* Use flock() to ensure exclusive access to the perf_event_attr
> >> +* map.
> >> +*/
> >> +   attr_map_fd = bperf_lock_attr_map(target);
> >> +   if (attr_map_fd < 0) {
> >> +   pr_err("Failed to lock perf_event_attr map\n");
> >> +   return -1;
> >> +   }
> >> +
> >> +   err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
> >> +   if (err) {
> >> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, 
> >> &entry, BPF_ANY);
> >> +   if (err)
> >> +   goto out;
> >> +   }
> >> +
> >> +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
> >> +   if (evsel->bperf_leader_link_fd < 0 &&
> >> +   bperf_reload_leader_program(evsel, attr_map_fd, &entry))
> >> +   goto out;
>
> Continue with previous explanation. In bperf_reload_leader_program(),
> we open another reference to the link, and destroy the skeleton. This
> brings the code to the same state as evsel->bperf_leader_link_fd >=
> condition above.

Thanks for the explanation.

>
> >> +
> >> +   /*
> >> +* The bpf_link holds reference to the leader program, and the
> >> +* leader program holds reference to the maps. Therefore, if
> >> +* link_id is valid, diff_map_id should also be valid.
> >> +*/
> >> +   evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
> >> +   bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
> >> +   

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-18 Thread Song Liu



> On Mar 17, 2021, at 10:54 PM, Namhyung Kim  wrote:
> 

[...]

>> +
>> +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
>> +  struct perf_event_attr_map_entry 
>> *entry)
>> +{
>> +   struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
>> +   int link_fd, diff_map_fd, err;
>> +   struct bpf_link *link = NULL;
>> +
>> +   if (!skel) {
>> +   pr_err("Failed to open leader skeleton\n");
>> +   return -1;
>> +   }
>> +
>> +   bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
>> +   err = bperf_leader_bpf__load(skel);
>> +   if (err) {
>> +   pr_err("Failed to load leader skeleton\n");
>> +   goto out;
>> +   }
>> +
>> +   err = -1;
>> +   link = bpf_program__attach(skel->progs.on_switch);
>> +   if (!link) {
>> +   pr_err("Failed to attach leader program\n");
>> +   goto out;
>> +   }
>> +
>> +   link_fd = bpf_link__fd(link);
>> +   diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
>> +   entry->link_id = bpf_link_get_id(link_fd);
>> +   entry->diff_map_id = bpf_map_get_id(diff_map_fd);
>> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, 
>> BPF_ANY);
>> +   assert(err == 0);
>> +
>> +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
>> +   assert(evsel->bperf_leader_link_fd >= 0);
> 
> Isn't it the same as link_fd?

This is a different fd on the same link. 

> 
>> +
>> +   /*
>> +* save leader_skel for install_pe, which is called within
>> +* following evsel__open_per_cpu call
>> +*/
>> +   evsel->leader_skel = skel;
>> +   evsel__open_per_cpu(evsel, all_cpu_map, -1);
>> +
>> +out:
>> +   bperf_leader_bpf__destroy(skel);
>> +   bpf_link__destroy(link);
> 
> Why do we destroy it?  Is it because we get an another reference?

Yes. We only need evsel->bperf_leader_link_fd to keep the whole 
skeleton attached. 

When multiple perf-stat sessions are sharing the leader skeleton, 
only the first one loads the leader skeleton, by calling 
bperf_reload_leader_program(). Other sessions simply hold a fd to 
the bpf_link. More explanation in bperf__load() below.  


> 
>> +   return err;
>> +}
>> +
>> +static int bperf__load(struct evsel *evsel, struct target *target)
>> +{
>> +   struct perf_event_attr_map_entry entry = {0x, 0x};
>> +   int attr_map_fd, diff_map_fd = -1, err;
>> +   enum bperf_filter_type filter_type;
>> +   __u32 filter_entry_cnt, i;
>> +
>> +   if (bperf_check_target(evsel, target, &filter_type, 
>> &filter_entry_cnt))
>> +   return -1;
>> +
>> +   if (!all_cpu_map) {
>> +   all_cpu_map = perf_cpu_map__new(NULL);
>> +   if (!all_cpu_map)
>> +   return -1;
>> +   }
>> +
>> +   evsel->bperf_leader_prog_fd = -1;
>> +   evsel->bperf_leader_link_fd = -1;
>> +
>> +   /*
>> +* Step 1: hold a fd on the leader program and the bpf_link, if
>> +* the program is not already gone, reload the program.
>> +* Use flock() to ensure exclusive access to the perf_event_attr
>> +* map.
>> +*/
>> +   attr_map_fd = bperf_lock_attr_map(target);
>> +   if (attr_map_fd < 0) {
>> +   pr_err("Failed to lock perf_event_attr map\n");
>> +   return -1;
>> +   }
>> +
>> +   err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
>> +   if (err) {
>> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, 
>> &entry, BPF_ANY);
>> +   if (err)
>> +   goto out;
>> +   }
>> +
>> +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
>> +   if (evsel->bperf_leader_link_fd < 0 &&
>> +   bperf_reload_leader_program(evsel, attr_map_fd, &entry))
>> +   goto out;

Continue with previous explanation. In bperf_reload_leader_program(), 
we open another reference to the link, and destroy the skeleton. This 
brings the code to the same state as evsel->bperf_leader_link_fd >= 
condition above. 

>> +
>> +   /*
>> +* The bpf_link holds reference to the leader program, and the
>> +* leader program holds reference to the maps. Therefore, if
>> +* link_id is valid, diff_map_id should also be valid.
>> +*/
>> +   evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
>> +   bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
>> +   assert(evsel->bperf_leader_prog_fd >= 0);
>> +
>> +   diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
>> +   assert(diff_map_fd >= 0);
>> +

[...]

>> +static int bperf__read(struct evsel *evsel)
>> +{
>> +   struct bperf_follower_bpf *skel = evsel->follower_skel;
>> +   __u32 num_cpu_bpf = cpu__max_cpu();
>> +   st

Re: [PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-17 Thread Namhyung Kim
On Wed, Mar 17, 2021 at 6:18 AM Song Liu  wrote:
> +static int bperf_check_target(struct evsel *evsel,
> + struct target *target,
> + enum bperf_filter_type *filter_type,
> + __u32 *filter_entry_cnt)
> +{
> +   if (evsel->leader->core.nr_members > 1) {
> +   pr_err("bpf managed perf events do not yet support 
> groups.\n");
> +   return -1;
> +   }
> +
> +   /* determine filter type based on target */
> +   if (target->system_wide) {
> +   *filter_type = BPERF_FILTER_GLOBAL;
> +   *filter_entry_cnt = 1;
> +   } else if (target->cpu_list) {
> +   *filter_type = BPERF_FILTER_CPU;
> +   *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
> +   } else if (target->tid) {
> +   *filter_type = BPERF_FILTER_PID;
> +   *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
> +   } else if (target->pid || evsel->evlist->workload.pid != -1) {
> +   *filter_type = BPERF_FILTER_TGID;
> +   *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
> +   } else {
> +   pr_err("bpf managed perf events do not yet support these 
> targets.\n");
> +   return -1;
> +   }
> +
> +   return 0;
> +}
> +
> +static struct perf_cpu_map *all_cpu_map;
> +
> +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
> +  struct perf_event_attr_map_entry 
> *entry)
> +{
> +   struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
> +   int link_fd, diff_map_fd, err;
> +   struct bpf_link *link = NULL;
> +
> +   if (!skel) {
> +   pr_err("Failed to open leader skeleton\n");
> +   return -1;
> +   }
> +
> +   bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
> +   err = bperf_leader_bpf__load(skel);
> +   if (err) {
> +   pr_err("Failed to load leader skeleton\n");
> +   goto out;
> +   }
> +
> +   err = -1;
> +   link = bpf_program__attach(skel->progs.on_switch);
> +   if (!link) {
> +   pr_err("Failed to attach leader program\n");
> +   goto out;
> +   }
> +
> +   link_fd = bpf_link__fd(link);
> +   diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
> +   entry->link_id = bpf_link_get_id(link_fd);
> +   entry->diff_map_id = bpf_map_get_id(diff_map_fd);
> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, 
> BPF_ANY);
> +   assert(err == 0);
> +
> +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
> +   assert(evsel->bperf_leader_link_fd >= 0);

Isn't it the same as link_fd?

> +
> +   /*
> +* save leader_skel for install_pe, which is called within
> +* following evsel__open_per_cpu call
> +*/
> +   evsel->leader_skel = skel;
> +   evsel__open_per_cpu(evsel, all_cpu_map, -1);
> +
> +out:
> +   bperf_leader_bpf__destroy(skel);
> +   bpf_link__destroy(link);

Why do we destroy it?  Is it because we get an another reference?

> +   return err;
> +}
> +
> +static int bperf__load(struct evsel *evsel, struct target *target)
> +{
> +   struct perf_event_attr_map_entry entry = {0x, 0x};
> +   int attr_map_fd, diff_map_fd = -1, err;
> +   enum bperf_filter_type filter_type;
> +   __u32 filter_entry_cnt, i;
> +
> +   if (bperf_check_target(evsel, target, &filter_type, 
> &filter_entry_cnt))
> +   return -1;
> +
> +   if (!all_cpu_map) {
> +   all_cpu_map = perf_cpu_map__new(NULL);
> +   if (!all_cpu_map)
> +   return -1;
> +   }
> +
> +   evsel->bperf_leader_prog_fd = -1;
> +   evsel->bperf_leader_link_fd = -1;
> +
> +   /*
> +* Step 1: hold a fd on the leader program and the bpf_link, if
> +* the program is not already gone, reload the program.
> +* Use flock() to ensure exclusive access to the perf_event_attr
> +* map.
> +*/
> +   attr_map_fd = bperf_lock_attr_map(target);
> +   if (attr_map_fd < 0) {
> +   pr_err("Failed to lock perf_event_attr map\n");
> +   return -1;
> +   }
> +
> +   err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
> +   if (err) {
> +   err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, 
> &entry, BPF_ANY);
> +   if (err)
> +   goto out;
> +   }
> +
> +   evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
> +   if (evsel->bperf_leader_link_fd < 0 &&
> +   bperf_reload_leader_program(evsel, attr_map_fd, &entry))
> +   goto out;
> +
> +   /*
> +* The bpf_link holds reference to the leader program, and the
> + 

[PATCH v2 1/3] perf-stat: introduce bperf, share hardware PMCs with BPF

2021-03-16 Thread Song Liu
perf uses performance monitoring counters (PMCs) to monitor system
performance. The PMCs are limited hardware resources. For example,
Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.

Modern data center systems use these PMCs in many different ways:
system level monitoring, (maybe nested) container level monitoring, per
process monitoring, profiling (in sample mode), etc. In some cases,
there are more active perf_events than available hardware PMCs. To allow
all perf_events to have a chance to run, it is necessary to do expensive
time multiplexing of events.

On the other hand, many monitoring tools count the common metrics (cycles,
instructions). It is a waste to have multiple tools create multiple
perf_events of "cycles" and occupy multiple PMCs.

bperf tries to reduce such wastes by allowing multiple perf_events of
"cycles" or "instructions" (at different scopes) to share PMUs. Instead
of having each perf-stat session to read its own perf_events, bperf uses
BPF programs to read the perf_events and aggregate readings to BPF maps.
Then, the perf-stat session(s) reads the values from these BPF maps.

Please refer to the comment before the definition of bperf_ops for the
description of bperf architecture.

bperf is off by default. To enable it, pass --bpf-counters option to
perf-stat. bperf uses a BPF hashmap to share information about BPF
programs and maps used by bperf. This map is pinned to bpffs. The default
path is /sys/fs/bpf/perf_attr_map. The user could change the path with
option --bpf-attr-map.

Signed-off-by: Song Liu 

---
Known limitations:
1. Do not support per cgroup events;
2. Do not support monitoring of BPF program (perf-stat -b);
3. Do not support event groups;
4. Do not support inherit events during fork().

The following commands have been tested:

   perf stat --bpf-counters -e cycles,ref-cycles -a
   perf stat --bpf-counters -e cycles,instructions -C 1,3,4
   perf stat --bpf-counters -e cycles -p 123
   perf stat --bpf-counters -e cycles -t 100,101
   perf stat --bpf-counters -e cycles,ref-cycles -- stressapptest ...
---
 tools/perf/Documentation/perf-stat.txt|  11 +
 tools/perf/Makefile.perf  |   1 +
 tools/perf/builtin-stat.c |  10 +
 tools/perf/util/bpf_counter.c | 519 +-
 tools/perf/util/bpf_skel/bperf.h  |  14 +
 tools/perf/util/bpf_skel/bperf_follower.bpf.c |  69 +++
 tools/perf/util/bpf_skel/bperf_leader.bpf.c   |  46 ++
 tools/perf/util/bpf_skel/bperf_u.h|  14 +
 tools/perf/util/evsel.h   |  20 +-
 tools/perf/util/target.h  |   4 +-
 10 files changed, 701 insertions(+), 7 deletions(-)
 create mode 100644 tools/perf/util/bpf_skel/bperf.h
 create mode 100644 tools/perf/util/bpf_skel/bperf_follower.bpf.c
 create mode 100644 tools/perf/util/bpf_skel/bperf_leader.bpf.c
 create mode 100644 tools/perf/util/bpf_skel/bperf_u.h

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 08a1714494f87..d2e7656b5ef81 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -93,6 +93,17 @@ report::
 
 1.102235068 seconds time elapsed
 
+--bpf-counters::
+   Use BPF programs to aggregate readings from perf_events.  This
+   allows multiple perf-stat sessions that are counting the same metric 
(cycles,
+   instructions, etc.) to share hardware counters.
+
+--bpf-attr-map::
+   With option "--bpf-counters", different perf-stat sessions share
+   information about shared BPF programs and maps via a pinned hashmap.
+   Use "--bpf-attr-map" to specify the path of this pinned hashmap.
+   The default path is /sys/fs/bpf/perf_attr_map.
+
 ifdef::HAVE_LIBPFM[]
 --pfm-events events::
 Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f6e609673de2b..ca9aa08e85a1f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1007,6 +1007,7 @@ python-clean:
 SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
 SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
 SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 
 ifdef BUILD_BPF_SKEL
 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2e2e4a8345ea2..92696373da994 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, 
int run_idx)
}
 
evlist__for_each_cpu (evsel_list, i, cpu) {
+   /*
+* bperf calls evsel__open_per_cpu() in bperf__load(), so
+* no need to call it again here.
+*/
+   if (target.use_bpf)
+   break;
affinity__set(&affinit