> On Mar 17, 2021, at 10:54 PM, Namhyung Kim <namhy...@kernel.org> wrote:
>
[...]
>> +
>> +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
>> + struct perf_event_attr_map_entry
>> *entry)
>> +{
>> + struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
>> + int link_fd, diff_map_fd, err;
>> + struct bpf_link *link = NULL;
>> +
>> + if (!skel) {
>> + pr_err("Failed to open leader skeleton\n");
>> + return -1;
>> + }
>> +
>> + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
>> + err = bperf_leader_bpf__load(skel);
>> + if (err) {
>> + pr_err("Failed to load leader skeleton\n");
>> + goto out;
>> + }
>> +
>> + err = -1;
>> + link = bpf_program__attach(skel->progs.on_switch);
>> + if (!link) {
>> + pr_err("Failed to attach leader program\n");
>> + goto out;
>> + }
>> +
>> + link_fd = bpf_link__fd(link);
>> + diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
>> + entry->link_id = bpf_link_get_id(link_fd);
>> + entry->diff_map_id = bpf_map_get_id(diff_map_fd);
>> + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry,
>> BPF_ANY);
>> + assert(err == 0);
>> +
>> + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
>> + assert(evsel->bperf_leader_link_fd >= 0);
>
> Isn't it the same as link_fd?
This is a different fd on the same link.
>
>> +
>> + /*
>> + * save leader_skel for install_pe, which is called within
>> + * following evsel__open_per_cpu call
>> + */
>> + evsel->leader_skel = skel;
>> + evsel__open_per_cpu(evsel, all_cpu_map, -1);
>> +
>> +out:
>> + bperf_leader_bpf__destroy(skel);
>> + bpf_link__destroy(link);
>
> Why do we destroy it? Is it because we get an another reference?
Yes. We only need evsel->bperf_leader_link_fd to keep the whole
skeleton attached.
When multiple perf-stat sessions are sharing the leader skeleton,
only the first one loads the leader skeleton, by calling
bperf_reload_leader_program(). Other sessions simply hold a fd to
the bpf_link. More explanation in bperf__load() below.
>
>> + return err;
>> +}
>> +
>> +static int bperf__load(struct evsel *evsel, struct target *target)
>> +{
>> + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
>> + int attr_map_fd, diff_map_fd = -1, err;
>> + enum bperf_filter_type filter_type;
>> + __u32 filter_entry_cnt, i;
>> +
>> + if (bperf_check_target(evsel, target, &filter_type,
>> &filter_entry_cnt))
>> + return -1;
>> +
>> + if (!all_cpu_map) {
>> + all_cpu_map = perf_cpu_map__new(NULL);
>> + if (!all_cpu_map)
>> + return -1;
>> + }
>> +
>> + evsel->bperf_leader_prog_fd = -1;
>> + evsel->bperf_leader_link_fd = -1;
>> +
>> + /*
>> + * Step 1: hold a fd on the leader program and the bpf_link, if
>> + * the program is not already gone, reload the program.
>> + * Use flock() to ensure exclusive access to the perf_event_attr
>> + * map.
>> + */
>> + attr_map_fd = bperf_lock_attr_map(target);
>> + if (attr_map_fd < 0) {
>> + pr_err("Failed to lock perf_event_attr map\n");
>> + return -1;
>> + }
>> +
>> + err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
>> + if (err) {
>> + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr,
>> &entry, BPF_ANY);
>> + if (err)
>> + goto out;
>> + }
>> +
>> + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
>> + if (evsel->bperf_leader_link_fd < 0 &&
>> + bperf_reload_leader_program(evsel, attr_map_fd, &entry))
>> + goto out;
Continue with previous explanation. In bperf_reload_leader_program(),
we open another reference to the link, and destroy the skeleton. This
brings the code to the same state as evsel->bperf_leader_link_fd >=
condition above.
>> +
>> + /*
>> + * The bpf_link holds reference to the leader program, and the
>> + * leader program holds reference to the maps. Therefore, if
>> + * link_id is valid, diff_map_id should also be valid.
>> + */
>> + evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
>> + bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
>> + assert(evsel->bperf_leader_prog_fd >= 0);
>> +
>> + diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
>> + assert(diff_map_fd >= 0);
>> +
[...]
>> +static int bperf__read(struct evsel *evsel)
>> +{
>> + struct bperf_follower_bpf *skel = evsel->follower_skel;
>> + __u32 num_cpu_bpf = cpu__max_cpu();
>> + struct bpf_perf_event_value values[num_cpu_bpf];
>> + int reading_map_fd, err = 0;
>> + __u32 i, j, num_cpu;
>> +
>> + bperf_sync_counters(evsel);
>> + reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
>> +
>> + for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings);
>> i++) {
>> + __u32 cpu;
>> +
>> + err = bpf_map_lookup_elem(reading_map_fd, &i, values);
>> + if (err)
>> + goto out;
>> + switch (evsel->follower_skel->bss->type) {
>> + case BPERF_FILTER_GLOBAL:
>> + assert(i == 0);
>> +
>> + num_cpu = all_cpu_map->nr;
>> + for (j = 0; j < num_cpu; j++) {
>> + cpu = all_cpu_map->map[j];
>> + perf_counts(evsel->counts, cpu, 0)->val =
>> values[cpu].counter;
>> + perf_counts(evsel->counts, cpu, 0)->ena =
>> values[cpu].enabled;
>> + perf_counts(evsel->counts, cpu, 0)->run =
>> values[cpu].running;
>
> I'm confused with this. Does the accum_readings map contain values
> for all cpus? IIUC it has only a single entry but you access it for each cpu.
> What am I missing?
accumulated_reading is a percpu array. In this case, each cpu has its own
bpf_perf_event_value with index 0. The BPF program could only access the
data on current cpu. When reading from use space, we get #-of-cpus entries
for index 0.
Does this make sense?
Thanks,
Song