Hi Serhei,
On Mon, Apr 13, 2026 at 5:31 PM Serhei Makarov <[email protected]> wrote:
>
> eu-stackprof is a new tool which profiles processes on a Linux system
> using perf_events and outputs gprof gmon.out format program counter
> histograms and callgraph-arc profiles; intended as an updated demo of
> libdwfl_stacktrace functionality and as a data-gathering tool for the
> profiledb initiative.
>
> * configure.ac: Add configure checks for C++20, eu-stackprof
> perf/libpfm dependencies.
> * src/Makefile.am (bin_PROGRAMS): Add stackprof.
> (stackprof_*): Add stackprof SOURCES, LDADD, and so forth.
> * src/stackprof.cxx: New file.
>
> Co-authored-by: <[email protected]>
> Signed-off-by: <[email protected]>
> ---
> configure.ac | 23 +-
> src/Makefile.am | 11 +-
> src/stackprof.cxx | 2083 +++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 2110 insertions(+), 7 deletions(-)
> create mode 100644 src/stackprof.cxx
>
> diff --git a/configure.ac b/configure.ac
> index f22a3f90..e5be95b8 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -888,10 +888,21 @@ fi
> AC_CHECK_PROG(HAVE_ZSTD, zstd, yes, no)
> AM_CONDITIONAL([HAVE_ZSTD],[test "x$HAVE_ZSTD" = "xyes"])
>
> -# For tests that need to use C++11
> -AX_CXX_COMPILE_STDCXX(11, noext, optional)
> -AS_IF([test "x$HAVE_CXX11" = "x1"], [HAVE_CXX11=yes], [HAVE_CXX11=no])
> -AM_CONDITIONAL([HAVE_CXX11],[test "x$HAVE_CXX11" = "xyes"])
> +# For tests that need to use C++20
> +AX_CXX_COMPILE_STDCXX(20, noext, optional)
> +AS_IF([test "x$HAVE_CXX20" = "x1"], [HAVE_CXX20=yes], [HAVE_CXX20=no])
> +AM_CONDITIONAL([HAVE_CXX20],[test "x$HAVE_CXX20" = "xyes"])
> +
> +
> +# For eu-stackprof
> +# optional:
> +AC_CHECK_HEADERS([perfmon/pfmlib_perf_event.h])
> +AM_CONDITIONAL([HAVE_LIBPFM], [test
> "x${ac_cv_header_perfmon_pfmlib_perf_event_h}" = "xyes" ])
> +AC_CHECK_LIB(pfm, pfm_get_os_event_encoding, [AC_SUBST(libpfm_LIBS,
> '-lpfm')])
> +# required:
> +AC_CHECK_HEADERS([linux/perf_event.h])
> +AM_CONDITIONAL([ENABLE_STACKPROF],[test
> "x${ac_cv_header_linux_perf_event_h}x${HAVE_CXX20}" = "xyesxyes" ])
> +
>
> AC_CHECK_HEADERS([execinfo.h])
>
> @@ -941,7 +952,7 @@ AS_IF([test "x$with_libarchive" = "xyes" -a
> "x$have_libarchive" != "xyes"], [
> # pronounce judgement on ability to build server, overridden by =yes/=no
> if test "x$enable_debuginfod" = "xno"; then
> true
> -elif test "x$have_jsonc$HAVE_CXX11$have_libarchive$have_sqlite3" =
> "xyesyesyesyes"; then
> +elif test "x$have_jsonc$HAVE_CXX20$have_libarchive$have_sqlite3" =
> "xyesyesyesyes"; then
I prefer that we do not make C++20 mandatory for debuginfod to keep it
compatible with older compilers. Also some GCC (like GCC 11, the RHEL
9 default), supports -std=c++20 but doesn't support <format> which
eu-stackprof includes. In this case AX_CXX_COMPILE_STDCXX(20, noext,
optional) detects -std=c++20, ENABLE_STACKPROF may get set, but
eu-stackprof fails to compile because <format> is missing. I haven't
tested this patch in such an environment but I've run into this kind
of issue elsewhere.
I suggest keeping the existing HAVE_CXX11 checks for debuginfod and
the testsuite and add separate checks for HAVE_CXX20. These should
use AX_CXX_COMPILE_STDCXX plus checks for the presence of <format> and
any other C++20 headers used by eu-stackprof that could be missing.
> enable_debuginfod=yes
> elif test "x$enable_debuginfod" = "xyes"; then
> AC_MSG_ERROR([unable to build debuginfod, missing libmicrohttpd, sqlite3
> or libarchive])
> @@ -1098,7 +1109,7 @@ AC_MSG_NOTICE([
> EXTRA TEST FEATURES (used with make check)
> have bunzip2 installed (required) : ${HAVE_BUNZIP2}
> have zstd installed : ${HAVE_ZSTD}
> - C++11 : ${HAVE_CXX11}
> + C++20 : ${HAVE_CXX20}
> debug branch prediction : ${use_debugpred}
> gprof support : ${use_gprof}
> gcov support : ${use_gcov}
> diff --git a/src/Makefile.am b/src/Makefile.am
> index f041d458..f753c70c 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -35,6 +35,9 @@ bin_PROGRAMS = readelf nm size strip elflint findtextrel
> addr2line \
> if ENABLE_STACKTRACE
> bin_PROGRAMS += stacktrace
> endif
> +if ENABLE_STACKPROF
> +bin_PROGRAMS += stackprof
> +endif
>
> noinst_LIBRARIES = libar.a
>
> @@ -127,7 +130,13 @@ endif
> elfcompress_LDADD = $(libebl) $(libelf) $(libdw) $(libeu) $(argp_LDADD)
> elfclassify_LDADD = $(libelf) $(libdw) $(libeu) $(argp_LDADD)
> srcfiles_SOURCES = srcfiles.cxx
> -srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD)
> $(libarchive_LIBS) $(libdebuginfod)
> +srcfiles_LDADD = $(libdw) $(libelf) $(libeu) $(argp_LDADD)
> $(libarchive_LIBS) $(libdebuginfod)
> +if ENABLE_STACKPROF
> +stackprof_SOURCES = stackprof.cxx
> +stackprof_CPPFLAGS = $(AM_CPPFLAGS) $(jsonc_CXXFLAGS)
jsonc_CXXFLAGS isn't set anywhere.
> +stackprof_CXXFLAGS = -Wall
> +stackprof_LDADD = $(libebl) $(libdw) $(libelf) $(libeu) $(argp_LDADD)
> $(libpfm_LIBS) $(jsonc_LIBS)
> +endif
>
> installcheck-binPROGRAMS: $(bin_PROGRAMS)
> bad=0; pid=$$$$; list="$(bin_PROGRAMS)"; for p in $$list; do \
> diff --git a/src/stackprof.cxx b/src/stackprof.cxx
> new file mode 100644
> index 00000000..33720e80
> --- /dev/null
> +++ b/src/stackprof.cxx
> @@ -0,0 +1,2083 @@
> +/* Collect stack-trace profiles of running program(s).
> + Copyright (C) 2025-2026 Red Hat, Inc.
> + This file is part of elfutils.
> +
> + This file is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3 of the License, or
> + (at your option) any later version.
> +
> + elfutils is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program. If not, see <http://www.gnu.org/licenses/>. */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include "printversion.h"
> +
> +#include <string>
> +#include <memory>
> +#include <iomanip>
> +#include <map>
> +#include <unordered_map>
> +#include <vector>
> +#include <bitset>
> +#include <stdexcept>
> +#include <cstring>
> +#include <csignal>
> +#include <cassert>
> +#include <chrono>
> +#include <iostream>
> +#include <fstream>
> +#include <sstream>
> +#include <cinttypes>
> +#include <format>
> +#include <filesystem>
> +
> +#include <sys/utsname.h>
> +
> +#include <sys/syscall.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <sys/wait.h>
> +#include <poll.h>
> +#ifdef HAVE_LINUX_PERF_EVENT_H
> +#include <linux/perf_event.h>
> +#endif
> +#include <argp.h>
> +#include <fcntl.h>
> +#include <dirent.h>
> +
> +#include <system.h>
> +
> +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
> +#include <perfmon/pfmlib_perf_event.h>
> +#endif
> +
> +#include <json-c/json.h>
> +
> +#include <gelf.h>
> +#include <dwarf.h>
> +#include <libdwfl.h>
> +#include <libdw.h>
> +#include "../libebl/libebl.h"
> +#include "../libdwfl_stacktrace/libdwfl_stacktrace.h"
> +
> +using namespace std; // so we don't have to std:: prefix everything in here
> +
> +////////////////////////////////////////////////////////////////////////
> +// find_debuginfo callbacks
> +
> +#ifdef FIND_DEBUGINFO
> +
> +static char *debuginfo_path = NULL;
> +
> +static const Dwfl_Callbacks dwfl_cfi_callbacks =
> + {
> + .find_elf = dwflst_tracker_linux_proc_find_elf,
> + .find_debuginfo = dwfl_standard_find_debuginfo,
> + .debuginfo_path = &debuginfo_path,
> + };
> +
> +#else
> +
> +int
> +nop_find_debuginfo (Dwfl_Module *mod __attribute__((unused)),
> + void **userdata __attribute__((unused)),
> + const char *modname __attribute__((unused)),
> + GElf_Addr base __attribute__((unused)),
> + const char *file_name __attribute__((unused)),
> + const char *debuglink_file __attribute__((unused)),
> + GElf_Word debuglink_crc __attribute__((unused)),
> + char **debuginfo_file_name __attribute__((unused)))
> +{
> +#ifdef DEBUG_MODULES
> + cerr << format("nop_find_debuginfo: modname={} file_name={}
> debuglink_file={}\n", modname, file_name, debuglink_file);
> +#endif
> + return -1;
> +}
> +
> +static const Dwfl_Callbacks dwfl_cfi_callbacks =
> +{
> + .find_elf = dwflst_tracker_linux_proc_find_elf,
> + .find_debuginfo = nop_find_debuginfo, /* work with CFI only */
> +};
> +
> +#endif /* FIND_DEBUGINFO */
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// class decls
> +
> +// Unwind statistics for a Dwfl and associated process.
> +struct UnwindDwflStats {
> + Dwfl *dwfl;
> + string comm;
> + int max_frames; /* for diagnostic purposes */
> + int total_samples; /* for diagnostic purposes */
> + int lost_samples; /* for diagnostic purposes */
> + int shown_errors; /* for diagnostic purposes */
> + Dwfl_Unwound_Source last_unwound; /* track CFI source, for diagnostic
> purposes */
> + Dwfl_Unwound_Source worst_unwound; /* track CFI source, for diagnostic
> purposes */
> +};
> +
> +struct hash_arc {
> + template <class T1, class T2>
> + size_t operator()(const pair<T1, T2> &p) const {
> + return hash<T1>()(p.first) ^ hash<T2>()(p.second);
> + }
> +};
> +
> +// Unwind statistics for a single module identified by build-id.
> +struct UnwindModuleStats {
> + map<uint64_t, uint32_t> histogram; /* sorted by pc */
> + unordered_map<pair<uint64_t, uint64_t>, uint32_t, hash_arc> callgraph;
> +
> + void record_pc(Dwarf_Addr pc) {
> + if (histogram.count(pc) == 0)
> + histogram[pc]=1;
> + else
> + histogram[pc]++;
> + }
> + void record_callgraph_arc(Dwarf_Addr from, Dwarf_Addr to) {
> + pair<uint64_t, uint64_t> arc(from, to);
> + if (callgraph.count(arc) == 0)
> + callgraph[arc]=1;
> + else
> + callgraph[arc]++;
> + }
> +};
> +
> +struct UnwindStatsTable
> +{
> + unordered_map<pid_t, UnwindDwflStats> dwfl_tab;
> + unordered_map<string, UnwindModuleStats> buildid_tab;
> + typedef map<string, UnwindModuleStats> buildid_map_t;
> +
> + UnwindStatsTable () {}
> + ~UnwindStatsTable () {}
> +
> + UnwindDwflStats *pid_find_or_create(pid_t pid);
> + string pid_find_comm(pid_t pid);
> + Dwfl *pid_find_dwfl(pid_t pid);
> + void pid_store_dwfl(pid_t pid, Dwfl *dwfl);
> +
> + UnwindModuleStats *buildid_find(string buildid);
> + UnwindModuleStats *buildid_find_or_create(string buildid, Dwfl_Module
> *mod);
> +
> + void print_summary() const;
> +};
> +
> +class PerfConsumer;
> +
> +// A PerfReader creates perf_events file descriptors, monitors the
> +// mmap'd ring buffers for events, and dispatches decoded forms to a
> +// PerfConsumer.
> +class PerfReader
> +{
> +private:
> + /* Sized by number of CPUs or threads: */
> + vector<int> perf_fds;
> + vector<perf_event_mmap_page *> perf_headers;
> + vector<pollfd> pollfds;
> +
> + PerfConsumer* consumer; // pluralize!
> + Ebl* default_ebl;
> + uint64_t sample_regs_user;
> + int sample_regs_count;
> + bool enabled;
> + int page_size;
> + int page_count;
> + int mmap_size;
> + vector<uint8_t> event_wraparound_temp; // for events straddling ring
> buffer end
> +
> + void decode_event(const perf_event_header* ehdr);
> +
> +public:
> + // PerfReader(perf_event_attr* attr, int pid, PerfConsumer* consumer); //
> attach to process hierarchy; may modify *attr
> + PerfReader(perf_event_attr* attr, PerfConsumer* consumer, int pid=-1);
> // systemwide; may modify *attr
> +
> + ~PerfReader();
> +
> + void process_some(); // run briefly, relay decoded perf_events to consumer
> + uint64_t regs_mask() { return this->sample_regs_user; }
> + Ebl *ebl() { return this->default_ebl; }
> +};
> +
> +// A PerfConsumer receives both raw and decoded (fields split out into
> function parameters)
> +// perf event records from a PerfReader. Pure interface.
> +class PerfConsumer
> +{
> +protected:
> + PerfReader *reader; /* access sample_regs_user etc. metadata */
> +
> +public:
> + PerfConsumer() {}
> + PerfConsumer(PerfReader *reader) : reader(reader) {}
> + void set_reader(PerfReader *reader) { this->reader = reader; }
> +
> + virtual ~PerfConsumer() {}
> + virtual void process(const perf_event_header* sample) {}
> +
> + virtual void process_comm(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid, bool exec, const
> string& comm) {}
> + virtual void process_exit(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid) {}
> + virtual void process_fork(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid) {}
> + virtual void process_sample(const perf_event_header* sample,
> + uint64_t ip,
> + uint32_t pid, uint32_t tid,
> + uint64_t time,
> + uint64_t abi,
> + uint32_t nregs, const uint64_t *regs,
> + uint64_t data_size, const uint8_t *data) {}
> + virtual void process_mmap2(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid,
> + uint64_t addr, uint64_t len, uint64_t pgoff,
> + uint8_t build_id_size, const uint8_t *build_id,
> + const char *filename) {}
> +};
> +
> +// A StatsPerfConsumer is a toy concrete object that accepts decoded
> +// perf events and logs and records basic stats about them.
> +class StatsPerfConsumer: public PerfConsumer
> +{
> + unordered_map<int,unsigned> event_type_counts;
> +
> +public:
> + StatsPerfConsumer() {}
> + ~StatsPerfConsumer(); // report to stdout
> + void process_comm(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid, bool exec, const string&
> comm);
> + void process_exit(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid);
> + void process_fork(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid);
> + void process_sample(const perf_event_header* sample,
> + uint64_t ip,
> + uint32_t pid, uint32_t tid,
> + uint64_t time,
> + uint64_t abi,
> + uint32_t nregs, const uint64_t *regs,
> + uint64_t data_size, const uint8_t *data);
> + void process_mmap2(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid,
> + uint64_t addr, uint64_t len, uint64_t pgoff,
> + uint8_t build_id_size, const uint8_t *build_id,
> + const char *filename);
> + void process(const perf_event_header* sample);
> +};
> +
> +// An UnwindSample records an unwound call stack from a perf-event
> +// sample.
> +struct UnwindSample
> +{
> + const perf_event_header *event;
> + Dwfl *dwfl;
> + uint32_t pid, tid;
> + vector<Dwarf_Addr> addrs;
> + int elfclass;
> +
> + Dwarf_Addr base; /* for diagnostic purposes */
> + Dwarf_Addr sp; /* for diagnostic purposes */
> +};
> +
> +class UnwindSampleConsumer;
> +
> +// A PerfConsumerUnwinder accepts decoded perf events, and produces
> +// UnwindSample objects from them for relaying to an
> +// UnwindSampleConsumer.
> +class PerfConsumerUnwinder: public PerfConsumer
> +{
> + UnwindSampleConsumer *consumer;
> + UnwindSample last_us; // XXX: why & is this safe to hang onto?
> + Dwflst_Process_Tracker *tracker;
> + UnwindStatsTable *stats;
> + unsigned maxframes;
> +
> + int find_procfile(Dwfl *dwfl, pid_t *pid, Elf **elf, int *elf_fd);
> + Dwfl *find_dwfl(pid_t pid, const uint64_t *regs, uint32_t nregs,
> + Elf **elf, bool *cached);
> +
> + int get_sp_reg(bool is_abi32);
> +
> +public:
> + PerfConsumerUnwinder(UnwindSampleConsumer* usc, UnwindStatsTable *ust);
> + PerfConsumerUnwinder(UnwindSampleConsumer* usc, UnwindStatsTable *ust,
> PerfReader *reader);
> + ~PerfConsumerUnwinder();
> +
> + /* libdwfl{st} callbacks */
> + Dwfl *init_dwfl(pid_t pid);
> + int unwind_frame_cb(Dwfl_Frame *state);
> +
> + void process_comm(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid, bool exec, const string&
> comm);
> + void process_exit(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid);
> + void process_fork(const perf_event_header* sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid);
> + void process_sample(const perf_event_header* sample,
> + uint64_t ip,
> + uint32_t pid, uint32_t tid,
> + uint64_t time,
> + uint64_t abi,
> + uint32_t nregs, const uint64_t *regs,
> + uint64_t data_size, const uint8_t *data);
> + void process_mmap2(const perf_event_header* sample,
> + uint32_t pid, uint32_t tid,
> + uint64_t addr, uint64_t len, uint64_t pgoff,
> + uint8_t build_id_size, const uint8_t *build_id,
> + const char *filename);
> +};
> +
> +// An UnwindSampleConsumer receives an UnwindSample from a
> PerfConsumerUnwinder.
> +// Pure abstract.
> +class UnwindSampleConsumer
> +{
> +public:
> + UnwindSampleConsumer() {}
> + virtual ~UnwindSampleConsumer() {}
> + virtual void process(const UnwindSample* sample) = 0;
> + virtual int maxframes() = 0;
> +};
> +
> +
> +// An UnwindStatsConsumer is a toy that just collects statistics about
> +// a received stream of UnwindSamples.
> +class UnwindStatsConsumer: public UnwindSampleConsumer
> +{
> + UnwindStatsTable *stats;
> +
> +public:
> + UnwindStatsConsumer(UnwindStatsTable *usc) : stats(usc) {}
> + ~UnwindStatsConsumer();
> + void process(const UnwindSample* sample);
> + int maxframes();
> +};
> +
> +
> +// An GprofUnwindSampleConsumer instance consumes UnwindSamples and tabulates
> +// them by buildid, for eventual writing out into gmon.out format files.
> +class GprofUnwindSampleConsumer: public UnwindSampleConsumer
> +{
> + UnwindStatsTable *stats;
> + unordered_map<string, string> buildid_to_mainfile;
> + unordered_map<string, string> buildid_to_debugfile;
> + void record_gmon_hist(ostream &of, map<uint64_t, uint32_t> &histogram,
> uint64_t low_pc, uint64_t high_pc, uint64_t alignment);
> +
> +public:
> + GprofUnwindSampleConsumer(UnwindStatsTable *usc) : stats(usc) {}
> + ~GprofUnwindSampleConsumer(); // write out all the gmon.$BUILDID.out files
> + void record_gmon_out(const string& buildid, UnwindModuleStats& m); //
> write out one gmon.$BUILDID.out file
> + void process(const UnwindSample* sample); // accumulate hits / callgraph
> edges (need maxdepth=1 only)
> + int maxframes();
> +};
> +
> +// hypothetical: FlamegraphUnwindSampleConsumer, taking in a bigger maxdepth
> +// hypothetical: PprofUnwindSampleConsumer, https://github.com/google/pprof
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// command line parsing and main()
> +
> +/* Name and version of program. */
> +ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
> +
> +/* Bug report address. */
> +ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
> +
> +#define HIST_SPLIT_OPTS "none/even/flex"
> +
> +/* Definitions of arguments for argp functions. */
> +static const struct argp_option options[] =
> +{
> + { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 },
> + { "verbose", 'v', NULL, 0, N_ ("Increase verbosity of logging messages
> (modules/samples/frames/more)."), 0 },
> + /* TODO: Add "quiet" option suppressing summary table. */
> + { "gmon", 'g', NULL, 0, N_("Generate gmon.BUILDID.out files for each
> binary."), 0 },
> + { "hist-split",'G', HIST_SPLIT_OPTS, 0, N_("Histogram splitting method for
> gmon, default 'even'."), 0 },
> + { "maxframes", 'n', "MAXFRAMES", 0, N_("Maximum number of frames to
> unwind, default 1 with --gmon, 256 otherwise."), 0 }, /* TODO */
> + { "output", 'o', "DIR", 0, N_("Output directory for gmon files."), 0 },
> + { "force", 'f', NULL, 0, N_("Unlink output files to force writing as
> new."), 0 },
> + { "pid", 'p', "PID", 0, N_("Profile given PID, and its future children."),
> 0 },
> +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
> + { "event", 'e', "EVENT", 0, N_("Sample given LIBPFM event
> specification."), 0 },
> +#define ARGP_KEY_EVENT_LIST 0x1000
> + { "event-list", ARGP_KEY_EVENT_LIST, NULL, 0, N_("Sample given LIBPFM
> event specification."), 0 },
> +#endif
> + { NULL, 0, NULL, 0, NULL, 0 }
> +};
> +
> +static error_t parse_opt (int key, char *arg, struct argp_state *state);
> +static const struct argp argp =
> + {
> + options, parse_opt, "[--] [CMD]...", N_("Collect systemwide stack-trace
> profiles."),
> + NULL, NULL, NULL
> + };
> +
> +// How to divide the program counter histograms in gmon output:
> +enum hist_split_method {
> + HIST_SPLIT_NONE = 0, /* one histogram for the entire executable */
> + HIST_SPLIT_EVEN = 1, /* all histograms the same size */
> + HIST_SPLIT_FLEX = 2, /* variable-size histograms */
> +};
> +
> +// Globals set based on command line options:
> +static unsigned verbose;
> +static bool gmon;
> +static hist_split_method gmon_hist_split = HIST_SPLIT_EVEN;
> +static string output_dir = ".";
> +static bool output_force = false; // overwrite preexisting output files?
> +static int pid;
> +static int opt_maxframes = -1; // set to >= 0 to override default maxframes
> in consumer
> +static string libpfm_event;
> +static string libpfm_event_decoded;
> +static perf_event_attr attr;
> +static bool branch_record = false; // using accurate branch recording for
> call-graph arcs rather than backtrace heuristics
> +
> +// Verbosity categories:
> +static bool show_summary = true; /* XXX could suppress with --quiet */
> +static bool show_modules = false; /* -> first sample for each module */
> +static bool show_samples = false; /* -> every sample */
> +static bool show_frames = false;
> +static bool show_debugfile = false;
> +static bool show_tmi = false; /* -> perf, cfi details */
> +
> +static error_t
> +parse_opt (int key, char *arg, struct argp_state *state)
> +{
> + (void)state;
> +
> + switch (key)
> + {
> + case ARGP_KEY_INIT:
> + break;
> +
> + case 'v':
> + verbose ++;
> + break;
> +
> + case 'g':
> + gmon = true;
> + break;
> +
> + case 'G':
> + gmon = true; /* Automatically enable gmon mode if they set a gmon
> option. */
> + if (std::string_view(arg) == "none")
> + gmon_hist_split = HIST_SPLIT_NONE;
> + else if (std::string_view(arg) == "even")
> + gmon_hist_split = HIST_SPLIT_EVEN;
> + else if (std::string_view(arg) == "flex")
> + gmon_hist_split = HIST_SPLIT_FLEX;
> + break;
> +
> + case 'o':
> + gmon = true;
> + output_dir = arg;
> + break;
> +
> + case 'p':
> + pid = atoi(arg);
> + break;
> +
> + case 'n':
> + opt_maxframes = atoi(arg);
> + if (opt_maxframes < 0)
> + {
> + argp_error (state, N_("-n MAXFRAMES should be 0 or higher."));
> + return EINVAL;
> + }
> + break;
> +
> + case 'f':
> + output_force = true;
> + break;
> +
> +#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
> + case 'e':
> + libpfm_event = arg;
> + break;
> +
> + case ARGP_KEY_EVENT_LIST:
> + {
> + pfm_pmu_info_t pinfo;
> + pfm_event_info_t info;
> +
> + pfm_err_t rc = pfm_initialize();
> + if (rc != PFM_SUCCESS)
> + {
> + cerr << format("ERROR: pfm_initialized failed: {}\n",
> pfm_strerror(rc));
> + exit(1);
> + }
> +
> + memset(&pinfo, 0, sizeof(pinfo));
> + memset(&info, 0, sizeof(info));
> + pinfo.size = sizeof(pinfo);
> + info.size = sizeof(info);
> +
> + for(int j= PFM_PMU_NONE ; j< PFM_PMU_MAX; j++)
> + {
> + pfm_err_t ret = pfm_get_pmu_info((pfm_pmu_t) j, &pinfo);
> + if (ret != PFM_SUCCESS)
> + continue;
> + if (! pinfo.is_present)
> + continue;
> + for (int i = pinfo.first_event; i != -1; i =
> pfm_get_event_next(i))
> + {
> + ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT, &info);
> + if (ret == PFM_SUCCESS)
> + clog << format("{}::{}\n", pinfo.name, info.name);
> + }
> + }
> + }
> + exit(0);
> +#endif
> +
> + default:
> + return ARGP_ERR_UNKNOWN;
> + }
> + return 0;
> +}
> +
> +sig_atomic_t interrupted;
> +
> +void sigint_handler(int sig)
> +{
> + interrupted ++;
> + if (interrupted > 1)
> + _exit(1);
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> + int remaining;
> + int pipefd[2] = {-1, -1}; // for CMD child process post-fork sync
> + bool has_cmd = false;
> + (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
> +
> + /* show_summary is true by default */
> + if (verbose > 0) show_modules = true;
> + if (verbose > 1) show_samples = true;
> + if (verbose > 2) show_frames = true;
> + if (verbose > 3) show_debugfile = true;
> + if (verbose > 4) show_tmi = true;
> +
> + if (pid > 0 && remaining < argc) // got a pid AND a cmd? reject
> + {
> + cerr << format("ERROR: Must not specify both -p PID and CMD\n");
> + exit(1);
> + }
> +
> + bool systemwide = (pid == 0) || (remaining == argc);
> + (void) systemwide;
> +
> + try
> + {
> + memset(&attr, 0, sizeof(attr));
> + attr.size = sizeof(attr);
> +
> + if (libpfm_event != "")
> + {
> +#if HAVE_PERFMON_PFMLIB_PERF_EVENT_H
> + pfm_err_t rc = pfm_initialize();
> + if (rc != PFM_SUCCESS)
> + {
> + cerr << format("ERROR: pfm_initialized failed: {}\n",
> pfm_strerror(rc));
> + exit(1);
> + }
> + char* fstr = nullptr;
> + pfm_perf_encode_arg_t arg = { .attr = &attr, .fstr=&fstr, .size =
> sizeof(arg) };
> + rc = pfm_get_os_event_encoding(libpfm_event.c_str(),
> + PFM_PLM3, /* userspace, whether
> systemwide or not */
> + PFM_OS_PERF_EVENT_EXT, &arg);
> + if (rc != PFM_SUCCESS)
> + {
> + cerr << format("ERROR: pfm_get_os_event_encoding failed: {}\n",
> pfm_strerror(rc));
> + exit(1);
> + }
> + if (verbose)
> + {
> + clog << format("libpfm expanded {} to {}\n", libpfm_event,
> fstr);
> + }
> + libpfm_event_decoded = fstr; // overwrite
> + free(fstr);
> +#endif
> + }
> + else
> + {
> + // same as: -e perf::CPU-CLOCK:freq=1000
> + attr.type = PERF_TYPE_SOFTWARE;
> + attr.config = PERF_COUNT_SW_CPU_CLOCK;
> + attr.sample_freq = 1000;
> + attr.freq = 1;
> + attr.exclude_kernel = 1;
> + attr.exclude_hv = 1;
> + attr.exclude_guest = 1;
> + }
> +
> + if (show_summary)
> + {
> + clog << format("perf_event_attr configuration type={:x} config={:x}
> {}{}\n",
> + attr.type, attr.config,
> + (attr.freq ? "sample_freq=" : "sample_period="),
> + (attr.freq ? attr.sample_freq :
> attr.sample_period));
> + clog << endl;
> + }
> +
> + if (remaining < argc) // got a CMD... suffix? ok start it
> + {
> + has_cmd = true;
> + int rc = pipe (pipefd); // will use pipefd[] >= 0 as flag for
> synchronization just below
> + if (rc < 0)
> + {
> + cerr << format("ERROR: pipe failed: {}\n", strerror(errno));
> + exit(1);
> + }
> +
> + pid = fork();
> + if (pid == 0) // in child
> + {
> + close (pipefd[1]); // close write end
> + char dummy;
> + int rc = read (pipefd[0], &dummy, 1); // block until parent is
> ready
> + if (rc != 1)
> + {
> + cerr << format("ERROR: child sync read failed: {}\n",
> strerror(errno));
> + exit(1);
> + }
> + close (pipefd[0]);
> + execvp (argv[remaining], & argv[remaining] /* not +1: child
> argv[0] included! */ );
> + // notreached unless error
> + cerr << format("ERROR: execvp failed: {}\n", strerror(errno));
> + exit(1);
> + }
> + else if (pid > 0) // in parent
> + {
> + close (pipefd[0]); // close read end
> + // will write to pipefd[1] after perfreader sicced at child
> + }
> + else // error
> + {
> + cerr << format("ERROR: fork failed: {}\n", strerror(errno));
> + exit(1);
> + }
> + }
> +
> + // Create the perf processing pipeline as per command line options
> + PerfReader *pr = nullptr;
> + UnwindStatsTable *tab = nullptr;
> + UnwindSampleConsumer *usc = nullptr;
> + PerfConsumerUnwinder *pcu = nullptr;
> + StatsPerfConsumer *spc = nullptr;
> +
> + if (gmon)
> + {
> + tab = new UnwindStatsTable();
> + usc = new GprofUnwindSampleConsumer(tab);
> + pcu = new PerfConsumerUnwinder(usc, tab);
> + pr = new PerfReader(&attr, pcu, pid);
> + }
> + else
> + {
> + tab = new UnwindStatsTable();
> + usc = new UnwindStatsConsumer(tab);
> + pcu = new PerfConsumerUnwinder (usc, tab);
> + pr = new PerfReader(&attr, pcu, pid);
> +#if 0
> + spc = new StatsPerfConsumer();
> + pr = new PerfReader(&attr, spc, pid);
> +#endif
> + }
> +
> + signal(SIGINT, sigint_handler);
> + signal(SIGTERM, sigint_handler);
> +
> + if (pid > 0 && has_cmd) // need to release child CMD process?
> + {
> + int rc = write(pipefd[1], "x", 1); // unblock child
> + assert (rc == 1);
> + close(pipefd[1]);
> + }
> +
> + if (show_summary)
> + {
> + clog << "Starting stack profile collection ";
> + if (pid) clog << format("pid {}", pid);
> + else clog << "systemwide";
> + clog << "\n";
> + }
> +
> + while (true) // main loop
> + {
> + if (interrupted) break;
> + if (pid > 0) waitpid(pid, NULL, WNOHANG); // reap dead child to
> allow kill(pid, 0) to signal death
> + if (pid > 0 && kill(pid, 0) != 0) break; // exit if child or
> targeted non-child process died
> + pr->process_some();
> + }
> +
> + delete pr;
> + delete usc;
> + delete pcu;
> + delete spc;
> + delete tab;
> +
> + // reporting done in various destructors
> + }
> + catch (const exception& e)
> + {
> + cerr << format("{}\n", e.what());
> + }
> +
> + return 0;
> +}
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// perf reader
> +
> +PerfReader::PerfReader(perf_event_attr* attr, PerfConsumer* consumer, int
> pid)
> +{
> + this->page_size = getpagesize();
> + this->page_count = 64; /* XXX May want to verify if this is a large-enough
> power-of-2. */
> + this->mmap_size = this->page_size * (this->page_count + 1); // total mmap
> size, incl header page
> + this->event_wraparound_temp.resize(this->mmap_size); // NB: never resize
> this object again!
> + this->consumer = consumer;
> + this->consumer->set_reader(this);
> + this->enabled = false;
> +
> + struct utsname u;
> + uname(&u);
> + int em = EM_NONE;
> + std::string_view machine = u.machine;
> + if (machine == "x86_64") em = EM_X86_64;
> + else if (machine == "i686" || machine == "i386") em = EM_386;
> + else if (machine == "aarch64" || machine == "armv7l") em = EM_ARM;
> + else {
> + cerr << format("ERROR: Unsupported architecture: {}\n", u.machine);
> + exit(1);
> + }
> + this->default_ebl = ebl_openbackend_machine(em);
> + this->sample_regs_user = ebl_perf_frame_regs_mask (this->default_ebl);
> + this->sample_regs_count = bitset<64>(this->sample_regs_user).count();
> +
> + attr->sample_regs_user = this->sample_regs_user;
> + attr->sample_stack_user = 8192; // enough?
> + attr->sample_type = (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME);
> + attr->sample_type |= PERF_SAMPLE_REGS_USER;
> + attr->sample_type |= PERF_SAMPLE_STACK_USER;
> + // XXX Maybe: ask for PERF_SAMPLE_CALLCHAIN, in case kernel can
> + // unwind for us? Would want an option to control this, to allow
> + // eu-stackprof to exercise our own unwinding functionality when
> + // testing.
> + attr->mmap = 1;
> + attr->mmap2 = 1;
> + attr->exclude_kernel = 1; /* in-kernel unwinding not relevant for our
> usecase */
> + attr->disabled = 1; /* will get enabled soon */
> + attr->task = 1; // catch FORK/EXIT
> + attr->comm = 1; // catch EXEC
> + attr->comm_exec = 1; // catch EXEC
> + // attr->precise_ip = 2; // request 0 skid ... but that conflicts with
> PERF_COUNT_HW_BRANCH_INSTRUCTIONS:freq=4000
> + attr->build_id = 1; // request build ids in MMAP2 events
> +
> + if (pid > 0) // actually only once, to allow break in case of error
> + attr->inherit = 1; // propagate to child processes
> +
> +
> + if (show_tmi)
> + { // hexdump attr
> + clog << "perf_event_attr hexdump: ";
> + auto bytes = (unsigned char*) attr;
> + for (size_t x = 0; x<sizeof(*attr); x++)
> + clog << ((x % 8) ? "" : " ")
> + << ((x % 32) ? "" : "\n")
> + << format("{:02x}", (unsigned)bytes[x]);
> + clog << "\n";
> + }
> +
> + // Iterate over all cpus, even if attaching to a single pid, because
> + // we set ->inherit=1. That requires possible concurrency, which is
> + // enabled by per-cpu ring buffers.
> + int ncpus = sysconf(_SC_NPROCESSORS_CONF);
> + for (int cpu=0; cpu<ncpus; cpu++)
> + {
> + int fd = syscall(__NR_perf_event_open, attr,
> + (pid > 0 ? pid : -1), cpu, -1,
> + PERF_FLAG_FD_CLOEXEC);
> + if (fd < 0)
> + {
> + cerr << format("WARNING: unable to open perf event for cpu {}:
> {}\n", cpu, strerror(errno));
> + continue;
> + }
> + void *buf = mmap(NULL, this->mmap_size, PROT_READ | PROT_WRITE,
> MAP_SHARED, fd, 0);
> + if (buf == MAP_FAILED)
> + {
> + cerr << format("ERROR: perf event mmap failed: {}\n",
> strerror(errno));
> + close(fd);
> + continue;
> + }
> + this->perf_fds.push_back(fd);
> + this->perf_headers.push_back((perf_event_mmap_page*) buf);
> + struct pollfd pfd = {.fd = fd, .events=POLLIN};
> + this->pollfds.push_back(pfd);
> + }
> +
> + if (this->perf_fds.size() == 0)
> + throw runtime_error("ERROR: no perf events opened");
> +}
> +
> +PerfReader::~PerfReader()
> +{
> + for (auto fd : this->perf_fds)
> + close(fd);
> + for (auto m : this->perf_headers)
> + munmap((void*) m, this->mmap_size);
> + ebl_closebackend (this->default_ebl);
> +}
> +
> +uint64_t millis_monotonic()
> +{
> + return
> chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now().time_since_epoch()).count();
> +}
> +
> +static inline uint64_t
> +ring_buffer_read_head(volatile struct perf_event_mmap_page *base)
> +{
> + uint64_t head = base->data_head;
> + asm volatile("" ::: "memory"); // memory fence
> + return head;
> +}
> +
> +static inline void
> +ring_buffer_write_tail(volatile struct perf_event_mmap_page *base,
> + uint64_t tail)
> +{
> + asm volatile("" ::: "memory"); // memory fence
> + base->data_tail = tail;
> +}
> +
> +void PerfReader::process_some()
> +{
> + if (! this->enabled)
> + {
> + for (auto fd : this->perf_fds)
> + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0 /* value ignored */);
> + this->enabled = true;
> + }
> +
> + uint64_t starttime = millis_monotonic();
> + uint64_t endtime = starttime + 1000; // run at most one second
> + uint64_t ring_buffer_size = this->page_size * this->page_count; // just
> the ring buffer size
> +
> + while (! interrupted)
> + {
> + uint64_t now = millis_monotonic();
> + if (endtime < now)
> + break;
> + int ready = poll(this->pollfds.data(), this->pollfds.size(),
> (int)(endtime-now)); // wait a little while
> + if (ready < 0)
> + break;
> +
> + for (size_t i = 0; i < pollfds.size(); i++)
> + if (this->pollfds[i].revents & POLLIN) // found an fd with fresh
> yummy events
> + {
> + perf_event_mmap_page *header = perf_headers[i];
> + uint64_t data_head = ring_buffer_read_head(header);
> + uint64_t data_tail = header->data_tail;
> + uint8_t *base = ((uint8_t *) header) + this->page_size;
> + struct perf_event_header *ehdr;
> + size_t ehdr_size;
> +
> + while (data_head != data_tail) // consume all packets in ring
> buffer XXX why?
> + {
> + ehdr = (perf_event_header*) (base + (data_tail &
> (ring_buffer_size - 1)));
> + ehdr_size = ehdr->size;
> + if (show_tmi)
> + clog << format("perf head={:p} tail={:p} ehdr={:p}
> size={:d}{:x}\n",
> + (void*) data_head, (void*) data_tail,
> (void*) ehdr, ehdr_size, 0);
> +
> + if (((uint8_t *)ehdr) + ehdr_size > base + ring_buffer_size)
> // mmap region wraparound?
> + {
> + // need to copy it to a contiguous temporary
> + uint8_t *copy_start = (uint8_t*) ehdr;
> + size_t len_first = base + ring_buffer_size - copy_start;
> + size_t len_secnd = ehdr_size - len_first;
> + uint8_t *event_temp = this->event_wraparound_temp.data();
> + memcpy(event_temp, copy_start, len_first); // part
> at end of mmap'd region
> + memcpy(event_temp + len_first, base, len_secnd); // part
> at beginning of mmap'd region
> + ehdr = (perf_event_header*) event_temp;
> + }
> +
> + this->decode_event(ehdr);
> + data_tail += ehdr_size;
> + }
> +
> + ring_buffer_write_tail(header, data_tail);
> + }
> + }
> +}
> +
> +void PerfReader::decode_event(const perf_event_header* ehdr)
> +{
> + consumer->process(ehdr); // allow general processing
> +
> + // and decode into individual event types
> + switch (ehdr->type)
> + {
> + case PERF_RECORD_SAMPLE:
> + {
> + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) +
> sizeof(perf_event_header);
> + uint64_t ip = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint64_t time = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + // PERF_SAMPLE_CALLCHAIN would be here if requested
> + uint64_t abi = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + uint32_t nregs = this->sample_regs_count;
> + const uint64_t* regs = reinterpret_cast<const uint64_t*>(data); data
> += nregs * sizeof(uint64_t);
> + uint64_t data_size = *reinterpret_cast<const uint64_t*>(data); data
> += sizeof(uint64_t);
> + const uint8_t* stack_data = data;
> + consumer->process_sample(ehdr, ip, pid, tid, time, abi, nregs, regs,
> data_size, stack_data);
> + break;
> + }
> + case PERF_RECORD_COMM:
> + {
> + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) +
> sizeof(perf_event_header);
> + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + const char* comm = reinterpret_cast<const char*>(data);
> + consumer->process_comm(ehdr, pid, tid, (ehdr->misc &
> PERF_RECORD_MISC_COMM_EXEC), comm);
> + break;
> + }
> + case PERF_RECORD_EXIT:
> + {
> + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) +
> sizeof(perf_event_header);
> + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t ppid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t ptid = *reinterpret_cast<const uint32_t*>(data);
> + consumer->process_exit(ehdr, pid, ppid, tid, ptid);
> + break;
> + }
> + case PERF_RECORD_FORK:
> + {
> + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) +
> sizeof(perf_event_header);
> + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t ppid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t ptid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + consumer->process_fork(ehdr, pid, ppid, tid, ptid);
> + break;
> + }
> + case PERF_RECORD_MMAP2:
> + {
> + const uint8_t* data = reinterpret_cast<const uint8_t*>(ehdr) +
> sizeof(perf_event_header);
> + uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data +=
> sizeof(uint32_t);
> + uint64_t addr = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + uint64_t len = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + uint64_t pgoff = *reinterpret_cast<const uint64_t*>(data); data +=
> sizeof(uint64_t);
> + uint8_t build_id_size = 0;
> + const uint8_t* build_id = nullptr;
> + if (ehdr->misc & PERF_RECORD_MISC_MMAP_BUILD_ID)
> + {
> + build_id_size = *reinterpret_cast<const uint8_t*>(data); data +=
> sizeof(uint8_t);
> + data += sizeof(uint8_t) + sizeof(uint16_t); // skip padding
> + build_id = reinterpret_cast<const uint8_t*>(data);
> + data += build_id_size;
> + }
> + else
> + {
> + data += 4 + 4 + 8 + 8; // maj, min, ino, ino_generation
> + }
> + data += sizeof(uint32_t) + sizeof(uint32_t); // prot, flags
> + const char* filename = reinterpret_cast<const char*>(data);
> + consumer->process_mmap2(ehdr, pid, tid, addr, len, pgoff,
> build_id_size, build_id, filename);
> + break;
> + }
> + default:
> + break;
> + }
> +}
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// perf event consumers
> +
> +void StatsPerfConsumer::process_comm(const perf_event_header *sample,
> + uint32_t pid, uint32_t tid, bool exec,
> const string &comm)
> +{
> + if (show_modules)
> + {
> + clog << format("process_comm: pid={} tid={} exec={} comm={}\n", pid,
> tid, exec, comm);
> + }
> +}
> +
> +void StatsPerfConsumer::process_exit(const perf_event_header *sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid)
> +{
> + if (show_modules)
> + {
> + clog << format("process_exit: pid={} ppid={} tid={} ptid={}\n", pid,
> ppid, tid, ptid);
> + }
> +}
> +
> +void StatsPerfConsumer::process_fork(const perf_event_header *sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid)
> +{
> + if (show_modules)
> + {
> + clog << format("process_fork: pid={} ppid={} tid={} ptid={}\n", pid,
> ppid, tid, ptid);
> + }
> +}
> +
> +void StatsPerfConsumer::process_sample(const perf_event_header *sample,
> + uint64_t ip,
> + uint32_t pid, uint32_t tid,
> + uint64_t time,
> + uint64_t abi,
> + uint32_t nregs, const uint64_t *regs,
> + uint64_t data_size, const uint8_t
> *data)
> +{
> + if (show_samples)
> + {
> + clog << format("process_sample: pid={:d} tid={:d} ip={:x} time={:d}
> abi={:d} nregs={:d} data_size={:d}\n",
> + pid, tid, ip, time, abi, nregs, data_size);
> + }
> +}
> +
> +void StatsPerfConsumer::process_mmap2(const perf_event_header *sample,
> + uint32_t pid, uint32_t tid,
> + uint64_t addr, uint64_t len, uint64_t
> pgoff,
> + uint8_t build_id_size, const uint8_t
> *build_id,
> + const char *filename)
> +{
> + if (show_modules)
> + {
> + clog << format("process_mmap2: pid={:d} tid={:d} addr={:x} len={:x}
> pgoff={:x} build_id_size={:d} filename={:s}\n",
> + pid, tid, addr, len, pgoff,
> (unsigned)build_id_size, filename);
> + }
> +}
> +
> +StatsPerfConsumer::~StatsPerfConsumer()
> +{
> + for (const auto& kv : this->event_type_counts)
> + {
> + clog << format("event type {} count {}\n", kv.first, kv.second);
> + }
> +}
> +
> +void StatsPerfConsumer::process(const perf_event_header* ehdr)
> +{
> + this->event_type_counts[ehdr->type] ++;
> +}
> +
> +
> +//////////////////////////////////////////////////////////////////////
> +// unwind stats table for PerfConsumerUnwinder + downstream consumers
> +
> +UnwindDwflStats *UnwindStatsTable::pid_find_or_create (pid_t pid)
> +{
> + if (this->dwfl_tab.count(pid) == 0)
> + this->dwfl_tab.emplace(pid, UnwindDwflStats());
> + return &this->dwfl_tab[pid];
> +}
> +
> +static const string unknown_comm = "<unknown>";
> +
> +string UnwindStatsTable::pid_find_comm (pid_t pid)
> +{
> + UnwindDwflStats *entry = this->pid_find_or_create(pid);
> + if (entry == NULL)
> + return unknown_comm;
> + if (!entry->comm.empty())
> + return entry->comm;
> + string name = format("/proc/{}/comm", pid);
> + ifstream procfile(name);
> + string buf;
> + if (!procfile || !getline(procfile, buf))
> + entry->comm = unknown_comm;
> + else
> + entry->comm = buf;
> +
> + return entry->comm;
> +}
> +
> +Dwfl *UnwindStatsTable::pid_find_dwfl (pid_t pid)
> +{
> + if (this->dwfl_tab.count(pid) == 0)
> + return NULL;
> + return this->dwfl_tab[pid].dwfl;
> +}
> +
> +void UnwindStatsTable::pid_store_dwfl (pid_t pid, Dwfl *dwfl)
> +{
> + UnwindDwflStats *entry = this->pid_find_or_create(pid);
> + if (entry == NULL)
> + return;
> + entry->dwfl = dwfl;
> + if (show_summary)
> + this->pid_find_comm(pid);
> + return;
> +}
> +
> +UnwindModuleStats *UnwindStatsTable::buildid_find (string buildid)
> +{
> + if (this->buildid_tab.count(buildid) == 0)
> + return NULL;
> + return &this->buildid_tab[buildid];
> +}
> +
> +UnwindModuleStats *UnwindStatsTable::buildid_find_or_create (string buildid,
> Dwfl_Module *mod)
> +{
> + if (this->buildid_tab.count(buildid) == 0)
> + {
> + this->buildid_tab.emplace(buildid, UnwindModuleStats());
> + /* TODO: Guess text range for mod? */
> + (void)mod;
> + }
> + return &this->buildid_tab[buildid];
> +}
> +
> +void UnwindStatsTable::print_summary () const
> +{
> +#define PERCENT(x,tot) ((x+tot == 0)?0.0:((double)x)/((double)tot)*100.0)
> + int total_samples = 0;
> + int total_lost_samples = 0;
> + clog << "\n=== pid / sample counts ===\n";
> + for (auto& p : this->dwfl_tab)
> + {
> + pid_t pid = p.first;
> + const UnwindDwflStats& d = p.second;
> + clog << format(N_("{} {} -- max {} frames, received {} samples, lost
> {} samples ({:.1f}%) (last {}, worst {})\n"),
> + pid, d.comm, d.max_frames,
> + d.total_samples, d.lost_samples,
> + PERCENT(d.lost_samples, d.total_samples),
> + dwfl_unwound_source_str(d.last_unwound),
> + dwfl_unwound_source_str(d.worst_unwound));
> + total_samples += d.total_samples;
> + total_lost_samples += d.lost_samples;
> + }
> + clog << "===\n";
> + clog << format(N_("TOTAL -- received {} samples, lost {} samples, loaded
> {} processes\n"),
> + total_samples, total_lost_samples,
> + this->dwfl_tab.size() /* TODO: If implementing eviction, need to
> maintain a separate count of evicted pids. */);
> + clog << "\n";
> +#undef PERCENT
> +}
> +
> +////////////////////////////////////////////////////////////////////////
> +// real perf consumer: unwind helpers
> +
> +PerfConsumerUnwinder::PerfConsumerUnwinder(UnwindSampleConsumer* usc,
> UnwindStatsTable *ust)
> + : consumer(usc), stats(ust) {
> + maxframes = usc->maxframes();
> + this->tracker = dwflst_tracker_begin (&dwfl_cfi_callbacks);
> +}
> +
> +PerfConsumerUnwinder::PerfConsumerUnwinder(UnwindSampleConsumer* usc,
> UnwindStatsTable *ust, PerfReader *reader)
> + : consumer(usc), stats(ust) {
> + maxframes = usc->maxframes();
> + this->reader = reader;
> + this->tracker = dwflst_tracker_begin (&dwfl_cfi_callbacks);
> +}
> +
> +PerfConsumerUnwinder::~PerfConsumerUnwinder() {
> + dwflst_tracker_end (this->tracker);
> +}
> +
> +/* TODO: Could be relocated to libdwfl/linux-pid-attach.c
> + to remove some duplication of existing linux-pid-attach code. */
> +int PerfConsumerUnwinder::find_procfile (Dwfl *dwfl, pid_t *pid, Elf **elf,
> int *elf_fd)
> +{
> + int err = 0; /* The errno to return. XXX libdwfl would also set this for
> dwfl->attacherr. */
> +
> + /* Make sure to report the actual PID (thread group leader) to
> + dwfl_attach_state. */
> + string buffer = format("/proc/{}/status", *pid);
> + ifstream procfile(buffer);
> + if (!procfile)
> + {
> + err = errno;
> + fail:
> + return err;
> + }
> +
> + string line;
> + while (getline (procfile, line))
> + if (startswith (line.c_str(), "Tgid:"))
> + {
> + errno = 0;
> + char *endptr;
> + long val = strtol (&line.c_str()[5], &endptr, 10);
> + if ((errno == ERANGE && val == LONG_MAX)
> + || *endptr != '\n' || val < 0 || val != (pid_t) val)
> + *pid = 0;
> + else
> + *pid = (pid_t) val;
> + break;
> + }
> +
> + if (*pid == 0)
> + {
> + err = ESRCH;
> + goto fail;
> + }
> +
> + {
> + string name = format("/proc/{}/task", *pid);
> + DIR *dir = opendir (name.c_str());
> + if (dir == NULL)
> + {
> + err = errno;
> + goto fail;
> + }
> + else
> + closedir(dir);
> + }
> +
> + {
> + string name = format("/proc/{}/exe", *pid);
> + *elf_fd = open (name.c_str(), O_RDONLY);
> + }
> + if (*elf_fd >= 0)
> + {
> + *elf = elf_begin (*elf_fd, ELF_C_READ_MMAP, NULL);
> + if (*elf == NULL)
> + {
> + /* Just ignore, dwfl_attach_state will fall back to trying
> + to associate the Dwfl with one of the existing Dwfl_Module
> + ELF images (to know the machine/class backend to use). */
> + if (verbose)
> + cerr << format(N_("WARNING: find_procfile pid {}: elf not
> found\n"), (long long)*pid);
> + close (*elf_fd);
> + *elf_fd = -1;
> + }
> + }
> + else
> + *elf = NULL;
> + return 0;
> +}
> +
> +Dwfl *PerfConsumerUnwinder::init_dwfl(pid_t pid)
> +{
> + Dwfl *dwfl = dwflst_tracker_dwfl_begin (this->tracker);
> +
> + int err = dwfl_linux_proc_report (dwfl, pid);
> + if (err < 0)
> + {
> + if (verbose)
> + cerr << format("WARNING: dwfl_linux_proc_report pid {}: {}\n", (long
> long) pid, dwfl_errmsg(-1));
> + return NULL;
> + }
> + err = dwfl_report_end (dwfl, NULL, NULL);
> + if (err != 0)
> + {
> + if (verbose)
> + cerr << format("WARNING: dwfl_report_end pid {}: {}\n", (long long)
> pid, dwfl_errmsg(-1));
> + return NULL;
> + }
> +
> + return dwfl;
> +}
> +
> +Dwfl *pcu_init_dwfl_cb (Dwflst_Process_Tracker *cb_tracker __attribute__
> ((unused)),
> + pid_t pid,
> + void *arg)
> +{
> + PerfConsumerUnwinder *pcu = (PerfConsumerUnwinder *)arg;
> + return pcu->init_dwfl (pid);
> +}
> +
> +uint32_t expected_frame_nregs (Ebl *ebl)
> +{
> + int m = ebl_get_elfmachine(ebl);
> + /* For aarch64, we actually use fewer than ebl->frame_nregs to unwind. */
> + if (m == EM_ARM)
> + return 14; /* XXX 16 for 32-bit ARM */
> + /* On x86, expect everything except FLAGS: */
> + if (m == EM_X86_64 || m == EM_386)
> + return ebl_frame_nregs(ebl);
> + /* In general, it's better to be on the permissive side. */
> + return 1;
> +}
> +
> +Dwfl *PerfConsumerUnwinder::find_dwfl(pid_t pid, const uint64_t *regs,
> uint32_t nregs,
> + Elf **out_elf, bool *cached)
> +{
> + if (nregs < expected_frame_nregs(this->reader->ebl()))
> + {
> + if (verbose)
> + cerr << format(N_("WARNING: find_dwfl: nregs={}, expected at least
> {}\n"), nregs, ebl_frame_nregs(this->reader->ebl()));
> + return NULL;
> + }
> +
> + Elf *elf = NULL;
> + Dwfl *dwfl = dwflst_tracker_find_pid (this->tracker, pid,
> pcu_init_dwfl_cb, this);
> + int elf_fd = -1;
> + int err;
> + if (dwfl != NULL && dwfl_pid(dwfl) != -1 /* dwfl is attached */)
> + {
> + *cached = true;
> + goto reuse;
> + }
> + err = this->find_procfile (dwfl, &pid, &elf, &elf_fd);
> + if (err < 0)
> + {
> + if (verbose)
> + cerr << format("WARNING: find_procfile pid {}: {}\n", (long long)
> pid, dwfl_errmsg(-1));
> + return NULL;
> + }
> +
> + reuse:
> + this->last_us.sp = regs[this->get_sp_reg(this->last_us.elfclass ==
> ELFCLASS32)];
> + this->last_us.base = this->last_us.sp;
> +
> + if (!*cached)
> + this->stats->pid_store_dwfl (pid, dwfl);
> + *out_elf = elf;
> + return dwfl;
> +}
> +
> +/* Index of stack pointer within dwarf_regs order: */
> +int PerfConsumerUnwinder::get_sp_reg(bool is_abi32)
> +{
> + int machine = ebl_get_elfmachine(this->reader->ebl());
> + if (machine == EM_X86_64 || machine == EM_386) return is_abi32 ? 4 : 7;
> + else if (machine == EM_ARM) return is_abi32 ? 13 : 31;
> + else { assert(0); return 7; }
> +}
> +
> +int PerfConsumerUnwinder::unwind_frame_cb(Dwfl_Frame *state)
> +{
> + Dwarf_Addr pc;
> + bool isactivation;
> + if (! dwfl_frame_pc (state, &pc, &isactivation))
> + {
> + if (verbose)
> + cerr << format("WARNING: dwfl_frame_pc: {}\n", dwfl_errmsg(-1));
> + return DWARF_CB_ABORT;
> + }
> +
> + Dwarf_Addr pc_adjusted = pc - (isactivation ? 0 : 1);
> + Dwarf_Addr sp;
> +
> + int is_abi32 = (this->last_us.elfclass == ELFCLASS32);
> + int user_regs_sp = this->get_sp_reg(is_abi32);
> + int rc = dwfl_frame_reg (state, user_regs_sp, &sp);
> + if (rc < 0)
> + {
> + if (verbose)
> + cerr << format("WARNING: dwfl_frame_reg: {}\n", dwfl_errmsg(-1));
> + return DWARF_CB_ABORT;
> + }
> +
> + UnwindDwflStats *dwfl_ent =
> this->stats->pid_find_or_create(this->last_us.pid);
> + if (dwfl_ent != NULL)
> + {
> + Dwfl_Unwound_Source unwound_source = dwfl_frame_unwound_source(state);
> + if (unwound_source > dwfl_ent->worst_unwound)
> + dwfl_ent->worst_unwound = unwound_source;
> + dwfl_ent->last_unwound = unwound_source;
> + if (show_frames)
> + {
> + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc);
> + uint64_t rel_pc = pc_adjusted;
> + int j = dwfl_module_relocate_address (m, &rel_pc);
> + (void) j;
> + clog << format("* frame {:d}: rel_pc={:x} raw_pc={:x} sp={:x}+{:x}
> [{}]\n",
> + this->last_us.addrs.size(), rel_pc, pc_adjusted,
> this->last_us.base, (sp - this->last_us.base),
> dwfl_unwound_source_str(unwound_source));
> + }
> + }
> + else
> + {
> + if (show_frames)
> + {
> + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc);
> + uint64_t rel_pc = pc_adjusted;
> + int j = dwfl_module_relocate_address (m, &rel_pc);
> + (void) j;
> + clog << format(N_("* frame {:d}: rel_pc={:x} raw_pc={:x}
> sp={:x}+{:x} [dwfl_ent not found]\n"),
> + this->last_us.addrs.size(), rel_pc, pc_adjusted,
> this->last_us.base, (sp - this->last_us.base));
> + }
> + }
> + if (show_debugfile)
> + {
> + Dwfl_Module *m = dwfl_addrmodule(this->last_us.dwfl, pc);
> + if (m == NULL)
> + {
> + clog << format("* pid {:d} pc={:x} -> MODULE NOT FOUND\n",
> + this->last_us.pid, pc);
> + }
> + else
> + {
> + const unsigned char *desc;
> + GElf_Addr vaddr;
> + int build_id_len = dwfl_module_build_id (m, &desc, &vaddr);
> + clog << format("* pid {:d} build_id=", this->last_us.pid);
> + for (int i = 0; i < build_id_len; ++i)
> + clog << format("{:02x}", static_cast<int>(desc[i]));
> +
> + const char *mainfile;
> + const char *debugfile;
> + const char *modname = dwfl_module_info (m, NULL, NULL, NULL, NULL,
> + NULL, &mainfile,
> &debugfile);
> + clog << format("module={} mainfile={} debugfile={}\n",
> + modname,
> + mainfile ? mainfile : "<none>",
> + debugfile ? debugfile : "<none>");
> + /* TODO: Also store this data to avoid repeated extraction for
> + the final buildid summary? */
> +#ifdef DEBUG_MODULES
> + Dwarf_Addr bias;
> + Dwarf_CFI *cfi_eh = dwfl_module_eh_cfi (m, &bias);
> + if (cfi_eh == NULL)
> + clog << format("* pc={:x} -> NO EH_CFI\n", pc);
> +#endif
> + }
> + }
> +
> + this->last_us.sp = sp;
> + this->last_us.addrs.push_back(pc);
> +
> + /* e.g. gmon callgraphs only requires maxframes=1
> + (initial pc + one frame for caller ID only) */
> + if (this->last_us.addrs.size() > this->maxframes)
> + {
> + /* XXX without maxframes, very rarely, the unwinder can loop
> + infinitely; worth investigating? */
> + return DWARF_CB_ABORT;
> + }
> + return DWARF_CB_OK;
> +}
> +
> +int pcu_unwind_frame_cb(Dwfl_Frame *state, void *arg)
> +{
> + PerfConsumerUnwinder *pcu = (PerfConsumerUnwinder *)arg;
> + return pcu->unwind_frame_cb(state);
> +}
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// real perf consumer: event handler callbacks
> +
> +void PerfConsumerUnwinder::process_comm(const perf_event_header *sample,
> + uint32_t pid, uint32_t tid, bool
> exec, const string &comm)
> +{
> + // NB: Could have dwflst ditch data for process and start anew, if EXEC.
> + // XXX: is this needed to avoid gradual memory leaks or pid reuse?
> +}
> +
> +void PerfConsumerUnwinder::process_exit(const perf_event_header *sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid)
> +{
> + // NB: Could have dwflst ditch data for process.
> + // XXX: is this needed to avoid gradual memory leaks or pid reuse?
> +}
> +
> +void PerfConsumerUnwinder::process_fork(const perf_event_header *sample,
> + uint32_t pid, uint32_t ppid,
> + uint32_t tid, uint32_t ptid)
> +{
> + // NB: Could have dwflst begin tracking a new process, but
> + // this will likely happen automatically when a packet is received
> + // from it. The short duration between fork/exec typically means
> + // elfutils will pick up on the post-exec process -- we would have
> + // to work hard to replicate a situation where
> + // process_fork/process_comm handling are needed.
> +}
> +
> +void PerfConsumerUnwinder::process_sample(const perf_event_header *sample,
> + uint64_t ip,
> + uint32_t pid, uint32_t tid,
> + uint64_t time,
> + uint64_t abi,
> + uint32_t nregs, const uint64_t
> *regs,
> + uint64_t data_size, const uint8_t
> *data)
> +{
> + string comm;
> + if (show_summary)
> + comm = this->stats->pid_find_comm(pid);
> +
> + if (show_frames)
> + clog << "\n"; /* extra newline for padding */
> +
> + Elf *elf = NULL; // Released during dwflst_tracker_end
> + bool cached = false;
> + Dwfl *dwfl = this->find_dwfl (pid, regs, nregs, &elf, &cached);
> + UnwindDwflStats *dwfl_ent = NULL;
> + bool first_load = false; /* -> for show_modules: pid is loaded first time
> */
> + if (verbose || show_summary || show_modules)
> + {
> + if (dwfl_ent == NULL)
> + dwfl_ent = this->stats->pid_find_or_create(pid);
> + if (dwfl_ent->total_samples == 0)
> + first_load = true;
> + }
> + if (dwfl == NULL)
> + {
> + if (show_summary || show_modules)
> + {
> + /* dwfl_ent loaded above */
> + dwfl_ent->total_samples++;
> + dwfl_ent->lost_samples++;
> + }
> + if (verbose && show_summary)
> + {
> + cerr << format("WARNING: find_dwfl pid {} ({}) (failed)\n", (long
> long)pid, comm);
> + }
> + else
> + {
> + cerr << format("WARNING: find_dwfl pid {} (failed)\n", (long
> long)pid);
> + }
> + return;
> + }
> +
> + if (show_samples || (first_load && show_modules))
> + {
> + bool is_abi32 = (abi == PERF_SAMPLE_REGS_ABI_32);
> + clog << format("find_dwfl {}pid {:d} {}({}): hdr_size={:d} size={:d}{}
> pc={:x} sp={:x}+{:d}\n",
> + first_load ? "newly seen " : "", (long long)pid,
> + (cached ? "(cached) " : ""), comm,
> + sample->size, data_size,
> + (is_abi32 ? " (32-bit)" : ""), ip,
> + this->last_us.base, 0);
> + }
> +
> + this->last_us.addrs.clear();
> + this->last_us.elfclass = (abi == PERF_SAMPLE_REGS_ABI_32 ? ELFCLASS32 :
> ELFCLASS64);
> + this->last_us.dwfl = dwfl;
> + this->last_us.pid = pid;
> + int rc = dwflst_perf_sample_getframes (dwfl, elf, pid, tid,
> + data, data_size,
> + regs, nregs,
> + this->reader->regs_mask(), abi,
> + pcu_unwind_frame_cb, this);
> + if (rc < 0)
> + {
> + /* dwfl_ent loaded above */
> + if (verbose && dwfl_ent->shown_errors < 10)
> + {
> + dwfl_ent->shown_errors ++;
> + cerr << format("WARNING: dwflst_perf_sample_getframes pid {}:
> {}{}\n",
> + (long long)pid, dwfl_errmsg(-1),
> + dwfl_ent->shown_errors >= 10 ?
> + " (...suppressing further warnings for this pid)" :
> "");
> + }
> + }
> + if (show_summary)
> + {
> + /* For final diagnostics. dwfl_ent loaded above */
> + if (this->last_us.addrs.size() > (unsigned long)dwfl_ent->max_frames)
> + dwfl_ent->max_frames = this->last_us.addrs.size();
> + dwfl_ent->total_samples++;
> + if (this->maxframes > 2 && this->last_us.addrs.size() <= 2)
> + dwfl_ent->lost_samples++;
> + }
> +
> + this->consumer->process (&this->last_us);
> + return;
> +}
> +
> +void PerfConsumerUnwinder::process_mmap2(const perf_event_header *sample,
> + uint32_t pid, uint32_t tid,
> + uint64_t addr, uint64_t len,
> uint64_t pgoff,
> + uint8_t build_id_size, const uint8_t
> *build_id,
> + const char *filename)
> +{
> + Dwfl *dwfl = this->stats->pid_find_dwfl(pid);
> + if (dwfl != NULL)
> + {
> + dwfl_report_begin_add(dwfl);
> + dwfl_report_module(dwfl, filename, /*start*/ addr, /*end*/ addr + len);
> + dwfl_report_end(dwfl, NULL, NULL);
> + }
> +}
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// unwind data consumers // basic statistics
> +
> +UnwindStatsConsumer::~UnwindStatsConsumer()
> +{
> + this->stats->print_summary();
> +}
> +
> +void UnwindStatsConsumer::process(const UnwindSample* sample)
> +{
> + /* Most of the logic is handled by UnwindStatsTable. */
> +}
> +
> +int UnwindStatsConsumer::maxframes()
> +{
> + return opt_maxframes >= 0 ? opt_maxframes : 256;
> +}
> +
> +
> +////////////////////////////////////////////////////////////////////////
> +// unwind data consumers // gprof
> +
> +/* gmon.out file format bits */
> +#define GMON_MAGIC "gmon"
> +#define GMON_VERSION 1
> +
> +struct gmon_hdr {
> + char cookie[4];
> + char version[4];
> + char spare[3 * 4];
> +};
> +
> +enum gmon_entry_tag {
> + GMON_TAG_TIME_HIST = 0,
> + GMON_TAG_CG_ARC = 1,
> + GMON_TAG_BB_COUNT = 2,
> +};
> +
> +struct gmon_hist_hdr {
> + uint8_t tag; /* GMON_TAG_TIME_HIST */
> + uint8_t unused[3];
> + uint64_t low_pc;
> + uint64_t high_pc;
> + uint32_t num_buckets;
> + uint32_t prof_rate;
> + char _dimension_string[16];
> +};
> +
> +
> +void GprofUnwindSampleConsumer::record_gmon_hist(ostream &of, map<uint64_t,
> uint32_t> &histogram, uint64_t low_pc, uint64_t high_pc, uint64_t alignment)
> +{
> + // write one histogram from low_pc ... high_pc
> + uint32_t num_buckets = (high_pc-low_pc)/alignment + 1;
> + double result_scale =
> (double)((high_pc-low_pc)/sizeof(uint16_t))/num_buckets;
> + if (verbose > 5)
> + /* It's the @scale value that must be kept within 0.000001 of 0.5 to
> + keep gprof from complaining. */
> + clog << format("+histogram {:x}..{:x} (alignment {}) of {} buckets
> @scale {}\n",
> + low_pc, high_pc, alignment, num_buckets, result_scale);
> +
> + // write histogram record header
> + unsigned char tag = GMON_TAG_TIME_HIST;
> + of.write(reinterpret_cast<const char *>(&tag), sizeof(tag));
> + int wordsize = (sizeof (void *) == 8) ? 8 : 4;
> + if (wordsize == 4) {
> + uint32_t addr = low_pc;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + addr = high_pc;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + } else {
> + of.write(reinterpret_cast<const char *>(&low_pc), sizeof(low_pc));
> + of.write(reinterpret_cast<const char *>(&high_pc), sizeof(high_pc));
> + }
> + of.write(reinterpret_cast<const char *>(&num_buckets),
> sizeof(num_buckets));
> + uint32_t prof_rate = attr.sample_freq;
> + of.write(reinterpret_cast<const char *>(&prof_rate), sizeof(prof_rate));
> + // dimension string is 15 chars long (not null terminated)
> + std::string dimension_base = libpfm_event.empty() ? "ticks" :
> + libpfm_event.substr(0, 15);
> + dimension_base.resize(15, '\0'); // ensure exactly 15 bytes
> + of.write(dimension_base.data(), 15);
> + // dimension character abbreviation: just take the first char of above
> + of.write(dimension_base.data(), 1);
> +
> + // write histogram buckets
> + uint64_t bucket_addr = low_pc;
> + int n_overflows = 0, max_overflows = 5; // limit 'bucket overflow' spam
> + for (uint32_t bucket = 0; bucket < num_buckets; bucket++)
> + {
> + uint16_t count = 0;
> + for (auto it = histogram.lower_bound(bucket_addr);
> + it != histogram.upper_bound(bucket_addr+alignment-1);
> + it ++)
> + {
> + if (numeric_limits<uint16_t>::max() <= (int) count + (int)
> it->second)
> + {
> + count = numeric_limits<uint16_t>::max();
> + // XXX: a provisional error message to give a sense of
> + // whether this happens often-enough to do something
> + // more complex, such as adjusting the histogram
> + // granularity:
> + if (n_overflows >= max_overflows) break;
> + n_overflows++;
> + cerr << format("WARNING: histogram bucket overflow at {:x}{}",
> + bucket_addr,
> + n_overflows >= max_overflows ?
> + " (... suppressing further warnings for this
> histogram)" : "")
> + << endl;
> + break;
> + }
> + count += it->second;
> + }
> + bucket_addr += alignment;
> + of.write(reinterpret_cast<const char *>(&count), sizeof(count));
> + }
> +}
> +
> +void GprofUnwindSampleConsumer::record_gmon_out(const string& buildid,
> UnwindModuleStats& m)
> +{
> + string filename = output_dir + "/" + "gmon." + buildid + ".out";
> + string exe_symlink_path = output_dir + "/" + "gmon." + buildid + ".exe";
> + string json_path = output_dir + "/" + "gmon." + buildid + ".json";
> +
> + if (output_force) {
> + filesystem::remove(filename);
> + filesystem::remove(exe_symlink_path);
> + filesystem::remove(json_path);
> + }
> +
> + string target_path = buildid_to_mainfile[buildid];
> + if (target_path != unknown_comm) // skip .exe symlink if there's no path
> + if (symlink(target_path.c_str(), exe_symlink_path.c_str()) == -1) {
> + // Handle error, e.g., print errno or throw exception
> + cerr << format("WARNING: symlink failed: {}\n", strerror(errno));
> + // NB: no return needed here; proceed to write out other bits.
> + // A smart enough consumer will make do with buildid based executable
> lookup.
> + }
> +
> + json_object *metadata = json_object_new_object();
> + if (!metadata) {
> + json_fail:
> + cerr << format("ERROR: json allocation failed: {}\n", strerror(errno));
> + return;
> + }
> + json_object *buildid_js = json_object_new_string(buildid.c_str());
> + if (NULL == buildid_js) goto json_fail;
> + json_object_object_add(metadata, "buildid", buildid_js);
> + if (buildid_to_mainfile.count(buildid) != 0) {
> + const string &mainfile = buildid_to_mainfile[buildid];
> + json_object *mainfile_js = json_object_new_string(mainfile.c_str());
> + if (NULL == mainfile_js) goto json_fail;
> + json_object_object_add(metadata, "mainfile", mainfile_js);
> + }
> + if (buildid_to_debugfile.count(buildid) != 0) {
> + const string &debugfile = buildid_to_debugfile[buildid];
> + json_object *debugfile_js = json_object_new_string(debugfile.c_str());
> + if (NULL == debugfile_js) goto json_fail;
> + json_object_object_add(metadata, "debugfile", debugfile_js);
> + }
> + if (libpfm_event != "") {
> + json_object *event_js = json_object_new_string(libpfm_event.c_str());
> + if (NULL == event_js) goto json_fail;
> + json_object_object_add(metadata, "libpfm-event", event_js);
> + }
> + if (libpfm_event_decoded != "") {
> + json_object *event_js =
> json_object_new_string(libpfm_event_decoded.c_str());
> + if (NULL == event_js) goto json_fail;
> + json_object_object_add(metadata, "libpfm-event-decoded", event_js);
> + }
> + {
> + json_object *br_js = json_object_new_boolean(branch_record);
> + if (NULL == br_js) goto json_fail;
> + json_object_object_add(metadata, "branch-record", br_js);
> + }
> +
> + const char *metadata_str = json_object_to_json_string(metadata);
> + if (!metadata_str) goto json_fail;
> + ofstream of_js (json_path);
> + of_js << metadata_str;
> + of_js.close();
> + json_object_put (metadata);
> +
> + ofstream of (filename, ios::binary);
> + if (!of)
> + {
> + cerr << format(N_("ERROR: buildid {} -- could not open '{}' for
> writing\n"), buildid, filename);
> + }
> +
> + /* Write gmon header. It and other headers mostly hold
> + native-endian and fixed (or native) bitwidth values. In
> + principle, we should get the bitness/endianness from the
> + particular executable associated with the buildid. But, being a
> + live profiler, we don't really have to deal with CROSS
> + architecture work, and for now can just hard-code the bitness to
> + match this host program. XXX
> + */
> + int wordsize = (sizeof (void *) == 8) ? 8 : 4;
> + struct gmon_hdr ghdr;
> + memcpy (&ghdr.cookie[0], GMON_MAGIC, 4);
> + uint32_t version = GMON_VERSION;
> + memcpy (&ghdr.version[0], reinterpret_cast<const char *>(&version), 4);
> + memset (&ghdr.spare[0], 0, sizeof(ghdr.spare));
> + of.write(reinterpret_cast<const char *>(&ghdr), sizeof(ghdr));
> +
> + if (m.histogram.size() > 0)
> + {
> + uint64_t low_pc = m.histogram.begin()->first;
> + uint64_t high_pc = m.histogram.rbegin()->first;
> + uint64_t alignment = (high_pc - low_pc + 1) / UINT_MAX + 1;
> +
> + if (gmon_hist_split == HIST_SPLIT_NONE)
> + {
> + /* Put everything into one histogram. */
> + this->record_gmon_hist(of, m.histogram, low_pc, high_pc, alignment);
> + }
> + else if (gmon_hist_split == HIST_SPLIT_EVEN)
> + {
> + /* This option attempts to satisfy gprof's histogram scale
> + consistency check, which requires all values
> + '(double)(high_pc-low_pc)/num_buckets' to fall within
> + EPSILON. In practice, we can only be sure of this if we
> + cover the address space with histograms all one size. */
> +
> + /* Keep the search for 'optimal' size simple -- we just need
> + a plausible order of magnitude. XXX Some rechecking of
> + correctness needed. */
> + //uint64_t min_size = 1; // this is 'optimal' much of the time
> + uint64_t min_size = 1024;
> + uint64_t max_size = high_pc - low_pc;
> + uint64_t opt_size = min_size;
> + uint64_t opt_est = 0;
> + uint64_t next_size = opt_size;
> + while (next_size < max_size)
> + {
> + if (next_size > max_size)
> + next_size = max_size;
> + uint64_t size_inc = sizeof(struct gmon_hdr) + next_size;
> + uint64_t size_est = size_inc;
> + uint64_t pc = low_pc;
> + while (pc + size_est < high_pc)
> + {
> + auto it = m.histogram.upper_bound(pc + size_est/alignment);
> + if (it == m.histogram.end())
> + break;
> + pc = it->first;
> + size_est += sizeof(struct gmon_hdr) + next_size;
> + }
> + if (opt_est == 0 || size_est < opt_est)
> + {
> + opt_size = next_size;
> + opt_est = size_est;
> + }
> + // if (opt_est > prev_est) break; /* XXX: We've hit the lowest
> point. */
> + next_size = 2 * next_size;
> + }
> +
> + /* Partition into histograms of opt_size.
> + XXX: May need to check if low_pc must be aligned. */
> + uint64_t prev_pc = low_pc;
> + uint64_t pc = prev_pc;
> + for (const auto& p : m.histogram)
> + {
> + pc = p.first;
> + if (pc - low_pc > opt_size)
> + {
> + /* Record a histogram from low_pc to low_pc+opt_size. */
> + this->record_gmon_hist(of, m.histogram,
> + low_pc, low_pc+opt_size-1 /* >=
> prev_pc */,
> + alignment);
> + low_pc = pc;
> + }
> + prev_pc = pc;
> + }
> + /* Record a final histogram from low_pc to low_pc+opt_size.
> + XXX: Edge case -- may want to adjust for overflow of
> + low_pc+opt_size at end of address space. */
> + this->record_gmon_hist(of, m.histogram,
> + low_pc, low_pc+opt_size-1 /* >= prev_pc */,
> + alignment);
> + }
> + else if (gmon_hist_split == HIST_SPLIT_FLEX)
> + {
> + /* Allow variable-size histograms to save on storage space.
> + Will fail gprof's input consistency checks, XXX but ok
> + for profiledb purposes? */
> + uint64_t prev_pc = low_pc;
> + uint64_t pc = prev_pc;
> + /* XXX Iterate histogram ascending by key, faster than by addr
> + when we just need to scan for gaps. */
> + for (const auto& p : m.histogram)
> + {
> + pc = p.first;
> + uint64_t bin_dist = (pc - prev_pc) / alignment;
> + if (bin_dist > sizeof(struct gmon_hist_hdr))
> + /* XXX If we add '&& low_pc != prev_pc && pc != high_pc',
> + this avoids producing a histogram with only 1 entry,
> + but this is still not enough to satisfy gprof's
> + histogram scale calculation. */
> + {
> + /* Record a histogram from low_pc to prev_pc. */
> + this->record_gmon_hist(of, m.histogram, low_pc, prev_pc,
> alignment);
> + low_pc = pc;
> + }
> + prev_pc = pc;
> + }
> + /* Record a final histogram from low_pc to pc. */
> + this->record_gmon_hist(of, m.histogram, low_pc, pc, alignment);
> + }
> + }
> +
> + /* Write call graph arcs. */
> + for (auto& p : m.callgraph)
> + {
> + unsigned char tag = GMON_TAG_CG_ARC;
> + of.write(reinterpret_cast<const char *>(&tag), sizeof(tag));
> + if (wordsize == 4) {
> + uint32_t addr = p.first.first;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + addr = p.first.second;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + } else {
> + uint64_t addr = p.first.first;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + addr = p.first.second;
> + of.write(reinterpret_cast<const char *>(&addr), sizeof(addr));
> + }
> + /* p is (from,to) -> count */
> + uint32_t count = p.second;
> + of.write(reinterpret_cast<const char *>(&count), sizeof(count));
> + }
> +
> + of.close();
> +}
> +
> +GprofUnwindSampleConsumer::~GprofUnwindSampleConsumer()
> +{
> + if (show_summary)
> + {
> + this->stats->print_summary ();
> + clog << "=== buildid / sample counts ===\n";
> + }
> +
> + UnwindStatsTable::buildid_map_t sorted_map
> (this->stats->buildid_tab.begin(), this->stats->buildid_tab.end());
> + for (auto& p : sorted_map) // traverse in sorted order
> + {
> + const string& buildid = p.first;
> + UnwindModuleStats& module_stats = p.second;
> + this->record_gmon_out(buildid, module_stats);
> + if (show_summary)
> + {
> + /* In record_gmon_out we will write the buildid-->path mapping
> + to a json metadata file. That makes for a reasonable hint;
> + debuginfod-find can be used as a mostly-functional fallback
> + (for packaged rather than locally built executables) if the
> + results are moved to another system. */
> + string mainfile = "<unknown>";
> + if (buildid_to_mainfile.count(buildid) != 0)
> + mainfile = buildid_to_mainfile[buildid];
> + string debugfile = "";
> + if (buildid_to_debugfile.count(buildid) != 0)
> + debugfile = buildid_to_debugfile[buildid];
> + clog << format(N_("buildid {} ({}{}{}) -- received {} distinct
> pcs, {} callgraph arcs\n"), /* TODO also count samples / estimated histogram
> size? */
> + buildid,
> + mainfile,
> + debugfile.empty() ? "" : " +debugfile ",
> + debugfile,
> + module_stats.histogram.size(),
> + module_stats.callgraph.size());
> + }
> + }
> + if (show_summary)
> + {
> + clog << "===\n";
> + clog << format(N_("TOTAL -- received {} buildids\n"),
> this->stats->buildid_tab.size());
> + }
> + clog << "\n";
> +}
> +
> +
> +int
> +GprofUnwindSampleConsumer::maxframes()
> +{
> + // gprof only needs one level of backtracing,
> + // but user can override consumer's preference
> + // with --maxframes option:
> + return opt_maxframes >= 0 ? opt_maxframes : 1;
> +}
> +
> +
> +void GprofUnwindSampleConsumer::process(const UnwindSample *sample)
> +{
> + if (sample->addrs.size() < 1)
> + return; /* edge case -- no pc or callgraph arc */
> +
> + Dwarf_Addr pc = sample->addrs[0];
> + Dwarf_Addr pc2 = sample->addrs.size() < 2 ? 0 : sample->addrs[1];
> +
> + Dwfl_Module *mod = dwfl_addrmodule(sample->dwfl, pc);
> + if (mod == NULL)
> + return;
> +#if 0
> + Dwarf_Addr bias;
> + Elf *elf = dwfl_module_getelf (mod, &bias);
> + (void)elf;
> +#endif
> +
> + Dwfl_Module *mod2 = dwfl_addrmodule(sample->dwfl, pc2);
> + // XXX: allowing mod2 == NULL -- callgraph arc will be skipped
> +
> + // extract buildid for pc (hit callee)
> + const unsigned char *desc = nullptr;
> + GElf_Addr vaddr;
> + int build_id_len = dwfl_module_build_id(mod, &desc, &vaddr);
> + if (build_id_len <= 0)
> + return; // TODO: report/tabulate hit outside known modules
> +
> + // possible optimization would be to use the unconverted build_id_desc as
> hash key
> + string buildid;
> + for (int i = 0; i < build_id_len; ++i) {
> + buildid += format("{:02x}", static_cast<int>(desc[i]));
> + }
> +
> + const char *mainfile_cstr;
> + const char *debugfile_cstr;
> + Dwarf_Addr low_addr;
> + Dwarf_Addr high_addr;
> + dwfl_module_info (mod, NULL, &low_addr, &high_addr, NULL,
> + NULL, &mainfile_cstr, &debugfile_cstr);
> + string mainfile = mainfile_cstr ? mainfile_cstr : "<unknown>";
> + string debugfile = debugfile_cstr ? debugfile_cstr : "";
> + if (!buildid_to_mainfile.count(buildid))
> + buildid_to_mainfile[buildid] = mainfile;
> + if (!buildid_to_debugfile.count(buildid))
> + buildid_to_debugfile[buildid] = debugfile;
> + /* XXX: Also monitor for collisions here? */
> +
> + UnwindModuleStats *buildid_ent =
> this->stats->buildid_find_or_create(buildid, mod);
> +
> + uint64_t last_pc = pc;
> + int i = dwfl_module_relocate_address (mod, &pc);
> + /* XXX: Out-of-range address seen with ld-linux.so, not useful for
> profiledb purposes: */
> + if ((last_pc < low_addr || last_pc > high_addr))
> + {
> + if (verbose)
> + clog << format(N_("{}: Skipping pc={:x} raw_pc={:x} outside module
> range start={:x}..end={:x}\n"),
> + mainfile, pc, last_pc, low_addr, high_addr);
> + return;
> + }
> + (void) i;
> + // XXX: could get dwfl_module_relocation_info (mod, i, NULL), but no need?
> + buildid_ent->record_pc(pc);
> +
> + // If caller & callee are in different modules, this is a
> cross-shared-library
> + // call, so we can't track it as a call-graph arc. TODO: at least count
> them
> + if (sample->addrs.size() >= 2 && mod == mod2) // intra-module call
> + {
> + last_pc = pc2;
> + int j = dwfl_module_relocate_address (mod, &pc2); // map pc2 also
> + if (last_pc < low_addr || last_pc > high_addr)
> + {
> + if (verbose)
> + clog << format(N_("{}: Skipping pc={:x} raw_pc={:x} outside
> module range start={:x}..end={:x}\n"),
> + mainfile, pc2, last_pc, low_addr, high_addr);
> + return;
> + }
> + (void) j;
> + buildid_ent->record_callgraph_arc(pc2, pc);
> + }
> +}
> +
> --
> 2.53.0
>
In po/POTFILES.in, src/stacktrace.c should be replaced with src/stackprof.cxx.
I'll spend more time later reviewing this series and send another
round of comments.
Aaron