Ilya Leoshkevich <i...@linux.ibm.com> writes:
> Add ability to dump /tmp/perf-<pid>.map and jit-<pid>.dump. > The first one allows the perf tool to map samples to each individual > translation block. The second one adds the ability to resolve symbol > names, line numbers and inspect JITed code. > > Example of use: > > perf record qemu-x86_64 -perfmap ./a.out > perf report > > or > > perf record -k 1 qemu-x86_64 -jitdump ./a.out > perf inject -j -i perf.data -o perf.data.jitted > perf report -i perf.data.jitted > > Co-developed-by: Vanderson M. do Rosario <vanderson...@gmail.com> > Co-developed-by: Alex Bennée <alex.ben...@linaro.org> > Signed-off-by: Ilya Leoshkevich <i...@linux.ibm.com> > --- > accel/tcg/debuginfo.c | 108 +++++++++++++ > accel/tcg/debuginfo.h | 54 +++++++ > accel/tcg/meson.build | 2 + > accel/tcg/perf.c | 333 ++++++++++++++++++++++++++++++++++++++ > accel/tcg/perf.h | 28 ++++ > accel/tcg/translate-all.c | 3 + > docs/devel/tcg.rst | 20 +++ > linux-user/elfload.c | 3 + > linux-user/exit.c | 2 + > linux-user/main.c | 15 ++ > linux-user/meson.build | 1 + > meson.build | 8 + > qemu-options.hx | 20 +++ > softmmu/vl.c | 11 ++ > tcg/tcg.c | 2 + > 15 files changed, 610 insertions(+) > create mode 100644 accel/tcg/debuginfo.c > create mode 100644 accel/tcg/debuginfo.h > create mode 100644 accel/tcg/perf.c > create mode 100644 accel/tcg/perf.h > > diff --git a/accel/tcg/debuginfo.c b/accel/tcg/debuginfo.c > new file mode 100644 > index 0000000000..904eb23103 > --- /dev/null > +++ b/accel/tcg/debuginfo.c > @@ -0,0 +1,108 @@ > +/* > + * Debug information support. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include "qemu/osdep.h" > + > +#include <elfutils/libdwfl.h> > + > +#include "debuginfo.h" > + > +static QemuMutex lock; > +static Dwfl *dwfl; > +static const Dwfl_Callbacks dwfl_callbacks = { > + .find_elf = NULL, > + .find_debuginfo = dwfl_standard_find_debuginfo, > + .section_address = NULL, > + .debuginfo_path = NULL, > +}; > + > +__attribute__((constructor)) > +static void debuginfo_init(void) > +{ > + qemu_mutex_init(&lock); > +} > + > +bool debuginfo_report_elf(const char *image_name, int image_fd, > + target_ulong load_bias) > +{ > + qemu_mutex_lock(&lock); You can wrap this up with a QEMU_LOCK_GUARD(&lock) { and avoid having to catch all your exit cases. > + > + if (dwfl == NULL) { > + dwfl = dwfl_begin(&dwfl_callbacks); > + } else { > + dwfl_report_begin_add(dwfl); > + } > + > + if (dwfl == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + > + dwfl_report_elf(dwfl, image_name, image_name, image_fd, load_bias, true); > + dwfl_report_end(dwfl, NULL, NULL); > + qemu_mutex_unlock(&lock); > + return true; > +} > + > +bool debuginfo_get_symbol(target_ulong address, > + const char **symbol, target_ulong *offset) > +{ > + Dwfl_Module *dwfl_module; > + GElf_Off dwfl_offset; > + GElf_Sym dwfl_sym; > + > + qemu_mutex_lock(&lock); > + > + if (dwfl == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + > + dwfl_module = dwfl_addrmodule(dwfl, address); > + if (dwfl_module == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + > + *symbol = dwfl_module_addrinfo(dwfl_module, address, &dwfl_offset, > + &dwfl_sym, NULL, NULL, NULL); > + if (*symbol == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + *offset = dwfl_offset; > + qemu_mutex_unlock(&lock); > + return true; > +} > + > +bool debuginfo_get_line(target_ulong address, > + const char **file, int *line) > +{ > + Dwfl_Module *dwfl_module; > + Dwfl_Line *dwfl_line; > + > + qemu_mutex_lock(&lock); ditto. > + > + if (dwfl == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + > + dwfl_module = dwfl_addrmodule(dwfl, address); > + if (dwfl_module == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + > + dwfl_line = dwfl_module_getsrc(dwfl_module, address); > + if (dwfl_line == NULL) { > + qemu_mutex_unlock(&lock); > + return false; > + } > + *file = dwfl_lineinfo(dwfl_line, NULL, line, 0, NULL, NULL); > + qemu_mutex_unlock(&lock); > + return true; > +} > diff --git a/accel/tcg/debuginfo.h b/accel/tcg/debuginfo.h > new file mode 100644 > index 0000000000..f4f22aa786 > --- /dev/null > +++ b/accel/tcg/debuginfo.h > @@ -0,0 +1,54 @@ > +/* > + * Debug information support. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#ifndef ACCEL_TCG_DEBUGINFO_H > +#define ACCEL_TCG_DEBUGINFO_H > + > +#include "exec/cpu-defs.h" > + > +#ifdef CONFIG_LIBDW > +/* > + * Load debuginfo for the specified guest ELF image. > + * Return true on success, false on failure. > + */ > +bool debuginfo_report_elf(const char *image_name, int image_fd, > + target_ulong load_bias); > + > +/* > + * Find a symbol name associated with the specified guest PC. > + * Return true on success, false if there is no associated symbol. > + */ > +bool debuginfo_get_symbol(target_ulong address, > + const char **symbol, target_ulong *offset); > + > +/* > + * Find a line number associated with the specified guest PC. > + * Return true on success, false if there is no associated line number. > + */ > +bool debuginfo_get_line(target_ulong address, > + const char **file, int *line); > +#else > +static inline bool debuginfo_report_elf(const char *image_name, int image_fd, > + target_ulong load_bias) > +{ > + return false; > +} > + > +static inline bool debuginfo_get_symbol(target_ulong address, > + const char **symbol, > + target_ulong *offset) > +{ > + return false; > +} > + > +static inline bool debuginfo_get_line(target_ulong address, > + const char **file, int *line) > +{ > + return false; > +} > +#endif > + > +#endif > diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build > index 7a0a79d731..e206e3471b 100644 > --- a/accel/tcg/meson.build > +++ b/accel/tcg/meson.build > @@ -1,5 +1,6 @@ > tcg_ss = ss.source_set() > tcg_ss.add(files( > + 'perf.c', > 'tcg-all.c', > 'cpu-exec-common.c', > 'cpu-exec.c', > @@ -11,6 +12,7 @@ tcg_ss.add(files( > tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c')) > tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c')) > tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')]) > +tcg_ss.add(when: libdw, if_true: files('debuginfo.c')) > specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) > > specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files( > diff --git a/accel/tcg/perf.c b/accel/tcg/perf.c > new file mode 100644 > index 0000000000..80b5a1bf8b > --- /dev/null > +++ b/accel/tcg/perf.c > @@ -0,0 +1,333 @@ > +/* > + * Linux perf perf-<pid>.map and jit-<pid>.dump integration. > + * > + * The jitdump spec can be found at [1]. > + * > + * [1] > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/perf/Documentation/jitdump-specification.txt > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include "qemu/osdep.h" > +#include "elf.h" > +#include "qemu/timer.h" > +#include "tcg/tcg.h" > + > +#include "debuginfo.h" > +#include "perf.h" > + > +static FILE *safe_fopen_w(const char *path) > +{ > + int saved_errno; > + FILE *f; > + int fd; > + > + /* Delete the old file, if any. */ > + unlink(path); > + > + /* Avoid symlink attacks by using O_CREAT | O_EXCL. */ > + fd = open(path, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); > + if (fd == -1) { > + return NULL; > + } > + > + /* Convert fd to FILE*. */ > + f = fdopen(fd, "w"); > + if (f == NULL) { > + saved_errno = errno; > + close(fd); > + errno = saved_errno; > + return NULL; > + } > + > + return f; > +} > + > +static FILE *perfmap; > + > +void perf_enable_perfmap(void) > +{ > + char map_file[32]; > + > + snprintf(map_file, sizeof(map_file), "/tmp/perf-%d.map", getpid()); > + perfmap = safe_fopen_w(map_file); > + if (perfmap == NULL) { > + warn_report("Could not open %s: %s, proceeding without perfmap", > + map_file, strerror(errno)); > + } > +} > + > +static FILE *jitdump; > + > +#define JITHEADER_MAGIC 0x4A695444 > +#define JITHEADER_VERSION 1 > + > +struct jitheader { > + uint32_t magic; > + uint32_t version; > + uint32_t total_size; > + uint32_t elf_mach; > + uint32_t pad1; > + uint32_t pid; > + uint64_t timestamp; > + uint64_t flags; > +}; > + > +enum jit_record_type { > + JIT_CODE_LOAD = 0, > + JIT_CODE_DEBUG_INFO = 2, > +}; > + > +struct jr_prefix { > + uint32_t id; > + uint32_t total_size; > + uint64_t timestamp; > +}; > + > +struct jr_code_load { > + struct jr_prefix p; > + > + uint32_t pid; > + uint32_t tid; > + uint64_t vma; > + uint64_t code_addr; > + uint64_t code_size; > + uint64_t code_index; > +}; > + > +struct debug_entry { > + uint64_t addr; > + int lineno; > + int discrim; > + const char name[]; > +}; > + > +struct jr_code_debug_info { > + struct jr_prefix p; > + > + uint64_t code_addr; > + uint64_t nr_entry; > + struct debug_entry entries[]; > +}; > + > +static uint32_t get_e_machine(void) > +{ > + Elf64_Ehdr elf_header; > + FILE *exe; > + size_t n; > + > + QEMU_BUILD_BUG_ON(offsetof(Elf32_Ehdr, e_machine) != > + offsetof(Elf64_Ehdr, e_machine)); > + > + exe = fopen("/proc/self/exe", "r"); > + if (exe == NULL) { > + return EM_NONE; > + } > + > + n = fread(&elf_header, sizeof(elf_header), 1, exe); > + fclose(exe); > + if (n != 1) { > + return EM_NONE; > + } > + > + return elf_header.e_machine; > +} > + > +void perf_enable_jitdump(void) > +{ > + struct jitheader header; > + char jitdump_file[32]; > +#ifdef CONFIG_LINUX > + void *perf_marker; > +#endif > + > + if (!use_rt_clock) { > + warn_report("CLOCK_MONOTONIC is not available, proceeding without > jitdump"); > + return; > + } > + > + snprintf(jitdump_file, sizeof(jitdump_file), "jit-%d.dump", getpid()); > + jitdump = safe_fopen_w(jitdump_file); > + if (jitdump == NULL) { > + warn_report("Could not open %s: %s, proceeding without jitdump", > + jitdump_file, strerror(errno)); > + return; > + } > + > +#ifdef CONFIG_LINUX > + /* > + * `perf inject` will see that the mapped file name in the corresponding > + * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump > + * and will process it as a jitdump file. > + */ > + perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ | > PROT_EXEC, > + MAP_PRIVATE, fileno(jitdump), 0); > + if (perf_marker == MAP_FAILED) { > + warn_report("Could not map %s: %s, proceeding without jitdump", > + jitdump_file, strerror(errno)); > + fclose(jitdump); > + jitdump = NULL; > + return; > + } > +#endif > + > + header.magic = JITHEADER_MAGIC; > + header.version = JITHEADER_VERSION; > + header.total_size = sizeof(header); > + header.elf_mach = get_e_machine(); > + header.pad1 = 0; > + header.pid = getpid(); > + header.timestamp = get_clock(); > + header.flags = 0; > + fwrite(&header, sizeof(header), 1, jitdump); > +} > + > +void perf_report_prologue(void *start, size_t size) > +{ > + if (perfmap) { > + fprintf(perfmap, "%"PRIxPTR" %zx tcg-prologue-buffer\n", > + (uintptr_t)start, size); > + } > +} > + > +/* > + * Append a single line mapping to a JIT_CODE_DEBUG_INFO jitdump entry. > + * Return 1 on success, 0 if there is no line number information for > guest_pc. > + */ > +static int append_debug_entry(GArray *raw, void *host_pc, > + target_ulong guest_pc) > +{ > + struct debug_entry ent; > + const char *file; > + int line; > + > + if (!debuginfo_get_line(guest_pc, &file, &line)) { > + return 0; > + } > + > + ent.addr = (uint64_t)host_pc; > + ent.lineno = line; > + ent.discrim = 0; > + g_array_append_vals(raw, &ent, sizeof(ent)); > + g_array_append_vals(raw, file, strlen(file) + 1); > + return 1; > +} > + > +/* Write a JIT_CODE_DEBUG_INFO jitdump entry. */ > +static void write_jr_code_debug_info(void *start, size_t size, int icount) > +{ > + GArray *raw = g_array_new(false, false, 1); > + struct jr_code_debug_info rec; > + struct debug_entry ent; > + target_ulong guest_pc; > + void *host_pc; > + int insn; > + > + /* Reserve space for the header. */ > + g_array_set_size(raw, sizeof(rec)); > + > + /* Create debug entries. */ > + rec.nr_entry = 0; > + for (insn = 0; insn < icount; insn++) { > + host_pc = start; > + if (insn != 0) { > + host_pc += tcg_ctx->gen_insn_end_off[insn - 1]; > + } > + guest_pc = tcg_ctx->gen_insn_data[insn][0]; > + rec.nr_entry += append_debug_entry(raw, host_pc, guest_pc); > + } > + > + /* Trailing debug_entry. */ > + ent.addr = (uint64_t)start + size; > + ent.lineno = 0; > + ent.discrim = 0; > + g_array_append_vals(raw, &ent, sizeof(ent)); > + g_array_append_vals(raw, "", 1); > + rec.nr_entry++; > + > + /* Create header. */ > + rec.p.id = JIT_CODE_DEBUG_INFO; > + rec.p.total_size = raw->len; > + rec.p.timestamp = get_clock(); > + rec.code_addr = (uint64_t)start; > + memcpy(raw->data, &rec, sizeof(rec)); > + > + /* Flush. */ > + fwrite(raw->data, raw->len, 1, jitdump); > + g_array_unref(raw); > +} > + > +/* Write a JIT_CODE_LOAD jitdump entry. */ > +static void write_jr_code_load(void *start, size_t size, > + const char *symbol, const char *suffix) > +{ > + static uint64_t code_index; > + struct jr_code_load rec; > + size_t suffix_size; > + size_t name_size; > + > + name_size = strlen(symbol); > + suffix_size = strlen(suffix) + 1; > + rec.p.id = JIT_CODE_LOAD; > + rec.p.total_size = sizeof(rec) + name_size + suffix_size + size; > + rec.p.timestamp = get_clock(); > + rec.pid = getpid(); > + rec.tid = gettid(); > + rec.vma = (uint64_t)start; > + rec.code_addr = (uint64_t)start; > + rec.code_size = size; > + rec.code_index = code_index++; > + fwrite(&rec, sizeof(rec), 1, jitdump); > + fwrite(symbol, name_size, 1, jitdump); > + fwrite(suffix, suffix_size, 1, jitdump); > + fwrite(start, size, 1, jitdump); > +} > + > +void perf_report_code(void *start, size_t size, int icount, uint64_t pc) > +{ > + char suffix[32] = ""; > + char symbol_buf[32]; > + const char *symbol; > + target_ulong offset; > + > + /* Symbolize guest PC. */ > + if (perfmap || jitdump) { > + if (!debuginfo_get_symbol(pc, &symbol, &offset)) { > + snprintf(symbol_buf, sizeof(symbol_buf), "subject-%"PRIx64, pc); > + symbol = symbol_buf; > + offset = 0; > + } > + if (offset != 0) { > + snprintf(suffix, sizeof(suffix), "+0x%"PRIx64, (uint64_t)offset); > + } > + } > + > + /* Emit a perfmap entry if needed. */ > + if (perfmap) { > + flockfile(perfmap); > + fprintf(perfmap, "%"PRIxPTR" %zx %s%s\n", > + (uintptr_t)start, size, symbol, suffix); > + funlockfile(perfmap); > + } > + > + /* Emit jitdump entries if needed. */ > + if (jitdump) { > + flockfile(jitdump); > + write_jr_code_debug_info(start, size, icount); > + write_jr_code_load(start, size, symbol, suffix); > + funlockfile(jitdump); > + } > +} > + > +void perf_exit(void) > +{ > + if (perfmap) { > + fclose(perfmap); > + perfmap = NULL; > + } > + > + if (jitdump) { > + fclose(jitdump); > + jitdump = NULL; > + } > +} > diff --git a/accel/tcg/perf.h b/accel/tcg/perf.h > new file mode 100644 > index 0000000000..df54be9ccd > --- /dev/null > +++ b/accel/tcg/perf.h > @@ -0,0 +1,28 @@ > +/* > + * Linux perf perf-<pid>.map and jit-<pid>.dump integration. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#ifndef ACCEL_TCG_PERF_H > +#define ACCEL_TCG_PERF_H > + > +#include <stddef.h> > +#include <stdint.h> > + > +/* Start writing perf-<pid>.map. */ > +void perf_enable_perfmap(void); > + > +/* Start writing jit-<pid>.dump. */ > +void perf_enable_jitdump(void); > + > +/* Add information about TCG prologue to profiler maps. */ > +void perf_report_prologue(void *start, size_t size); > + > +/* Add information about JITted guest code to profiler maps. */ > +void perf_report_code(void *start, size_t size, int icount, uint64_t pc); > + > +/* Stop writing perf-<pid>.map and/or jit-<pid>.dump. */ > +void perf_exit(void); > + > +#endif > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 4ed75a13e1..b9e8d8066f 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -62,6 +62,7 @@ > #include "tb-hash.h" > #include "tb-context.h" > #include "internal.h" > +#include "perf.h" > > /* #define DEBUG_TB_INVALIDATE */ > /* #define DEBUG_TB_FLUSH */ > @@ -1492,6 +1493,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > } > tb->tc.size = gen_code_size; > > + perf_report_code(gen_code_buf, gen_code_size, tb->icount, tb->pc); > + I think the recent code re-factoring means this needs updating. If its the guest pc I think that is already in a local variable. > #ifdef CONFIG_PROFILER > qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); > qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); > diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst > index a65fb7b1c4..63e42b0426 100644 > --- a/docs/devel/tcg.rst > +++ b/docs/devel/tcg.rst > @@ -188,3 +188,23 @@ memory areas instead calls out to C code for device > emulation. > Finally, the MMU helps tracking dirty pages and pages pointed to by > translation blocks. > > +Profiling JITted code > +--------------------- > + > +The Linux ``perf`` tool will treat all JITted code as a single block as > +unlike the main code it can't use debug information to link individual > +program counter samples with larger functions. To overcome this > +limitation you can use the ``-perfmap`` or the ``-jitdump`` option to > generate > +map files. ``-perfmap`` is lightweight and produces only guest-host mappings. > +``-jitdump`` additionally saves JITed code and guest debug information (if > +available); its output needs to be integrated with the ``perf.data`` file > +before the final report can be viewed. I think this needs to be a bit clearer. Does jitdump only make sense if the guest has debug information. Running: perf record -k 1 ./qemu-system-aarch64 -jitdump -monitor none \ -display none -chardev stdio,id=output \ -M virt -cpu max -display none \ -semihosting-config enable=on,target=native,chardev=output \ -kernel tests/tcg/aarch64-softmmu/memory perf inject -j -i perf.data -o perf.data.jitted perf report -i perf.data.jitted gives me pretty much the same report as the -perfmap. I think this is because we only properly look at the guest elf data for linux-user? > + > +.. code:: > + > + perf record $QEMU -perfmap $REMAINING_ARGS > + perf report > + > + perf record -k 1 $QEMU -jitdump $REMAINING_ARGS > + perf inject -j -i perf.data -o perf.data.jitted > + perf report -i perf.data.jitted > diff --git a/linux-user/elfload.c b/linux-user/elfload.c > index 20894b633f..5928c14dfc 100644 > --- a/linux-user/elfload.c > +++ b/linux-user/elfload.c > @@ -19,6 +19,7 @@ > #include "qemu/selfmap.h" > #include "qapi/error.h" > #include "target_signal.h" > +#include "accel/tcg/debuginfo.h" > > #ifdef _ARCH_PPC64 > #undef ARCH_DLINFO > @@ -3261,6 +3262,8 @@ static void load_elf_image(const char *image_name, int > image_fd, > load_symbols(ehdr, image_fd, load_bias); > } > > + debuginfo_report_elf(image_name, image_fd, load_bias); > + > mmap_unlock(); > > close(image_fd); > diff --git a/linux-user/exit.c b/linux-user/exit.c > index fa6ef0b9b4..607b6da9fc 100644 > --- a/linux-user/exit.c > +++ b/linux-user/exit.c > @@ -17,6 +17,7 @@ > * along with this program; if not, see <http://www.gnu.org/licenses/>. > */ > #include "qemu/osdep.h" > +#include "accel/tcg/perf.h" > #include "exec/gdbstub.h" > #include "qemu.h" > #include "user-internals.h" > @@ -38,4 +39,5 @@ void preexit_cleanup(CPUArchState *env, int code) > #endif > gdb_exit(code); > qemu_plugin_user_exit(); > + perf_exit(); > } > diff --git a/linux-user/main.c b/linux-user/main.c > index a17fed045b..4290651c3c 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -53,6 +53,7 @@ > #include "signal-common.h" > #include "loader.h" > #include "user-mmap.h" > +#include "accel/tcg/perf.h" > > #ifdef CONFIG_SEMIHOSTING > #include "semihosting/semihost.h" > @@ -423,6 +424,16 @@ static void handle_arg_abi_call0(const char *arg) > } > #endif > > +static void handle_arg_perfmap(const char *arg) > +{ > + perf_enable_perfmap(); > +} > + > +static void handle_arg_jitdump(const char *arg) > +{ > + perf_enable_jitdump(); > +} > + > static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins); > > #ifdef CONFIG_PLUGIN > @@ -493,6 +504,10 @@ static const struct qemu_argument arg_table[] = { > {"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false, > handle_arg_abi_call0, > "", "assume CALL0 Xtensa ABI"}, > #endif > + {"perfmap", "QEMU_PERFMAP", false, handle_arg_perfmap, > + "", "Generate a /tmp/perf-${pid}.map file for perf"}, > + {"jitdump", "QEMU_JITDUMP", false, handle_arg_jitdump, > + "", "Generate a jit-${pid}.dump file for perf"}, > {NULL, NULL, false, NULL, NULL, NULL} > }; > > diff --git a/linux-user/meson.build b/linux-user/meson.build > index de4320af05..7171dc60be 100644 > --- a/linux-user/meson.build > +++ b/linux-user/meson.build > @@ -22,6 +22,7 @@ linux_user_ss.add(files( > 'uname.c', > )) > linux_user_ss.add(rt) > +linux_user_ss.add(libdw) > > linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c')) > linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c')) > diff --git a/meson.build b/meson.build > index b686dfef75..be625efcc5 100644 > --- a/meson.build > +++ b/meson.build > @@ -1631,6 +1631,12 @@ if libbpf.found() and not cc.links(''' > endif > endif > > +# libdw > +libdw = dependency('libdw', > + method: 'pkg-config', > + kwargs: static_kwargs, > + required: false) > + > ################# > # config-host.h # > ################# > @@ -1897,6 +1903,7 @@ config_host_data.set('CONFIG_DBUS_DISPLAY', > dbus_display) > config_host_data.set('CONFIG_CFI', get_option('cfi')) > config_host_data.set('CONFIG_SELINUX', selinux.found()) > config_host_data.set('CONFIG_XEN_BACKEND', xen.found()) > +config_host_data.set('CONFIG_LIBDW', libdw.found()) > if xen.found() > # protect from xen.version() having less than three components > xen_version = xen.version().split('.') + ['0', '0'] > @@ -3937,6 +3944,7 @@ summary_info += {'libudev': libudev} > # Dummy dependency, keep .found() > summary_info += {'FUSE lseek': fuse_lseek.found()} > summary_info += {'selinux': selinux} > +summary_info += {'libdw': libdw} > summary(summary_info, bool_yn: true, section: 'Dependencies') > > if not supported_cpus.contains(cpu) > diff --git a/qemu-options.hx b/qemu-options.hx > index 95b998a13b..b0c64c4a31 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -4799,6 +4799,26 @@ SRST > Enable synchronization profiling. > ERST > > +#ifdef CONFIG_TCG > +DEF("perfmap", 0, QEMU_OPTION_perfmap, > + "-perfmap generate a /tmp/perf-${pid}.map file for perf\n", > + QEMU_ARCH_ALL) > +SRST > +``-perfmap`` > + Generate a map file for Linux perf tools that will allow basic profiling > + information to be broken down into basic blocks. > +ERST > + > +DEF("jitdump", 0, QEMU_OPTION_jitdump, > + "-jitdump generate a jit-${pid}.dump file for perf\n", > + QEMU_ARCH_ALL) > +SRST > +``-jitdump`` > + Generate a dump file for Linux perf tools that maps basic blocks to > symbol > + names, line numbers and JITted code. > +ERST > +#endif > + > DEFHEADING() > > DEFHEADING(Generic object creation:) > diff --git a/softmmu/vl.c b/softmmu/vl.c > index b464da25bc..40e371a0c2 100644 > --- a/softmmu/vl.c > +++ b/softmmu/vl.c > @@ -96,6 +96,9 @@ > #include "fsdev/qemu-fsdev.h" > #endif > #include "sysemu/qtest.h" > +#ifdef CONFIG_TCG > +#include "accel/tcg/perf.h" > +#endif > > #include "disas/disas.h" > > @@ -2900,6 +2903,14 @@ void qemu_init(int argc, char **argv) > case QEMU_OPTION_DFILTER: > qemu_set_dfilter_ranges(optarg, &error_fatal); > break; > +#ifdef CONFIG_TCG > + case QEMU_OPTION_perfmap: > + perf_enable_perfmap(); > + break; > + case QEMU_OPTION_jitdump: > + perf_enable_jitdump(); > + break; > +#endif > case QEMU_OPTION_seed: > qemu_guest_random_seed_main(optarg, &error_fatal); > break; > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 612a12f58f..cd1ccf2bff 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -61,6 +61,7 @@ > #include "exec/log.h" > #include "tcg/tcg-ldst.h" > #include "tcg-internal.h" > +#include "accel/tcg/perf.h" > > #ifdef CONFIG_TCG_INTERPRETER > #include <ffi.h> > @@ -749,6 +750,7 @@ void tcg_prologue_init(TCGContext *s) > #endif > > prologue_size = tcg_current_code_size(s); > + perf_report_prologue(s->code_gen_ptr, prologue_size); > > #ifndef CONFIG_TCG_INTERPRETER > flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), -- Alex Bennée