Hi Serhei,
On Tue, Mar 24, 2026 at 6:37 PM Serhei Makarov <[email protected]> wrote:
>
> Expanding the libdwflst support for handling perf sample_regs register
> files to other architectures. First, add the pieces for aarch64.
>
> * backends/Makefile.am (aarch64_SRCS): Add aarch64_initreg_sample.c.
> * backends/aarch64_init.c (aarch64_init): Add hooks for
> set_initial_registers_sample, sample_sp_pc, perf_frame_regs_mask.
> * backends/aarch64_initreg_sample.c: New file. Implement
> aarch64_set_initial_registers_sample mirroring the ptrace->dwarf_regs
> logic in aarch64_initreg.c.
> * backends/libebl_PERF_FLAGS.h (PERF_FRAME_REGISTERS_AARCH64): New
> constant describing registers needed for aarch64 unwinding.
> (generic_sample_sp_pc): New inline function generalizing existing
> x86_sample_sp_pc code to any arch given sp_index and pc_index.
> * libebl/eblinitreg_sample.c (ebl_sample_perf_regs_mapping): Update
> with a default implementation for arches where perf_regs and
> dwarf_regs order coincides (we just need the mapping to account for
> present/absent registers in perf_regs mask).
> * libebl/eblopenbackend.c (__libebl_init_cached_regs_mapping): New
> function generalizing a bit of ARCH_init regs_mapping boilerplate.
> * libebl/eblopenbackend.c (__libebl_init_cached_regs_mapping):
> Implement new function.
> ---
> backends/Makefile.am | 4 +-
> backends/aarch64_init.c | 8 ++-
> backends/aarch64_initreg_sample.c | 104 ++++++++++++++++++++++++++++++
> backends/libebl_PERF_FLAGS.h | 49 ++++++++++++--
> libebl/eblinitreg_sample.c | 49 ++++++++++++--
> libebl/eblopenbackend.c | 13 +++-
> libebl/libeblP.h | 5 +-
> 7 files changed, 217 insertions(+), 15 deletions(-)
> create mode 100644 backends/aarch64_initreg_sample.c
>
> diff --git a/backends/Makefile.am b/backends/Makefile.am
> index 7a820df0..bebd990e 100644
> --- a/backends/Makefile.am
> +++ b/backends/Makefile.am
> @@ -1,6 +1,6 @@
> ## Process this file with automake to create Makefile.in
> ##
> -## Copyright (C) 2000-2010, 2013, 2014, 2025 Red Hat, Inc.
> +## Copyright (C) 2000-2010, 2013, 2014, 2025-2026 Red Hat, Inc.
> ## Copyright (C) 2012 Tilera Corporation
> ## This file is part of elfutils.
> ##
> @@ -61,7 +61,7 @@ arm_SRCS = arm_init.c arm_symbol.c arm_regs.c
> arm_corenote.c \
>
> aarch64_SRCS = aarch64_init.c aarch64_regs.c aarch64_symbol.c \
> aarch64_corenote.c aarch64_retval.c aarch64_cfi.c \
> - aarch64_initreg.c aarch64_unwind.c
> + aarch64_initreg.c aarch64_initreg_sample.c aarch64_unwind.c
>
> sparc_SRCS = sparc_init.c sparc_symbol.c sparc_regs.c sparc_retval.c \
> sparc_corenote.c sparc64_corenote.c sparc_auxv.c sparc_attrs.c \
> diff --git a/backends/aarch64_init.c b/backends/aarch64_init.c
> index c61767d5..f6505bd7 100644
> --- a/backends/aarch64_init.c
> +++ b/backends/aarch64_init.c
> @@ -1,5 +1,5 @@
> /* Initialization of AArch64 specific backend library.
> - Copyright (C) 2013, 2017 Red Hat, Inc.
> + Copyright (C) 2013, 2017, 2026 Red Hat, Inc.
> This file is part of elfutils.
>
> This file is free software; you can redistribute it and/or modify
> @@ -33,6 +33,7 @@
> #define BACKEND aarch64_
> #define RELOC_PREFIX R_AARCH64_
> #include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
>
> /* This defines the common reloc hooks based on aarch64_reloc.def. */
> #include "common-reloc.c"
> @@ -61,6 +62,11 @@ aarch64_init (Elf *elf __attribute__ ((unused)),
> + ALT_FRAME_RETURN_COLUMN (used when LR isn't used) = 97 DWARF regs. */
> eh->frame_nregs = 97;
> HOOK (eh, set_initial_registers_tid);
> + HOOK (eh, set_initial_registers_sample);
> + HOOK (eh, sample_sp_pc);
> + /* sample_perf_regs_mapping is default ver */
> + eh->perf_frame_regs_mask = PERF_FRAME_REGISTERS_AARCH64;
> + __libebl_init_cached_regs_mapping (eh);
> HOOK (eh, unwind);
>
> return eh;
> diff --git a/backends/aarch64_initreg_sample.c
> b/backends/aarch64_initreg_sample.c
> new file mode 100644
> index 00000000..1ea17324
> --- /dev/null
> +++ b/backends/aarch64_initreg_sample.c
> @@ -0,0 +1,104 @@
> +/* Populate process registers from a register sample.
> + Copyright (C) 2026 Red Hat Inc.
> + This file is part of elfutils.
> +
> + This file is free software; you can redistribute it and/or modify
> + it under the terms of either
> +
> + * the GNU Lesser General Public License as published by the Free
> + Software Foundation; either version 3 of the License, or (at
> + your option) any later version
> +
> + or
> +
> + * the GNU General Public License as published by the Free
> + Software Foundation; either version 2 of the License, or (at
> + your option) any later version
> +
> + or both in parallel, as here.
> +
> + elfutils is distributed in the hope that it will be useful, but
> + WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + General Public License for more details.
> +
> + You should have received copies of the GNU General Public License and
> + the GNU Lesser General Public License along with this program. If
> + not, see <http://www.gnu.org/licenses/>. */
> +
> +#ifdef HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#include <assert.h>
> +
> +#define BACKEND aarch64_
> +#include "libebl_CPU.h"
> +#include "libebl_PERF_FLAGS.h"
> +/* TODO libebl_PERF_FLAGS.h now includes generic_sample_sp_pc
> + -- may want to rename the header? */
> +
> +bool
> +aarch64_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, uint32_t n_regs_mapping,
> + Dwarf_Word *sp, Dwarf_Word *pc)
> +{
> + return generic_sample_sp_pc (regs, n_regs, regs_mapping, n_regs_mapping,
> + sp, 31 /* index of sp in dwarf_regs */,
> + pc, 32 /* index of pc in dwarf_regs */);
> +}
> +
> +bool
> +aarch64_set_initial_registers_sample (const Dwarf_Word *regs, uint32_t
> n_regs,
> + const int *regs_mapping, size_t
> n_regs_mapping,
> + ebl_tid_registers_t *setfunc,
> + void *arg)
> +{
> +/* TODO(REVIEW): The #ifdef here seems strictly optional as we don't
> + refer to perf_events or ptrace arch-specific declarations. */
This builds for me on x86_64 with the #if !defined(__arch64__) stub
removed. If we wanted to support reading aarch64 samples on a
non-aarch64 host then we don't want the stub in any case.
> +#if !defined(__aarch64__)
> + (void)regs; (void)n_regs;
> + (void)regs_mapping; (void)n_regs_mapping;
> + (void)setfunc; (void)arg;
> + return false;
> +#else
> +#define N_GREGS 33
> + Dwarf_Word dwarf_regs[N_GREGS];
> + bool scratch_present = false;
> + size_t i;
> + for (i = 0; i < N_GREGS; i++)
> + dwarf_regs[i] = 0x0;
> + for (i = 0; i < n_regs; i++)
> + {
> + if (i >= n_regs_mapping)
> + break;
> + if (regs_mapping[i] < 0 || regs_mapping[i] >= N_GREGS)
> + continue;
> + if (regs_mapping[i] < 19)
> + scratch_present = true;
> + dwarf_regs[regs_mapping[i]] = regs[i];
> + }
> +
> + /* X0..X18 only if present. */
> + if (scratch_present && ! setfunc (0, 19, &dwarf_regs[0], arg))
> + return false;
> +
> + /* X19..X29, X30(LR) plus SP. */
> + if (! setfunc (19, 32 - 18, &dwarf_regs[19], arg))
> + return false;
> +
> + /* PC. */
> + if (! setfunc (-1, 1, &dwarf_regs[32], arg))
> + return false;
> +
> + /* TODO(REVIEW): May need to obtain PAC mask since the unwinder
> + needs to strip it from LR/X30 to handle pointer
> + authentication. */
It looks like PAC support was added to aarch64_initreg.c recently in
commit 52a747a316. I'm not familiar with the details of aarch64 PAC
but it sounds like a mask may need to be applied to sample register
values in some cases.
> +
> + /* Skip ELR, RA_SIGN_STATE */
> +
> + /* XXX Skip FP registers. */
> + return true;
> +#endif /* __aarch64__ */
> +}
> diff --git a/backends/libebl_PERF_FLAGS.h b/backends/libebl_PERF_FLAGS.h
> index 51c20ea6..54eb38a9 100644
> --- a/backends/libebl_PERF_FLAGS.h
> +++ b/backends/libebl_PERF_FLAGS.h
> @@ -1,7 +1,7 @@
> /* Linux perf_events sample_regs_user flags required for unwinding.
> Internal only; elfutils library users should use
> ebl_perf_frame_regs_mask().
>
> - Copyright (C) 2025 Red Hat, Inc.
> + Copyright (C) 2025-2026 Red Hat, Inc.
> This file is part of elfutils.
>
> This file is free software; you can redistribute it and/or modify
> @@ -33,7 +33,7 @@
>
> #if defined(__linux__)
> /* XXX Need to exclude __linux__ arches without perf_regs.h. */
> -#if defined(__x86_64__) || defined(__i386__)
> +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
> /* || defined(other_architecture)... */
> # include <asm/perf_regs.h>
> #endif
> @@ -54,9 +54,50 @@
> and note how regs are added in the same order as the perf_regs.h enum. */
> #else
> /* Since asm/perf_regs.h is absent, or gives the register layout for a
> - different arch, we can't unwind i386 and x86_64 frames. */
> + different arch, we can't unwind i386 and x86_64 perf sample frames. */
> #define PERF_FRAME_REGISTERS_I386 0
> #define PERF_FRAME_REGISTERS_X86_64 0
> -#endif
> +#endif /* _ASM_X86_PERF_REGS_H */
> +
> +#if defined(_ASM_ARM64_PERF_REGS_H)
> +#define REG(R) (1ULL << PERF_REG_ARM64_ ## R)
> +/* Proper unwind set: callee-saved X19..X28, then X29 for FP,
> + LR for return addr, and SP, PC. */
> +#define PERF_FRAME_REGISTERS_AARCH64 (REG(X19) | REG(X20) | REG(X21) \
> + | REG(X22) | REG(X23) | REG(X24) | REG(X25) | REG(X26) | REG(X27) \
> + | REG(X28) | REG(X29) /*FP*/ | REG(LR) | REG(SP) | REG(PC))
> +/* Register ordering defined in linux
> arch/arm64/include/uapi/asm/perf_regs.h. */
> +#else
> +/* Since asm/perf_regs.h is absent, or gives the register layout for a
> + different arch, we can't unwind aarch64 perf sample frames. */
> +#define PERF_FRAME_REGISTERS_AARCH64 0
> +#endif /* _ASM_ARM64_PERF_REGS_H */
> +
> +static inline bool
> +generic_sample_sp_pc (const Dwarf_Word *regs, uint32_t n_regs,
> + const int *regs_mapping, uint32_t n_regs_mapping,
> + Dwarf_Word *sp, uint sp_index /* into dwarf_regs */,
> + Dwarf_Word *pc, uint pc_index /* into dwarf_regs */)
> +{
> + if (sp != NULL) *sp = 0;
> + if (pc != NULL) *pc = 0;
> + /* TODO: Register locations could be cached and rechecked on a
> + fastpath without needing to loop, though the overhead reduction
> + is minimal. */
That's fine to leave for a future patch.
> + int j, need_sp = (sp != NULL), need_pc = (pc != NULL);
> + for (j = 0; (need_sp || need_pc) && n_regs_mapping > (uint32_t)j; j++)
> + {
> + if (n_regs < (uint32_t)j) break;
I think this should be <= instead of <. Otherwise regs[n_regs] could
be read which is one past the end.
> + if (need_sp && regs_mapping[j] == (int)sp_index)
> + {
> + *sp = regs[j]; need_sp = false;
> + }
> + if (need_pc && regs_mapping[j] == (int)pc_index)
> + {
> + *pc = regs[j]; need_pc = false;
> + }
> + }
> + return (!need_sp && !need_pc);
> +}
>
> #endif /* libebl_PERF_FLAGS.h */
> diff --git a/libebl/eblinitreg_sample.c b/libebl/eblinitreg_sample.c
> index d5704dfa..0cf264b0 100644
> --- a/libebl/eblinitreg_sample.c
> +++ b/libebl/eblinitreg_sample.c
> @@ -1,6 +1,6 @@
> /* Populate process Dwfl_Frame from perf_events sample.
>
> - Copyright (C) 2025 Red Hat, Inc.
> + Copyright (C) 2025-2026 Red Hat, Inc.
> This file is part of elfutils.
>
> This file is free software; you can redistribute it and/or modify
> @@ -31,9 +31,11 @@
> # include <config.h>
> #endif
>
> -#include <libeblP.h>
> +#include <stdlib.h>
> #include <assert.h>
>
> +#include <libeblP.h>
> +
> bool
> ebl_sample_sp_pc (Ebl *ebl,
> const Dwarf_Word *regs, uint32_t n_regs,
> @@ -83,10 +85,45 @@ ebl_sample_perf_regs_mapping (Ebl *ebl,
> uint64_t perf_regs_mask, uint32_t abi,
> const int **regs_mapping, size_t
> *n_regs_mapping)
> {
> - /* If sample_perf_regs_mapping is unsupported then PERF_FRAME_REGS_MASK is
> zero. */
> - assert (ebl->sample_perf_regs_mapping != NULL);
> - return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
> - regs_mapping, n_regs_mapping);
> + /* If sample_perf_regs_mapping is unsupported then perf_frame_regs_mask is
> zero. */
> + assert (ebl->perf_frame_regs_mask != 0);
Can we replace this assert with error handling? We have been
gradually removing asserts from elfutils libraries.
> +
> + /* If sample_perf_regs_mapping is defined for this arch, use it. */
> + if (ebl->sample_perf_regs_mapping != NULL)
> + return ebl->sample_perf_regs_mapping (ebl, perf_regs_mask, abi,
> + regs_mapping, n_regs_mapping);
> +
> + /* If sample_perf_regs_mapping is unspecified, then it is safe
> + to return a linear 1:1 mapping between perf_regs and dwarf_regs. */
> +
> + if (perf_regs_mask != 0 && ebl->cached_perf_regs_mask == perf_regs_mask)
Does cached_perf_regs_mask ever get set on ARM arches beyond the
default value of 0? It's set in x86_initreg_sample.c but I don't see
an ARM equivilent.
> + {
> + *regs_mapping = ebl->cached_regs_mapping;
> + *n_regs_mapping = ebl->cached_n_regs_mapping;
> + return true;
> + }
> +
> + /* XXX Unwind-relevant register file should be no bigger than this: */
> + int count = 64;
> +
> + ebl->cached_regs_mapping = (int *)calloc (count, sizeof(int));
Can this code ever be reached if ebl->cached_regs_mapping isn't NULL?
If so then the previous cached_regs_mapping should be freed.
> + ebl->cached_n_regs_mapping = count;
> +
> + int j, k; uint64_t bit;
> + for (j = 0, k = 0, bit = 1;
> + k < count; k++, bit <<= 1)
> + {
> + ebl->cached_regs_mapping[k] = -1;
> + if ((bit & perf_regs_mask)) {
> + ebl->cached_regs_mapping[j] = k;
> + j++;
> + }
> + }
If I understand this correctly then the length of
ebl->cached_regs_mapping is always 64 and only the first j entries are
meaningful. In x86_initreg_sample.c it looks like the
cached_regs_mapping has only as many entries as needed. Would that
approach work here?
> +
> + *regs_mapping = ebl->cached_regs_mapping;
> + *n_regs_mapping = ebl->cached_n_regs_mapping;
> + return true;
> +
> }
>
> uint64_t
> diff --git a/libebl/eblopenbackend.c b/libebl/eblopenbackend.c
> index b68dea7a..1be0e5fe 100644
> --- a/libebl/eblopenbackend.c
> +++ b/libebl/eblopenbackend.c
> @@ -1,5 +1,5 @@
> /* Generate ELF backend handle.
> - Copyright (C) 2000-2017 Red Hat, Inc.
> + Copyright (C) 2000-2017, 2026 Red Hat, Inc.
> This file is part of elfutils.
>
> This file is free software; you can redistribute it and/or modify
> @@ -271,6 +271,17 @@ fill_defaults (Ebl *result)
> result->sysvhash_entrysize = sizeof (Elf32_Word);
> }
>
> +/* Called by the initialization functions for backends which support
> + hook sample_perf_regs_mapping(). */
> +void
> +internal_function
> +__libebl_init_cached_regs_mapping (Ebl *eh)
> +{
> + eh->cached_perf_regs_mask = 0;
> + eh->cached_regs_mapping = NULL;
> + eh->cached_n_regs_mapping = -1;
cached_n_regs_mapping is a size_t so this actually sets it to
SIZE_MAX. I would use just use SIZE_MAX here to make this clear and to
avoid triggering warnings. Does code related to cached_n_regs_mapping
account for SIZE_MAX being the invalid/unset marker? Another option is
to use 0 plus a NULL cached_regs_mapping to indicate invalid/unset.
Aaron
> +}
> +
> /* Find an appropriate backend for the file associated with ELF. */
> static Ebl *
> openbackend (Elf *elf, const char *emulation, GElf_Half machine)
> diff --git a/libebl/libeblP.h b/libebl/libeblP.h
> index 348da49e..f9b76d4e 100644
> --- a/libebl/libeblP.h
> +++ b/libebl/libeblP.h
> @@ -1,5 +1,5 @@
> /* Internal definitions for interface for libebl.
> - Copyright (C) 2000-2009, 2013, 2014, 2025 Red Hat, Inc.
> + Copyright (C) 2000-2009, 2013, 2014, 2025-2026 Red Hat, Inc.
> This file is part of elfutils.
>
> This file is free software; you can redistribute it and/or modify
> @@ -96,6 +96,9 @@ struct ebl
> initialize for the given Elf or machine. */
> typedef Ebl *(*ebl_bhinit_t) (Elf *, GElf_Half, Ebl *);
>
> +/* Additional helper to init cached perf_events mapping data. */
> +void __libebl_init_cached_regs_mapping (Ebl *ebl)
> + internal_function;
>
> /* LEB128 constant helper macros. */
> #define ULEB128_7(x) (BUILD_BUG_ON_ZERO ((x) >= (1U << 7)) + (x))
> --
> 2.53.0
>