Re: [PATCH v3 06/20] gendwarfksyms: Add a cache for processed DIEs

2024-10-01 Thread Sami Tolvanen
On Tue, Oct 1, 2024 at 2:10 PM Petr Pavlu  wrote:
>
> On 9/23/24 20:18, Sami Tolvanen wrote:
> > +static void update_fqn(struct die *cache, Dwarf_Die *die)
> > +{
> > + if (!cache->fqn)
> > + cache->fqn = get_fqn(die) ?: "";
> > +}
>
> When is update_fqn() called with cache->fqn being already non-NULL?

In patch 16, because we need the name before process_fqn() is called,
and if we end up outputting the name after that, cache->fqn is already
non-NULL.

> In general, I find handling of cache->fqn slightly confusing, mostly
> because the member has three states: NULL initially,
> a statically-allocated empty string if the DIE doesn't have a name and
> a dynamically-allocated non-zero-length string otherwise.
>
> I wonder if it would be possible to reduce it to two states: NULL
> initially and when the DIE doesn't have a name, or a regular string.

I also thought about it, but using an empty string to represent an
unnamed DIE and NULL to represent an uninitialized value seemed the
most reasonable option.

> > +static inline const char *die_state_name(enum die_state state)
> > +{
> > + switch (state) {
> > + default:
> > + CASE_CONST_TO_STR(DIE_INCOMPLETE)
> > + CASE_CONST_TO_STR(DIE_COMPLETE)
> > + }
>
> Nit: I suggest to move the default case out of the switch statement:
>
> switch (state) {
> CASE_CONST_TO_STR(DIE_INCOMPLETE)
> CASE_CONST_TO_STR(DIE_COMPLETE)
> }
> error("unexpected die_state: %d", state);
>
> This way, if someone adds a new value in die_state and forgets to handle
> it in die_state_name(), they get a compiler warning.. or a runtime error
> later.

Sure, I'll change this.

Sami



Re: [PATCH v3 06/20] gendwarfksyms: Add a cache for processed DIEs

2024-10-01 Thread Petr Pavlu
On 9/23/24 20:18, Sami Tolvanen wrote:
> Basic types in DWARF repeat frequently and traversing the DIEs using
> libdw is relatively slow. Add a simple hashtable based cache for the
> processed DIEs.
> 
> Signed-off-by: Sami Tolvanen 
> ---
>  scripts/gendwarfksyms/Makefile|   1 +
>  scripts/gendwarfksyms/die.c   | 143 ++
>  scripts/gendwarfksyms/dwarf.c | 136 +---
>  scripts/gendwarfksyms/gendwarfksyms.c |   6 ++
>  scripts/gendwarfksyms/gendwarfksyms.h |  62 ++-
>  5 files changed, 307 insertions(+), 41 deletions(-)
>  create mode 100644 scripts/gendwarfksyms/die.c
> 
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> index 9f8fec4fd39b..c0d4ce50fc27 100644
> --- a/scripts/gendwarfksyms/Makefile
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -2,6 +2,7 @@
>  hostprogs-always-y += gendwarfksyms
>  
>  gendwarfksyms-objs += gendwarfksyms.o
> +gendwarfksyms-objs += die.o
>  gendwarfksyms-objs += dwarf.o
>  gendwarfksyms-objs += symbols.o
>  
> diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
> new file mode 100644
> index ..28d89fce89fc
> --- /dev/null
> +++ b/scripts/gendwarfksyms/die.c
> @@ -0,0 +1,143 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include 
> +#include "gendwarfksyms.h"
> +
> +#define DIE_HASH_BITS 20
> +
> +/* {die->addr, state} -> struct die * */
> +static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
> +
> +static unsigned int map_hits;
> +static unsigned int map_misses;
> +
> +static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
> +{
> + return hash_32(addr_hash(addr) ^ (unsigned int)state);
> +}
> +
> +static void init_die(struct die *cd)
> +{
> + cd->state = DIE_INCOMPLETE;
> + cd->fqn = NULL;
> + cd->tag = -1;
> + cd->addr = 0;
> + INIT_LIST_HEAD(&cd->fragments);
> +}
> +
> +static struct die *create_die(Dwarf_Die *die, enum die_state state)
> +{
> + struct die *cd;
> +
> + cd = xmalloc(sizeof(struct die));
> + init_die(cd);
> + cd->addr = (uintptr_t)die->addr;
> +
> + hash_add(die_map, &cd->hash, die_hash(cd->addr, state));
> + return cd;
> +}
> +
> +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res)
> +{
> + struct die *cd;
> +
> + hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) {
> + if (cd->addr == addr && cd->state == state) {
> + *res = cd;
> + return 0;
> + }
> + }
> +
> + return -1;
> +}
> +
> +struct die *die_map_get(Dwarf_Die *die, enum die_state state)
> +{
> + struct die *cd;
> +
> + if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) {
> + map_hits++;
> + return cd;
> + }
> +
> + map_misses++;
> + return create_die(die, state);
> +}
> +
> +static void reset_die(struct die *cd)
> +{
> + struct die_fragment *tmp;
> + struct die_fragment *df;
> +
> + list_for_each_entry_safe(df, tmp, &cd->fragments, list) {
> + if (df->type == FRAGMENT_STRING)
> + free(df->data.str);
> + free(df);
> + }
> +
> + if (cd->fqn && *cd->fqn)
> + free(cd->fqn);
> + init_die(cd);
> +}
> +
> +void die_map_free(void)
> +{
> + struct hlist_node *tmp;
> + unsigned int stats[DIE_LAST + 1];
> + struct die *cd;
> + int i;
> +
> + memset(stats, 0, sizeof(stats));
> +
> + hash_for_each_safe(die_map, cd, tmp, hash) {
> + stats[cd->state]++;
> + reset_die(cd);
> + free(cd);
> + }
> + hash_init(die_map);
> +
> + if (map_hits + map_misses > 0)
> + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits,
> +   map_misses,
> +   (100.0f * map_hits) / (map_hits + map_misses));
> +
> + for (i = 0; i <= DIE_LAST; i++)
> + debug("%s: %u entries", die_state_name(i), stats[i]);
> +}
> +
> +static struct die_fragment *append_item(struct die *cd)
> +{
> + struct die_fragment *df;
> +
> + df = xmalloc(sizeof(struct die_fragment));
> + df->type = FRAGMENT_EMPTY;
> + list_add_tail(&df->list, &cd->fragments);
> + return df;
> +}
> +
> +void die_map_add_string(struct die *cd, const char *str)
> +{
> + struct die_fragment *df;
> +
> + if (!cd)
> + return;
> +
> + df = append_item(cd);
> + df->data.str = xstrdup(str);
> + df->type = FRAGMENT_STRING;
> +}
> +
> +void die_map_add_die(struct die *cd, struct die *child)
> +{
> + struct die_fragment *df;
> +
> + if (!cd)
> + return;
> +
> + df = append_item(cd);
> + df->data.addr = child->addr;
> + df->type = FRAGMENT_DIE;
> +}
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 3e9e8500f448..f0c881bef026 100644
> --- a/scripts/gendwarfksy