The compiler may choose not to emit type information in DWARF for
external symbols. Clang, for example, does this for symbols not
defined in the current TU.

To provide a way to work around this issue, add support for
__gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
the necessary type information in DWARF also for the missing symbols.

Example usage:

  #define GENDWARFKSYMS_PTR(sym) \
      static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
          __section(".discard.gendwarfksyms") = &sym;

  extern int external_symbol(void);
  GENDWARFKSYMS_PTR(external_symbol);

Signed-off-by: Sami Tolvanen <samitolva...@google.com>
Acked-by: Neal Gompa <n...@gompa.dev>
Reviewed-by: Petr Pavlu <petr.pa...@suse.com>
---
 scripts/gendwarfksyms/dwarf.c              | 55 +++++++++++++++++++++-
 scripts/gendwarfksyms/examples/symbolptr.c | 33 +++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h      |  7 +++
 scripts/gendwarfksyms/symbols.c            | 27 +++++++++++
 4 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gendwarfksyms/examples/symbolptr.c

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index ba0f502ad20c..10224756f2af 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -1013,6 +1013,31 @@ static void process_variable(struct state *state, 
Dwarf_Die *die)
        process_symbol(state, die, __process_variable);
 }
 
+static void save_symbol_ptr(struct state *state)
+{
+       Dwarf_Die ptr_type;
+       Dwarf_Die type;
+
+       if (!get_ref_die_attr(&state->die, DW_AT_type, &ptr_type) ||
+           dwarf_tag(&ptr_type) != DW_TAG_pointer_type)
+               error("%s must be a pointer type!",
+                     get_symbol_name(&state->die));
+
+       if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type))
+               error("%s pointer missing a type attribute?",
+                     get_symbol_name(&state->die));
+
+       /*
+        * Save the symbol pointer DIE in case the actual symbol is
+        * missing from the DWARF. Clang, for example, intentionally
+        * omits external symbols from the debugging information.
+        */
+       if (dwarf_tag(&type) == DW_TAG_subroutine_type)
+               symbol_set_ptr(state->sym, &type);
+       else
+               symbol_set_ptr(state->sym, &ptr_type);
+}
+
 static int process_exported_symbols(struct state *unused, struct die *cache,
                                    Dwarf_Die *die)
 {
@@ -1036,7 +1061,9 @@ static int process_exported_symbols(struct state *unused, 
struct die *cache,
 
                state_init(&state);
 
-               if (tag == DW_TAG_subprogram)
+               if (is_symbol_ptr(get_symbol_name(&state.die)))
+                       save_symbol_ptr(&state);
+               else if (tag == DW_TAG_subprogram)
                        process_subprogram(&state, &state.die);
                else
                        process_variable(&state, &state.die);
@@ -1049,8 +1076,34 @@ static int process_exported_symbols(struct state 
*unused, struct die *cache,
        }
 }
 
+static void process_symbol_ptr(struct symbol *sym, void *arg)
+{
+       struct state state;
+       Dwarf *dwarf = arg;
+
+       if (sym->state != SYMBOL_UNPROCESSED || !sym->ptr_die_addr)
+               return;
+
+       debug("%s", sym->name);
+       state_init(&state);
+       state.sym = sym;
+
+       if (!dwarf_die_addr_die(dwarf, (void *)sym->ptr_die_addr, &state.die))
+               error("dwarf_die_addr_die failed for symbol ptr: '%s'",
+                     sym->name);
+
+       if (dwarf_tag(&state.die) == DW_TAG_subroutine_type)
+               process_subprogram(&state, &state.die);
+       else
+               process_variable(&state, &state.die);
+
+       cache_clear_expanded(&state.expansion_cache);
+}
+
 void process_cu(Dwarf_Die *cudie)
 {
        check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
                                    match_all));
+
+       symbol_for_each(process_symbol_ptr, dwarf_cu_getdwarf(cudie->cu));
 }
diff --git a/scripts/gendwarfksyms/examples/symbolptr.c 
b/scripts/gendwarfksyms/examples/symbolptr.c
new file mode 100644
index 000000000000..b7b97cd39769
--- /dev/null
+++ b/scripts/gendwarfksyms/examples/symbolptr.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ *
+ * Example for symbol pointers. When compiled with Clang, gendwarfkyms
+ * uses a symbol pointer for `f`.
+ *
+ * $ clang -g -c examples/symbolptr.c examples/symbolptr.o
+ * $ echo -e "f\ng\np" | ./gendwarfksyms -d examples/symbolptr.o
+ */
+
+/* Kernel macros for userspace testing. */
+#ifndef __used
+#define __used __attribute__((__used__))
+#endif
+#ifndef __section
+#define __section(section) __attribute__((__section__(section)))
+#endif
+
+#define __GENDWARFKSYMS_EXPORT(sym)                            \
+       static typeof(sym) *__gendwarfksyms_ptr_##sym __used    \
+               __section(".discard.gendwarfksyms") = &sym;
+
+extern void f(unsigned int arg);
+void g(int *arg);
+void g(int *arg) {}
+
+struct s;
+extern struct s *p;
+
+__GENDWARFKSYMS_EXPORT(f);
+__GENDWARFKSYMS_EXPORT(g);
+__GENDWARFKSYMS_EXPORT(p);
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h 
b/scripts/gendwarfksyms/gendwarfksyms.h
index 962c36326ccc..c9277af76d7b 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -92,6 +92,10 @@ extern int symtypes;
  * symbols.c
  */
 
+/* See symbols.c:is_symbol_ptr */
+#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
+#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
+
 static inline unsigned int addr_hash(uintptr_t addr)
 {
        return hash_ptr((const void *)addr);
@@ -115,14 +119,17 @@ struct symbol {
        struct hlist_node name_hash;
        enum symbol_state state;
        uintptr_t die_addr;
+       uintptr_t ptr_die_addr;
        unsigned long crc;
 };
 
 typedef void (*symbol_callback_t)(struct symbol *, void *arg);
 
+bool is_symbol_ptr(const char *name);
 void symbol_read_exports(FILE *file);
 void symbol_read_symtab(int fd);
 struct symbol *symbol_get(const char *name);
+void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr);
 void symbol_set_die(struct symbol *sym, Dwarf_Die *die);
 void symbol_set_crc(struct symbol *sym, unsigned long crc);
 void symbol_for_each(symbol_callback_t func, void *arg);
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index e0c9007f7250..e00c86fa0ba2 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -39,6 +39,20 @@ static unsigned int __for_each_addr(struct symbol *sym, 
symbol_callback_t func,
        return processed;
 }
 
+/*
+ * For symbols without debugging information (e.g. symbols defined in other
+ * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the
+ * kernel uses to ensure type information is present in the TU that exports
+ * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the
+ * exported symbol, e.g.:
+ *
+ *   typeof(symname) *__gendwarf_ptr_symname = &symname;
+ */
+bool is_symbol_ptr(const char *name)
+{
+       return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN);
+}
+
 static unsigned int for_each(const char *name, symbol_callback_t func,
                             void *data)
 {
@@ -47,6 +61,8 @@ static unsigned int for_each(const char *name, 
symbol_callback_t func,
 
        if (!name || !*name)
                return 0;
+       if (is_symbol_ptr(name))
+               name += SYMBOL_PTR_PREFIX_LEN;
 
        hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
                                    hash_str(name)) {
@@ -84,6 +100,17 @@ void symbol_set_crc(struct symbol *sym, unsigned long crc)
                error("no matching symbols: '%s'", sym->name);
 }
 
+static void set_ptr(struct symbol *sym, void *data)
+{
+       sym->ptr_die_addr = (uintptr_t)((Dwarf_Die *)data)->addr;
+}
+
+void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr)
+{
+       if (for_each(sym->name, set_ptr, ptr) == 0)
+               error("no matching symbols: '%s'", sym->name);
+}
+
 static void set_die(struct symbol *sym, void *data)
 {
        sym->die_addr = (uintptr_t)((Dwarf_Die *)data)->addr;
-- 
2.47.0.163.g1226f6d8fa-goog


Reply via email to