Hi Tao,

You've managed to do nearly the same as my implementations of kallsyms,
but with far less code! Nice! Thank you for this.

A few comments inline.

Tao Liu <[email protected]> writes:
> This patch will parse kernel's kallsyms data, and store them into a hash
> table so they can be referenced later in a fast speed.
>
> Signed-off-by: Tao Liu <[email protected]>
> ---
>  Makefile       |   2 +-
>  kallsyms.c     | 265 +++++++++++++++++++++++++++++++++++++++++++++++++
>  kallsyms.h     |  17 ++++
>  makedumpfile.c |   3 +
>  makedumpfile.h |  11 ++
>  5 files changed, 297 insertions(+), 1 deletion(-)
>  create mode 100644 kallsyms.c
>  create mode 100644 kallsyms.h
>
> diff --git a/Makefile b/Makefile
> index 05ab5f2..6c450ac 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -45,7 +45,7 @@ CFLAGS_ARCH += -m32
>  endif
>  
>  SRC_BASE = makedumpfile.c makedumpfile.h diskdump_mod.h sadump_mod.h 
> sadump_info.h
> -SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c 
> cache.c tools.c printk.c detect_cycle.c
> +SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c 
> cache.c tools.c printk.c detect_cycle.c kallsyms.c
>  OBJ_PART=$(patsubst %.c,%.o,$(SRC_PART))
>  SRC_ARCH = arch/arm.c arch/arm64.c arch/x86.c arch/x86_64.c arch/ia64.c 
> arch/ppc64.c arch/s390x.c arch/ppc.c arch/sparc64.c arch/mips64.c 
> arch/loongarch64.c arch/riscv64.c
>  OBJ_ARCH=$(patsubst %.c,%.o,$(SRC_ARCH))
> diff --git a/kallsyms.c b/kallsyms.c
> new file mode 100644
> index 0000000..ecf64e0
> --- /dev/null
> +++ b/kallsyms.c
> @@ -0,0 +1,265 @@
> +#include <stdint.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include "makedumpfile.h"
> +#include "kallsyms.h"
> +
> +static uint32_t *kallsyms_offsets = NULL;
> +static uint16_t *kallsyms_token_index = NULL;
> +static uint8_t  *kallsyms_token_table = NULL;
> +static uint8_t  *kallsyms_names = NULL;
> +static unsigned long kallsyms_relative_base = 0;
> +static unsigned int kallsyms_num_syms = 0;
> +
> +#define NAME_HASH 512
> +static struct syment *name_hash_table[NAME_HASH] = {0};
> +
> +static uint64_t absolute_percpu(uint64_t base, int32_t val)
> +{
> +     if (val >= 0)
> +             return (uint64_t)val;
> +     else
> +             return base - 1 - val;
> +}
> +
> +static unsigned int hash_index(const char *name, unsigned int hash_size)
> +{
> +     unsigned int len, value;
> +
> +     len = strlen(name);
> +     value = name[len - 1] * name[len / 2];
> +
> +     return (name[0] ^ value) % hash_size;
> +}
> +
> +static void name_hash_install(struct syment *en)
> +{
> +     unsigned int index = hash_index(en->name, NAME_HASH);
> +     struct syment *sp = name_hash_table[index];
> +
> +     if (sp == NULL) {
> +             name_hash_table[index] = en;
> +     } else {
> +             while (sp) {
> +                     if (sp->name_hash_next) {
> +                             sp = sp->name_hash_next;
> +                     } else {
> +                             sp->name_hash_next = en;
> +                             break;
> +                     }
> +             }
> +     }
> +}
> +
> +static struct syment *search_kallsyms_by_name(char *name)
> +{
> +     unsigned int index;
> +     struct syment *sp;
> +
> +     index = hash_index(name, NAME_HASH);
> +     for (sp = name_hash_table[index]; sp; sp = sp->name_hash_next) {
> +             if (!strcmp(name, sp->name)) {
> +                     return sp;
> +             }
> +     }
> +     return sp;
> +}
> +
> +static bool is_unwanted_symbol(char *name)
> +{
> +     const char *unwanted_prefix[] = {
> +             "__pfx_",       // CFI symbols
> +             "_R",           // Rust symbols
> +     };
> +     for (int i = 0; i < sizeof(unwanted_prefix) / sizeof(char *); i++) {
> +             if (!strncmp(name, unwanted_prefix[i], 
> strlen(unwanted_prefix[i])))
> +                     return true;
> +     }
> +     return false;
> +}
> +
> +uint64_t get_kallsyms_value_by_name(char *name)
> +{
> +     struct syment *sp;
> +
> +     sp = search_kallsyms_by_name(name);
> +     if (!sp)
> +             return 0;
> +     return sp->value;
> +}
> +
> +#define BUFLEN 1024
> +static bool parse_kernel_kallsyms(void)
> +{
> +     char buf[BUFLEN];
> +     int index = 0, i;
> +     uint8_t *compressd_data;
> +     uint8_t *uncompressd_data;
> +     uint64_t stext;
> +     uint8_t len, len_old;
> +     struct syment *kern_syment;
> +     bool skip;
> +
> +     for (i = 0; i < kallsyms_num_syms; i++) {
> +             skip = false;
> +             memset(buf, 0, BUFLEN);
> +             len = kallsyms_names[index];
> +             if (len & 0x80) {
> +                     index++;
> +                     len_old = len;
> +                     len = kallsyms_names[index];
> +                     if (len & 0x80) {
> +                             fprintf(stderr, "%s: BUG! Unexpected 3-byte 
> length,"
> +                                     " should be detected in 
> init_kernel_kallsyms()\n",
> +                                     __func__);
> +                             goto out;
> +                     }
> +                     len = (len_old & 0x7F) | (len << 7);

The 2-byte representation was added in commit 73bbb94466fd3 ("kallsyms:
support "big" kernel symbols"), in v6.1. It seems useful to include a
comment about that, at a minimum.

It also seems to me that, for older kernel versions, this means lengths
128-255 are ambiguous: for v6.1+, they indicate a long symbol, but for
kernel versions prior to that, they are valid lengths.

I guess this is implemented for current kernels, but it might be worth
checking the kernel major/minor version for this. Though, I haven't
personally witnessed the issue, so maybe it's unnecessary. I will test
this on some older kernels and let you know.

> +             }
> +             index++;
> +
> +             compressd_data = &kallsyms_names[index];
> +             index += len;
> +             while (len--) {
> +                     uncompressd_data = 
> &kallsyms_token_table[kallsyms_token_index[*compressd_data]];
> +                     if (strlen(buf) + strlen((char *)uncompressd_data) >= 
> BUFLEN) {
> +                             skip = true;
> +                             break;
> +                     }
> +                     strcat(buf, (char *)uncompressd_data);
> +                     compressd_data++;
> +             }
> +             if (skip || is_unwanted_symbol(&buf[1]))
> +                     continue;
> +             kern_syment = (struct syment *)calloc(1, sizeof(struct syment));
> +             if (!kern_syment)
> +                     goto no_mem;
> +             kern_syment->value = kallsyms_offsets[i];
> +             kern_syment->name = strdup(&buf[1]);
> +             if (!kern_syment->name) {
> +                     free(kern_syment);
> +                     goto no_mem;
> +             }
> +             name_hash_install(kern_syment);

Like I mentioned in a prior email, if we were able to know the list of
symbols we care about up-front, we could entirely avoid creating the
hash table, and also avoid maintaining a list of symbol prefixes we want
to skip loading. I'm not certain that you would want to go that far, but
it's a thought.

> +     }
> +
> +     /* Now refresh the absolute each kallsyms address */

I think this could use a better comment. This is my understanding of the
history of the kallsyms address encoding history:

    Kallsyms originally stored absolute symbol addresses in a plain
    array called "kallsyms_addresses". This strategy was called
    "absolute kallsyms". In Linux v4.6, commit 2213e9a66bb87 ("kallsyms:
    add support for relative offsets in kallsyms address table"),
    introduced two ways of storing symbol addresses relative two a base
    address, so that 64-bit architectures could use 32-bit arrays. These
    methods were CONFIG_KALLSYMS_BASE_RELATIVE and
    CONFIG_KALLSYMS_ABSOLUTE_PERCPU. The ABSOLUTE_PERCPU mechanism was
    used by architectures like x86_64 with a percpu address range near
    0x0, but kernel address range in the negative address space. Some
    architectures, namely tile and ia64, had to continue using absolute
    kallsyms due to very large gaps in their address spaces.

    After both architectures were removed, absolute percpu was dropped
    in v6.11 commit 64e166099b69b ("kallsyms: get rid of code for
    absolute kallsyms"). In v6.15, the x86_64 percpu address range was
    moved away from 0x0, and as a result ABSOLUTE_PERCPU was no longer
    required. It was dropped in 01157ddc58dc2 ("kallsyms: Remove
    KALLSYMS_ABSOLUTE_PERCPU"), leaving only the BASE_RELATIVE scheme
    (which no longer has a kconfig entry, since there is no other
    scheme).

    This code implements support for BASE_RELATIVE and ABSOLUTE_PERCPU,
    but absolute percpu is not supported. The kallsyms symbols
    themselves were only added to vmcoreinfo in v6.0 with commit
    f09bddbd86619 ("vmcoreinfo: add kallsyms_num_syms symbol"). At that
    time, only ia64 would have used the absolute percpu mechanism. Even
    if these commits were backported to quite old kernels, BASE_RELATIVE
    and ABSOLUTE_PERCPU would suffice for most other architectures until
    v4.6 and earlier.

I don't know if all of the context is necessary, but I find it helpful
to know.

Thanks,
Stephen

> +     stext = get_kallsyms_value_by_name("_stext");
> +     if (SYMBOL(_stext) == absolute_percpu(kallsyms_relative_base, stext)) {
> +             for (i = 0; i < NAME_HASH; i++) {
> +                     for (kern_syment = name_hash_table[i];
> +                          kern_syment;
> +                          kern_syment = kern_syment->name_hash_next)
> +                             kern_syment->value = 
> absolute_percpu(kallsyms_relative_base,
> +                                                     kern_syment->value);
> +             }
> +     } else if (SYMBOL(_stext) == kallsyms_relative_base + stext) {
> +             for (i = 0; i < NAME_HASH; i++) {
> +                     for (kern_syment = name_hash_table[i];
> +                          kern_syment;
> +                          kern_syment = kern_syment->name_hash_next)
> +                             kern_syment->value += kallsyms_relative_base;
> +             }
> +     } else {
> +             fprintf(stderr, "%s: Wrong calculate kallsyms symbol value!\n", 
> __func__);
> +             goto out;
> +     }
> +
> +     return true;
> +no_mem:
> +     fprintf(stderr, "%s: Not enough memory!\n", __func__);
> +out:
> +     return false;
> +}
> +
> +static bool vmcore_info_ready = false;
> +
> +bool read_vmcoreinfo_kallsyms(void)
> +{
> +     READ_SYMBOL("kallsyms_names", kallsyms_names);
> +     READ_SYMBOL("kallsyms_num_syms", kallsyms_num_syms);
> +     READ_SYMBOL("kallsyms_token_table", kallsyms_token_table);
> +     READ_SYMBOL("kallsyms_token_index", kallsyms_token_index);
> +     READ_SYMBOL("kallsyms_offsets", kallsyms_offsets);
> +     READ_SYMBOL("kallsyms_relative_base", kallsyms_relative_base);
> +     vmcore_info_ready = true;
> +     return true;
> +}
> +
> +bool init_kernel_kallsyms(void)
> +{
> +     const int token_index_size = (UINT8_MAX + 1) * sizeof(uint16_t);
> +     uint64_t last_token, len;
> +     unsigned char data, data_old;
> +     int i;
> +     bool ret = false;
> +
> +     if (vmcore_info_ready == false) {
> +             fprintf(stderr, "%s: vmcoreinfo not ready for kallsyms!\n",
> +                     __func__);
> +             return ret;
> +     }
> +
> +     readmem(VADDR, SYMBOL(kallsyms_num_syms), &kallsyms_num_syms,
> +             sizeof(kallsyms_num_syms));
> +     readmem(VADDR, SYMBOL(kallsyms_relative_base), &kallsyms_relative_base,
> +             sizeof(kallsyms_relative_base));
> +
> +     kallsyms_offsets = malloc(sizeof(uint32_t) * kallsyms_num_syms);
> +     if (!kallsyms_offsets)
> +             goto no_mem;
> +     readmem(VADDR, SYMBOL(kallsyms_offsets), kallsyms_offsets,
> +             kallsyms_num_syms * sizeof(uint32_t));
> +
> +     kallsyms_token_index = malloc(token_index_size);
> +     if (!kallsyms_token_index)
> +             goto no_mem;
> +     readmem(VADDR, SYMBOL(kallsyms_token_index), kallsyms_token_index,
> +             token_index_size);
> +
> +     last_token = SYMBOL(kallsyms_token_table) + 
> kallsyms_token_index[UINT8_MAX];
> +     do {
> +             readmem(VADDR, last_token++, &data, 1);
> +     } while(data);
> +     len = last_token - SYMBOL(kallsyms_token_table);
> +     kallsyms_token_table = malloc(len);
> +     if (!kallsyms_token_table)
> +             goto no_mem;
> +     readmem(VADDR, SYMBOL(kallsyms_token_table), kallsyms_token_table, len);
> +
> +     for (len = 0, i = 0; i < kallsyms_num_syms; i++) {
> +             readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1);
> +             if (data & 0x80) {
> +                     len += 1;
> +                     data_old = data;
> +                     readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1);
> +                     if (data & 0x80) {
> +                             fprintf(stderr, "%s: BUG! Unexpected 3-byte 
> length"
> +                                     " encoding in kallsyms names\n", 
> __func__);
> +                             goto out;
> +                     }
> +                     data = (data_old & 0x7F) | (data << 7);
> +             }
> +             len += data + 1;
> +     }
> +     kallsyms_names = malloc(len);
> +     if (!kallsyms_names)
> +             goto no_mem;
> +     readmem(VADDR, SYMBOL(kallsyms_names), kallsyms_names, len);
> +
> +     ret = parse_kernel_kallsyms();
> +     goto out;
> +
> +no_mem:
> +     fprintf(stderr, "%s: Not enough memory!\n", __func__);
> +out:
> +     if (kallsyms_offsets)
> +             free(kallsyms_offsets);
> +     if (kallsyms_token_index)
> +             free(kallsyms_token_index);
> +     if (kallsyms_token_table)
> +             free(kallsyms_token_table);
> +     if (kallsyms_names)
> +             free(kallsyms_names);
> +     return ret;
> +}
> diff --git a/kallsyms.h b/kallsyms.h
> new file mode 100644
> index 0000000..a4fbe10
> --- /dev/null
> +++ b/kallsyms.h
> @@ -0,0 +1,17 @@
> +#ifndef _KALLSYMS_H
> +#define _KALLSYMS_H
> +
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +struct __attribute__((packed)) syment {
> +     uint64_t value;
> +     char *name;
> +     struct syment *name_hash_next;
> +};
> +
> +bool read_vmcoreinfo_kallsyms(void);
> +bool init_kernel_kallsyms(void);
> +uint64_t get_kallsyms_value_by_name(char *);
> +
> +#endif /* _KALLSYMS_H */
> \ No newline at end of file
> diff --git a/makedumpfile.c b/makedumpfile.c
> index 12fb0d8..dba3628 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -27,6 +27,7 @@
>  #include <limits.h>
>  #include <assert.h>
>  #include <zlib.h>
> +#include "kallsyms.h"
>  
>  struct symbol_table  symbol_table;
>  struct size_table    size_table;
> @@ -3105,6 +3106,8 @@ read_vmcoreinfo_from_vmcore(off_t offset, unsigned long 
> size, int flag_xen_hv)
>               if (!read_vmcoreinfo())
>                       goto out;
>       }
> +     read_vmcoreinfo_kallsyms();
> +
>       close_vmcoreinfo();
>  
>       ret = TRUE;
> diff --git a/makedumpfile.h b/makedumpfile.h
> index 134eb7a..0dec50e 100644
> --- a/makedumpfile.h
> +++ b/makedumpfile.h
> @@ -259,6 +259,7 @@ static inline int string_exists(char *s) { return (s ? 
> TRUE : FALSE); }
>  #define UINT(ADDR)   *((unsigned int *)(ADDR))
>  #define ULONG(ADDR)  *((unsigned long *)(ADDR))
>  #define ULONGLONG(ADDR)      *((unsigned long long *)(ADDR))
> +#define VOID_PTR(ADDR)  *((void **)(ADDR))
>  
>  
>  /*
> @@ -1919,6 +1920,16 @@ struct symbol_table {
>        * symbols on sparc64 arch
>        */
>       unsigned long long              vmemmap_table;
> +
> +     /*
> +      * kallsyms related
> +      */
> +     unsigned long long              kallsyms_names;
> +     unsigned long long              kallsyms_num_syms;
> +     unsigned long long              kallsyms_token_table;
> +     unsigned long long              kallsyms_token_index;
> +     unsigned long long              kallsyms_offsets;
> +     unsigned long long              kallsyms_relative_base;
>  };
>  
>  struct size_table {
> -- 
> 2.47.0

Reply via email to