Hi Tao, You've managed to do nearly the same as my implementations of kallsyms, but with far less code! Nice! Thank you for this.
A few comments inline. Tao Liu <[email protected]> writes: > This patch will parse kernel's kallsyms data, and store them into a hash > table so they can be referenced later in a fast speed. > > Signed-off-by: Tao Liu <[email protected]> > --- > Makefile | 2 +- > kallsyms.c | 265 +++++++++++++++++++++++++++++++++++++++++++++++++ > kallsyms.h | 17 ++++ > makedumpfile.c | 3 + > makedumpfile.h | 11 ++ > 5 files changed, 297 insertions(+), 1 deletion(-) > create mode 100644 kallsyms.c > create mode 100644 kallsyms.h > > diff --git a/Makefile b/Makefile > index 05ab5f2..6c450ac 100644 > --- a/Makefile > +++ b/Makefile > @@ -45,7 +45,7 @@ CFLAGS_ARCH += -m32 > endif > > SRC_BASE = makedumpfile.c makedumpfile.h diskdump_mod.h sadump_mod.h > sadump_info.h > -SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c > cache.c tools.c printk.c detect_cycle.c > +SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c > cache.c tools.c printk.c detect_cycle.c kallsyms.c > OBJ_PART=$(patsubst %.c,%.o,$(SRC_PART)) > SRC_ARCH = arch/arm.c arch/arm64.c arch/x86.c arch/x86_64.c arch/ia64.c > arch/ppc64.c arch/s390x.c arch/ppc.c arch/sparc64.c arch/mips64.c > arch/loongarch64.c arch/riscv64.c > OBJ_ARCH=$(patsubst %.c,%.o,$(SRC_ARCH)) > diff --git a/kallsyms.c b/kallsyms.c > new file mode 100644 > index 0000000..ecf64e0 > --- /dev/null > +++ b/kallsyms.c > @@ -0,0 +1,265 @@ > +#include <stdint.h> > +#include <stdbool.h> > +#include <string.h> > +#include "makedumpfile.h" > +#include "kallsyms.h" > + > +static uint32_t *kallsyms_offsets = NULL; > +static uint16_t *kallsyms_token_index = NULL; > +static uint8_t *kallsyms_token_table = NULL; > +static uint8_t *kallsyms_names = NULL; > +static unsigned long kallsyms_relative_base = 0; > +static unsigned int kallsyms_num_syms = 0; > + > +#define NAME_HASH 512 > +static struct syment *name_hash_table[NAME_HASH] = {0}; > + > +static uint64_t absolute_percpu(uint64_t base, int32_t val) > +{ > + if (val >= 0) > + return (uint64_t)val; > + else > + return base - 1 - val; > +} > + > +static unsigned int hash_index(const char *name, unsigned int hash_size) > +{ > + unsigned int len, value; > + > + len = strlen(name); > + value = name[len - 1] * name[len / 2]; > + > + return (name[0] ^ value) % hash_size; > +} > + > +static void name_hash_install(struct syment *en) > +{ > + unsigned int index = hash_index(en->name, NAME_HASH); > + struct syment *sp = name_hash_table[index]; > + > + if (sp == NULL) { > + name_hash_table[index] = en; > + } else { > + while (sp) { > + if (sp->name_hash_next) { > + sp = sp->name_hash_next; > + } else { > + sp->name_hash_next = en; > + break; > + } > + } > + } > +} > + > +static struct syment *search_kallsyms_by_name(char *name) > +{ > + unsigned int index; > + struct syment *sp; > + > + index = hash_index(name, NAME_HASH); > + for (sp = name_hash_table[index]; sp; sp = sp->name_hash_next) { > + if (!strcmp(name, sp->name)) { > + return sp; > + } > + } > + return sp; > +} > + > +static bool is_unwanted_symbol(char *name) > +{ > + const char *unwanted_prefix[] = { > + "__pfx_", // CFI symbols > + "_R", // Rust symbols > + }; > + for (int i = 0; i < sizeof(unwanted_prefix) / sizeof(char *); i++) { > + if (!strncmp(name, unwanted_prefix[i], > strlen(unwanted_prefix[i]))) > + return true; > + } > + return false; > +} > + > +uint64_t get_kallsyms_value_by_name(char *name) > +{ > + struct syment *sp; > + > + sp = search_kallsyms_by_name(name); > + if (!sp) > + return 0; > + return sp->value; > +} > + > +#define BUFLEN 1024 > +static bool parse_kernel_kallsyms(void) > +{ > + char buf[BUFLEN]; > + int index = 0, i; > + uint8_t *compressd_data; > + uint8_t *uncompressd_data; > + uint64_t stext; > + uint8_t len, len_old; > + struct syment *kern_syment; > + bool skip; > + > + for (i = 0; i < kallsyms_num_syms; i++) { > + skip = false; > + memset(buf, 0, BUFLEN); > + len = kallsyms_names[index]; > + if (len & 0x80) { > + index++; > + len_old = len; > + len = kallsyms_names[index]; > + if (len & 0x80) { > + fprintf(stderr, "%s: BUG! Unexpected 3-byte > length," > + " should be detected in > init_kernel_kallsyms()\n", > + __func__); > + goto out; > + } > + len = (len_old & 0x7F) | (len << 7); The 2-byte representation was added in commit 73bbb94466fd3 ("kallsyms: support "big" kernel symbols"), in v6.1. It seems useful to include a comment about that, at a minimum. It also seems to me that, for older kernel versions, this means lengths 128-255 are ambiguous: for v6.1+, they indicate a long symbol, but for kernel versions prior to that, they are valid lengths. I guess this is implemented for current kernels, but it might be worth checking the kernel major/minor version for this. Though, I haven't personally witnessed the issue, so maybe it's unnecessary. I will test this on some older kernels and let you know. > + } > + index++; > + > + compressd_data = &kallsyms_names[index]; > + index += len; > + while (len--) { > + uncompressd_data = > &kallsyms_token_table[kallsyms_token_index[*compressd_data]]; > + if (strlen(buf) + strlen((char *)uncompressd_data) >= > BUFLEN) { > + skip = true; > + break; > + } > + strcat(buf, (char *)uncompressd_data); > + compressd_data++; > + } > + if (skip || is_unwanted_symbol(&buf[1])) > + continue; > + kern_syment = (struct syment *)calloc(1, sizeof(struct syment)); > + if (!kern_syment) > + goto no_mem; > + kern_syment->value = kallsyms_offsets[i]; > + kern_syment->name = strdup(&buf[1]); > + if (!kern_syment->name) { > + free(kern_syment); > + goto no_mem; > + } > + name_hash_install(kern_syment); Like I mentioned in a prior email, if we were able to know the list of symbols we care about up-front, we could entirely avoid creating the hash table, and also avoid maintaining a list of symbol prefixes we want to skip loading. I'm not certain that you would want to go that far, but it's a thought. > + } > + > + /* Now refresh the absolute each kallsyms address */ I think this could use a better comment. This is my understanding of the history of the kallsyms address encoding history: Kallsyms originally stored absolute symbol addresses in a plain array called "kallsyms_addresses". This strategy was called "absolute kallsyms". In Linux v4.6, commit 2213e9a66bb87 ("kallsyms: add support for relative offsets in kallsyms address table"), introduced two ways of storing symbol addresses relative two a base address, so that 64-bit architectures could use 32-bit arrays. These methods were CONFIG_KALLSYMS_BASE_RELATIVE and CONFIG_KALLSYMS_ABSOLUTE_PERCPU. The ABSOLUTE_PERCPU mechanism was used by architectures like x86_64 with a percpu address range near 0x0, but kernel address range in the negative address space. Some architectures, namely tile and ia64, had to continue using absolute kallsyms due to very large gaps in their address spaces. After both architectures were removed, absolute percpu was dropped in v6.11 commit 64e166099b69b ("kallsyms: get rid of code for absolute kallsyms"). In v6.15, the x86_64 percpu address range was moved away from 0x0, and as a result ABSOLUTE_PERCPU was no longer required. It was dropped in 01157ddc58dc2 ("kallsyms: Remove KALLSYMS_ABSOLUTE_PERCPU"), leaving only the BASE_RELATIVE scheme (which no longer has a kconfig entry, since there is no other scheme). This code implements support for BASE_RELATIVE and ABSOLUTE_PERCPU, but absolute percpu is not supported. The kallsyms symbols themselves were only added to vmcoreinfo in v6.0 with commit f09bddbd86619 ("vmcoreinfo: add kallsyms_num_syms symbol"). At that time, only ia64 would have used the absolute percpu mechanism. Even if these commits were backported to quite old kernels, BASE_RELATIVE and ABSOLUTE_PERCPU would suffice for most other architectures until v4.6 and earlier. I don't know if all of the context is necessary, but I find it helpful to know. Thanks, Stephen > + stext = get_kallsyms_value_by_name("_stext"); > + if (SYMBOL(_stext) == absolute_percpu(kallsyms_relative_base, stext)) { > + for (i = 0; i < NAME_HASH; i++) { > + for (kern_syment = name_hash_table[i]; > + kern_syment; > + kern_syment = kern_syment->name_hash_next) > + kern_syment->value = > absolute_percpu(kallsyms_relative_base, > + kern_syment->value); > + } > + } else if (SYMBOL(_stext) == kallsyms_relative_base + stext) { > + for (i = 0; i < NAME_HASH; i++) { > + for (kern_syment = name_hash_table[i]; > + kern_syment; > + kern_syment = kern_syment->name_hash_next) > + kern_syment->value += kallsyms_relative_base; > + } > + } else { > + fprintf(stderr, "%s: Wrong calculate kallsyms symbol value!\n", > __func__); > + goto out; > + } > + > + return true; > +no_mem: > + fprintf(stderr, "%s: Not enough memory!\n", __func__); > +out: > + return false; > +} > + > +static bool vmcore_info_ready = false; > + > +bool read_vmcoreinfo_kallsyms(void) > +{ > + READ_SYMBOL("kallsyms_names", kallsyms_names); > + READ_SYMBOL("kallsyms_num_syms", kallsyms_num_syms); > + READ_SYMBOL("kallsyms_token_table", kallsyms_token_table); > + READ_SYMBOL("kallsyms_token_index", kallsyms_token_index); > + READ_SYMBOL("kallsyms_offsets", kallsyms_offsets); > + READ_SYMBOL("kallsyms_relative_base", kallsyms_relative_base); > + vmcore_info_ready = true; > + return true; > +} > + > +bool init_kernel_kallsyms(void) > +{ > + const int token_index_size = (UINT8_MAX + 1) * sizeof(uint16_t); > + uint64_t last_token, len; > + unsigned char data, data_old; > + int i; > + bool ret = false; > + > + if (vmcore_info_ready == false) { > + fprintf(stderr, "%s: vmcoreinfo not ready for kallsyms!\n", > + __func__); > + return ret; > + } > + > + readmem(VADDR, SYMBOL(kallsyms_num_syms), &kallsyms_num_syms, > + sizeof(kallsyms_num_syms)); > + readmem(VADDR, SYMBOL(kallsyms_relative_base), &kallsyms_relative_base, > + sizeof(kallsyms_relative_base)); > + > + kallsyms_offsets = malloc(sizeof(uint32_t) * kallsyms_num_syms); > + if (!kallsyms_offsets) > + goto no_mem; > + readmem(VADDR, SYMBOL(kallsyms_offsets), kallsyms_offsets, > + kallsyms_num_syms * sizeof(uint32_t)); > + > + kallsyms_token_index = malloc(token_index_size); > + if (!kallsyms_token_index) > + goto no_mem; > + readmem(VADDR, SYMBOL(kallsyms_token_index), kallsyms_token_index, > + token_index_size); > + > + last_token = SYMBOL(kallsyms_token_table) + > kallsyms_token_index[UINT8_MAX]; > + do { > + readmem(VADDR, last_token++, &data, 1); > + } while(data); > + len = last_token - SYMBOL(kallsyms_token_table); > + kallsyms_token_table = malloc(len); > + if (!kallsyms_token_table) > + goto no_mem; > + readmem(VADDR, SYMBOL(kallsyms_token_table), kallsyms_token_table, len); > + > + for (len = 0, i = 0; i < kallsyms_num_syms; i++) { > + readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1); > + if (data & 0x80) { > + len += 1; > + data_old = data; > + readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1); > + if (data & 0x80) { > + fprintf(stderr, "%s: BUG! Unexpected 3-byte > length" > + " encoding in kallsyms names\n", > __func__); > + goto out; > + } > + data = (data_old & 0x7F) | (data << 7); > + } > + len += data + 1; > + } > + kallsyms_names = malloc(len); > + if (!kallsyms_names) > + goto no_mem; > + readmem(VADDR, SYMBOL(kallsyms_names), kallsyms_names, len); > + > + ret = parse_kernel_kallsyms(); > + goto out; > + > +no_mem: > + fprintf(stderr, "%s: Not enough memory!\n", __func__); > +out: > + if (kallsyms_offsets) > + free(kallsyms_offsets); > + if (kallsyms_token_index) > + free(kallsyms_token_index); > + if (kallsyms_token_table) > + free(kallsyms_token_table); > + if (kallsyms_names) > + free(kallsyms_names); > + return ret; > +} > diff --git a/kallsyms.h b/kallsyms.h > new file mode 100644 > index 0000000..a4fbe10 > --- /dev/null > +++ b/kallsyms.h > @@ -0,0 +1,17 @@ > +#ifndef _KALLSYMS_H > +#define _KALLSYMS_H > + > +#include <stdint.h> > +#include <stdbool.h> > + > +struct __attribute__((packed)) syment { > + uint64_t value; > + char *name; > + struct syment *name_hash_next; > +}; > + > +bool read_vmcoreinfo_kallsyms(void); > +bool init_kernel_kallsyms(void); > +uint64_t get_kallsyms_value_by_name(char *); > + > +#endif /* _KALLSYMS_H */ > \ No newline at end of file > diff --git a/makedumpfile.c b/makedumpfile.c > index 12fb0d8..dba3628 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -27,6 +27,7 @@ > #include <limits.h> > #include <assert.h> > #include <zlib.h> > +#include "kallsyms.h" > > struct symbol_table symbol_table; > struct size_table size_table; > @@ -3105,6 +3106,8 @@ read_vmcoreinfo_from_vmcore(off_t offset, unsigned long > size, int flag_xen_hv) > if (!read_vmcoreinfo()) > goto out; > } > + read_vmcoreinfo_kallsyms(); > + > close_vmcoreinfo(); > > ret = TRUE; > diff --git a/makedumpfile.h b/makedumpfile.h > index 134eb7a..0dec50e 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -259,6 +259,7 @@ static inline int string_exists(char *s) { return (s ? > TRUE : FALSE); } > #define UINT(ADDR) *((unsigned int *)(ADDR)) > #define ULONG(ADDR) *((unsigned long *)(ADDR)) > #define ULONGLONG(ADDR) *((unsigned long long *)(ADDR)) > +#define VOID_PTR(ADDR) *((void **)(ADDR)) > > > /* > @@ -1919,6 +1920,16 @@ struct symbol_table { > * symbols on sparc64 arch > */ > unsigned long long vmemmap_table; > + > + /* > + * kallsyms related > + */ > + unsigned long long kallsyms_names; > + unsigned long long kallsyms_num_syms; > + unsigned long long kallsyms_token_table; > + unsigned long long kallsyms_token_index; > + unsigned long long kallsyms_offsets; > + unsigned long long kallsyms_relative_base; > }; > > struct size_table { > -- > 2.47.0
