This patch will parse kernel's kallsyms data, and store them into a hash
table so they can be referenced later in a fast speed.

Signed-off-by: Tao Liu <[email protected]>
---
 Makefile       |   2 +-
 kallsyms.c     | 265 +++++++++++++++++++++++++++++++++++++++++++++++++
 kallsyms.h     |  17 ++++
 makedumpfile.c |   3 +
 makedumpfile.h |  11 ++
 5 files changed, 297 insertions(+), 1 deletion(-)
 create mode 100644 kallsyms.c
 create mode 100644 kallsyms.h

diff --git a/Makefile b/Makefile
index 05ab5f2..6c450ac 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ CFLAGS_ARCH += -m32
 endif
 
 SRC_BASE = makedumpfile.c makedumpfile.h diskdump_mod.h sadump_mod.h 
sadump_info.h
-SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c 
cache.c tools.c printk.c detect_cycle.c
+SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c 
cache.c tools.c printk.c detect_cycle.c kallsyms.c
 OBJ_PART=$(patsubst %.c,%.o,$(SRC_PART))
 SRC_ARCH = arch/arm.c arch/arm64.c arch/x86.c arch/x86_64.c arch/ia64.c 
arch/ppc64.c arch/s390x.c arch/ppc.c arch/sparc64.c arch/mips64.c 
arch/loongarch64.c arch/riscv64.c
 OBJ_ARCH=$(patsubst %.c,%.o,$(SRC_ARCH))
diff --git a/kallsyms.c b/kallsyms.c
new file mode 100644
index 0000000..ecf64e0
--- /dev/null
+++ b/kallsyms.c
@@ -0,0 +1,265 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include "makedumpfile.h"
+#include "kallsyms.h"
+
+static uint32_t *kallsyms_offsets = NULL;
+static uint16_t *kallsyms_token_index = NULL;
+static uint8_t  *kallsyms_token_table = NULL;
+static uint8_t  *kallsyms_names = NULL;
+static unsigned long kallsyms_relative_base = 0;
+static unsigned int kallsyms_num_syms = 0;
+
+#define NAME_HASH 512
+static struct syment *name_hash_table[NAME_HASH] = {0};
+
+static uint64_t absolute_percpu(uint64_t base, int32_t val)
+{
+       if (val >= 0)
+               return (uint64_t)val;
+       else
+               return base - 1 - val;
+}
+
+static unsigned int hash_index(const char *name, unsigned int hash_size)
+{
+       unsigned int len, value;
+
+       len = strlen(name);
+       value = name[len - 1] * name[len / 2];
+
+       return (name[0] ^ value) % hash_size;
+}
+
+static void name_hash_install(struct syment *en)
+{
+       unsigned int index = hash_index(en->name, NAME_HASH);
+       struct syment *sp = name_hash_table[index];
+
+       if (sp == NULL) {
+               name_hash_table[index] = en;
+       } else {
+               while (sp) {
+                       if (sp->name_hash_next) {
+                               sp = sp->name_hash_next;
+                       } else {
+                               sp->name_hash_next = en;
+                               break;
+                       }
+               }
+       }
+}
+
+static struct syment *search_kallsyms_by_name(char *name)
+{
+       unsigned int index;
+       struct syment *sp;
+
+       index = hash_index(name, NAME_HASH);
+       for (sp = name_hash_table[index]; sp; sp = sp->name_hash_next) {
+               if (!strcmp(name, sp->name)) {
+                       return sp;
+               }
+       }
+       return sp;
+}
+
+static bool is_unwanted_symbol(char *name)
+{
+       const char *unwanted_prefix[] = {
+               "__pfx_",       // CFI symbols
+               "_R",           // Rust symbols
+       };
+       for (int i = 0; i < sizeof(unwanted_prefix) / sizeof(char *); i++) {
+               if (!strncmp(name, unwanted_prefix[i], 
strlen(unwanted_prefix[i])))
+                       return true;
+       }
+       return false;
+}
+
+uint64_t get_kallsyms_value_by_name(char *name)
+{
+       struct syment *sp;
+
+       sp = search_kallsyms_by_name(name);
+       if (!sp)
+               return 0;
+       return sp->value;
+}
+
+#define BUFLEN 1024
+static bool parse_kernel_kallsyms(void)
+{
+       char buf[BUFLEN];
+       int index = 0, i;
+       uint8_t *compressd_data;
+       uint8_t *uncompressd_data;
+       uint64_t stext;
+       uint8_t len, len_old;
+       struct syment *kern_syment;
+       bool skip;
+
+       for (i = 0; i < kallsyms_num_syms; i++) {
+               skip = false;
+               memset(buf, 0, BUFLEN);
+               len = kallsyms_names[index];
+               if (len & 0x80) {
+                       index++;
+                       len_old = len;
+                       len = kallsyms_names[index];
+                       if (len & 0x80) {
+                               fprintf(stderr, "%s: BUG! Unexpected 3-byte 
length,"
+                                       " should be detected in 
init_kernel_kallsyms()\n",
+                                       __func__);
+                               goto out;
+                       }
+                       len = (len_old & 0x7F) | (len << 7);
+               }
+               index++;
+
+               compressd_data = &kallsyms_names[index];
+               index += len;
+               while (len--) {
+                       uncompressd_data = 
&kallsyms_token_table[kallsyms_token_index[*compressd_data]];
+                       if (strlen(buf) + strlen((char *)uncompressd_data) >= 
BUFLEN) {
+                               skip = true;
+                               break;
+                       }
+                       strcat(buf, (char *)uncompressd_data);
+                       compressd_data++;
+               }
+               if (skip || is_unwanted_symbol(&buf[1]))
+                       continue;
+               kern_syment = (struct syment *)calloc(1, sizeof(struct syment));
+               if (!kern_syment)
+                       goto no_mem;
+               kern_syment->value = kallsyms_offsets[i];
+               kern_syment->name = strdup(&buf[1]);
+               if (!kern_syment->name) {
+                       free(kern_syment);
+                       goto no_mem;
+               }
+               name_hash_install(kern_syment);
+       }
+
+       /* Now refresh the absolute each kallsyms address */
+       stext = get_kallsyms_value_by_name("_stext");
+       if (SYMBOL(_stext) == absolute_percpu(kallsyms_relative_base, stext)) {
+               for (i = 0; i < NAME_HASH; i++) {
+                       for (kern_syment = name_hash_table[i];
+                            kern_syment;
+                            kern_syment = kern_syment->name_hash_next)
+                               kern_syment->value = 
absolute_percpu(kallsyms_relative_base,
+                                                       kern_syment->value);
+               }
+       } else if (SYMBOL(_stext) == kallsyms_relative_base + stext) {
+               for (i = 0; i < NAME_HASH; i++) {
+                       for (kern_syment = name_hash_table[i];
+                            kern_syment;
+                            kern_syment = kern_syment->name_hash_next)
+                               kern_syment->value += kallsyms_relative_base;
+               }
+       } else {
+               fprintf(stderr, "%s: Wrong calculate kallsyms symbol value!\n", 
__func__);
+               goto out;
+       }
+
+       return true;
+no_mem:
+       fprintf(stderr, "%s: Not enough memory!\n", __func__);
+out:
+       return false;
+}
+
+static bool vmcore_info_ready = false;
+
+bool read_vmcoreinfo_kallsyms(void)
+{
+       READ_SYMBOL("kallsyms_names", kallsyms_names);
+       READ_SYMBOL("kallsyms_num_syms", kallsyms_num_syms);
+       READ_SYMBOL("kallsyms_token_table", kallsyms_token_table);
+       READ_SYMBOL("kallsyms_token_index", kallsyms_token_index);
+       READ_SYMBOL("kallsyms_offsets", kallsyms_offsets);
+       READ_SYMBOL("kallsyms_relative_base", kallsyms_relative_base);
+       vmcore_info_ready = true;
+       return true;
+}
+
+bool init_kernel_kallsyms(void)
+{
+       const int token_index_size = (UINT8_MAX + 1) * sizeof(uint16_t);
+       uint64_t last_token, len;
+       unsigned char data, data_old;
+       int i;
+       bool ret = false;
+
+       if (vmcore_info_ready == false) {
+               fprintf(stderr, "%s: vmcoreinfo not ready for kallsyms!\n",
+                       __func__);
+               return ret;
+       }
+
+       readmem(VADDR, SYMBOL(kallsyms_num_syms), &kallsyms_num_syms,
+               sizeof(kallsyms_num_syms));
+       readmem(VADDR, SYMBOL(kallsyms_relative_base), &kallsyms_relative_base,
+               sizeof(kallsyms_relative_base));
+
+       kallsyms_offsets = malloc(sizeof(uint32_t) * kallsyms_num_syms);
+       if (!kallsyms_offsets)
+               goto no_mem;
+       readmem(VADDR, SYMBOL(kallsyms_offsets), kallsyms_offsets,
+               kallsyms_num_syms * sizeof(uint32_t));
+
+       kallsyms_token_index = malloc(token_index_size);
+       if (!kallsyms_token_index)
+               goto no_mem;
+       readmem(VADDR, SYMBOL(kallsyms_token_index), kallsyms_token_index,
+               token_index_size);
+
+       last_token = SYMBOL(kallsyms_token_table) + 
kallsyms_token_index[UINT8_MAX];
+       do {
+               readmem(VADDR, last_token++, &data, 1);
+       } while(data);
+       len = last_token - SYMBOL(kallsyms_token_table);
+       kallsyms_token_table = malloc(len);
+       if (!kallsyms_token_table)
+               goto no_mem;
+       readmem(VADDR, SYMBOL(kallsyms_token_table), kallsyms_token_table, len);
+
+       for (len = 0, i = 0; i < kallsyms_num_syms; i++) {
+               readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1);
+               if (data & 0x80) {
+                       len += 1;
+                       data_old = data;
+                       readmem(VADDR, SYMBOL(kallsyms_names) + len, &data, 1);
+                       if (data & 0x80) {
+                               fprintf(stderr, "%s: BUG! Unexpected 3-byte 
length"
+                                       " encoding in kallsyms names\n", 
__func__);
+                               goto out;
+                       }
+                       data = (data_old & 0x7F) | (data << 7);
+               }
+               len += data + 1;
+       }
+       kallsyms_names = malloc(len);
+       if (!kallsyms_names)
+               goto no_mem;
+       readmem(VADDR, SYMBOL(kallsyms_names), kallsyms_names, len);
+
+       ret = parse_kernel_kallsyms();
+       goto out;
+
+no_mem:
+       fprintf(stderr, "%s: Not enough memory!\n", __func__);
+out:
+       if (kallsyms_offsets)
+               free(kallsyms_offsets);
+       if (kallsyms_token_index)
+               free(kallsyms_token_index);
+       if (kallsyms_token_table)
+               free(kallsyms_token_table);
+       if (kallsyms_names)
+               free(kallsyms_names);
+       return ret;
+}
diff --git a/kallsyms.h b/kallsyms.h
new file mode 100644
index 0000000..a4fbe10
--- /dev/null
+++ b/kallsyms.h
@@ -0,0 +1,17 @@
+#ifndef _KALLSYMS_H
+#define _KALLSYMS_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+struct __attribute__((packed)) syment {
+       uint64_t value;
+       char *name;
+       struct syment *name_hash_next;
+};
+
+bool read_vmcoreinfo_kallsyms(void);
+bool init_kernel_kallsyms(void);
+uint64_t get_kallsyms_value_by_name(char *);
+
+#endif /* _KALLSYMS_H */
\ No newline at end of file
diff --git a/makedumpfile.c b/makedumpfile.c
index 12fb0d8..dba3628 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -27,6 +27,7 @@
 #include <limits.h>
 #include <assert.h>
 #include <zlib.h>
+#include "kallsyms.h"
 
 struct symbol_table    symbol_table;
 struct size_table      size_table;
@@ -3105,6 +3106,8 @@ read_vmcoreinfo_from_vmcore(off_t offset, unsigned long 
size, int flag_xen_hv)
                if (!read_vmcoreinfo())
                        goto out;
        }
+       read_vmcoreinfo_kallsyms();
+
        close_vmcoreinfo();
 
        ret = TRUE;
diff --git a/makedumpfile.h b/makedumpfile.h
index 134eb7a..0dec50e 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -259,6 +259,7 @@ static inline int string_exists(char *s) { return (s ? TRUE 
: FALSE); }
 #define UINT(ADDR)     *((unsigned int *)(ADDR))
 #define ULONG(ADDR)    *((unsigned long *)(ADDR))
 #define ULONGLONG(ADDR)        *((unsigned long long *)(ADDR))
+#define VOID_PTR(ADDR)  *((void **)(ADDR))
 
 
 /*
@@ -1919,6 +1920,16 @@ struct symbol_table {
         * symbols on sparc64 arch
         */
        unsigned long long              vmemmap_table;
+
+       /*
+        * kallsyms related
+        */
+       unsigned long long              kallsyms_names;
+       unsigned long long              kallsyms_num_syms;
+       unsigned long long              kallsyms_token_table;
+       unsigned long long              kallsyms_token_index;
+       unsigned long long              kallsyms_offsets;
+       unsigned long long              kallsyms_relative_base;
 };
 
 struct size_table {
-- 
2.47.0


Reply via email to