This is mostly a port of the Xen hypervisor implementation. The U-Boot
binary is built as normal, then its symbol table is fed into
tools/symbols to generate an optimised lookup table. U-Boot is rebuilt
with the symbol table and handling code in lib/symbols.c.

Based on code from Xen at
  c20850540ad6("x86/altcall: always use a temporary parameter stashing 
variable")

Signed-off-by: Caleb Connolly <caleb.conno...@linaro.org>
---
 Makefile          |  15 +-
 include/symbols.h |  19 ++
 lib/Kconfig       |   6 +
 lib/Makefile      |   1 +
 lib/symbols.c     | 126 +++++++++++
 tools/Makefile    |   3 +
 tools/symbols.c   | 646 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 815 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fcb9565d2fff..ae899fe97724 100644
--- a/Makefile
+++ b/Makefile
@@ -1781,16 +1781,29 @@ quiet_cmd_u-boot__ ?= LD      $@
       cmd_u-boot__ ?= $(LD) $(KBUILD_LDFLAGS) $(LDFLAGS_u-boot) -o $@          
\
                -T u-boot.lds $(u-boot-init)                                    
\
                --whole-archive                                                 
\
                        $(u-boot-main)                                          
\
+                       $(if $(shell [ "$@" = "u-boot" ] && echo 
"true"),lib/symbols.o u-boot.sym.o,)                   \
                --no-whole-archive                                              
\
                $(PLATFORM_LIBS) -Map u-boot.map;                               
\
                $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
 endif
 
-u-boot:        $(u-boot-init) $(u-boot-main) $(u-boot-keep-syms-lto) 
u-boot.lds FORCE
+u-boot.nosyms: $(u-boot-init) $(u-boot-main) $(u-boot-keep-syms-lto) 
u-boot.lds FORCE
        +$(call if_changed,u-boot__)
 
+ifeq ($(CONFIG_SYMBOL_LOOKUP),y)
+u-boot: u-boot.nosyms FORCE
+       @$(NM) -n -pa --format=sysv u-boot.nosyms | tools/symbols --all-symbols 
--sysv --sort > u-boot.sym.S
+       @$(CC) $(c_flags) -c $(srctree)/lib/symbols.c -o lib/symbols.o
+       @$(AS) u-boot.sym.S -o u-boot.sym.o
+       @$(call cmd,u-boot__)
+else
+u-boot: u-boot.nosyms FORCE
+       +$(call if_changed,copy)
+endif
+
+
 ifeq ($(CONFIG_RISCV),y)
        @tools/prelink-riscv $@
 endif
 
diff --git a/include/symbols.h b/include/symbols.h
new file mode 100644
index 000000000000..b17f122be339
--- /dev/null
+++ b/include/symbols.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <string.h>
+#include <asm/types.h>
+
+#define KSYM_NAME_LEN 127
+
+#if IS_ENABLED(CONFIG_SYMBOL_LOOKUP)
+const char * __attribute__((weak)) symbols_lookup(unsigned long addr, unsigned 
long *symaddr, unsigned long *offset,
+                          char *namebuf);
+#else
+static inline const char *symbols_lookup(unsigned long addr, unsigned long 
*symaddr, unsigned long *offset,
+                          char *namebuf)
+{
+       strcpy(namebuf, "???");
+       namebuf[3] = '\0';
+       return namebuf;
+}
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index b3baa4b85b07..06a78f83b7d6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -977,8 +977,14 @@ config VPL_OF_LIBFDT_ASSUME_MASK
          are made, and libfdt is able to deal with malicious data. A value of
          0xff means all assumptions are made and any invalid data may cause
          unsafe execution. See FDT_ASSUME_PERFECT, etc. in libfdt_internal.h
 
+config SYMBOL_LOOKUP
+       bool "Enable symbol lookup"
+       help
+         This enables support for looking up symbol names from addresses. The
+         primary usecase for this is improved debugging support.
+
 menu "System tables"
        depends on (!EFI && !SYS_COREBOOT) || (ARM && EFI_LOADER)
 
 config BLOBLIST_TABLES
diff --git a/lib/Makefile b/lib/Makefile
index e389ad014f89..a4ccda76f438 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -121,8 +121,9 @@ obj-y += linux_string.o
 obj-$(CONFIG_LMB) += lmb.o
 obj-y += membuff.o
 obj-$(CONFIG_REGEX) += slre.o
 obj-y += string.o
+#obj-y += symbols.o
 obj-y += tables_csum.o
 obj-y += time.o
 obj-y += hexdump.o
 obj-$(CONFIG_GETOPT) += getopt.o
diff --git a/lib/symbols.c b/lib/symbols.c
new file mode 100644
index 000000000000..e881d5603425
--- /dev/null
+++ b/lib/symbols.c
@@ -0,0 +1,126 @@
+/*
+ * symbols.c: in-kernel printing of symbolic oopses and stack traces.
+ * Ported from Xen hypervisor
+ *
+ * Copyright 2002 Rusty Russell <ru...@rustcorp.com.au> IBM Corporation
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarq...@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression (see tools/symbols.c for a more complete description)
+ */
+
+#include <asm-generic/sections.h>
+#include <asm/esr.h>
+#include <asm/global_data.h>
+#include <asm/ptrace.h>
+#include <hexdump.h>
+#include <linux/compiler.h>
+#include <symbols.h>
+
+extern const uint64_t symbols_offsets[];
+extern const unsigned int symbols_num_syms;
+extern const u8 symbols_names[];
+extern const u8 symbols_token_table[];
+extern const u16 symbols_token_index[];
+extern const unsigned int symbols_markers[];
+
+#define symbols_address(n) (symbols_offsets[n] + 0UL)
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* expand a compressed symbol data into the resulting uncompressed string,
+   given the offset to where the symbol is in the compressed stream */
+static unsigned int symbols_expand_symbol(unsigned int off, char *result)
+{
+       int len, skipped_first = 0;
+       const u8 *tptr, *data;
+
+       /* get the compressed symbol length from the first symbol byte */
+       data = &symbols_names[off];
+       len = *data;
+       data++;
+
+       /* update the offset to return the offset for the next symbol on
+        * the compressed stream */
+       off += len + 1;
+
+       /* for every byte on the compressed symbol data, copy the table
+        * entry for that byte */
+       while (len) {
+               tptr = &symbols_token_table[symbols_token_index[*data]];
+               data++;
+               len--;
+
+               while (*tptr) {
+                       if (skipped_first) {
+                               *result = *tptr;
+                               result++;
+                       } else
+                               skipped_first = 1;
+                       tptr++;
+               }
+       }
+
+       *result = '\0';
+
+       /* return to offset to the next symbol */
+       return off;
+}
+
+/* find the offset on the compressed stream given and index in the
+ * symbols array */
+static unsigned int get_symbol_offset(unsigned long pos)
+{
+       const u8 *name;
+       int i;
+
+       /* use the closest marker we have. We have markers every 256 positions,
+        * so that should be close enough */
+       name = &symbols_names[symbols_markers[pos >> 8]];
+
+       /* sequentially scan all the symbols up to the point we're searching 
for.
+        * Every symbol is stored in a [<len>][<len> bytes of data] format, so 
we
+        * just need to add the len to the current pointer for every symbol we
+        * wish to skip */
+       for (i = 0; i < (pos & 0xFF); i++)
+               name = name + (*name) + 1;
+
+       return name - symbols_names;
+}
+
+const char *symbols_lookup(unsigned long addr, unsigned long *symaddr, 
unsigned long *offset, char *namebuf)
+{
+       unsigned long low, high, mid;
+
+       addr -= (unsigned long)_start;
+
+       namebuf[KSYM_NAME_LEN] = 0;
+       namebuf[0] = 0;
+
+       /* do a binary search on the sorted symbols_addresses array */
+       low = 0;
+       high = symbols_num_syms;
+
+       while (high - low > 1) {
+               mid = (low + high) / 2;
+               if (symbols_address(mid) <= addr) {
+                       low = mid;
+               } else {
+                       high = mid;
+               }
+       }
+
+       /* search for the first aliased symbol. Aliased symbols are
+        * symbols with the same address */
+       while (low && symbols_address(low - 1) == symbols_address(low))
+               --low;
+
+       /* Grab name */
+       symbols_expand_symbol(get_symbol_offset(low), namebuf);
+
+       *symaddr = symbols_address(low);
+       *offset = addr - symbols_address(low);
+       return namebuf;
+}
diff --git a/tools/Makefile b/tools/Makefile
index 6a4280e3668f..278539ea77be 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -225,8 +225,11 @@ hostprogs-$(CONFIG_ARCH_MVEBU) += kwboot
 
 hostprogs-y += proftool
 proftool-objs = proftool.o generated/lib/abuf.o
 
+HOSTCFLAGS_symbols.o += -DCONFIG_TEXT_BASE=$(CONFIG_TEXT_BASE)
+hostprogs-$(CONFIG_SYMBOL_LOOKUP) += symbols
+
 hostprogs-$(CONFIG_STATIC_RELA) += relocate-rela
 hostprogs-$(CONFIG_RISCV) += prelink-riscv
 
 hostprogs-$(CONFIG_ARCH_OCTEON) += update_octeon_header
diff --git a/tools/symbols.c b/tools/symbols.c
new file mode 100644
index 000000000000..9e3c4f62cf24
--- /dev/null
+++ b/tools/symbols.c
@@ -0,0 +1,646 @@
+/* Generate assembler source containing symbol information
+ *
+ * Copyright 2002       by Kai Germaschewski
+ *
+ * Based on Xen implementation as of March 2024
+ *     c20850540ad6 ("x86/altcall: always use a temporary parameter stashing 
variable")
+ *
+ * Simplified and adjusted for U-Boot, notably the generated ASM is simplified 
with the
+ * text offset applied at runtime to better support U-Boot relocation.
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ * Usage: nm -n vmlinux | scripts/symbols [--all-symbols] > symbols.S
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarq...@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression
+ *
+ *      Table compression uses all the unused char codes on the symbols and
+ *  maps these to the most used substrings (tokens). For instance, it might
+ *  map char code 0xF7 to represent "write_" and then in every symbol where
+ *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
+ *      The used codes themselves are also placed in the table so that the
+ *  decompresion can work without "special cases".
+ *      Applied to kernel symbols, this usually produces a compression ratio
+ *  of about 50%.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+#define KSYM_NAME_LEN          127
+
+
+struct sym_entry {
+       unsigned long long addr;
+       unsigned int len;
+       unsigned char *sym;
+       char *orig_symbol;
+       unsigned int addr_idx;
+       unsigned int stream_offset;
+       unsigned char type;
+};
+#define SYMBOL_NAME(s) ((char *)(s)->sym + 1)
+
+static struct sym_entry *table;
+static unsigned int table_size, table_cnt;
+static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, 
_eextratext;
+static int all_symbols = 0;
+static int sort_by_name = 0;
+static int map_only = 0;
+static char symbol_prefix_char = '\0';
+static enum { fmt_bsd, fmt_sysv } input_format;
+static int compare_name(const void *p1, const void *p2);
+
+int token_profit[0x10000];
+
+/* the table that holds the result of the compression */
+unsigned char best_table[256][2];
+unsigned char best_table_len[256];
+
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: symbols [--all-symbols] 
[--symbol-prefix=<prefix char>] < in.map > out.S\n");
+       exit(1);
+}
+
+/*
+ * This ignores the intensely annoying "mapping symbols" found
+ * in ARM ELF files: $a, $t and $d.
+ */
+static inline int is_arm_mapping_symbol(const char *str)
+{
+       return str[0] == '$' && strchr("atd", str[1])
+              && (str[2] == '\0' || str[2] == '.');
+}
+
+static int read_symbol(FILE *in, struct sym_entry *s)
+{
+       char str[500], type[20] = "";
+       char *sym, stype;
+       static char *filename = NULL;
+       int rc = -1;
+
+       switch (input_format) {
+       case fmt_bsd:
+               rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
+               break;
+       case fmt_sysv:
+               while (fscanf(in, "\n") == 1)
+                       /* nothing */;
+               rc = fscanf(in, "%499[^ |] |%llx | %c |",
+                           str, &s->addr, &stype);
+               if (rc == 3 && fscanf(in, " %19[^ |] |", type) != 1)
+                       *type = '\0';
+               break;
+       }
+       if (rc != 3) {
+               if (rc != EOF) {
+                       /* skip line */
+                       if (fgets(str, 500, in) == NULL)
+                               return -1; /* must check fgets result */
+               
+                       //printf("SKIP: %s\n", str);
+               }
+               return -1;
+       }
+
+       sym = strrchr(str, '.');
+       if (strcasecmp(type, "FILE") == 0 ||
+           (/*
+             * GNU nm prior to binutils commit 552e55ed06 (expected to
+             * appear in 2.27) doesn't produce a type for EFI binaries.
+             */
+            input_format == fmt_sysv && !*type && stype == '?' && sym &&
+            sym[1] && strchr("cSsoh", sym[1]) && !sym[2])) {
+               free(filename);
+               //filename = *str ? strdup(str) : NULL;
+               goto skip_tail;
+       }
+
+       /*
+        * Keep all symbols relative to TEXT_BASE, it gets added back at 
runtime.
+        * this simplifies handling relocation.
+        */
+       s->addr -= CONFIG_TEXT_BASE;
+
+       rc = -1;
+
+       sym = str;
+       /* skip prefix char */
+       if (symbol_prefix_char && str[0] == symbol_prefix_char)
+               sym++;
+
+       /* Ignore most absolute/undefined (?) symbols. */
+       if (strcmp(sym, "_stext") == 0)
+               _stext = s->addr;
+       else if (strcmp(sym, "_etext") == 0)
+               _etext = s->addr;
+       else if (strcmp(sym, "_sinittext") == 0)
+               _sinittext = s->addr;
+       else if (strcmp(sym, "_einittext") == 0)
+               _einittext = s->addr;
+       else if (strcmp(sym, "_sextratext") == 0)
+               _sextratext = s->addr;
+       else if (strcmp(sym, "_eextratext") == 0)
+               _eextratext = s->addr;
+       else if (toupper((uint8_t)stype) == 'A')
+       {
+               /* Keep these useful absolute symbols */
+               if (strcmp(sym, "__gp"))
+                       goto skip_tail;
+       }
+       else if (toupper((uint8_t)stype) == 'U' ||
+                toupper((uint8_t)stype) == 'N' ||
+                is_arm_mapping_symbol(sym))
+               goto skip_tail;
+       /* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
+       else if (str[0] == '$')
+               goto skip_tail;
+
+       /* include the type field in the symbol name, so that it gets
+        * compressed together */
+       s->len = strlen(str) + 1;
+       if (islower((unsigned char)stype) && filename)
+               s->len += strlen(filename) + 1;
+       s->sym = malloc(s->len + 1);
+       sym = SYMBOL_NAME(s);
+       if (islower((unsigned char)stype) && filename) {
+               sym = stpcpy(sym, filename);
+               *sym++ = ':';
+       }
+       strcpy(sym, str);
+       if (sort_by_name || map_only) {
+               s->orig_symbol = strdup(SYMBOL_NAME(s));
+               s->type = stype; /* As s->sym[0] ends mangled. */
+       }
+       s->sym[0] = stype;
+       rc = 0;
+
+ skip_tail:
+       if ((input_format == fmt_sysv) && fgets(str, 500, in) == NULL)
+               /* ignore errors while discarding rest of line */;
+
+       return rc;
+}
+
+static int symbol_valid(struct sym_entry *s)
+{
+       int offset = 1;
+
+       /* skip prefix char */
+       if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
+               offset++;
+
+       /* if --all-symbols is not specified, then symbols outside the text
+        * and inittext sections are discarded */
+       if (!all_symbols) {
+               if ((s->addr < _stext || s->addr > _etext)
+                   && (s->addr < _sinittext || s->addr > _einittext)
+                   && (s->addr < _sextratext || s->addr > _eextratext))
+                       return 0;
+               /* Corner case.  Discard any symbols with the same value as
+                * _etext _einittext or _eextratext; they can move between pass
+                * 1 and 2 when the symbols data are added.  If these symbols
+                * move then they may get dropped in pass 2, which breaks the
+                * symbols rules.
+                */
+               if ((s->addr == _etext && strcmp((char*)s->sym + offset, 
"_etext")) ||
+                   (s->addr == _einittext && strcmp((char*)s->sym + offset, 
"_einittext")) ||
+                   (s->addr == _eextratext && strcmp((char*)s->sym + offset, 
"_eextratext")))
+                       return 0;
+       }
+
+       /* Exclude symbols which vary between passes. */
+       if (strstr((char *)s->sym + offset, "_compiled."))
+               return 0;
+
+       return 1;
+}
+
+static void read_map(FILE *in)
+{
+       while (!feof(in)) {
+               if (table_cnt >= table_size) {
+                       table_size += 10000;
+                       table = realloc(table, sizeof(*table) * table_size);
+                       if (!table) {
+                               fprintf(stderr, "out of memory\n");
+                               exit (1);
+                       }
+               }
+               if (read_symbol(in, &table[table_cnt]) == 0)
+                       table_cnt++;
+       }
+}
+
+static void output_label(char *label)
+{
+       if (symbol_prefix_char)
+               printf(".globl %c%s\n", symbol_prefix_char, label);
+       else
+               printf(".globl %s\n", label);
+       printf("\t.align 8\n");
+       if (symbol_prefix_char)
+               printf("%c%s:\n", symbol_prefix_char, label);
+       else
+               printf("%s:\n", label);
+}
+
+/* uncompress a compressed symbol. When this function is called, the best table
+ * might still be compressed itself, so the function needs to be recursive */
+static int expand_symbol(unsigned char *data, int len, char *result)
+{
+       int c, rlen, total=0;
+
+       while (len) {
+               c = *data;
+               /* if the table holds a single char that is the same as the one
+                * we are looking for, then end the search */
+               if (best_table[c][0]==c && best_table_len[c]==1) {
+                       *result++ = c;
+                       total++;
+               } else {
+                       /* if not, recurse and expand */
+                       rlen = expand_symbol(best_table[c], best_table_len[c], 
result);
+                       total += rlen;
+                       result += rlen;
+               }
+               data++;
+               len--;
+       }
+       *result=0;
+
+       return total;
+}
+
+/* Sort by original (non mangled) symbol name, then type. */
+static int compare_name_orig(const void *p1, const void *p2)
+{
+       const struct sym_entry *sym1 = p1;
+       const struct sym_entry *sym2 = p2;
+       int rc;
+
+       rc = strcmp(sym1->orig_symbol, sym2->orig_symbol);
+
+       if (!rc)
+               rc = sym1->type - sym2->type;
+
+       return rc;
+}
+
+static void write_src(void)
+{
+       unsigned int i, k, off;
+       unsigned int best_idx[256];
+       unsigned int *markers;
+       char buf[KSYM_NAME_LEN+1];
+
+       if (map_only) {
+               for (i = 0; i < table_cnt; i++)
+                       printf("%#llx %c %s\n", table[i].addr, table[i].type,
+                                               table[i].orig_symbol);
+
+               return;
+       }
+       printf("\t.section .rodata, \"a\"\n");
+
+       output_label("symbols_offsets");
+       for (i = 0; i < table_cnt; i++) {
+               printf("\t.quad\t%#llx\n", table[i].addr);
+       }
+       printf("\n");
+
+       output_label("symbols_num_syms");
+       printf("\t.long\t%d\n", table_cnt);
+       printf("\n");
+
+       /* table of offset markers, that give the offset in the compressed 
stream
+        * every 256 symbols */
+       markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 
255) / 256));
+
+       output_label("symbols_names");
+       off = 0;
+       for (i = 0; i < table_cnt; i++) {
+               if ((i & 0xFF) == 0)
+                       markers[i >> 8] = off;
+
+               printf("\t.byte 0x%02x", table[i].len);
+               for (k = 0; k < table[i].len; k++)
+                       printf(", 0x%02x", table[i].sym[k]);
+               printf("\n");
+
+               table[i].stream_offset = off;
+               off += table[i].len + 1;
+       }
+       printf("\n");
+
+       output_label("symbols_markers");
+       for (i = 0; i < ((table_cnt + 255) >> 8); i++)
+               printf("\t.long\t%d\n", markers[i]);
+       printf("\n");
+
+
+       output_label("symbols_token_table");
+       off = 0;
+       for (i = 0; i < 256; i++) {
+               best_idx[i] = off;
+               expand_symbol(best_table[i], best_table_len[i], buf);
+               printf("\t.asciz\t\"%s\"\n", buf);
+               off += strlen(buf) + 1;
+       }
+       printf("\n");
+
+       output_label("symbols_token_index");
+       for (i = 0; i < 256; i++)
+               printf("\t.short\t%d\n", best_idx[i]);
+       printf("\n");
+
+       if (!sort_by_name) {
+               free(markers);
+               return;
+       }
+
+       /* Sorted by original symbol names and type. */
+       qsort(table, table_cnt, sizeof(*table), compare_name_orig);
+
+       output_label("symbols_sorted_offsets");
+       /* A fixed sized array with two entries: offset in the
+        * compressed stream (for symbol name), and offset in
+        * symbols_addresses (or symbols_offset). */
+       for (i = 0; i < table_cnt; i++) {
+               printf("\t.long %u, %u\n", table[i].stream_offset, 
table[i].addr_idx);
+       }
+       printf("\n");
+
+       free(markers);
+}
+
+
+/* table lookup compression functions */
+
+/* count all the possible tokens in a symbol */
+static void learn_symbol(unsigned char *symbol, int len)
+{
+       int i;
+
+       for (i = 0; i < len - 1; i++)
+               token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
+}
+
+/* decrease the count for all the possible tokens in a symbol */
+static void forget_symbol(unsigned char *symbol, int len)
+{
+       int i;
+
+       for (i = 0; i < len - 1; i++)
+               token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
+}
+
+/* remove all the invalid symbols from the table and do the initial token 
count */
+static void build_initial_tok_table(void)
+{
+       unsigned int i, pos;
+
+       pos = 0;
+       for (i = 0; i < table_cnt; i++) {
+               if ( symbol_valid(&table[i]) ) {
+                       if (pos != i)
+                               table[pos] = table[i];
+                       learn_symbol(table[pos].sym, table[pos].len);
+                       pos++;
+               }
+       }
+       table_cnt = pos;
+}
+
+static void *memmem_pvt(void *h, size_t hlen, void *n, size_t nlen)
+{
+       char *p;
+       for (p = h; (p - (char *)h) <= (long)(hlen - nlen); p++)
+               if (!memcmp(p, n, nlen)) return p;
+       return NULL;
+}
+
+/* replace a given token in all the valid symbols. Use the sampled symbols
+ * to update the counts */
+static void compress_symbols(unsigned char *str, int idx)
+{
+       unsigned int i, len, size;
+       unsigned char *p1, *p2;
+
+       for (i = 0; i < table_cnt; i++) {
+
+               len = table[i].len;
+               p1 = table[i].sym;
+
+               table[i].addr_idx = i;
+               /* find the token on the symbol */
+               p2 = memmem_pvt(p1, len, str, 2);
+               if (!p2) continue;
+
+               /* decrease the counts for this symbol's tokens */
+               forget_symbol(table[i].sym, len);
+
+               size = len;
+
+               do {
+                       *p2 = idx;
+                       p2++;
+                       size -= (p2 - p1);
+                       memmove(p2, p2 + 1, size);
+                       p1 = p2;
+                       len--;
+
+                       if (size < 2) break;
+
+                       /* find the token on the symbol */
+                       p2 = memmem_pvt(p1, size, str, 2);
+
+               } while (p2);
+
+               table[i].len = len;
+
+               /* increase the counts for this symbol's new tokens */
+               learn_symbol(table[i].sym, len);
+       }
+}
+
+/* search the token with the maximum profit */
+static int find_best_token(void)
+{
+       int i, best, bestprofit;
+
+       bestprofit=-10000;
+       best = 0;
+
+       for (i = 0; i < 0x10000; i++) {
+               if (token_profit[i] > bestprofit) {
+                       best = i;
+                       bestprofit = token_profit[i];
+               }
+       }
+       return best;
+}
+
+/* this is the core of the algorithm: calculate the "best" table */
+static void optimize_result(void)
+{
+       int i, best;
+
+       /* using the '\0' symbol last allows compress_symbols to use standard
+        * fast string functions */
+       for (i = 255; i >= 0; i--) {
+
+               /* if this table slot is empty (it is not used by an actual
+                * original char code */
+               if (!best_table_len[i]) {
+
+                       /* find the token with the breates profit value */
+                       best = find_best_token();
+                       if (token_profit[best] == 0)
+                               break;
+
+                       /* place it in the "best" table */
+                       best_table_len[i] = 2;
+                       best_table[i][0] = best & 0xFF;
+                       best_table[i][1] = (best >> 8) & 0xFF;
+
+                       /* replace this token in all the valid symbols */
+                       compress_symbols(best_table[i], i);
+               }
+       }
+}
+
+/* start by placing the symbols that are actually used on the table */
+static void insert_real_symbols_in_table(void)
+{
+       unsigned int i, j, c;
+
+       memset(best_table, 0, sizeof(best_table));
+       memset(best_table_len, 0, sizeof(best_table_len));
+
+       for (i = 0; i < table_cnt; i++) {
+               for (j = 0; j < table[i].len; j++) {
+                       c = table[i].sym[j];
+                       best_table[c][0]=c;
+                       best_table_len[c]=1;
+               }
+       }
+}
+
+static void optimize_token_table(void)
+{
+       build_initial_tok_table();
+
+       insert_real_symbols_in_table();
+
+       /* When valid symbol is not registered, exit to error */
+       if (!table_cnt) {
+               fprintf(stderr, "No valid symbol.\n");
+               exit(1);
+       }
+
+       optimize_result();
+}
+
+static int compare_value(const void *p1, const void *p2)
+{
+       const struct sym_entry *sym1 = p1;
+       const struct sym_entry *sym2 = p2;
+
+       if (sym1->addr < sym2->addr)
+               return -1;
+       if (sym1->addr > sym2->addr)
+               return +1;
+       /* Prefer global symbols. */
+       if (isupper(*sym1->sym))
+               return -1;
+       if (isupper(*sym2->sym))
+               return +1;
+       return 0;
+}
+
+static int compare_name(const void *p1, const void *p2)
+{
+       const struct sym_entry *sym1 = p1;
+       const struct sym_entry *sym2 = p2;
+
+       return strcmp(SYMBOL_NAME(sym1), SYMBOL_NAME(sym2));
+}
+
+int main(int argc, char **argv)
+{
+       unsigned int i;
+       bool unsorted = false, warn_dup = false, error_dup = false, found_dup = 
false;
+
+       if (argc >= 2) {
+               for (i = 1; i < argc; i++) {
+                       if(strcmp(argv[i], "--all-symbols") == 0)
+                               all_symbols = 1;
+                       else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) 
{
+                               char *p = &argv[i][16];
+                               /* skip quote */
+                               if ((*p == '"' && *(p+2) == '"') || (*p == '\'' 
&& *(p+2) == '\''))
+                                       p++;
+                               symbol_prefix_char = *p;
+                       } else if (strcmp(argv[i], "--sysv") == 0)
+                               input_format = fmt_sysv;
+                       else if (strcmp(argv[i], "--sort") == 0)
+                               unsorted = true;
+                       else if (strcmp(argv[i], "--sort-by-name") == 0)
+                               sort_by_name = 1;
+                       else if (strcmp(argv[i], "--warn-dup") == 0)
+                               warn_dup = true;
+                       else if (strcmp(argv[i], "--error-dup") == 0)
+                               warn_dup = error_dup = true;
+                       else if (strcmp(argv[i], "--xensyms") == 0)
+                               map_only = true;
+                       else
+                               usage();
+               }
+       } else if (argc != 1)
+               usage();
+
+       read_map(stdin);
+
+       if (warn_dup) {
+               qsort(table, table_cnt, sizeof(*table), compare_name);
+               for (i = 1; i < table_cnt; ++i)
+                       if (strcmp(SYMBOL_NAME(table + i - 1),
+                                  SYMBOL_NAME(table + i)) == 0 &&
+                           table[i - 1].addr != table[i].addr) {
+                               fprintf(stderr,
+                                       "Duplicate symbol '%s' (%llx != 
%llx)\n",
+                                       SYMBOL_NAME(table + i),
+                                       table[i].addr, table[i - 1].addr);
+                               found_dup = true;
+                       }
+               unsorted = true;
+       }
+
+       if (error_dup && found_dup)
+               exit(1);
+
+       if (unsorted)
+               qsort(table, table_cnt, sizeof(*table), compare_value);
+
+       optimize_token_table();
+       write_src();
+
+       return 0;
+}

-- 
2.45.2

Reply via email to