Hi Paul,

Am 16.06.2017 um 02:05 schrieb Paul Burton:
> U-Boot has up until now built with -fpic for the MIPS architecture,
> producing position independent code which uses indirection through a
> global offset table, making relocation fairly straightforward as it
> simply involves patching up GOT entries.
> 
> Using -fpic does however have some downsides. The biggest of these is
> that generated code is bloated in various ways. For example, function
> calls are indirected through the GOT & the t9 register:
> 
>   8f998064   lw     t9,-32668(gp)
>   0320f809   jalr   t9
> 
> Without -fpic the call is simply:
> 
>   0f803f01   jal    be00fc04 <puts>
> 
> This is more compact & faster (due to the lack of the load & the
> dependency the jump has on its result). It is also easier to read &
> debug because the disassembly shows what function is being called,
> rather than just an offset from gp which would then have to be looked up
> in the ELF to discover the target function.
> 
> Another disadvantage of -fpic is that each function begins with a
> sequence to calculate the value of the gp register, for example:
> 
>   3c1c0004   lui    gp,0x4
>   279c3384   addiu  gp,gp,13188
>   0399e021   addu   gp,gp,t9
> 
> Without using -fpic this sequence no longer appears at the start of each
> function, reducing code size considerably.
> 
> This patch switches U-Boot from building with -fpic to building with
> -fno-pic, in order to gain the benefits described above. The cost of
> this is an extra step during the build process to extract relocation
> data from the ELF & write it into a new .rel section in a compact
> format, plus the added complexity of dealing with multiple types of
> relocation rather than the single type that applied to the GOT. The
> benefit is smaller, cleaner, more debuggable code. The relocate_code()
> function is reimplemented in C to handle the new relocation scheme,
> which also makes it easier to read & debug.
> 
> Taking maltael_defconfig as an example the size of u-boot.bin built
> using the Codescape MIPS 2016.05-06 toolchain (gcc 4.9.2, binutils
> 2.24.90) shrinks from 254KiB to 224KiB.
> 
> Signed-off-by: Paul Burton <paul.bur...@imgtec.com>
> Cc: Daniel Schwierzeck <daniel.schwierz...@gmail.com>
> Cc: u-boot@lists.denx.de

nice work, thanks. Nits below

> ---
> 
>  arch/mips/Makefile.postlink    |  23 +++
>  arch/mips/config.mk            |  19 +-
>  arch/mips/cpu/start.S          | 130 -------------
>  arch/mips/cpu/u-boot.lds       |  41 +---
>  arch/mips/include/asm/relocs.h |  24 +++
>  arch/mips/lib/Makefile         |   1 +
>  arch/mips/lib/reloc.c          | 164 ++++++++++++++++
>  common/board_f.c               |   2 +-
>  tools/.gitignore               |   1 +
>  tools/Makefile                 |   2 +
>  tools/mips-relocs.c            | 426 
> +++++++++++++++++++++++++++++++++++++++++
>  11 files changed, 656 insertions(+), 177 deletions(-)
>  create mode 100644 arch/mips/Makefile.postlink
>  create mode 100644 arch/mips/include/asm/relocs.h
>  create mode 100644 arch/mips/lib/reloc.c
>  create mode 100644 tools/mips-relocs.c
> 
> diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink
> new file mode 100644
> index 0000000000..d6fbc0d404
> --- /dev/null
> +++ b/arch/mips/Makefile.postlink
> @@ -0,0 +1,23 @@
> +#
> +# Copyright (c) 2017 Imagination Technologies Ltd.
> +#
> +# SPDX-License-Identifier:   GPL-2.0+
> +#
> +
> +PHONY := __archpost
> +__archpost:
> +
> +-include include/config/auto.conf
> +include scripts/Kbuild.include
> +
> +CMD_RELOCS = tools/mips-relocs
> +quiet_cmd_relocs = RELOCS  $@
> +      cmd_relocs = $(CMD_RELOCS) $@ .$@.relocs

what's the purpose of .$@.relocs? The mips-relocs tool only has one
arguments and the kernel Makefile doesn't have this

> +
> +u-boot: FORCE
> +     @true
> +     $(call if_changed,relocs)
> +
> +.PHONY: FORCE
> +
> +FORCE:
> diff --git a/arch/mips/config.mk b/arch/mips/config.mk
> index 2c72c1553d..56d150171e 100644
> --- a/arch/mips/config.mk
> +++ b/arch/mips/config.mk
> @@ -56,25 +56,16 @@ PLATFORM_ELFFLAGS += -B mips $(OBJCOPYFLAGS)
>  # LDFLAGS_vmlinux            += -G 0 -static -n -nostdlib
>  # MODFLAGS                   += -mlong-calls
>  #
> -# On the other hand, we want PIC in the U-Boot code to relocate it from ROM
> -# to RAM. $28 is always used as gp.
> -#
> -ifdef CONFIG_SPL_BUILD
> -PF_ABICALLS                  := -mno-abicalls
> -PF_PIC                               := -fno-pic
> -PF_PIE                               :=
> -else
> -PF_ABICALLS                  := -mabicalls
> -PF_PIC                               := -fpic
> -PF_PIE                               := -pie
> -PF_OBJCOPY                   := -j .got -j .rel.dyn -j .padding
> +ifndef CONFIG_SPL_BUILD
> +PF_OBJCOPY                   := -j .got -j .rel -j .padding
>  PF_OBJCOPY                   += -j .dtb.init.rodata
> +LDFLAGS_FINAL                        += --emit-relocs
>  endif

I think we could now drop the extra PF_OBJCOPY variable and directly
assign the values to OBJCOPYFLAGS

>  
> -PLATFORM_CPPFLAGS            += -G 0 $(PF_ABICALLS) $(PF_PIC)
> +PLATFORM_CPPFLAGS            += -G 0 -mno-abicalls -fno-pic
>  PLATFORM_CPPFLAGS            += -msoft-float
>  PLATFORM_LDFLAGS             += -G 0 -static -n -nostdlib
>  PLATFORM_RELFLAGS            += -ffunction-sections -fdata-sections
> -LDFLAGS_FINAL                        += --gc-sections $(PF_PIE)
> +LDFLAGS_FINAL                        += --gc-sections
>  OBJCOPYFLAGS                 += -j .text -j .rodata -j .data -j .u_boot_list
>  OBJCOPYFLAGS                 += $(PF_OBJCOPY)
> diff --git a/arch/mips/cpu/start.S b/arch/mips/cpu/start.S
> index d01ee9f9bd..952c57afd7 100644
> --- a/arch/mips/cpu/start.S
> +++ b/arch/mips/cpu/start.S
> @@ -221,18 +221,6 @@ wr_done:
>       ehb
>  #endif
>  
> -     /*
> -      * Initialize $gp, force pointer sized alignment of bal instruction to
> -      * forbid the compiler to put nop's between bal and _gp. This is
> -      * required to keep _gp and ra aligned to 8 byte.
> -      */
> -     .align  PTRLOG
> -     bal     1f
> -      nop
> -     PTR     _gp
> -1:
> -     PTR_L   gp, 0(ra)
> -
>  #ifdef CONFIG_MIPS_CM
>       PTR_LA  t9, mips_cm_map
>       jalr    t9
> @@ -291,121 +279,3 @@ wr_done:
>        move   ra, zero
>  
>       END(_start)
> -
> -/*
> - * void relocate_code (addr_sp, gd, addr_moni)
> - *
> - * This "function" does not return, instead it continues in RAM
> - * after relocating the monitor code.
> - *
> - * a0 = addr_sp
> - * a1 = gd
> - * a2 = destination address
> - */
> -ENTRY(relocate_code)
> -     move    sp, a0                  # set new stack pointer
> -     move    fp, sp
> -
> -     move    s0, a1                  # save gd in s0
> -     move    s2, a2                  # save destination address in s2
> -
> -     PTR_LI  t0, CONFIG_SYS_MONITOR_BASE
> -     PTR_SUB s1, s2, t0              # s1 <-- relocation offset
> -
> -     PTR_LA  t2, __image_copy_end
> -     move    t1, a2
> -
> -     /*
> -      * t0 = source address
> -      * t1 = target address
> -      * t2 = source end address
> -      */
> -1:
> -     PTR_L   t3, 0(t0)
> -     PTR_S   t3, 0(t1)
> -     PTR_ADDU t0, PTRSIZE
> -     blt     t0, t2, 1b
> -      PTR_ADDU t1, PTRSIZE
> -
> -     /*
> -      * Now we want to update GOT.
> -      *
> -      * GOT[0] is reserved. GOT[1] is also reserved for the dynamic object
> -      * generated by GNU ld. Skip these reserved entries from relocation.
> -      */
> -     PTR_LA  t3, num_got_entries
> -     PTR_LA  t8, _GLOBAL_OFFSET_TABLE_
> -     PTR_ADD t8, s1                  # t8 now holds relocated _G_O_T_
> -     PTR_ADDIU t8, t8, 2 * PTRSIZE   # skipping first two entries
> -     PTR_LI  t2, 2
> -1:
> -     PTR_L   t1, 0(t8)
> -     beqz    t1, 2f
> -      PTR_ADD t1, s1
> -     PTR_S   t1, 0(t8)
> -2:
> -     PTR_ADDIU t2, 1
> -     blt     t2, t3, 1b
> -      PTR_ADDIU t8, PTRSIZE
> -
> -     /* Update dynamic relocations */
> -     PTR_LA  t1, __rel_dyn_start
> -     PTR_LA  t2, __rel_dyn_end
> -
> -     b       2f                      # skip first reserved entry
> -      PTR_ADDIU t1, 2 * PTRSIZE
> -
> -1:
> -     lw      t8, -4(t1)              # t8 <-- relocation info
> -
> -     PTR_LI  t3, MIPS_RELOC
> -     bne     t8, t3, 2f              # skip non-MIPS_RELOC entries
> -      nop
> -
> -     PTR_L   t3, -(2 * PTRSIZE)(t1)  # t3 <-- location to fix up in FLASH
> -
> -     PTR_L   t8, 0(t3)               # t8 <-- original pointer
> -     PTR_ADD t8, s1                  # t8 <-- adjusted pointer
> -
> -     PTR_ADD t3, s1                  # t3 <-- location to fix up in RAM
> -     PTR_S   t8, 0(t3)
> -
> -2:
> -     blt     t1, t2, 1b
> -      PTR_ADDIU t1, 2 * PTRSIZE      # each rel.dyn entry is 2*PTRSIZE bytes
> -
> -     /*
> -      * Flush caches to ensure our newly modified instructions are visible
> -      * to the instruction cache. We're still running with the old GOT, so
> -      * apply the reloc offset to the start address.
> -      */
> -     PTR_LA  a0, __text_start
> -     PTR_LA  a1, __text_end
> -     PTR_SUB a1, a1, a0
> -     PTR_LA  t9, flush_cache
> -     jalr    t9
> -      PTR_ADD        a0, s1
> -
> -     PTR_ADD gp, s1                  # adjust gp
> -
> -     /*
> -      * Clear BSS
> -      *
> -      * GOT is now relocated. Thus __bss_start and __bss_end can be
> -      * accessed directly via $gp.
> -      */
> -     PTR_LA  t1, __bss_start         # t1 <-- __bss_start
> -     PTR_LA  t2, __bss_end           # t2 <-- __bss_end
> -
> -1:
> -     PTR_S   zero, 0(t1)
> -     blt     t1, t2, 1b
> -      PTR_ADDIU t1, PTRSIZE
> -
> -     move    a0, s0                  # a0 <-- gd
> -     move    a1, s2
> -     PTR_LA  t9, board_init_r
> -     jr      t9
> -      move   ra, zero
> -
> -     END(relocate_code)
> diff --git a/arch/mips/cpu/u-boot.lds b/arch/mips/cpu/u-boot.lds
> index 0129c99611..bd5536f013 100644
> --- a/arch/mips/cpu/u-boot.lds
> +++ b/arch/mips/cpu/u-boot.lds
> @@ -34,15 +34,6 @@ SECTIONS
>               *(.data*)
>       }
>  
> -     . = .;
> -     _gp = ALIGN(16) + 0x7ff0;
> -
> -     .got : {
> -             *(.got)
> -     }
> -
> -     num_got_entries = SIZEOF(.got) >> PTR_COUNT_SHIFT;
> -
>       . = ALIGN(4);
>       .sdata : {
>               *(.sdata*)
> @@ -57,33 +48,19 @@ SECTIONS
>       __image_copy_end = .;
>       __init_end = .;
>  
> -     .rel.dyn : {
> -             __rel_dyn_start = .;
> -             *(.rel.dyn)
> -             __rel_dyn_end = .;
> -     }
> -
> -     .padding : {
> -             /*
> -              * Workaround for a binutils feature (or bug?).
> -              *
> -              * The GNU ld from binutils puts the dynamic relocation
> -              * entries into the .rel.dyn section. Sometimes it
> -              * allocates more dynamic relocation entries than it needs
> -              * and the unused slots are set to R_MIPS_NONE entries.
> -              *
> -              * However the size of the .rel.dyn section in the ELF
> -              * section header does not cover the unused entries, so
> -              * objcopy removes those during stripping.
> -              *
> -              * Create a small section here to avoid that.
> -              */
> -             LONG(0xFFFFFFFF)
> +     /*
> +      * .rel must come last so that the mips-relocs tool can shrink
> +      * the section size & the PT_LOAD program header filesz.
> +      */
> +     .rel : {
> +             __rel_start = .;
> +             BYTE(0x0)
> +             . += (32 * 1024) - 1;
>       }
>  
>       _end = .;
>  
> -     .bss __rel_dyn_start (OVERLAY) : {
> +     .bss __rel_start (OVERLAY) : {
>               __bss_start = .;
>               *(.sbss.*)
>               *(.bss.*)
> diff --git a/arch/mips/include/asm/relocs.h b/arch/mips/include/asm/relocs.h
> new file mode 100644
> index 0000000000..92e9d04f7c
> --- /dev/null
> +++ b/arch/mips/include/asm/relocs.h
> @@ -0,0 +1,24 @@
> +/*
> + * MIPS Relocations
> + *
> + * Copyright (c) 2017 Imagination Technologies Ltd.
> + *
> + * SPDX-License-Identifier:  GPL-2.0+
> + */
> +
> +#ifndef __ASM_MIPS_RELOCS_H__
> +#define __ASM_MIPS_RELOCS_H__
> +
> +#define R_MIPS_NONE          0
> +#define R_MIPS_32            2
> +#define R_MIPS_26            4
> +#define R_MIPS_HI16          5
> +#define R_MIPS_LO16          6
> +#define R_MIPS_PC16          10
> +#define R_MIPS_64            18
> +#define R_MIPS_HIGHER                28
> +#define R_MIPS_HIGHEST               29
> +#define R_MIPS_PC21_S2               60
> +#define R_MIPS_PC26_S2               61
> +
> +#endif /* __ASM_MIPS_RELOCS_H__ */
> diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
> index 659c6ad187..ef557c6932 100644
> --- a/arch/mips/lib/Makefile
> +++ b/arch/mips/lib/Makefile
> @@ -8,6 +8,7 @@
>  obj-y        += cache.o
>  obj-y        += cache_init.o
>  obj-y        += genex.o
> +obj-y        += reloc.o
>  obj-y        += stack.o
>  obj-y        += traps.o
>  
> diff --git a/arch/mips/lib/reloc.c b/arch/mips/lib/reloc.c
> new file mode 100644
> index 0000000000..b7ae56df5a
> --- /dev/null
> +++ b/arch/mips/lib/reloc.c
> @@ -0,0 +1,164 @@
> +/*
> + * MIPS Relocation
> + *
> + * Copyright (c) 2017 Imagination Technologies Ltd.
> + *
> + * SPDX-License-Identifier:  GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/relocs.h>
> +#include <asm/sections.h>
> +
> +/*
> + * __rel_start: Relocation data generated by the mips-relocs tool
> + *
> + * This data, found in the .rel section, is generated by the mips-relocs 
> tool &
> + * contains a record of all locations in the U-Boot binary that need to be
> + * fixed up during relocation.
> + *
> + * The data is a sequence of unsigned integers, which are of somewhat 
> arbitrary
> + * size. This is achieved by encoding integers as a sequence of bytes, each 
> of
> + * which contains 7 bits of data with the most significant bit indicating
> + * whether any further bytes need to be read. The least significant bits of 
> the
> + * integer are found in the first byte - ie. it somewhat resembles little
> + * endian.
> + *
> + * Each pair of two integers represents a relocation that must be applied. 
> The
> + * first integer represents the type of relocation as a standard ELF 
> relocation
> + * type (ie. R_MIPS_*). The second integer represents the offset at which to
> + * apply the relocation, relative to the previous relocation or for the first
> + * relocation the start of the relocated .text section.
> + *
> + * The end of the relocation data is indicated when type R_MIPS_NONE (0) is
> + * read, at which point no further integers should be read. That is, the
> + * terminating R_MIPS_NONE reloc includes no offset.
> + */
> +extern uint8_t __rel_start[];

could you move this to asm/sections.h (or asm-generic/sections.h
perhaps) to suppress a checkpatch.pl warning and to have linker script
exports in one place?

> +
> +/**
> + * read_uint() - Read an unsigned integer from the buffer
> + * @buf: pointer to a pointer to the reloc buffer
> + *
> + * Read one whole unsigned integer from the relocation data pointed to by 
> @buf,
> + * advancing @buf past the bytes encoding the integer.
> + *
> + * Returns: the integer read from @buf
> + */
> +static unsigned long read_uint(uint8_t **buf)
> +{
> +     unsigned long val = 0;
> +     unsigned int shift = 0;
> +     uint8_t new;
> +
> +     do {
> +             new = *(*buf)++;
> +             val |= (new & 0x7f) << shift;
> +             shift += 7;
> +     } while (new & 0x80);
> +
> +     return val;
> +}
> +
> +/**
> + * apply_reloc() - Apply a single relocation
> + * @type: the type of reloc (R_MIPS_*)
> + * @addr: the address that the reloc should be applied to
> + * @off: the relocation offset, ie. number of bytes we're moving U-Boot by
> + *
> + * Apply a single relocation of type @type at @addr. This function is
> + * intentionally simple, and does the bare minimum needed to fixup the
> + * relocated U-Boot - in particular, it does not check for overflows.
> + */
> +static void apply_reloc(unsigned int type, void *addr, long off)
> +{
> +     switch (type) {
> +     case R_MIPS_26:
> +             *(uint32_t *)addr += (off >> 2) & 0x3ffffff;
> +             break;
> +
> +     case R_MIPS_32:
> +             *(uint32_t *)addr += off;
> +             break;
> +
> +     case R_MIPS_64:
> +             *(uint64_t *)addr += off;
> +             break;
> +
> +     case R_MIPS_HI16:
> +             *(uint32_t *)addr += off >> 16;
> +             break;
> +
> +     default:
> +             panic("Unhandled reloc type %u\n", type);
> +     }
> +}
> +
> +/**
> + * relocate_code() - Relocate U-Boot, generally from flash to DDR
> + * @start_addr_sp: new stack pointer
> + * @new_gd: pointer to relocated global data
> + * @relocaddr: the address to relocate to
> + *
> + * Relocate U-Boot from its current location (generally in flash) to a new 
> one
> + * (generally in DDR). This function will copy the U-Boot binary & apply
> + * relocations as necessary, then jump to board_init_r in the new build of
> + * U-Boot. As such, this function does not return.
> + */
> +void relocate_code(ulong start_addr_sp, gd_t *new_gd, ulong relocaddr)
> +{
> +     unsigned long addr, length, bss_len;
> +     uint8_t *buf, *bss_start;
> +     unsigned int type;
> +     long off;
> +
> +     /*
> +      * Ensure that we're relocating by an offset which is a multiple of
> +      * 64KiB, ie. doesn't change the least significant 16 bits of any
> +      * addresses. This allows us to discard R_MIPS_LO16 relocs, saving
> +      * space in the U-Boot binary & complexity in handling them.
> +      */
> +     off = relocaddr - (unsigned long)__text_start;
> +     if (off & 0xffff)
> +             panic("Mis-aligned relocation\n");
> +
> +     /* Copy U-Boot to RAM */
> +     length = __image_copy_end - __text_start;
> +     memcpy((void *)relocaddr, __text_start, length);
> +
> +     /* Now apply relocations to the copy in RAM */
> +     buf = __rel_start;
> +     addr = relocaddr;
> +     while (true) {
> +             type = read_uint(&buf);
> +             if (type == R_MIPS_NONE)
> +                     break;
> +
> +             addr += read_uint(&buf) << 2;
> +             apply_reloc(type, (void *)addr, off);
> +     }
> +
> +     /* Ensure the icache is coherent */
> +     flush_cache(relocaddr, length);
> +
> +     /* Clear the .bss section */
> +     bss_start = (uint8_t *)((unsigned long)__bss_start + off);
> +     bss_len = (unsigned long)&__bss_end - (unsigned long)__bss_start;
> +     memset(bss_start, 0, bss_len);
> +
> +     /* Jump to the relocated U-Boot */
> +     asm volatile(
> +                    "move    $29, %0\n"
> +             "       move    $4, %1\n"
> +             "       move    $5, %2\n"
> +             "       move    $31, $0\n"
> +             "       jr      %3"
> +             : /* no outputs */
> +             : "r"(start_addr_sp),
> +               "r"(new_gd),
> +               "r"(relocaddr),
> +               "r"((unsigned long)board_init_r + off));
> +
> +     /* Since we jumped to the new U-Boot above, we won't get here */
> +     unreachable();
> +}
> diff --git a/common/board_f.c b/common/board_f.c
> index 46e52849fb..5983bf62e7 100644
> --- a/common/board_f.c
> +++ b/common/board_f.c
> @@ -418,7 +418,7 @@ static int reserve_uboot(void)
>        */
>       gd->relocaddr -= gd->mon_len;
>       gd->relocaddr &= ~(4096 - 1);
> -#ifdef CONFIG_E500
> +#if defined(CONFIG_E500) || defined(CONFIG_MIPS)
>       /* round down to next 64 kB limit so that IVPR stays aligned */
>       gd->relocaddr &= ~(65536 - 1);
>  #endif
> diff --git a/tools/.gitignore b/tools/.gitignore
> index 6ec71f5c7f..ac0c979319 100644
> --- a/tools/.gitignore
> +++ b/tools/.gitignore
> @@ -11,6 +11,7 @@
>  /img2srec
>  /kwboot
>  /dumpimage
> +/mips-relocs
>  /mkenvimage
>  /mkimage
>  /mkexynosspl
> diff --git a/tools/Makefile b/tools/Makefile
> index cb1683e153..56098e6943 100644
> --- a/tools/Makefile
> +++ b/tools/Makefile
> @@ -209,6 +209,8 @@ hostprogs-$(CONFIG_STATIC_RELA) += relocate-rela
>  hostprogs-y += fdtgrep
>  fdtgrep-objs += $(LIBFDT_OBJS) fdtgrep.o
>  
> +hostprogs-$(CONFIG_MIPS) += mips-relocs
> +
>  # We build some files with extra pedantic flags to try to minimize things
>  # that won't build on some weird host compiler -- though there are lots of
>  # exceptions for files that aren't complaint.
> diff --git a/tools/mips-relocs.c b/tools/mips-relocs.c
> new file mode 100644
> index 0000000000..b690fa53c4
> --- /dev/null
> +++ b/tools/mips-relocs.c
> @@ -0,0 +1,426 @@
> +/*
> + * MIPS Relocation Data Generator
> + *
> + * Copyright (c) 2017 Imagination Technologies Ltd.
> + *
> + * SPDX-License-Identifier:  GPL-2.0+
> + */
> +
> +#include <elf.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +
> +#include <asm/relocs.h>
> +
> +#define hdr_field(pfx, idx, field) ({                                \
> +     uint64_t _val;                                          \
> +     unsigned int _size;                                     \
> +                                                             \
> +     if (is_64) {                                            \
> +             _val = pfx##hdr64[idx].field;                   \
> +             _size = sizeof(pfx##hdr64[0].field);            \
> +     } else {                                                \
> +             _val = pfx##hdr32[idx].field;                   \
> +             _size = sizeof(pfx##hdr32[0].field);            \
> +     }                                                       \
> +                                                             \
> +     switch (_size) {                                        \
> +     case 1:                                                 \
> +             break;                                          \
> +     case 2:                                                 \
> +             _val = is_be ? be16toh(_val) : le16toh(_val);   \
> +             break;                                          \
> +     case 4:                                                 \
> +             _val = is_be ? be32toh(_val) : le32toh(_val);   \
> +             break;                                          \
> +     case 8:                                                 \
> +             _val = is_be ? be64toh(_val) : le64toh(_val);   \
> +             break;                                          \
> +     }                                                       \
> +                                                             \
> +     _val;                                                   \
> +})
> +
> +#define set_hdr_field(pfx, idx, field, val) ({                       \
> +     uint64_t _val;                                          \
> +     unsigned int _size;                                     \
> +                                                             \
> +     if (is_64)                                              \
> +             _size = sizeof(pfx##hdr64[0].field);            \
> +     else                                                    \
> +             _size = sizeof(pfx##hdr32[0].field);            \
> +                                                             \
> +     switch (_size) {                                        \
> +     case 1:                                                 \
> +             _val = val;                                     \
> +             break;                                          \
> +     case 2:                                                 \
> +             _val = is_be ? htobe16(val) : htole16(val);     \
> +             break;                                          \
> +     case 4:                                                 \
> +             _val = is_be ? htobe32(val) : htole32(val);     \
> +             break;                                          \
> +     case 8:                                                 \
> +             _val = is_be ? htobe64(val) : htole64(val);     \
> +             break;                                          \
> +     }                                                       \
> +                                                             \
> +     if (is_64)                                              \
> +             pfx##hdr64[idx].field = _val;                   \
> +     else                                                    \
> +             pfx##hdr32[idx].field = _val;                   \
> +})
> +
> +#define ehdr_field(field) \
> +     hdr_field(e, 0, field)
> +#define phdr_field(idx, field) \
> +     hdr_field(p, idx, field)
> +#define shdr_field(idx, field) \
> +     hdr_field(s, idx, field)
> +
> +#define set_phdr_field(idx, field, val) \
> +     set_hdr_field(p, idx, field, val)
> +#define set_shdr_field(idx, field, val) \
> +     set_hdr_field(s, idx, field, val)
> +
> +#define shstr(idx) (&shstrtab[idx])
> +
> +bool is_64, is_be;
> +uint64_t text_base;
> +
> +struct mips_reloc {
> +     uint8_t type;
> +     uint64_t offset;
> +} *relocs;
> +size_t relocs_sz, relocs_idx;
> +
> +static int add_reloc(unsigned int type, uint64_t off)
> +{
> +     struct mips_reloc *new;
> +     size_t new_sz;
> +
> +     switch (type) {
> +     case R_MIPS_NONE:
> +     case R_MIPS_LO16:
> +     case R_MIPS_PC16:
> +     case R_MIPS_HIGHER:
> +     case R_MIPS_HIGHEST:
> +     case R_MIPS_PC21_S2:
> +     case R_MIPS_PC26_S2:
> +             /* Skip these relocs */
> +             return 0;
> +
> +     default:
> +             break;
> +     }
> +
> +     if (relocs_idx == relocs_sz) {
> +             new_sz = relocs_sz ? relocs_sz * 2 : 128;
> +             new = realloc(relocs, new_sz * sizeof(*relocs));
> +             if (!new) {
> +                     fprintf(stderr, "Out of memory\n");
> +                     return -ENOMEM;
> +             }
> +
> +             relocs = new;
> +             relocs_sz = new_sz;
> +     }
> +
> +     relocs[relocs_idx++] = (struct mips_reloc){
> +             .type = type,
> +             .offset = off,
> +     };
> +
> +     return 0;
> +}
> +
> +static int parse_mips32_rel(const void *_rel)
> +{
> +     const Elf32_Rel *rel = _rel;
> +     uint32_t off, type;
> +
> +     off = is_be ? be32toh(rel->r_offset) : le32toh(rel->r_offset);
> +     off -= text_base;
> +
> +     type = is_be ? be32toh(rel->r_info) : le32toh(rel->r_info);
> +     type = ELF32_R_TYPE(type);
> +
> +     return add_reloc(type, off);
> +}
> +
> +static int parse_mips64_rela(const void *_rel)
> +{
> +     const Elf64_Rela *rel = _rel;
> +     uint64_t off, type;
> +
> +     off = is_be ? be64toh(rel->r_offset) : le64toh(rel->r_offset);
> +     off -= text_base;
> +
> +     type = rel->r_info >> (64 - 8);
> +
> +     return add_reloc(type, off);
> +}
> +
> +static void output_uint(uint8_t **buf, uint64_t val)
> +{
> +     uint64_t tmp;
> +
> +     do {
> +             tmp = val & 0x7f;
> +             val >>= 7;
> +             tmp |= !!val << 7;
> +             *(*buf)++ = tmp;
> +     } while (val);
> +}
> +
> +static int compare_relocs(const void *a, const void *b)
> +{
> +     const struct mips_reloc *ra = a, *rb = b;
> +
> +     return ra->offset - rb->offset;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +     unsigned int i, j, i_rel_shdr, sh_type, sh_entsize, sh_entries;
> +     size_t rel_size, rel_actual_size, load_sz;
> +     const char *shstrtab, *sh_name, *rel_pfx;
> +     int (*parse_fn)(const void *rel);
> +     uint8_t *buf_start, *buf;
> +     const Elf32_Ehdr *ehdr32;
> +     const Elf64_Ehdr *ehdr64;
> +     uintptr_t sh_offset;
> +     Elf32_Phdr *phdr32;
> +     Elf64_Phdr *phdr64;
> +     Elf32_Shdr *shdr32;
> +     Elf64_Shdr *shdr64;
> +     struct stat st;
> +     int err, fd;
> +     void *elf;
> +     bool skip;
> +
> +     fd = open(argv[1], O_RDWR);
> +     if (fd == -1) {
> +             fprintf(stderr, "Unable to open input file %s\n", argv[1]);
> +             err = errno;
> +             goto out_ret;
> +     }
> +
> +     err = fstat(fd, &st);
> +     if (err) {
> +             fprintf(stderr, "Unable to fstat() input file\n");
> +             goto out_close_fd;
> +     }
> +
> +     elf = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> +     if (elf == MAP_FAILED) {
> +             fprintf(stderr, "Unable to mmap() input file\n");
> +             err = errno;
> +             goto out_close_fd;
> +     }
> +
> +     ehdr32 = elf;
> +     ehdr64 = elf;
> +
> +     if (memcmp(&ehdr32->e_ident[EI_MAG0], ELFMAG, SELFMAG)) {
> +             fprintf(stderr, "Input file is not an ELF\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     if (ehdr32->e_ident[EI_VERSION] != EV_CURRENT) {
> +             fprintf(stderr, "Unrecognised ELF version\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     switch (ehdr32->e_ident[EI_CLASS]) {
> +     case ELFCLASS32:
> +             is_64 = false;
> +             break;
> +     case ELFCLASS64:
> +             is_64 = true;
> +             break;
> +     default:
> +             fprintf(stderr, "Unrecognised ELF class\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     switch (ehdr32->e_ident[EI_DATA]) {
> +     case ELFDATA2LSB:
> +             is_be = false;
> +             break;
> +     case ELFDATA2MSB:
> +             is_be = true;
> +             break;
> +     default:
> +             fprintf(stderr, "Unrecognised ELF data encoding\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     if (ehdr_field(e_type) != ET_EXEC) {
> +             fprintf(stderr, "Input ELF is not an executable\n");
> +             printf("type 0x%lx\n", ehdr_field(e_type));
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     if (ehdr_field(e_machine) != EM_MIPS) {
> +             fprintf(stderr, "Input ELF does not target MIPS\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     phdr32 = elf + ehdr_field(e_phoff);
> +     phdr64 = elf + ehdr_field(e_phoff);
> +     shdr32 = elf + ehdr_field(e_shoff);
> +     shdr64 = elf + ehdr_field(e_shoff);
> +     shstrtab = elf + shdr_field(ehdr_field(e_shstrndx), sh_offset);
> +
> +     i_rel_shdr = UINT_MAX;
> +     for (i = 0; i < ehdr_field(e_shnum); i++) {
> +             sh_name = shstr(shdr_field(i, sh_name));
> +
> +             if (!strcmp(sh_name, ".rel")) {
> +                     i_rel_shdr = i;
> +                     continue;
> +             }
> +
> +             if (!strcmp(sh_name, ".text")) {
> +                     text_base = shdr_field(i, sh_addr);
> +                     continue;
> +             }
> +     }
> +     if (i_rel_shdr == UINT_MAX) {
> +             fprintf(stderr, "Unable to find .rel section\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +     if (!text_base) {
> +             fprintf(stderr, "Unable to find .text base address\n");
> +             err = -EINVAL;
> +             goto out_free_relocs;
> +     }
> +
> +     rel_pfx = is_64 ? ".rela." : ".rel.";
> +
> +     for (i = 0; i < ehdr_field(e_shnum); i++) {
> +             sh_type = shdr_field(i, sh_type);
> +             if ((sh_type != SHT_REL) && (sh_type != SHT_RELA))
> +                     continue;
> +
> +             sh_name = shstr(shdr_field(i, sh_name));
> +             if (strncmp(sh_name, rel_pfx, strlen(rel_pfx))) {
> +                     if (strcmp(sh_name, ".rel") && strcmp(sh_name, 
> ".rel.dyn"))
> +                             fprintf(stderr, "WARNING: Unexpected reloc 
> section name '%s'\n", sh_name);
> +                     continue;
> +             }
> +
> +             /*
> +              * Skip reloc sections which either don't correspond to another
> +              * section in the ELF, or whose corresponding section isn't
> +              * loaded as part of the U-Boot binary (ie. doesn't have the
> +              * alloc flags set).
> +              */
> +             skip = true;
> +             for (j = 0; j < ehdr_field(e_shnum); j++) {
> +                     if (strcmp(&sh_name[strlen(rel_pfx) - 1], 
> shstr(shdr_field(j, sh_name))))
> +                             continue;
> +
> +                     skip = !(shdr_field(j, sh_flags) & SHF_ALLOC);
> +                     break;
> +             }
> +             if (skip)
> +                     continue;
> +
> +             sh_offset = shdr_field(i, sh_offset);
> +             sh_entsize = shdr_field(i, sh_entsize);
> +             sh_entries = shdr_field(i, sh_size) / sh_entsize;
> +
> +             if (sh_type == SHT_REL) {
> +                     if (is_64) {
> +                             fprintf(stderr, "REL-style reloc in MIPS64 
> ELF?\n");
> +                             err = -EINVAL;
> +                             goto out_free_relocs;
> +                     } else {
> +                             parse_fn = parse_mips32_rel;
> +                     }
> +             } else {
> +                     if (is_64) {
> +                             parse_fn = parse_mips64_rela;
> +                     } else {
> +                             fprintf(stderr, "RELA-style reloc in MIPS32 
> ELF?\n");
> +                             err = -EINVAL;
> +                             goto out_free_relocs;
> +                     }
> +             }
> +
> +             for (j = 0; j < sh_entries; j++) {
> +                     err = parse_fn(elf + sh_offset + (j * sh_entsize));
> +                     if (err)
> +                             goto out_free_relocs;
> +             }
> +     }
> +
> +     /* Sort relocs in ascending order of offset */
> +     qsort(relocs, relocs_idx, sizeof(*relocs), compare_relocs);
> +
> +     /* Make reloc offsets relative to their predecessor */
> +     for (i = relocs_idx - 1; i > 0; i--)
> +             relocs[i].offset -= relocs[i - 1].offset;
> +
> +     /* Write the relocations to the .rel section */
> +     buf = buf_start = elf + shdr_field(i_rel_shdr, sh_offset);
> +     for (i = 0; i < relocs_idx; i++) {
> +             output_uint(&buf, relocs[i].type);
> +             output_uint(&buf, relocs[i].offset >> 2);
> +     }
> +
> +     /* Write a terminating R_MIPS_NONE (0) */
> +     output_uint(&buf, R_MIPS_NONE);
> +
> +     /* Ensure the relocs didn't overflow the .rel section */
> +     rel_size = shdr_field(i_rel_shdr, sh_size);
> +     rel_actual_size = buf - buf_start;
> +     if (rel_actual_size > rel_size) {
> +             fprintf(stderr, "Relocs overflowed .rel section\n");
> +             return -ENOMEM;
> +     }
> +
> +     /* Update the .rel section's size */
> +     set_shdr_field(i_rel_shdr, sh_size, rel_actual_size);
> +
> +     /* Shrink the PT_LOAD program header filesz (ie. shrink u-boot.bin) */
> +     for (i = 0; i < ehdr_field(e_phnum); i++) {
> +             if (phdr_field(i, p_type) != PT_LOAD)
> +                     continue;
> +
> +             load_sz = phdr_field(i, p_filesz);
> +             load_sz -= rel_size - rel_actual_size;
> +             set_phdr_field(i, p_filesz, load_sz);
> +             break;
> +     }
> +
> +     /* Make sure data is written back to the file */
> +     err = msync(elf, st.st_size, MS_SYNC);
> +     if (err) {
> +             fprintf(stderr, "Failed to msync: %d\n", errno);
> +             goto out_free_relocs;
> +     }
> +
> +out_free_relocs:
> +     free(relocs);
> +     munmap(elf, st.st_size);
> +out_close_fd:
> +     close(fd);
> +out_ret:
> +     return err;
> +}
> 

-- 
- Daniel

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
https://lists.denx.de/listinfo/u-boot

Reply via email to