This implements CONFIG_RELOCATABLE for 64-bit by making the kernel as
a position-independent executable (PIE).  This involves processing the
dynamic relocations in the image in the early stages of booting, even
if the kernel is being run at the address it is linked at, since the
linker does not necessarily fill in words in the image for which there
are dynamic relocations.

The dynamic relocations are processed by a new function relocate(addr),
where the addr parameter is the virtual address where the image will be
run.  In fact we call it twice; once before calling prom_init, and again
when starting the main kernel.  This means that reloc_offset() returns
0 in prom_init (since it has been relocated to the address it is running
at), which necessitated a few adjustments.

The relocate() function currently only handles R_PPC64_RELATIVE
relocs, which are very simple to process (and the linker puts them all
first in the dynamic relocation section, and tells us how many of them
there are).  Currently we only get R_PPC64_RELATIVE relocs, plus one
R_PPC64_NONE reloc which we can ignore, plus some relocs against weak
undefined symbols (e.g. mach_iseries, mach_powermac) which we can also
ignore.  Ideally we would have a little program to check that we
hadn't inadvertently ended up with any other relocs.

This also changes __va and __pa to use an equivalent definition that is
simpler.  With the relocatable kernel, PAGE_OFFSET and MEMORY_START are
constants (for 64-bit) whereas PHYSICAL_START is a variable (and
KERNELBASE ideally should be too, but isn't yet).

With this, relocatable kernels still copy themselves down to physical
address 0 and run there.

Signed-off-by: Paul Mackerras <[EMAIL PROTECTED]>
---
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 63c9caf..5a5cf3f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -809,6 +809,19 @@ config PIN_TLB
 endmenu
 
 if PPC64
+config RELOCATABLE
+       bool "Build a relocatable kernel"
+       help
+         This builds a kernel image that is capable of running anywhere
+         in the RMA (real memory area) at any 16k-aligned base address.
+         The kernel is linked as a position-independent executable (PIE)
+         and contains dynamic relocations which are processed early
+         in the bootup process.
+
+         One use is for the kexec on panic case where the recovery kernel
+         must live at a different physical address than the primary
+         kernel.
+
 config PAGE_OFFSET
        hex
        default "0xc000000000000000"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 9155c93..9e5a53f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -63,7 +63,8 @@ override CC   += -m$(CONFIG_WORD_SIZE)
 override AR    := GNUTARGET=elf$(CONFIG_WORD_SIZE)-powerpc $(AR)
 endif
 
-LDFLAGS_vmlinux        := -Bstatic
+LDFLAGS_vmlinux-$(CONFIG_PPC64)$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux        := -Bstatic $(LDFLAGS_vmlinux-yy)
 
 CFLAGS-$(CONFIG_PPC64) := -mminimal-toc -mtraceback=none  -mcall-aixdesc
 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 -mmultiple
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 14174aa..9109e1f 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -310,8 +310,11 @@ $(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb
 $(obj)/vmlinux.strip: vmlinux
        $(STRIP) -s -R .comment $< -o $@
 
+# The iseries hypervisor won't take an ET_DYN executable, so this
+# changes the type (byte 17) in the file to ET_EXEC (2).
 $(obj)/zImage.iseries: vmlinux
        $(STRIP) -s -R .comment $< -o $@
+       printf "\x02" | dd of=$@ conv=notrunc bs=1 seek=17
 
 $(obj)/uImage: vmlinux $(wrapperbits)
        $(call if_changed,wrap,uboot)
diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 7454aa4..1567a0c 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c
@@ -27,7 +27,8 @@ int parse_elf64(void *hdr, struct elf_info *info)
              elf64->e_ident[EI_MAG3]  == ELFMAG3       &&
              elf64->e_ident[EI_CLASS] == ELFCLASS64    &&
              elf64->e_ident[EI_DATA]  == ELFDATA2MSB   &&
-             elf64->e_type            == ET_EXEC       &&
+             (elf64->e_type            == ET_EXEC ||
+              elf64->e_type            == ET_DYN)      &&
              elf64->e_machine         == EM_PPC64))
                return 0;
 
@@ -58,7 +59,8 @@ int parse_elf32(void *hdr, struct elf_info *info)
              elf32->e_ident[EI_MAG3]  == ELFMAG3       &&
              elf32->e_ident[EI_CLASS] == ELFCLASS32    &&
              elf32->e_ident[EI_DATA]  == ELFDATA2MSB   &&
-             elf32->e_type            == ET_EXEC       &&
+             (elf32->e_type            == ET_EXEC ||
+              elf32->e_type            == ET_DYN)      &&
              elf32->e_machine         == EM_PPC))
                return 0;
 
diff --git a/arch/powerpc/include/asm/mmu-hash64.h 
b/arch/powerpc/include/asm/mmu-hash64.h
index 19c7a94..91af0cb 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -437,7 +437,7 @@ typedef struct {
        })
 #endif /* 1 */
 
-/* This is only valid for addresses >= KERNELBASE */
+/* This is only valid for addresses >= PAGE_OFFSET */
 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
        if (ssize == MMU_SEGSIZE_256M)
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e088545..64e1445 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -71,15 +71,21 @@
 #define PAGE_OFFSET    ASM_CONST(CONFIG_PAGE_OFFSET)
 #define LOAD_OFFSET    ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
 
-#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_FLATMEM)
+#if defined(CONFIG_RELOCATABLE)
 #ifndef __ASSEMBLY__
 extern phys_addr_t memstart_addr;
 extern phys_addr_t kernstart_addr;
 #endif
 #define PHYSICAL_START kernstart_addr
-#define MEMORY_START   memstart_addr
 #else
 #define PHYSICAL_START ASM_CONST(CONFIG_PHYSICAL_START)
+#endif
+
+#ifdef CONFIG_PPC64
+#define MEMORY_START   0UL
+#elif defined(CONFIG_RELOCATABLE)
+#define MEMORY_START   memstart_addr
+#else
 #define MEMORY_START   (PHYSICAL_START + PAGE_OFFSET - KERNELBASE)
 #endif
 
@@ -92,8 +98,8 @@ extern phys_addr_t kernstart_addr;
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
-#define __va(x) ((void *)((unsigned long)(x) - PHYSICAL_START + KERNELBASE))
-#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - KERNELBASE)
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - MEMORY_START))
+#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
 
 /*
  * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
diff --git a/arch/powerpc/include/asm/sections.h 
b/arch/powerpc/include/asm/sections.h
index 916018e..0336a6c 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -7,6 +7,7 @@
 #ifdef __powerpc64__
 
 extern char _end[];
+extern char __start_interrupts[], __end_interrupts[];
 
 static inline int in_kernel_text(unsigned long addr)
 {
@@ -16,6 +17,12 @@ static inline int in_kernel_text(unsigned long addr)
        return 0;
 }
 
+static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
+{
+       return start < (unsigned long)__init_end &&
+               (unsigned long)_stext < end;
+}
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 64f5948..1fbc953 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_PPC64)           += setup_64.o sys_ppc32.o \
                                   paca.o cpu_setup_ppc970.o \
                                   cpu_setup_pa6t.o \
                                   firmware.o sysfs.o nvram_64.o
+obj64-$(CONFIG_RELOCATABLE)    += reloc_64.o
 obj-$(CONFIG_PPC64)            += vdso64/
 obj-$(CONFIG_ALTIVEC)          += vecemu.o vector.o
 obj-$(CONFIG_PPC_970_NAP)      += idle_power4.o
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index afbd530..abb3bfe 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1353,6 +1353,12 @@ _INIT_STATIC(__boot_from_prom)
         */
        rldicr  r1,r1,0,59
 
+#ifdef CONFIG_RELOCATABLE
+       /* Relocate code for where we are now */
+       mr      r3,r26
+       bl      .relocate
+#endif
+
        /* Restore parameters */
        mr      r3,r31
        mr      r4,r30
@@ -1361,11 +1367,19 @@ _INIT_STATIC(__boot_from_prom)
        mr      r7,r27
 
        /* Do all of the interaction with OF client interface */
+       mr      r8,r26
        bl      .prom_init
        /* We never return */
        trap
 
 _STATIC(__after_prom_start)
+#ifdef CONFIG_RELOCATABLE
+       /* process relocations for the final address of the kernel */
+       lis     r25,[EMAIL PROTECTED]   /* compute virtual base of kernel */
+       sldi    r25,r25,32
+       mr      r3,r25
+       bl      .relocate
+#endif
 
 /*
  * We need to run with _stext at physical address PHYSICAL_START.
@@ -1374,10 +1388,9 @@ _STATIC(__after_prom_start)
  *
  * Note: This process overwrites the OF exception vectors.
  */
-       LOAD_REG_IMMEDIATE(r3, PHYSICAL_START)  /* target addr */
-       cmpd    r3,r26                  /* In some cases the loader may  */
+       li      r3,0                    /* target addr */
+       mr.     r4,r26                  /* In some cases the loader may  */
        beq     9f                      /* have already put us at zero */
-       mr      r4,r26                  /* source address */
        lis     r5,(copy_to_here - _stext)@ha
        addi    r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
        li      r6,0x100                /* Start offset, the first 0x100 */
@@ -1610,6 +1623,13 @@ _INIT_STATIC(start_here_multiplatform)
        ori     r6,r6,MSR_RI
        mtmsrd  r6                      /* RI on */
 
+#ifdef CONFIG_RELOCATABLE
+       /* Save the physical address we're running at in kernstart_addr */
+       LOAD_REG_ADDR(r4, kernstart_addr)
+       clrldi  r0,r25,2
+       std     r0,0(r4)
+#endif
+
        /* The following gets the stack set up with the regs */
        /* pointing to the real addr of the kernel stack.  This is   */
        /* all done to support the C function call below which sets  */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 623e8c3..48a3471 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -12,6 +12,7 @@
 
 #include <asm/lppaca.h>
 #include <asm/paca.h>
+#include <asm/sections.h>
 
 /* This symbol is provided by the linker - let it fill in the paca
  * field correctly */
@@ -79,7 +80,7 @@ void __init initialise_pacas(void)
                new_paca->lock_token = 0x8000;
                new_paca->paca_index = cpu;
                new_paca->kernel_toc = kernel_toc;
-               new_paca->kernelbase = KERNELBASE;
+               new_paca->kernelbase = (unsigned long) _stext;
                new_paca->kernel_msr = MSR_KERNEL;
                new_paca->hw_cpu_id = 0xffff;
                new_paca->slb_shadow_ptr = &slb_shadow[cpu];
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 87d83c5..4df797e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1163,7 +1163,9 @@ void __init early_init_devtree(void *params)
        parse_early_param();
 
        /* Reserve LMB regions used by kernel, initrd, dt, etc... */
-       lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+       lmb_reserve(0, __end_interrupts - _stext);
+       lmb_reserve(__pa(__end_interrupts),
+                   klimit - (unsigned long)__end_interrupts);
        reserve_kdump_trampoline();
        reserve_crashkernel();
        early_reserve_mem();
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1f89885..2cbbbc7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2309,13 +2309,14 @@ static void __init prom_check_initrd(unsigned long r3, 
unsigned long r4)
 
 unsigned long __init prom_init(unsigned long r3, unsigned long r4,
                               unsigned long pp,
-                              unsigned long r6, unsigned long r7)
+                              unsigned long r6, unsigned long r7,
+                              unsigned long kbase)
 {      
        struct prom_t *_prom;
        unsigned long hdr;
-       unsigned long offset = reloc_offset();
 
 #ifdef CONFIG_PPC32
+       unsigned long offset = reloc_offset();
        reloc_got2(offset);
 #endif
 
@@ -2371,7 +2372,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned 
long r4,
         * Copy the CPU hold code
         */
        if (RELOC(of_platform) != PLATFORM_POWERMAC)
-               copy_and_flush(0, KERNELBASE + offset, 0x100, 0);
+               copy_and_flush(0, kbase, 0x100, 0);
 
        /*
         * Do early parsing of command line
@@ -2474,7 +2475,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned 
long r4,
        reloc_got2(-offset);
 #endif
 
-       __start(hdr, KERNELBASE + offset, 0);
+       __start(hdr, kbase, 0);
 
        return 0;
 }
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
new file mode 100644
index 0000000..b47a0e1
--- /dev/null
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -0,0 +1,87 @@
+/*
+ * Code to process dynamic relocations in the kernel.
+ *
+ * Copyright 2008 Paul Mackerras, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/ppc_asm.h>
+
+RELA = 7
+RELACOUNT = 0x6ffffff9
+R_PPC64_RELATIVE = 22
+
+/*
+ * r3 = desired final address of kernel
+ */
+_GLOBAL(relocate)
+       mflr    r0
+       bcl     20,31,$+4
+0:     mflr    r12             /* r12 has runtime addr of label 0 */
+       mtlr    r0
+       ld      r11,(p_dyn - 0b)(r12)
+       add     r11,r11,r12     /* r11 has runtime addr of .dynamic section */
+       ld      r9,(p_rela - 0b)(r12)
+       add     r9,r9,r12       /* r9 has runtime addr of .rela.dyn section */
+       ld      r10,(p_st - 0b)(r12)
+       add     r10,r10,r12     /* r10 has runtime addr of _stext */
+
+       /*
+        * Scan the dynamic section for the RELA and RELACOUNT entries.
+        */
+       li      r7,0
+       li      r8,0
+1:     ld      r6,0(r11)       /* get tag */
+       cmpdi   r6,0
+       beq     4f              /* end of list */
+       cmpdi   r6,RELA
+       bne     2f
+       ld      r7,8(r11)       /* get RELA pointer in r7 */
+       b       3f
+2:     addis   r6,r6,(-RELACOUNT)@ha
+       cmpdi   r6,[EMAIL PROTECTED]
+       bne     3f
+       ld      r8,8(r11)       /* get RELACOUNT value in r8 */
+3:     addi    r11,r11,16
+       b       1b
+4:     cmpdi   r7,0            /* check we have both RELA and RELACOUNT */
+       cmpdi   cr1,r8,0
+       beq     6f
+       beq     cr1,6f
+
+       /*
+        * Work out linktime address of _stext and hence the
+        * relocation offset to be applied.
+        * cur_offset [r7] = rela.run [r9] - rela.link [r7]
+        * _stext.link [r10] = _stext.run [r10] - cur_offset [r7]
+        * final_offset [r3] = _stext.final [r3] - _stext.link [r10]
+        */
+       subf    r7,r7,r9        /* cur_offset */
+       subf    r10,r7,r10
+       subf    r3,r10,r3       /* final_offset */
+
+       /*
+        * Run through the list of relocations and process the
+        * R_PPC64_RELATIVE ones.
+        */
+       mtctr   r8
+5:     lwz     r0,12(9)        /* ELF64_R_TYPE(reloc->r_info) */
+       cmpwi   r0,R_PPC64_RELATIVE
+       bne     6f
+       ld      r6,0(r9)        /* reloc->r_offset */
+       ld      r0,16(r9)       /* reloc->r_addend */
+       add     r0,r0,r3
+       stdx    r0,r7,r6
+       addi    r9,r9,24
+       bdnz    5b
+
+6:     blr
+
+p_dyn: .llong  __dynamic_start - 0b
+p_rela:        .llong  __rela_dyn_start - 0b
+p_st:  .llong  _stext - 0b
+
diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index 4a8ce62..115c16d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -186,6 +186,21 @@ SECTIONS
                *(.machine.desc)
                __machine_desc_end = . ;
        }
+       . = ALIGN(8);
+       .dynsym : { *(.dynsym) }
+       .dynstr : { *(.dynstr) }
+       .dynamic :
+       {
+               __dynamic_start = .;
+               *(.dynamic)
+       }
+       .hash : { *(.hash) }
+       .interp : { *(.interp) }
+       .rela.dyn :
+       {
+               __rela_dyn_start = .;
+               *(.rela*)
+       }
 
        /* freed after init ends here */
        . = ALIGN(PAGE_SIZE);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5ce5a4d..f5dc515 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -173,14 +173,12 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long 
vend,
                tmp_mode = mode;
                
                /* Make non-kernel text non-executable */
-               if (!in_kernel_text(vaddr))
+               if (!overlaps_kernel_text(vaddr, vaddr + step))
                        tmp_mode = mode | HPTE_R_N;
 
                hash = hpt_hash(va, shift, ssize);
                hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
-               DBG("htab_bolt_mapping: calling %p\n", ppc_md.hpte_insert);
-
                BUG_ON(!ppc_md.hpte_insert);
                ret = ppc_md.hpte_insert(hpteg, va, paddr,
                                tmp_mode, HPTE_V_BOLTED, psize, ssize);
diff --git a/arch/powerpc/platforms/powermac/smp.c 
b/arch/powerpc/platforms/powermac/smp.c
index 4ae3d00..40f72c2 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -787,7 +787,7 @@ static void __devinit smp_core99_kick_cpu(int nr)
 {
        unsigned int save_vector;
        unsigned long target, flags;
-       unsigned int *vector = (unsigned int *)(KERNELBASE+0x100);
+       unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
 
        if (nr < 0 || nr > 3)
                return;
@@ -801,7 +801,7 @@ static void __devinit smp_core99_kick_cpu(int nr)
        save_vector = *vector;
 
        /* Setup fake reset vector that does
-        *   b __secondary_start_pmac_0 + nr*8 - KERNELBASE
+        *   b __secondary_start_pmac_0 + nr*8
         */
        target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
        patch_branch(vector, target, BRANCH_SET_LINK);
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Reply via email to