Eric and Hugh have reported instant reboot due to my recent changes in
decompression code.

The root cause is that I didn't realize that we need to adjust GOT to be
able to run C code that early.

The problem is only visible with old toolchain. Binutils >= 2.24 is able
to eliminate GOT references by replacing them with RIP-relative address
loads[1].

We need to adjust GOT two times:
 - before calling paging_prepare() with address the binary was loaded by 
bootloader
 - before relocating the kernel to the new place with relocation address

[1] 
https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commitdiff;h=80d873266dec

Signed-off-by: Kirill A. Shutemov <[email protected]>
Fixes: 194a9749c73d ("x86/boot/compressed/64: Handle 5-level paging boot if 
kernel is above 4G")
Reported-by: Eric Dumazet <[email protected]>
Reported-by: Hugh Dickins <[email protected]>
---
 arch/x86/boot/compressed/head_64.S | 66 ++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S 
b/arch/x86/boot/compressed/head_64.S
index fca012baba19..6cbb2d64c91e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,6 +305,21 @@ ENTRY(startup_64)
        /* Set up the stack */
        leaq    boot_stack_end(%rbx), %rsp
 
+       /*
+        * paging_prepare() and cleanup_trampoline() below can have GOT
+        * references. Adjust the table with address we are running at.
+        */
+
+       /* The GOP was not adjusted before */
+       xorq    %rax, %rax
+
+       /* Calculate the address the binary is loaded at. */
+       call    1f
+1:     popq    %rdi
+       subq    $1b, %rdi
+
+       call    adjust_gop
+
        /*
         * At this point we are in long mode with 4-level paging enabled,
         * but we might want to enable 5-level paging or vice versa.
@@ -381,6 +396,24 @@ trampoline_return:
        pushq   $0
        popfq
 
+       /*
+        * Previously we've adjusted the GOT with address the binary was
+        * loaded at. Now we need to re-adjust for relocation address.
+        */
+
+       /*
+        * Calculate the address the binary is loaded at.
+        * This address was used to adjust the table before and we need to
+        * undo the change.
+        */
+       call    1f
+1:     popq    %rax
+       subq    $1b, %rax
+
+       /* The new adjustment is relocation address */
+       movq    %rbx, %rdi
+       call    adjust_gop
+
 /*
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
@@ -481,19 +514,6 @@ relocated:
        shrq    $3, %rcx
        rep     stosq
 
-/*
- * Adjust our own GOT
- */
-       leaq    _got(%rip), %rdx
-       leaq    _egot(%rip), %rcx
-1:
-       cmpq    %rcx, %rdx
-       jae     2f
-       addq    %rbx, (%rdx)
-       addq    $8, %rdx
-       jmp     1b
-2:
-       
 /*
  * Do the extraction, and jump to the new kernel..
  */
@@ -512,6 +532,26 @@ relocated:
  */
        jmp     *%rax
 
+/*
+ * Adjust global offest table
+ *
+ * RAX is previous adjustment of the table to undo (0 if it's the first time 
we touch GOP).
+ * RDI is the new adjustment to apply.
+ */
+adjust_gop:
+       /* Walk through the GOT adding the address to the entries */
+       leaq    _got(%rip), %rdx
+       leaq    _egot(%rip), %rcx
+1:
+       cmpq    %rcx, %rdx
+       jae     2f
+       subq    %rax, (%rdx)    /* Undo previous adjustment */
+       addq    %rdi, (%rdx)    /* Apply the new adjustment */
+       addq    $8, %rdx
+       jmp     1b
+2:
+       ret
+
        .code32
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
-- 
2.17.0

Reply via email to