Linus, please pull the latest x86-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-for-linus This update contains: - The final fix for the hibernation resume path which addresses the observed crashes which were exposed by a recent change which sets NX on gap pages. - A trivial update to the maintainers file Thanks, tglx ------------------> Jon Mason (1): MAINTAINERS: Update the Calgary IOMMU entry Rafael J. Wysocki (1): x86/power/64: Fix crash whan the hibernation code passes control to the image kernel MAINTAINERS | 6 ++-- arch/x86/power/hibernate_64.c | 69 +++++++++++++++++++++++++++++++++++---- arch/x86/power/hibernate_asm_64.S | 31 +++++++++--------- 3 files changed, 81 insertions(+), 25 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 16700e4fcc4a..f589a9d0fb87 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2773,9 +2773,9 @@ F: include/net/caif/ F: net/caif/ CALGARY x86-64 IOMMU -M: Muli Ben-Yehuda <m...@il.ibm.com> -M: "Jon D. Mason" <jdma...@kudzu.us> -L: disc...@x86-64.org +M: Muli Ben-Yehuda <mu...@mulix.org> +M: Jon Mason <jdma...@kudzu.us> +L: io...@lists.linux-foundation.org S: Maintained F: arch/x86/kernel/pci-calgary_64.c F: arch/x86/kernel/tce_64.c diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 009947d419a6..aba6e26d3891 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -27,7 +27,8 @@ extern asmlinkage __visible int restore_image(void); * Address to jump to in the last phase of restore in order to get to the image * kernel's text (this value is passed in the image header). */ -unsigned long restore_jump_address __visible; +void *restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,8 +38,51 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; +void *restore_pgd_addr __visible; +pgd_t restore_pgd __visible; + void *relocated_restore_code __visible; +static int prepare_temporary_text_mapping(void) +{ + unsigned long vaddr = (unsigned long)restore_jump_address; + unsigned long paddr = jump_address_phys & PMD_MASK; + pmd_t *pmd; + pud_t *pud; + + /* + * The new mapping only has to cover the page containing the image + * kernel's entry point (jump_address_phys), because the switch over to + * it is carried out by relocated code running from a page allocated + * specifically for this purpose and covered by the identity mapping, so + * the temporary kernel text mapping is only needed for the final jump. + * However, in that mapping the virtual address of the image kernel's + * entry point must be the same as its virtual address in the image + * kernel (restore_jump_address), so the image kernel's + * restore_registers() code doesn't find itself in a different area of + * the virtual address space after switching over to the original page + * tables used by the image kernel. + */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + + restore_pgd = __pgd(__pa(pud) | _KERNPG_TABLE); + + pud += pud_index(vaddr); + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + + pmd += pmd_index(vaddr); + set_pmd(pmd, __pmd(paddr | __PAGE_KERNEL_LARGE_EXEC)); + + restore_pgd_addr = temp_level4_pgt + pgd_index(vaddr); + return 0; +} + static void *alloc_pgt_page(void *context) { return (void *)get_safe_page(GFP_ATOMIC); @@ -59,10 +103,19 @@ static int set_up_temporary_mappings(void) if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ + /* Re-use the original kernel text mapping for now */ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), init_level4_pgt[pgd_index(__START_KERNEL_map)]); + /* + * Prepare a temporary mapping for the kernel text, but don't use it + * just yet, we'll switch over to it later. It only has to cover one + * piece of code: the page containing the image kernel's entry point. + */ + result = prepare_temporary_text_mapping(); + if (result) + return result; + /* Set up the direct mapping from scratch */ for (i = 0; i < nr_pfn_mapped; i++) { mstart = pfn_mapped[i].start << PAGE_SHIFT; @@ -89,8 +142,7 @@ int swsusp_arch_resume(void) relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); if (!relocated_restore_code) return -ENOMEM; - memcpy(relocated_restore_code, &core_restore_code, - &restore_registers - &core_restore_code); + memcpy(relocated_restore_code, &core_restore_code, PAGE_SIZE); restore_image(); return 0; @@ -108,12 +160,13 @@ int pfn_is_nosave(unsigned long pfn) } struct restore_data_record { - unsigned long jump_address; + void *jump_address; + unsigned long jump_address_phys; unsigned long cr3; unsigned long magic; }; -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +#define RESTORE_MAGIC 0x123456789ABCDEF0UL /** * arch_hibernation_header_save - populate the architecture specific part @@ -126,7 +179,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = restore_jump_address; + rdr->jump_address = &restore_registers; + rdr->jump_address_phys = __pa_symbol(&restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; return 0; @@ -142,6 +196,7 @@ int arch_hibernation_header_restore(void *addr) struct restore_data_record *rdr = addr; restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; restore_cr3 = rdr->cr3; return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 4400a43b9e28..3856ea4c9299 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend) pushfq popq pt_regs_flags(%rax) - /* save the address of restore_registers */ - movq $restore_registers, %rax - movq %rax, restore_jump_address(%rip) /* save cr3 */ movq %cr3, %rax movq %rax, restore_cr3(%rip) @@ -72,8 +69,10 @@ ENTRY(restore_image) movq %rax, %cr4; # turn PGE back on /* prepare to jump to the image kernel */ - movq restore_jump_address(%rip), %rax movq restore_cr3(%rip), %rbx + movq restore_jump_address(%rip), %r10 + movq restore_pgd(%rip), %r8 + movq restore_pgd_addr(%rip), %r9 /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx @@ -96,20 +95,22 @@ ENTRY(core_restore_code) /* progress to the next pbe */ movq pbe_next(%rdx), %rdx jmp .Lloop + .Ldone: + /* switch over to the temporary kernel text mapping */ + movq %r8, (%r9) + /* flush TLB */ + movq %rax, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rax, %cr4; # turn PGE back on /* jump to the restore_registers address from the image header */ - jmpq *%rax - /* - * NOTE: This assumes that the boot kernel's text mapping covers the - * image kernel's page containing restore_registers and the address of - * this page is the same as in the image kernel's text mapping (it - * should always be true, because the text mapping is linear, starting - * from 0, and is supposed to cover the entire kernel text for every - * kernel). - * - * code below belongs to the image kernel - */ + jmpq *%r10 + /* code below belongs to the image kernel */ + .align PAGE_SIZE ENTRY(restore_registers) FRAME_BEGIN /* go back to the original page tables */