From: Andy Lutomirski <l...@kernel.org>

Make VSYSCALLs work fully in PTI mode.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Cc: Kees Cook <keesc...@chromium.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Brian Gerst <brge...@gmail.com>
Cc: David Laight <david.lai...@aculab.com>
Cc: Borislav Petkov <b...@alien8.de>

---
 arch/x86/entry/vsyscall/vsyscall_64.c |    6 +--
 arch/x86/include/asm/vsyscall.h       |    1 
 arch/x86/mm/pti.c                     |   63 ++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 3 deletions(-)

--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -343,14 +343,14 @@ int in_gate_area_no_mm(unsigned long add
  * vsyscalls but leave the page not present.  If so, we skip calling
  * this.
  */
-static void __init set_vsyscall_pgtable_user_bits(void)
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
 {
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
 
-       pgd = pgd_offset_k(VSYSCALL_ADDR);
+       pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
        pgd->pgd |= _PAGE_USER;
        p4d = p4d_offset(pgd, VSYSCALL_ADDR);
 #if CONFIG_PGTABLE_LEVELS >= 5
@@ -372,7 +372,7 @@ void __init map_vsyscall(void)
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
-               set_vsyscall_pgtable_user_bits();
+               set_vsyscall_pgtable_user_bits(swapper_pg_dir);
        }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -38,6 +38,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
 #include <asm/cmdline.h>
 #include <asm/pti.h>
 #include <asm/pgtable.h>
@@ -191,6 +192,48 @@ static pmd_t *pti_user_pagetable_walk_pm
        return pmd_offset(pud, address);
 }
 
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+       pte_t *pte;
+
+       /* We can't do anything sensible if we hit a large mapping. */
+       if (pmd_large(*pmd)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (pmd_none(*pmd)) {
+               unsigned long new_pte_page = __get_free_page(gfp);
+               if (!new_pte_page)
+                       return NULL;
+
+               if (pmd_none(*pmd)) {
+                       set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+                       new_pte_page = 0;
+               }
+               if (new_pte_page)
+                       free_page(new_pte_page);
+       }
+
+       pte = pte_offset_kernel(pmd, address);
+       if (pte_flags(*pte) & _PAGE_USER) {
+               WARN_ONCE(1, "attempt to walk to user pte\n");
+               return NULL;
+       }
+       return pte;
+}
+
 static void __init
 pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
 {
@@ -250,6 +293,25 @@ static void __init pti_setup_espfix64(vo
 #endif
 }
 
+static void __init pti_setup_vsyscall(void)
+{
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+       pte_t *pte, *target_pte;
+       unsigned int level;
+
+       pte = lookup_address(VSYSCALL_ADDR, &level);
+       if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+               return;
+
+       target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+       if (WARN_ON(!target_pte))
+               return;
+
+       *target_pte = *pte;
+       set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+#endif
+}
+
 /*
  * Clone the populated PMDs of the user shared fixmaps into the user space
  * visible page table.
@@ -289,4 +351,5 @@ void __init pti_init(void)
        pti_clone_user_shared();
        pti_clone_entry_text();
        pti_setup_espfix64();
+       pti_setup_vsyscall();
 }


Reply via email to