Back when we would dynamically add mappings to the usermode tables,
we needed to preallocate all the high top-level entries in the
usermode tables.  We don't need this in recent versions of PTI, so
get rid of preallocation.

With preallocation gone, the comments in pti_set_user_pgd() make
even less sense.  Rearrange the function to make it entirely obvious
what it does and does not do.  FWIW, I haven't even tried to wrap my
head around the old logic, since it seemed to be somewhere between
incomprehensible and wrong.

I admit that a bit of the earlier complexity was based on my
suggestions.  Mea culpa.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/pgtable_64.h | 74 +++++++++++++++------------------------
 arch/x86/mm/pti.c                 | 52 ++++++---------------------
 2 files changed, 39 insertions(+), 87 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h 
b/arch/x86/include/asm/pgtable_64.h
index f5adf92091c6..be8d086de927 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -195,14 +195,6 @@ static inline bool pgdp_maps_userspace(void *__ptr)
 }
 
 /*
- * Does this PGD allow access from userspace?
- */
-static inline bool pgd_userspace_access(pgd_t pgd)
-{
-       return pgd.pgd & _PAGE_USER;
-}
-
-/*
  * Take a PGD location (pgdp) and a pgd value that needs to be set there.
  * Populates the user and returns the resulting PGD that must be set in
  * the kernel copy of the page tables.
@@ -213,50 +205,42 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t 
pgd)
        if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_PTI))
                return pgd;
 
-       if (pgd_userspace_access(pgd)) {
-               if (pgdp_maps_userspace(pgdp)) {
-                       /*
-                        * The user page tables get the full PGD,
-                        * accessible from userspace:
-                        */
-                       kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
-                       /*
-                        * For the copy of the pgd that the kernel uses,
-                        * make it unusable to userspace.  This ensures on
-                        * in case that a return to userspace with the
-                        * kernel CR3 value, userspace will crash instead
-                        * of running.
-                        *
-                        * Note: NX might be not available or disabled.
-                        */
-                       if (__supported_pte_mask & _PAGE_NX)
-                               pgd.pgd |= _PAGE_NX;
-               }
-       } else if (pgd_userspace_access(*pgdp)) {
+       if (pgdp_maps_userspace(pgdp)) {
                /*
-                * We are clearing a _PAGE_USER PGD for which we presumably
-                * populated the user PGD.  We must now clear the user PGD
-                * entry.
+                * The user page tables get the full PGD,
+                * accessible from userspace:
                 */
-               if (pgdp_maps_userspace(pgdp)) {
-                       kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
-               } else {
-                       /*
-                        * Attempted to clear a _PAGE_USER PGD which is in
-                        * the kernel portion of the address space.  PGDs
-                        * are pre-populated and we never clear them.
-                        */
-                       WARN_ON_ONCE(1);
-               }
+               kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+               /*
+                * If this is normal user memory, make it NX in the kernel
+                * pagetables so that, if we somehow screw up and return to
+                * usermode with the kernel CR3 loaded, we'll get a page
+                * fault instead of allowing user code to execute with
+                * the wrong CR3.
+                *
+                * As exceptions, we don't set NX if:
+                *  - this is EFI or similar, the kernel may execute from it
+                *  - we don't have NX support
+                *  - we're clearing the PGD (i.e. pgd.pgd == 0).
+                */
+               if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+                       pgd.pgd |= _PAGE_NX;
        } else {
                /*
-                * _PAGE_USER was not set in either the PGD being set or
-                * cleared.  All kernel PGDs should be pre-populated so
-                * this should never happen after boot.
+                * Changes to the high (kernel) portion of the kernelmode
+                * page tables are not automatically propagated to the
+                * usermode tables.
+                *
+                * Users should keep in mind that, unlike the kernelmode
+                * tables, there is no vmalloc_fault equivalent for the
+                * usermode tables.  Top-level entries added to init_mm's
+                * usermode pgd after boot will not be automatically
+                * propagated to other mms.
                 */
-               WARN_ON_ONCE(system_state == SYSTEM_RUNNING);
        }
 #endif
+
        /* return the copy of the PGD we want the kernel to use: */
        return pgd;
 }
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index bd5d042adb3c..f48645d2f3fd 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -83,8 +83,16 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long 
address)
        }
 
        if (pgd_none(*pgd)) {
-               WARN_ONCE(1, "All user pgds should have been populated\n");
-               return NULL;
+               unsigned long new_p4d_page = __get_free_page(gfp);
+               if (!new_p4d_page)
+                       return NULL;
+
+               if (pgd_none(*pgd)) {
+                       set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+                       new_p4d_page = 0;
+               }
+               if (new_p4d_page)
+                       free_page(new_p4d_page);
        }
        BUILD_BUG_ON(pgd_large(*pgd) != 0);
 
@@ -193,45 +201,6 @@ static void __init pti_clone_entry_text(void)
 }
 
 /*
- * Ensure that the top level of the user page tables are entirely
- * populated.  This ensures that all processes that get forked have the
- * same entries.  This way, we do not have to ever go set up new entries in
- * older processes.
- *
- * Note: we never free these, so there are no updates to them after this.
- */
-static void __init pti_init_all_pgds(void)
-{
-       pgd_t *pgd;
-       int i;
-
-       pgd = kernel_to_user_pgdp(pgd_offset_k(0UL));
-       for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
-               /*
-                * Each PGD entry moves up PGDIR_SIZE bytes through the
-                * address space, so get the first virtual address mapped
-                * by PGD #i:
-                */
-               unsigned long addr = i * PGDIR_SIZE;
-#if CONFIG_PGTABLE_LEVELS > 4
-               p4d_t *p4d = p4d_alloc_one(&init_mm, addr);
-               if (!p4d) {
-                       WARN_ON(1);
-                       break;
-               }
-               set_pgd(pgd + i, __pgd(_KERNPG_TABLE | __pa(p4d)));
-#else /* CONFIG_PGTABLE_LEVELS <= 4 */
-               pud_t *pud = pud_alloc_one(&init_mm, addr);
-               if (!pud) {
-                       WARN_ON(1);
-                       break;
-               }
-               set_pgd(pgd + i, __pgd(_KERNPG_TABLE | __pa(pud)));
-#endif /* CONFIG_PGTABLE_LEVELS */
-       }
-}
-
-/*
  * Initialize kernel page table isolation
  */
 void __init pti_init(void)
@@ -241,7 +210,6 @@ void __init pti_init(void)
 
        pr_info("enabled\n");
 
-       pti_init_all_pgds();
        pti_clone_user_shared();
        pti_clone_entry_text();
 }
-- 
2.13.6

Reply via email to