The branch main has been updated by mhorne:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=de09dcebd720d5776df4cc4e67ffc7da757e4305

commit de09dcebd720d5776df4cc4e67ffc7da757e4305
Author:     Mitchell Horne <mho...@freebsd.org>
AuthorDate: 2024-06-20 18:30:17 +0000
Commit:     Mitchell Horne <mho...@freebsd.org>
CommitDate: 2024-06-20 18:33:19 +0000

    riscv: rework page table bootstrap
    
    The overall goal of the change is to reduce the amount of work done in
    locore assembly, and defer as much as possible until pmap_bootstrap().
    Currently, half the setup is done in assembly, and then we pass the l1pt
    address to pmap_bootstrap() where it is amended with other mappings.
    
    Inspiration and understanding has been taken from amd64's
    create_pagetables() routine, and I try to present the page table
    construction in the same way: a linear procedure with commentary
    explaining what we are doing and why. Thus the core of the new
    implementation is contained in pmap_create_pagetables().
    
    Once pmap_create_pagetables() has finished, we switch to the new
    pagetable root and leave the bootstrap ones created by locore behind,
    resulting in a minimal 8kB of wasted space.
    
    Having the whole procedure in one place, in C code, allows it to be more
    easily understood, while also making it more amenable to future changes
    which depend on CPU feature/errata detection.
    
    Note that with this change the size of the early devmap is bumped up
    from one to four L2 pages (8MB).
    
    Reviewed by:    markj
    MFC after:      1 month
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D45327
---
 sys/riscv/include/pte.h     |   2 -
 sys/riscv/include/vmparam.h |   7 +-
 sys/riscv/riscv/genassym.c  |   2 +
 sys/riscv/riscv/locore.S    |  87 +++++------
 sys/riscv/riscv/pmap.c      | 355 ++++++++++++++++++++++++++++----------------
 5 files changed, 272 insertions(+), 181 deletions(-)

diff --git a/sys/riscv/include/pte.h b/sys/riscv/include/pte.h
index da7bd051e122..031cae667f0c 100644
--- a/sys/riscv/include/pte.h
+++ b/sys/riscv/include/pte.h
@@ -93,5 +93,3 @@ typedef       uint64_t        pn_t;                   /* page 
number */
 #define        PTE_SIZE        8
 
 #endif /* !_MACHINE_PTE_H_ */
-
-/* End of pte.h */
diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h
index 7bfa587ce37c..c750791bb280 100644
--- a/sys/riscv/include/vmparam.h
+++ b/sys/riscv/include/vmparam.h
@@ -238,13 +238,16 @@
 extern vm_paddr_t dmap_phys_base;
 extern vm_paddr_t dmap_phys_max;
 extern vm_offset_t dmap_max_addr;
-extern vm_offset_t init_pt_va;
 #endif
 
 #define        ZERO_REGION_SIZE        (64 * 1024)     /* 64KB */
 
+/*
+ * The top of KVA is reserved for early device mappings.
+ */
 #define        DEVMAP_MAX_VADDR        VM_MAX_KERNEL_ADDRESS
-#define        PMAP_MAPDEV_EARLY_SIZE  L2_SIZE
+#define        DEVMAP_MIN_VADDR        (DEVMAP_MAX_VADDR - 
PMAP_MAPDEV_EARLY_SIZE)
+#define        PMAP_MAPDEV_EARLY_SIZE  (4 * L2_SIZE)
 
 /*
  * No non-transparent large page support in the pmap.
diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c
index b1e1034fd479..105e17e679b7 100644
--- a/sys/riscv/riscv/genassym.c
+++ b/sys/riscv/riscv/genassym.c
@@ -60,6 +60,8 @@ ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
 ASSYM(PMAP_MAPDEV_EARLY_SIZE, PMAP_MAPDEV_EARLY_SIZE);
 
+ASSYM(PM_SATP, offsetof(struct pmap, pm_satp));
+
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra));
diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S
index 17fdcc8ef55c..f7363fd025a7 100644
--- a/sys/riscv/riscv/locore.S
+++ b/sys/riscv/riscv/locore.S
@@ -1,6 +1,10 @@
 /*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
  * Copyright (c) 2015-2018 Ruslan Bukin <b...@bsdpad.com>
  * All rights reserved.
+ * Copyright (c) 2019-2021 Mitchell Horne <mho...@freebsd.org>
+ * Copyright (c) 2022-2024 The FreeBSD Foundation
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
@@ -10,6 +14,9 @@
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
+ * Portions of this software were developed by Mitchell Horne
+ * <mho...@freebsd.org> under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -36,7 +43,6 @@
 
 #include <machine/asm.h>
 #include <machine/param.h>
-#include <machine/trap.h>
 #include <machine/riscvreg.h>
 #include <machine/pte.h>
 
@@ -104,16 +110,24 @@ _start:
        mv      a1, zero
 
        /*
-        * Set up page tables: map a 1GB region starting at KERNBASE using 2MB
-        * superpages, starting from the first 2MB physical page into which the
-        * kernel was loaded.  Also reserve an L2 page for the early device map
-        * and map the DTB, if any, using the second-last entry of that L2
-        * page.  This is hopefully enough to get us to pmap_bootstrap().
+        * Set up page tables: Our goal is to enable virtual memory, doing the
+        * minimum amount of work in assembly; just what is required to
+        * bootstrap. We will construct the real page tables in C code, in
+        * pmap_bootstrap().
+        *
+        * Here we map a 1GB region starting at KERNBASE using 2MB superpages,
+        * starting from the first 2MB physical page into which the kernel was
+        * loaded.
         *
-        * Implementations are required to provide SV39 mode, so we use that
-        * initially and will optionally enable SV48 mode during kernel pmap
-        * initialization.
+        * We also use an L1 entry to create a 1GB identity map (1:1 PA->VA).
+        * This is useful for two reasons:
+        *  - handling the DTB pointer passed from SBI firmware (physical addr)
+        *  - simpler construction of pagetables in pmap_bootstrap()
         *
+        * Implementations are required to provide Sv39 mode, so we use that
+        * here and will conditionally enable Sv48 (or higher) later.
+        *
+        * We arrive here with:
         *  a0 - modulep or zero
         *  a1 - zero or dtbp
         */
@@ -122,7 +136,7 @@ pagetables:
        jal     get_physmem
 
        /* Construct 1GB Identity Map (1:1 PA->VA) */
-       lla     s1, pagetable_l1
+       lla     s1, bootstrap_pt_l1
 
        srli    s2, s9, L1_SHIFT        /* kernstart >> L1_SHIFT */
        andi    a5, s2, Ln_ADDR_MASK    /* & Ln_ADDR_MASK */
@@ -136,11 +150,11 @@ pagetables:
        add     t0, s1, a5
        sd      t6, (t0)                /* Store new PTE */
 
-       /* Construct the virtual address space */
+       /* Construct the virtual address space at KERNBASE */
 
        /* Add L1 entry for kernel */
-       lla     s1, pagetable_l1
-       lla     s2, pagetable_l2        /* Link to next level PN */
+       lla     s1, bootstrap_pt_l1
+       lla     s2, bootstrap_pt_l2     /* Link to next level PN */
        srli    s2, s2, PAGE_SHIFT
 
        li      a5, KERNBASE
@@ -157,9 +171,9 @@ pagetables:
        sd      t6, (t0)
 
        /* Level 2 superpages (512 x 2MiB) */
-       lla     s1, pagetable_l2
+       lla     s1, bootstrap_pt_l2
        srli    t4, s9, L2_SHIFT        /* Div physmem base by 2 MiB */
-       li      t2, 512                 /* Build 512 entries */
+       li      t2, Ln_ENTRIES          /* Build 512 entries */
        add     t3, t4, t2
        li      t0, (PTE_KERN | PTE_X)
 1:
@@ -171,24 +185,6 @@ pagetables:
        addi    t4, t4, 1
        bltu    t4, t3, 1b
 
-       /* Create an L1 table entry for early devmap */
-       lla     s1, pagetable_l1
-       lla     s2, pagetable_l2_devmap /* Link to next level PN */
-       srli    s2, s2, PAGE_SHIFT
-
-       li      a5, (VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE)
-       srli    a5, a5, L1_SHIFT        /* >> L1_SHIFT */
-       andi    a5, a5, Ln_ADDR_MASK    /* & Ln_ADDR_MASK */
-       li      t4, PTE_V
-       slli    t5, s2, PTE_PPN0_S      /* (s2 << PTE_PPN0_S) */
-       or      t6, t4, t5
-
-       /* Store the L1 table entry */
-       li      a6, PTE_SIZE
-       mulw    a5, a5, a6
-       add     t0, s1, a5
-       sd      t6, (t0)
-
        /* Page tables END */
 
        /*
@@ -203,7 +199,7 @@ pagetables:
        csrw    stvec, t0
 
        /* Set page tables base register */
-       lla     s2, pagetable_l1
+       lla     s2, bootstrap_pt_l1
        srli    s2, s2, PAGE_SHIFT
        li      t0, SATP_MODE_SV39
        or      s2, s2, t0
@@ -244,8 +240,6 @@ va:
        bltu    t0, t1, 1b
 
        /* Fill riscv_bootparams */
-       la      t0, pagetable_l1
-       sd      t0, RISCV_BOOTPARAMS_KERN_L1PT(sp)
        sd      s9, RISCV_BOOTPARAMS_KERN_PHYS(sp)
 
        la      t0, initstack
@@ -278,12 +272,13 @@ initstack:
        .space  (PAGE_SIZE * KSTACK_PAGES)
 initstack_end:
 
-       .align  12
-pagetable_l1:
-       .space  PAGE_SIZE
-pagetable_l2:
+/*
+ * Static space for the bootstrap page tables. Unused after pmap_bootstrap().
+ */
+       .balign PAGE_SIZE
+bootstrap_pt_l1:
        .space  PAGE_SIZE
-pagetable_l2_devmap:
+bootstrap_pt_l2:
        .space  PAGE_SIZE
 
        .align 3
@@ -292,10 +287,6 @@ virt_map:
 hart_lottery:
        .space  4
 
-       .globl init_pt_va
-init_pt_va:
-       .quad pagetable_l2      /* XXX: Keep page tables VA */
-
 #ifndef SMP
 ENTRY(mpentry)
 1:
@@ -343,10 +334,8 @@ ENTRY(mpentry)
        csrw    stvec, t0
 
        /* Set page tables base register */
-       lla     s2, pagetable_l1
-       srli    s2, s2, PAGE_SHIFT
-       li      t0, SATP_MODE_SV39
-       or      s2, s2, t0
+       lla     t2, kernel_pmap_store
+       ld      s2, PM_SATP(t2)
        sfence.vma
        csrw    satp, s2
 
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index ca051a9e4416..937bb22371e2 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -243,8 +243,7 @@ CTASSERT((DMAP_MIN_ADDRESS  & ~L1_OFFSET) == 
DMAP_MIN_ADDRESS);
 CTASSERT((DMAP_MAX_ADDRESS  & ~L1_OFFSET) == DMAP_MAX_ADDRESS);
 
 /*
- * This code assumes that the early DEVMAP is L2_SIZE aligned and is fully
- * contained within a single L2 entry.
+ * This code assumes that the early DEVMAP is L2_SIZE aligned.
  */
 CTASSERT((PMAP_MAPDEV_EARLY_SIZE & L2_OFFSET) == 0);
 
@@ -324,6 +323,8 @@ static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, 
struct spglist *);
 
 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 
+static uint64_t pmap_satp_mode(void);
+
 #define        pmap_clear(pte)                 pmap_store(pte, 0)
 #define        pmap_clear_bits(pte, bits)      atomic_clear_64(pte, bits)
 #define        pmap_load_store(pte, entry)     atomic_swap_64(pte, entry)
@@ -361,6 +362,28 @@ pagezero(void *p)
     ((((l2) & ~PTE_HI_MASK) >> PTE_PPN1_S) << L2_SHIFT)
 #define PTE_TO_VM_PAGE(pte) PHYS_TO_VM_PAGE(PTE_TO_PHYS(pte))
 
+/*
+ * Construct a page table entry of the specified level pointing to physical
+ * address pa, with PTE bits 'bits'.
+ *
+ * A leaf PTE of any level must point to an address matching its alignment,
+ * e.g. L2 pages must be 2MB aligned in memory.
+ */
+#define        L1_PTE(pa, bits)        ((((pa) >> L1_SHIFT) << PTE_PPN2_S) | 
(bits))
+#define        L2_PTE(pa, bits)        ((((pa) >> L2_SHIFT) << PTE_PPN1_S) | 
(bits))
+#define        L3_PTE(pa, bits)        ((((pa) >> L3_SHIFT) << PTE_PPN0_S) | 
(bits))
+
+/*
+ * Construct a page directory entry (PDE), pointing to next level entry at pa,
+ * with PTE bits 'bits'.
+ *
+ * Unlike PTEs, page directory entries can point to any 4K-aligned physical
+ * address.
+ */
+#define        L0_PDE(pa, bits)        L3_PTE(pa, bits)
+#define        L1_PDE(pa, bits)        L3_PTE(pa, bits)
+#define        L2_PDE(pa, bits)        L3_PTE(pa, bits)
+
 static __inline pd_entry_t *
 pmap_l0(pmap_t pmap, vm_offset_t va)
 {
@@ -501,45 +524,20 @@ pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
        mtx_unlock(&allpmaps_lock);
 }
 
+/*
+ * This should only be used during pmap bootstrap e.g. by
+ * pmap_create_pagetables().
+ */
 static pt_entry_t *
-pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
-    u_int *l2_slot)
+pmap_early_alloc_tables(vm_paddr_t *freemempos, int npages)
 {
-       pt_entry_t *l2;
-       pd_entry_t *l1 __diagused;
-
-       l1 = (pd_entry_t *)l1pt;
-       *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
+       pt_entry_t *pt;
 
-       /* Check locore has used a table L1 map */
-       KASSERT((l1[*l1_slot] & PTE_RX) == 0,
-               ("Invalid bootstrap L1 table"));
+       pt = (pt_entry_t *)*freemempos;
+       *freemempos += npages * PAGE_SIZE;
+       bzero(pt, npages * PAGE_SIZE);
 
-       /* Find the address of the L2 table */
-       l2 = (pt_entry_t *)init_pt_va;
-       *l2_slot = pmap_l2_index(va);
-
-       return (l2);
-}
-
-static vm_paddr_t
-pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
-{
-       u_int l1_slot, l2_slot;
-       pt_entry_t *l2;
-       vm_paddr_t ret;
-
-       l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
-
-       /* Check locore has used L2 superpages */
-       KASSERT((l2[l2_slot] & PTE_RX) != 0,
-               ("Invalid bootstrap L2 table"));
-
-       /* L2 is superpages */
-       ret = L2PTE_TO_PHYS(l2[l2_slot]);
-       ret += (va & L2_OFFSET);
-
-       return (ret);
+       return (pt);
 }
 
 static void
@@ -575,38 +573,152 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t 
min_pa, vm_paddr_t max_pa)
        sfence_vma();
 }
 
-static vm_offset_t
-pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
+/*
+ *     Create a new set of pagetables to run the kernel with.
+ *
+ *     An initial, temporary setup was created in locore.S, which serves well
+ *     enough to get us this far. It mapped kernstart -> KERNBASE, using 2MB
+ *     superpages, and created a 1GB identity map, which allows this function
+ *     to dereference physical addresses.
+ *
+ *     The memory backing these page tables is allocated in the space
+ *     immediately following the kernel's preload area. Depending on the size
+ *     of this area, some, all, or none of these pages can be implicitly
+ *     mapped by the kernel's 2MB mappings. This memory will only ever be
+ *     accessed through the direct map, however.
+ */
+static vm_paddr_t
+pmap_create_pagetables(vm_paddr_t kernstart, vm_size_t kernlen,
+    vm_paddr_t min_pa, vm_paddr_t max_pa, vm_paddr_t *root_pt_phys)
 {
-       vm_offset_t l3pt;
-       pt_entry_t entry;
-       pd_entry_t *l2;
-       vm_paddr_t pa;
-       u_int l2_slot;
-       pn_t pn;
+       pt_entry_t *l0, *l1, *kern_l2, *kern_l3, *devmap_l3;
+       pd_entry_t *devmap_l2;
+       vm_paddr_t kernend, freemempos, pa;
+       int nkernl2, nkernl3, ndevmapl3;
+       int i, slot;
+       int mode;
 
-       KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
+       kernend = kernstart + kernlen;
 
-       l2 = pmap_l2(kernel_pmap, va);
-       l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
-       l2_slot = pmap_l2_index(va);
-       l3pt = l3_start;
+       /* Static allocations begin after the kernel staging area. */
+       freemempos = roundup2(kernend, PAGE_SIZE);
 
-       for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
-               KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
+       /* Detect Sv48 mode. */
+       mode = PMAP_MODE_SV39;
+       TUNABLE_INT_FETCH("vm.pmap.mode", &mode);
 
-               pa = pmap_early_vtophys(l1pt, l3pt);
-               pn = (pa / PAGE_SIZE);
-               entry = (PTE_V);
-               entry |= (pn << PTE_PPN0_S);
-               pmap_store(&l2[l2_slot], entry);
-               l3pt += PAGE_SIZE;
+       if (mode == PMAP_MODE_SV48 && (mmu_caps & MMU_SV48) != 0) {
+               /*
+                * Sv48 mode: allocate an L0 page table to be the root. The
+                * layout of KVA is otherwise identical to Sv39.
+                */
+               l0 = pmap_early_alloc_tables(&freemempos, 1);
+               *root_pt_phys = (vm_paddr_t)l0;
+               pmap_mode = PMAP_MODE_SV48;
+       } else {
+               l0 = NULL;
+       }
+
+       /*
+        * Allocate an L1 page table.
+        */
+       l1 = pmap_early_alloc_tables(&freemempos, 1);
+       if (pmap_mode == PMAP_MODE_SV39)
+               *root_pt_phys = (vm_paddr_t)l1;
+
+       /*
+        * Allocate a set of L2 page tables for KVA. Most likely, only 1 is
+        * needed.
+        */
+       nkernl2 = howmany(howmany(kernlen, L2_SIZE), Ln_ENTRIES);
+       kern_l2 = pmap_early_alloc_tables(&freemempos, nkernl2);
+
+       /*
+        * Allocate an L2 page table for the static devmap, located at the end
+        * of KVA. We can expect that the devmap will always be less than 1GB
+        * in size.
+        */
+       devmap_l2 = pmap_early_alloc_tables(&freemempos, 1);
+
+       /* Allocate L3 page tables for the devmap. */
+       ndevmapl3 = howmany(howmany(PMAP_MAPDEV_EARLY_SIZE, L3_SIZE),
+           Ln_ENTRIES);
+       devmap_l3 = pmap_early_alloc_tables(&freemempos, ndevmapl3);
+
+       /*
+        * Allocate some L3 bootstrap pages, for early KVA allocations before
+        * vm_mem_init() has run. For example, the message buffer.
+        *
+        * A somewhat arbitrary choice of 32MB. This should be more than enough
+        * for any early allocations. There is no need to worry about waste, as
+        * whatever is not used will be consumed by later calls to
+        * pmap_growkernel().
+        */
+       nkernl3 = 16;
+       kern_l3 = pmap_early_alloc_tables(&freemempos, nkernl3);
+
+       /* Allocations are done. */
+       if (freemempos < roundup2(kernend, L2_SIZE))
+               freemempos = roundup2(kernend, L2_SIZE);
+
+       /*
+        * Map the kernel (and preloaded modules or data) using L2 superpages.
+        *
+        * kernstart is 2MB-aligned. This is enforced by loader(8) and required
+        * by locore assembly.
+        *
+        * TODO: eventually, this should be done with proper permissions for
+        * each segment, rather than mapping the entire kernel and preloaded
+        * modules RWX.
+        */
+       slot = pmap_l2_index(KERNBASE);
+       for (pa = kernstart; pa < kernend; pa += L2_SIZE, slot++) {
+               pmap_store(&kern_l2[slot], L2_PTE(pa, PTE_KERN | PTE_X));
+       }
+
+       /*
+        * Connect the L3 bootstrap pages to the kernel L2 table. The L3 PTEs
+        * themselves are invalid.
+        */
+       slot = pmap_l2_index(freemempos - kernstart + KERNBASE);
+       for (i = 0; i < nkernl3; i++, slot++) {
+               pa = (vm_paddr_t)kern_l3 + ptoa(i);
+               pmap_store(&kern_l2[slot], L2_PDE(pa, PTE_V));
+       }
+
+       /* Connect the L2 tables to the L1 table. */
+       slot = pmap_l1_index(KERNBASE);
+       for (i = 0; i < nkernl2; i++, slot++) {
+               pa = (vm_paddr_t)kern_l2 + ptoa(i);
+               pmap_store(&l1[slot], L1_PDE(pa, PTE_V));
+       }
+
+       /* Connect the L1 table to L0, if in use. */
+       if (pmap_mode == PMAP_MODE_SV48) {
+               slot = pmap_l0_index(KERNBASE);
+               pmap_store(&l0[slot], L0_PDE((vm_paddr_t)l1, PTE_V));
+       }
+
+       /*
+        * Connect the devmap L3 pages to the L2 table. The devmap PTEs
+        * themselves are invalid.
+        */
+       slot = pmap_l2_index(DEVMAP_MIN_VADDR);
+       for (i = 0; i < ndevmapl3; i++, slot++) {
+               pa = (vm_paddr_t)devmap_l3 + ptoa(i);
+               pmap_store(&devmap_l2[slot], L2_PDE(pa, PTE_V));
        }
 
-       /* Clean the L2 page table */
-       memset((void *)l3_start, 0, l3pt - l3_start);
+       /* Connect the devmap L2 pages to the L1 table. */
+       slot = pmap_l1_index(DEVMAP_MIN_VADDR);
+       pa = (vm_paddr_t)devmap_l2;
+       pmap_store(&l1[slot], L1_PDE(pa, PTE_V));
+
+       /* Bootstrap the direct map. */
+       pmap_bootstrap_dmap((vm_offset_t)l1, min_pa, max_pa);
 
-       return (l3pt);
+       /* Return the next position of free memory */
+       return (freemempos);
 }
 
 /*
@@ -616,19 +728,17 @@ void
 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 {
        vm_paddr_t physmap[PHYS_AVAIL_ENTRIES];
-       uint64_t satp;
-       vm_offset_t dpcpu, freemempos, l0pv, msgbufpv;
-       vm_paddr_t l0pa, l1pa, max_pa, min_pa, pa;
-       pd_entry_t *l0p;
-       u_int l1_slot, l2_slot;
+       vm_paddr_t freemempos;
+       vm_paddr_t max_pa, min_pa, pa;
+       vm_paddr_t root_pt_phys;
+       vm_offset_t freeva;
+       vm_offset_t dpcpu, msgbufpv;
+       pt_entry_t *pte;
        u_int physmap_idx;
-       int i, mode;
+       int i;
 
        printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 
-       /* Set this early so we can use the pagetable walking functions */
-       kernel_pmap_store.pm_top = (pd_entry_t *)l1pt;
-       kernel_pmap_store.pm_stage = PM_STAGE1;
        PMAP_LOCK_INIT(kernel_pmap);
        TAILQ_INIT(&kernel_pmap->pm_pvchunk);
        vm_radix_init(&kernel_pmap->pm_root);
@@ -664,74 +774,63 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, 
vm_size_t kernlen)
        printf("min_pa %lx\n", min_pa);
        printf("max_pa %lx\n", max_pa);
 
-       /* Create a direct map region early so we can use it for pa -> va */
-       pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
-
-       /*
-        * Read the page table to find out what is already mapped.
-        * This assumes we have mapped a block of memory from KERNBASE
-        * using a single L1 entry.
-        */
-       (void)pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
-
-       /* Sanity check the index, KERNBASE should be the first VA */
-       KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
-
-       freemempos = roundup2(KERNBASE + kernlen, PAGE_SIZE);
-
-       /* Create the l3 tables for the early devmap */
-       freemempos = pmap_bootstrap_l3(l1pt,
-           VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE, freemempos);
+       /* Create a new set of pagetables to run the kernel in. */
+       freemempos = pmap_create_pagetables(kernstart, kernlen, min_pa, max_pa,
+           &root_pt_phys);
 
+       /* Switch to the newly created page tables. */
+       kernel_pmap->pm_stage = PM_STAGE1;
+       kernel_pmap->pm_top = (pd_entry_t *)PHYS_TO_DMAP(root_pt_phys);
+       kernel_pmap->pm_satp = atop(root_pt_phys) | pmap_satp_mode();
+       csr_write(satp, kernel_pmap->pm_satp);
        sfence_vma();
 
-#define alloc_pages(var, np)                                           \
-       (var) = freemempos;                                             \
-       freemempos += (np * PAGE_SIZE);                                 \
-       memset((char *)(var), 0, ((np) * PAGE_SIZE));
-
-       mode = 0;
-       TUNABLE_INT_FETCH("vm.pmap.mode", &mode);
-       if (mode == PMAP_MODE_SV48 && (mmu_caps & MMU_SV48) != 0) {
-               /*
-                * Enable SV48 mode: allocate an L0 page and set SV48 mode in
-                * SATP.  If the implementation does not provide SV48 mode,
-                * the mode read back from the (WARL) SATP register will be
-                * unchanged, and we continue in SV39 mode.
-                */
-               alloc_pages(l0pv, 1);
-               l0p = (void *)l0pv;
-               l1pa = pmap_early_vtophys(l1pt, l1pt);
-               l0p[pmap_l0_index(KERNBASE)] = PTE_V |
-                   ((l1pa >> PAGE_SHIFT) << PTE_PPN0_S);
-
-               l0pa = pmap_early_vtophys(l1pt, l0pv);
-               csr_write(satp, (l0pa >> PAGE_SHIFT) | SATP_MODE_SV48);
-               satp = csr_read(satp);
-               if ((satp & SATP_MODE_M) == SATP_MODE_SV48) {
-                       pmap_mode = PMAP_MODE_SV48;
-                       kernel_pmap_store.pm_top = l0p;
-               } else {
-                       /* Mode didn't change, give the page back. */
-                       freemempos -= PAGE_SIZE;
-               }
-       }
-
-       /* Allocate dynamic per-cpu area. */
-       alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
+       /*
+        * Now, we need to make a few more static reservations from KVA.
+        *
+        * Set freeva to freemempos virtual address, and be sure to advance
+        * them together.
+        */
+       freeva = freemempos - kernstart + KERNBASE;
+#define reserve_space(var, pa, size)                                   \
+       do {                                                            \
+               var = freeva;                                           \
+               pa = freemempos;                                        \
+               freeva += size;                                         \
+               freemempos += size;                                     \
+       } while (0)
+
+       /* Allocate the dynamic per-cpu area. */
+       reserve_space(dpcpu, pa, DPCPU_SIZE);
+
+       /* Map it. */
+       pte = pmap_l3(kernel_pmap, dpcpu);
+       KASSERT(pte != NULL, ("Bootstrap pages missing"));
+       for (i = 0; i < howmany(DPCPU_SIZE, PAGE_SIZE); i++)
+               pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN));
+
+       /* Now, it can be initialized. */
        dpcpu_init((void *)dpcpu, 0);
 
        /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
-       alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
+       reserve_space(msgbufpv, pa, round_page(msgbufsize));
        msgbufp = (void *)msgbufpv;
 
-       virtual_avail = roundup2(freemempos, L2_SIZE);
-       virtual_end = VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE;
-       kernel_vm_end = virtual_avail;
+       /* Map it. */
+       pte = pmap_l3(kernel_pmap, msgbufpv);
+       KASSERT(pte != NULL, ("Bootstrap pages missing"));
+       for (i = 0; i < howmany(msgbufsize, PAGE_SIZE); i++)
+               pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN));
+
+#undef reserve_space
 
-       pa = pmap_early_vtophys(l1pt, freemempos);
+       /* Mark the bounds of our available virtual address space */
+       virtual_avail = kernel_vm_end = freeva;
+       virtual_end = DEVMAP_MIN_VADDR;
 
-       physmem_exclude_region(kernstart, pa - kernstart, EXFLAG_NOALLOC);
+       /* Exclude the reserved physical memory from allocations. */
+       physmem_exclude_region(kernstart, freemempos - kernstart,
+           EXFLAG_NOALLOC);
 }
 
 /*

Reply via email to