Author: andrew
Date: Thu Mar 31 11:07:24 2016
New Revision: 297446
URL: https://svnweb.freebsd.org/changeset/base/297446

Log:
  Add support for 4 level pagetables. The userland address space has been
  increased to 256TiB. The kernel address space can also be increased to be
  the same size, but this will be performed in a later change.
  
  To help work with an extra level of page tables two new functions have
  been added, one to file the lowest level table entry, and one to find the
  block/page level. Both of these find the entry for a given pmap and virtual
  address.
  
  This has been tested with a combination of buildworld, stress2 tests, and
  by using sort to consume a large amount of memory by sorting /dev/zero. No
  new issues are known to be present from this change.
  
  Reviewed by:  kib
  Obtained from:        ABT Systems Ltd
  Relnotes:     yes
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D5720

Modified:
  head/sys/arm64/arm64/genassym.c
  head/sys/arm64/arm64/locore.S
  head/sys/arm64/arm64/machdep.c
  head/sys/arm64/arm64/minidump_machdep.c
  head/sys/arm64/arm64/pmap.c
  head/sys/arm64/arm64/swtch.S
  head/sys/arm64/arm64/vm_machdep.c
  head/sys/arm64/include/machdep.h
  head/sys/arm64/include/pcb.h
  head/sys/arm64/include/pmap.h
  head/sys/arm64/include/pte.h
  head/sys/arm64/include/vmparam.h

Modified: head/sys/arm64/arm64/genassym.c
==============================================================================
--- head/sys/arm64/arm64/genassym.c     Thu Mar 31 09:55:21 2016        
(r297445)
+++ head/sys/arm64/arm64/genassym.c     Thu Mar 31 11:07:24 2016        
(r297446)
@@ -52,7 +52,7 @@ ASSYM(PCB_SIZE, roundup2(sizeof(struct p
 ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT);
 ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x));
 ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
-ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr));
+ASSYM(PCB_L0ADDR, offsetof(struct pcb, pcb_l0addr));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 

Modified: head/sys/arm64/arm64/locore.S
==============================================================================
--- head/sys/arm64/arm64/locore.S       Thu Mar 31 09:55:21 2016        
(r297445)
+++ head/sys/arm64/arm64/locore.S       Thu Mar 31 11:07:24 2016        
(r297446)
@@ -35,7 +35,7 @@
 #include <machine/param.h>
 #include <machine/pte.h>
 
-#define        VIRT_BITS       39
+#define        VIRT_BITS       48
 
        .globl  kernbase
        .set    kernbase, KERNBASE
@@ -89,7 +89,8 @@ _start:
        /*
         * At this point:
         * x27 = TTBR0 table
-        * x26 = TTBR1 table
+        * x26 = Kernel L1 table
+        * x24 = TTBR1 table
         */
 
        /* Enable the mmu */
@@ -100,16 +101,6 @@ _start:
        br      x15
 
 virtdone:
-       /*
-        * Now that we are in virtual address space,
-        * we don't need the identity mapping in TTBR0 and
-        * can set the TCR to a more useful value.
-        */
-       ldr     x2, tcr
-       mrs     x3, id_aa64mmfr0_el1
-       bfi     x2, x3, #32, #3
-       msr     tcr_el1, x2
-
        /* Set up the stack */
        adr     x25, initstack_end
        mov     sp, x25
@@ -128,6 +119,7 @@ virtdone:
 
        /* Make the page table base a virtual address */
        sub     x26, x26, x29
+       sub     x24, x24, x29
 
        sub     sp, sp, #(64 * 4)
        mov     x0, sp
@@ -139,6 +131,7 @@ virtdone:
        str     x26, [x0, 8]    /* kern_l1pt */
        str     x29, [x0, 16]   /* kern_delta */
        str     x25, [x0, 24]   /* kern_stack */
+       str     x24, [x0, 32]   /* kern_l0pt */
 
        /* trace back starts here */
        mov     fp, #0
@@ -175,7 +168,7 @@ ENTRY(mpentry)
        msr     contextidr_el1, x1
 
        /* Load the kernel page table */
-       adr     x26, pagetable_l1_ttbr1
+       adr     x24, pagetable_l0_ttbr1
        /* Load the identity page table */
        adr     x27, pagetable_l0_ttbr0
 
@@ -187,16 +180,6 @@ ENTRY(mpentry)
        br      x15
 
 mp_virtdone:
-       /*
-        * Now that we are in virtual address space,
-        * we don't need the identity mapping in TTBR0 and
-        * can set the TCR to a more useful value.
-        */
-       ldr     x2, tcr
-       mrs     x3, id_aa64mmfr0_el1
-       bfi     x2, x3, #32, #3
-       msr     tcr_el1, x2
-
        ldr     x4, =secondary_stacks
        mov     x5, #(PAGE_SIZE * KSTACK_PAGES)
        mul     x5, x0, x5
@@ -388,11 +371,18 @@ create_pagetables:
        mov     x6, x26
        bl      link_l1_pagetable
 
+       /* Move to the l0 table */
+       add     x24, x26, #PAGE_SIZE
+
+       /* Link the l0 -> l1 table */
+       mov     x9, x6
+       mov     x6, x24
+       bl      link_l0_pagetable
 
        /*
         * Build the TTBR0 maps.
         */
-       add     x27, x26, #PAGE_SIZE
+       add     x27, x24, #PAGE_SIZE
 
        mov     x6, x27         /* The initial page table */
 #if defined(SOCDEV_PA) && defined(SOCDEV_VA)
@@ -440,7 +430,7 @@ link_l0_pagetable:
         */
        /* Find the table index */
        lsr     x11, x8, #L0_SHIFT
-       and     x11, x11, #Ln_ADDR_MASK
+       and     x11, x11, #L0_ADDR_MASK
 
        /* Build the L0 block entry */
        mov     x12, #L0_TABLE
@@ -582,7 +572,7 @@ start_mmu:
 
        /* Load ttbr0 and ttbr1 */
        msr     ttbr0_el1, x27
-       msr     ttbr1_el1, x26
+       msr     ttbr1_el1, x24
        isb
 
        /* Clear the Monitor Debug System control register */
@@ -596,11 +586,8 @@ start_mmu:
 
        /*
         * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1.
-        * Some machines have physical memory mapped >512GiB, which can not
-        * be identity-mapped using the default 39 VA bits. Thus, use
-        * 48 VA bits for now and switch back to 39 after the VA jump.
         */
-       ldr     x2, tcr_early
+       ldr     x2, tcr
        mrs     x3, id_aa64mmfr0_el1
        bfi     x2, x3, #32, #3
        msr     tcr_el1, x2
@@ -623,9 +610,6 @@ mair:
 tcr:
        .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
            TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
-tcr_early:
-       .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \
-           TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
 sctlr_set:
        /* Bits to set */
        .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
@@ -651,6 +635,8 @@ pagetable:
        .space  PAGE_SIZE
 pagetable_l1_ttbr1:
        .space  PAGE_SIZE
+pagetable_l0_ttbr1:
+       .space  PAGE_SIZE
 pagetable_l1_ttbr0:
        .space  PAGE_SIZE
 pagetable_l0_ttbr0:

Modified: head/sys/arm64/arm64/machdep.c
==============================================================================
--- head/sys/arm64/arm64/machdep.c      Thu Mar 31 09:55:21 2016        
(r297445)
+++ head/sys/arm64/arm64/machdep.c      Thu Mar 31 11:07:24 2016        
(r297446)
@@ -896,8 +896,8 @@ initarm(struct arm64_bootparams *abp)
        cache_setup();
 
        /* Bootstrap enough of pmap  to enter the kernel proper */
-       pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta,
-           lastaddr - KERNBASE);
+       pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
+           KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
 
        arm_devmap_bootstrap(0, NULL);
 

Modified: head/sys/arm64/arm64/minidump_machdep.c
==============================================================================
--- head/sys/arm64/arm64/minidump_machdep.c     Thu Mar 31 09:55:21 2016        
(r297445)
+++ head/sys/arm64/arm64/minidump_machdep.c     Thu Mar 31 11:07:24 2016        
(r297446)
@@ -218,7 +218,7 @@ blk_write(struct dumperinfo *di, char *p
 int
 minidumpsys(struct dumperinfo *di)
 {
-       pd_entry_t *l1, *l2;
+       pd_entry_t *l0, *l1, *l2;
        pt_entry_t *l3;
        uint32_t pmapsize;
        vm_offset_t va;
@@ -236,7 +236,7 @@ minidumpsys(struct dumperinfo *di)
        pmapsize = 0;
        for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
                pmapsize += PAGE_SIZE;
-               if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3))
+               if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3))
                        continue;
 
                /* We should always be using the l2 table for kvm */
@@ -335,7 +335,7 @@ minidumpsys(struct dumperinfo *di)
        /* Dump kernel page directory pages */
        bzero(&tmpbuffer, sizeof(tmpbuffer));
        for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) {
-               if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) {
+               if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) {
                        /* We always write a page, even if it is zero */
                        error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE);
                        if (error)

Modified: head/sys/arm64/arm64/pmap.c
==============================================================================
--- head/sys/arm64/arm64/pmap.c Thu Mar 31 09:55:21 2016        (r297445)
+++ head/sys/arm64/arm64/pmap.c Thu Mar 31 11:07:24 2016        (r297446)
@@ -11,7 +11,7 @@
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
- * Copyright (c) 2014 The FreeBSD Foundation
+ * Copyright (c) 2014-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
@@ -142,9 +142,14 @@ __FBSDID("$FreeBSD$");
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
-#define        NPDEPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define        NUPDE                   (NPDEPG * NPDEPG)
-#define        NUSERPGTBLS             (NUPDE + NPDEPG)
+#define        NL0PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define        NL1PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define        NL2PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define        NL3PG           (PAGE_SIZE/(sizeof (pt_entry_t)))
+
+#define        NUL0E           L0_ENTRIES
+#define        NUL1E           (NUL0E * NL1PG)
+#define        NUL2E           (NUL1E * NL2PG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
@@ -273,15 +278,37 @@ pagezero(void *p)
        bzero(p, PAGE_SIZE);
 }
 
+#define        pmap_l0_index(va)       (((va) >> L0_SHIFT) & L0_ADDR_MASK)
 #define        pmap_l1_index(va)       (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define        pmap_l2_index(va)       (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define        pmap_l3_index(va)       (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 static __inline pd_entry_t *
+pmap_l0(pmap_t pmap, vm_offset_t va)
+{
+
+       return (&pmap->pm_l0[pmap_l0_index(va)]);
+}
+
+static __inline pd_entry_t *
+pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
+{
+       pd_entry_t *l1;
+
+       l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+       return (&l1[pmap_l1_index(va)]);
+}
+
+static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
+       pd_entry_t *l0;
+
+       l0 = pmap_l0(pmap, va);
+       if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
+               return (NULL);
 
-       return (&pmap->pm_l1[pmap_l1_index(va)]);
+       return (pmap_l0_to_l1(l0, va));
 }
 
 static __inline pd_entry_t *
@@ -314,28 +341,103 @@ pmap_l2_to_l3(pd_entry_t *l2, vm_offset_
        return (&l3[pmap_l3_index(va)]);
 }
 
+/*
+ * Returns the lowest valid pde for a given virtual address.
+ * The next level may or may not point to a valid page or block.
+ */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
+{
+       pd_entry_t *l0, *l1, *l2, desc;
+
+       l0 = pmap_l0(pmap, va);
+       desc = pmap_load(l0) & ATTR_DESCR_MASK;
+       if (desc != L0_TABLE) {
+               *level = -1;
+               return (NULL);
+       }
+
+       l1 = pmap_l0_to_l1(l0, va);
+       desc = pmap_load(l1) & ATTR_DESCR_MASK;
+       if (desc != L1_TABLE) {
+               *level = 0;
+               return (l0);
+       }
+
+       l2 = pmap_l1_to_l2(l1, va);
+       desc = pmap_load(l2) & ATTR_DESCR_MASK;
+       if (desc != L2_TABLE) {
+               *level = 1;
+               return (l1);
+       }
+
+       *level = 2;
+       return (l2);
+}
+
+/*
+ * Returns the lowest valid pte block or table entry for a given virtual
+ * address. If there are no valid entries return NULL and set the level to
+ * the first invalid level.
+ */
 static __inline pt_entry_t *
-pmap_l3(pmap_t pmap, vm_offset_t va)
+pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
 {
-       pd_entry_t *l2;
+       pd_entry_t *l1, *l2, desc;
+       pt_entry_t *l3;
 
-       l2 = pmap_l2(pmap, va);
-       if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
+       l1 = pmap_l1(pmap, va);
+       if (l1 == NULL) {
+               *level = 0;
                return (NULL);
+       }
+       desc = pmap_load(l1) & ATTR_DESCR_MASK;
+       if (desc == L1_BLOCK) {
+               *level = 1;
+               return (l1);
+       }
 
-       return (pmap_l2_to_l3(l2, va));
+       if (desc != L1_TABLE) {
+               *level = 1;
+               return (NULL);
+       }
+
+       l2 = pmap_l1_to_l2(l1, va);
+       desc = pmap_load(l2) & ATTR_DESCR_MASK;
+       if (desc == L2_BLOCK) {
+               *level = 2;
+               return (l2);
+       }
+
+       if (desc != L2_TABLE) {
+               *level = 2;
+               return (NULL);
+       }
+
+       *level = 3;
+       l3 = pmap_l2_to_l3(l2, va);
+       if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
+               return (NULL);
+
+       return (l3);
 }
 
 bool
-pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2,
-    pt_entry_t **l3)
+pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
+    pd_entry_t **l2, pt_entry_t **l3)
 {
-       pd_entry_t *l1p, *l2p;
+       pd_entry_t *l0p, *l1p, *l2p;
+
+       if (pmap->pm_l0 == NULL)
+               return (false);
+
+       l0p = pmap_l0(pmap, va);
+       *l0 = l0p;
 
-       if (pmap->pm_l1 == NULL)
+       if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
                return (false);
 
-       l1p = pmap_l1(pmap, va);
+       l1p = pmap_l0_to_l1(l0p, va);
        *l1 = l1p;
 
        if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
@@ -544,7 +646,8 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_o
  *     Bootstrap the system enough to run with virtual memory.
  */
 void
-pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
+pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
+    vm_size_t kernlen)
 {
        u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
        uint64_t kern_delta;
@@ -562,7 +665,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd
        printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
        /* Set this early so we can use the pagetable walking functions */
-       kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
+       kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
        PMAP_LOCK_INIT(kernel_pmap);
 
        /*
@@ -805,30 +908,40 @@ pmap_invalidate_all(pmap_t pmap)
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
-       pd_entry_t *l2p, l2;
-       pt_entry_t *l3p, l3;
+       pt_entry_t *pte, tpte;
        vm_paddr_t pa;
+       int lvl;
 
        pa = 0;
        PMAP_LOCK(pmap);
        /*
-        * Start with the l2 tabel. We are unable to allocate
-        * pages in the l1 table.
+        * Find the block or page map for this virtual address. pmap_pte
+        * will return either a valid block/page entry, or NULL.
         */
-       l2p = pmap_l2(pmap, va);
-       if (l2p != NULL) {
-               l2 = pmap_load(l2p);
-               if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) {
-                       l3p = pmap_l2_to_l3(l2p, va);
-                       if (l3p != NULL) {
-                               l3 = pmap_load(l3p);
-
-                               if ((l3 & ATTR_DESCR_MASK) == L3_PAGE)
-                                       pa = (l3 & ~ATTR_MASK) |
-                                           (va & L3_OFFSET);
-                       }
-               } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
-                       pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET);
+       pte = pmap_pte(pmap, va, &lvl);
+       if (pte != NULL) {
+               tpte = pmap_load(pte);
+               pa = tpte & ~ATTR_MASK;
+               switch(lvl) {
+               case 1:
+                       KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
+                           ("pmap_extract: Invalid L1 pte found: %lx",
+                           tpte & ATTR_DESCR_MASK));
+                       pa |= (va & L1_OFFSET);
+                       break;
+               case 2:
+                       KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
+                           ("pmap_extract: Invalid L2 pte found: %lx",
+                           tpte & ATTR_DESCR_MASK));
+                       pa |= (va & L2_OFFSET);
+                       break;
+               case 3:
+                       KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
+                           ("pmap_extract: Invalid L3 pte found: %lx",
+                           tpte & ATTR_DESCR_MASK));
+                       pa |= (va & L3_OFFSET);
+                       break;
+               }
        }
        PMAP_UNLOCK(pmap);
        return (pa);
@@ -844,21 +957,31 @@ pmap_extract(pmap_t pmap, vm_offset_t va
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
-       pt_entry_t *l3p, l3;
+       pt_entry_t *pte, tpte;
        vm_paddr_t pa;
        vm_page_t m;
+       int lvl;
 
        pa = 0;
        m = NULL;
        PMAP_LOCK(pmap);
 retry:
-       l3p = pmap_l3(pmap, va);
-       if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
-               if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
+       pte = pmap_pte(pmap, va, &lvl);
+       if (pte != NULL) {
+               tpte = pmap_load(pte);
+
+               KASSERT(lvl > 0 && lvl <= 3,
+                   ("pmap_extract_and_hold: Invalid level %d", lvl));
+               CTASSERT(L1_BLOCK == L2_BLOCK);
+               KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
+                   (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
+                   ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
+                    tpte & ATTR_DESCR_MASK));
+               if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
                    ((prot & VM_PROT_WRITE) == 0)) {
-                       if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa))
+                       if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
                                goto retry;
-                       m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK);
+                       m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
                        vm_page_hold(m);
                }
        }
@@ -870,25 +993,39 @@ retry:
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
-       pd_entry_t *l2p, l2;
-       pt_entry_t *l3;
+       pt_entry_t *pte, tpte;
        vm_paddr_t pa;
+       int lvl;
 
        if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
                pa = DMAP_TO_PHYS(va);
        } else {
-               l2p = pmap_l2(kernel_pmap, va);
-               if (l2p == NULL)
-                       panic("pmap_kextract: No l2");
-               l2 = pmap_load(l2p);
-               if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
-                       return ((l2 & ~ATTR_MASK) |
-                           (va & L2_OFFSET));
-
-               l3 = pmap_l2_to_l3(l2p, va);
-               if (l3 == NULL)
-                       panic("pmap_kextract: No l3...");
-               pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK);
+               pa = 0;
+               pte = pmap_pte(kernel_pmap, va, &lvl);
+               if (pte != NULL) {
+                       tpte = pmap_load(pte);
+                       pa = tpte & ~ATTR_MASK;
+                       switch(lvl) {
+                       case 1:
+                               KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
+                                   ("pmap_kextract: Invalid L1 pte found: %lx",
+                                   tpte & ATTR_DESCR_MASK));
+                               pa |= (va & L1_OFFSET);
+                               break;
+                       case 2:
+                               KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
+                                   ("pmap_kextract: Invalid L2 pte found: %lx",
+                                   tpte & ATTR_DESCR_MASK));
+                               pa |= (va & L2_OFFSET);
+                               break;
+                       case 3:
+                               KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
+                                   ("pmap_kextract: Invalid L3 pte found: %lx",
+                                   tpte & ATTR_DESCR_MASK));
+                               pa |= (va & L3_OFFSET);
+                               break;
+                       }
+               }
        }
        return (pa);
 }
@@ -900,8 +1037,10 @@ pmap_kextract(vm_offset_t va)
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
-       pt_entry_t *l3;
+       pd_entry_t *pde;
+       pt_entry_t *pte;
        vm_offset_t va;
+       int lvl;
 
        KASSERT((pa & L3_OFFSET) == 0,
           ("pmap_kenter_device: Invalid physical address"));
@@ -912,11 +1051,16 @@ pmap_kenter_device(vm_offset_t sva, vm_s
 
        va = sva;
        while (size != 0) {
-               l3 = pmap_l3(kernel_pmap, va);
-               KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
-               pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
+               pde = pmap_pde(kernel_pmap, va, &lvl);
+               KASSERT(pde != NULL,
+                   ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va));
+               KASSERT(lvl == 2,
+                   ("pmap_kenter_device: Invalid level %d", lvl));
+
+               pte = pmap_l2_to_l3(pde, va);
+               pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
                    ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
-               PTE_SYNC(l3);
+               PTE_SYNC(pte);
 
                va += PAGE_SIZE;
                pa += PAGE_SIZE;
@@ -927,28 +1071,30 @@ pmap_kenter_device(vm_offset_t sva, vm_s
 
 /*
  * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
-       pt_entry_t *l3;
+       pt_entry_t *pte;
+       int lvl;
 
-       l3 = pmap_l3(kernel_pmap, va);
-       KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
+       pte = pmap_pte(kernel_pmap, va, &lvl);
+       KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
+       KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
-       if (pmap_l3_valid_cacheable(pmap_load(l3)))
+       if (pmap_l3_valid_cacheable(pmap_load(pte)))
                cpu_dcache_wb_range(va, L3_SIZE);
-       pmap_load_clear(l3);
-       PTE_SYNC(l3);
+       pmap_load_clear(pte);
+       PTE_SYNC(pte);
        pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
-       pt_entry_t *l3;
+       pt_entry_t *pte;
        vm_offset_t va;
+       int lvl;
 
        KASSERT((sva & L3_OFFSET) == 0,
           ("pmap_kremove_device: Invalid virtual address"));
@@ -957,10 +1103,12 @@ pmap_kremove_device(vm_offset_t sva, vm_
 
        va = sva;
        while (size != 0) {
-               l3 = pmap_l3(kernel_pmap, va);
-               KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
-               pmap_load_clear(l3);
-               PTE_SYNC(l3);
+               pte = pmap_pte(kernel_pmap, va, &lvl);
+               KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
+               KASSERT(lvl == 3,
+                   ("Invalid device pagetable level: %d != 3", lvl));
+               pmap_load_clear(pte);
+               PTE_SYNC(pte);
 
                va += PAGE_SIZE;
                size -= PAGE_SIZE;
@@ -999,19 +1147,26 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
-       pt_entry_t *l3, pa;
+       pd_entry_t *pde;
+       pt_entry_t *pte, pa;
        vm_offset_t va;
        vm_page_t m;
-       int i;
+       int i, lvl;
 
        va = sva;
        for (i = 0; i < count; i++) {
+               pde = pmap_pde(kernel_pmap, va, &lvl);
+               KASSERT(pde != NULL,
+                   ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
+               KASSERT(lvl == 2,
+                   ("pmap_qenter: Invalid level %d", lvl));
+
                m = ma[i];
                pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
                    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
-               l3 = pmap_l3(kernel_pmap, va);
-               pmap_load_store(l3, pa);
-               PTE_SYNC(l3);
+               pte = pmap_l2_to_l3(pde, va);
+               pmap_load_store(pte, pa);
+               PTE_SYNC(pte);
 
                va += L3_SIZE;
        }
@@ -1021,25 +1176,27 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
-       pt_entry_t *l3;
+       pt_entry_t *pte;
        vm_offset_t va;
+       int lvl;
 
        KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
        va = sva;
        while (count-- > 0) {
-               l3 = pmap_l3(kernel_pmap, va);
-               KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
-
-               if (pmap_l3_valid_cacheable(pmap_load(l3)))
-                       cpu_dcache_wb_range(va, L3_SIZE);
-               pmap_load_clear(l3);
-               PTE_SYNC(l3);
+               pte = pmap_pte(kernel_pmap, va, &lvl);
+               KASSERT(lvl == 3,
+                   ("Invalid device pagetable level: %d != 3", lvl));
+               if (pte != NULL) {
+                       if (pmap_l3_valid_cacheable(pmap_load(pte)))
+                               cpu_dcache_wb_range(va, L3_SIZE);
+                       pmap_load_clear(pte);
+                       PTE_SYNC(pte);
+               }
 
                va += PAGE_SIZE;
        }
@@ -1104,26 +1261,47 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t
        /*
         * unmap the page table page
         */
-       if (m->pindex >= NUPDE) {
-               /* PD page */
+       if (m->pindex >= (NUL2E + NUL1E)) {
+               /* l1 page */
+               pd_entry_t *l0;
+
+               l0 = pmap_l0(pmap, va);
+               pmap_load_clear(l0);
+               PTE_SYNC(l0);
+       } else if (m->pindex >= NUL2E) {
+               /* l2 page */
                pd_entry_t *l1;
+
                l1 = pmap_l1(pmap, va);
                pmap_load_clear(l1);
                PTE_SYNC(l1);
        } else {
-               /* PTE page */
+               /* l3 page */
                pd_entry_t *l2;
+
                l2 = pmap_l2(pmap, va);
                pmap_load_clear(l2);
                PTE_SYNC(l2);
        }
        pmap_resident_count_dec(pmap, 1);
-       if (m->pindex < NUPDE) {
-               /* We just released a PT, unhold the matching PD */
-               vm_page_t pdpg;
+       if (m->pindex < NUL2E) {
+               /* We just released an l3, unhold the matching l2 */
+               pd_entry_t *l1, tl1;
+               vm_page_t l2pg;
 
-               pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK);
-               pmap_unwire_l3(pmap, va, pdpg, free);
+               l1 = pmap_l1(pmap, va);
+               tl1 = pmap_load(l1);
+               l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+               pmap_unwire_l3(pmap, va, l2pg, free);
+       } else if (m->pindex < (NUL2E + NUL1E)) {
+               /* We just released an l2, unhold the matching l1 */
+               pd_entry_t *l0, tl0;
+               vm_page_t l1pg;
+
+               l0 = pmap_l0(pmap, va);
+               tl0 = pmap_load(l0);
+               l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+               pmap_unwire_l3(pmap, va, l1pg, free);
        }
        pmap_invalidate_page(pmap, va);
 
@@ -1164,27 +1342,27 @@ pmap_pinit0(pmap_t pmap)
 
        PMAP_LOCK_INIT(pmap);
        bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
-       pmap->pm_l1 = kernel_pmap->pm_l1;
+       pmap->pm_l0 = kernel_pmap->pm_l0;
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
-       vm_paddr_t l1phys;
-       vm_page_t l1pt;
+       vm_paddr_t l0phys;
+       vm_page_t l0pt;
 
        /*
-        * allocate the l1 page
+        * allocate the l0 page
         */
-       while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
+       while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
            VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
                VM_WAIT;
 
-       l1phys = VM_PAGE_TO_PHYS(l1pt);
-       pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
+       l0phys = VM_PAGE_TO_PHYS(l0pt);
+       pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
 
-       if ((l1pt->flags & PG_ZERO) == 0)
-               pagezero(pmap->pm_l1);
+       if ((l0pt->flags & PG_ZERO) == 0)
+               pagezero(pmap->pm_l0);
 
        bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
@@ -1205,7 +1383,7 @@ pmap_pinit(pmap_t pmap)
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
-       vm_page_t m, /*pdppg, */pdpg;
+       vm_page_t m, l1pg, l2pg;
 
        PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
@@ -1237,33 +1415,84 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t 
         * it isn't already there.
         */
 
-       if (ptepindex >= NUPDE) {
-               pd_entry_t *l1;
-               vm_pindex_t l1index;
+       if (ptepindex >= (NUL2E + NUL1E)) {
+               pd_entry_t *l0;
+               vm_pindex_t l0index;
+
+               l0index = ptepindex - (NUL2E + NUL1E);
+               l0 = &pmap->pm_l0[l0index];
+               pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
+               PTE_SYNC(l0);
+       } else if (ptepindex >= NUL2E) {
+               vm_pindex_t l0index, l1index;
+               pd_entry_t *l0, *l1;
+               pd_entry_t tl0;
+
+               l1index = ptepindex - NUL2E;
+               l0index = l1index >> L0_ENTRIES_SHIFT;
+
+               l0 = &pmap->pm_l0[l0index];
+               tl0 = pmap_load(l0);
+               if (tl0 == 0) {
+                       /* recurse for allocating page dir */
+                       if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
+                           lockp) == NULL) {
+                               --m->wire_count;
+                               /* XXX: release mem barrier? */
+                               atomic_subtract_int(&vm_cnt.v_wire_count, 1);
+                               vm_page_free_zero(m);
+                               return (NULL);
+                       }
+               } else {
+                       l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
+                       l1pg->wire_count++;
+               }
 
-               l1index = ptepindex - NUPDE;
-               l1 = &pmap->pm_l1[l1index];
+               l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
+               l1 = &l1[ptepindex & Ln_ADDR_MASK];
                pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
                PTE_SYNC(l1);
-
        } else {
-               vm_pindex_t l1index;
-               pd_entry_t *l1, *l2;
-
-               l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
-               l1 = &pmap->pm_l1[l1index];
-               if (pmap_load(l1) == 0) {
+               vm_pindex_t l0index, l1index;
+               pd_entry_t *l0, *l1, *l2;
+               pd_entry_t tl0, tl1;
+
+               l1index = ptepindex >> Ln_ENTRIES_SHIFT;
+               l0index = l1index >> L0_ENTRIES_SHIFT;
+
+               l0 = &pmap->pm_l0[l0index];
+               tl0 = pmap_load(l0);
+               if (tl0 == 0) {
                        /* recurse for allocating page dir */
-                       if (_pmap_alloc_l3(pmap, NUPDE + l1index,
+                       if (_pmap_alloc_l3(pmap, NUL2E + l1index,
                            lockp) == NULL) {
                                --m->wire_count;
                                atomic_subtract_int(&vm_cnt.v_wire_count, 1);
                                vm_page_free_zero(m);
                                return (NULL);
                        }
+                       tl0 = pmap_load(l0);
+                       l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+                       l1 = &l1[l1index & Ln_ADDR_MASK];
                } else {
-                       pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
-                       pdpg->wire_count++;
+                       l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
+                       l1 = &l1[l1index & Ln_ADDR_MASK];
+                       tl1 = pmap_load(l1);
+                       if (tl1 == 0) {
+                               /* recurse for allocating page dir */
+                               if (_pmap_alloc_l3(pmap, NUL2E + l1index,
+                                   lockp) == NULL) {
+                                       --m->wire_count;
+                                       /* XXX: release mem barrier? */
+                                       atomic_subtract_int(
+                                           &vm_cnt.v_wire_count, 1);
+                                       vm_page_free_zero(m);
+                                       return (NULL);
+                               }
+                       } else {
+                               l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
+                               l2pg->wire_count++;
+                       }
                }
 
                l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
@@ -1281,8 +1510,9 @@ static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
        vm_pindex_t ptepindex;
-       pd_entry_t *l2;
+       pd_entry_t *pde, tpde;
        vm_page_t m;
+       int lvl;
 
        /*
         * Calculate pagetable page index
@@ -1292,24 +1522,29 @@ retry:
        /*
         * Get the page directory entry
         */
-       l2 = pmap_l2(pmap, va);
+       pde = pmap_pde(pmap, va, &lvl);
 
        /*
-        * If the page table page is mapped, we just increment the
-        * hold count, and activate it.
+        * If the page table page is mapped, we just increment the hold count,
+        * and activate it. If we get a level 2 pde it will point to a level 3
+        * table.
         */
-       if (l2 != NULL && pmap_load(l2) != 0) {
-               m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
-               m->wire_count++;
-       } else {
-               /*
-                * Here if the pte page isn't mapped, or if it has been
-                * deallocated.
-                */
-               m = _pmap_alloc_l3(pmap, ptepindex, lockp);
-               if (m == NULL && lockp != NULL)
-                       goto retry;
+       if (lvl == 2) {
+               tpde = pmap_load(pde);
+               if (tpde != 0) {
+                       m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
+                       m->wire_count++;
+                       return (m);
+               }
        }
+
+       /*
+        * Here if the pte page isn't mapped, or if it has been deallocated.
+        */
+       m = _pmap_alloc_l3(pmap, ptepindex, lockp);
+       if (m == NULL && lockp != NULL)
+               goto retry;
+
        return (m);
 }
 
@@ -1332,7 +1567,7 @@ pmap_release(pmap_t pmap)
            ("pmap_release: pmap resident count %ld != 0",
            pmap->pm_stats.resident_count));
 
-       m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
+       m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
 
        m->wire_count--;
        atomic_subtract_int(&vm_cnt.v_wire_count, 1);
@@ -1369,7 +1604,7 @@ pmap_growkernel(vm_offset_t addr)
 {
        vm_paddr_t paddr;
        vm_page_t nkpg;
-       pd_entry_t *l1, *l2;
+       pd_entry_t *l0, *l1, *l2;
 
        mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
@@ -1377,7 +1612,11 @@ pmap_growkernel(vm_offset_t addr)
        if (addr - 1 >= kernel_map->max_offset)
                addr = kernel_map->max_offset;
        while (kernel_vm_end < addr) {
-               l1 = pmap_l1(kernel_pmap, kernel_vm_end);
+               l0 = pmap_l0(kernel_pmap, kernel_vm_end);
+               KASSERT(pmap_load(l0) != 0,
+                   ("pmap_growkernel: No level 0 kernel entry"));
+
+               l1 = pmap_l0_to_l1(l0, kernel_vm_end);
                if (pmap_load(l1) == 0) {
                        /* We need a new PDP entry */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to