Optimize pmap on amd64

based on a patch for i386 by Art from 2008 that removes the APTE stuff.  
Some additional bits were taken from a patch by Art for amd64 from 2005.

Instead of mapping the ptes of an inactive pmap in the APTE range, and 
then doing an unconditional remote TLB-flush on all CPUs, we just switch 
to the other pmap locally. This only causes a local TLB-flush.

In cases where we only need to access a single PTE, walk the page tables 
manually in the direct mapping. This saves some more TLB flushes.

I have also reordered some stuff so that we will usually send the 
TLB-shootdown-IPIs first, then do some local stuff, and then wait for the 
remote TLB-shootdown to finish.


diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c
index 05cbd3e..c137412 100644
--- a/sys/arch/amd64/amd64/pmap.c
+++ b/sys/arch/amd64/amd64/pmap.c
@@ -203,7 +203,6 @@ long nkptp[] = NKPTP_INITIALIZER;
 long nkptpmax[] = NKPTPMAX_INITIALIZER;
 long nbpd[] = NBPD_INITIALIZER;
 pd_entry_t *normal_pdes[] = PDES_INITIALIZER;
-pd_entry_t *alternate_pdes[] = APDES_INITIALIZER;
 
 /* int nkpde = NKPTP; */
 
@@ -285,11 +284,12 @@ void  pmap_enter_pv(struct vm_page *, struct pv_entry *, 
struct pmap *,
     vaddr_t, struct vm_page *);
 struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, pd_entry_t **);
 struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
+int pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int 
*offs);
 void pmap_free_ptp(struct pmap *, struct vm_page *,
     vaddr_t, pt_entry_t *, pd_entry_t **, struct pg_to_free *);
 void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *);
 static boolean_t pmap_is_active(struct pmap *, int);
-void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***);
+void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***, paddr_t *);
 struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
 void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
 boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
@@ -299,14 +299,23 @@ void pmap_remove_ptes(struct pmap *, struct vm_page *, 
vaddr_t,
 #define PMAP_REMOVE_ALL                0       /* remove all mappings */
 #define PMAP_REMOVE_SKIPWIRED  1       /* skip wired mappings */
 
-void pmap_unmap_ptes(struct pmap *);
+void pmap_unmap_ptes(struct pmap *, paddr_t);
 boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *);
 boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t **, pd_entry_t *);
 void pmap_alloc_level(pd_entry_t **, vaddr_t, int, long *);
-void pmap_apte_flush(struct pmap *pmap);
 
 void pmap_sync_flags_pte(struct vm_page *, u_long);
 
+void   pmap_tlb_shootpage(struct pmap *, vaddr_t, int);
+void   pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t, int);
+void   pmap_tlb_shoottlb(struct pmap *, int);
+#ifdef MULTIPROCESSOR
+void   pmap_tlb_shootwait(void);
+#else
+#define        pmap_tlb_shootwait()
+#endif
+
+
 /*
  * p m a p   i n l i n e   h e l p e r   f u n c t i o n s
  */
@@ -349,57 +358,75 @@ pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
        }
 }
 
-void
-pmap_apte_flush(struct pmap *pmap)
-{
-       pmap_tlb_shoottlb();
-       pmap_tlb_shootwait();
-}
-
 /*
  * pmap_map_ptes: map a pmap's PTEs into KVM
- *
- * => we lock enough pmaps to keep things locked in
- * => must be undone with pmap_unmap_ptes before returning
  */
 
 void
-pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp)
+pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp, 
paddr_t *save_cr3)
 {
-       pd_entry_t opde, npde;
+       paddr_t cr3 = rcr3();
 
-       /* if curpmap then we are always mapped */
-       if (pmap_is_curpmap(pmap)) {
-               *ptepp = PTE_BASE;
-               *pdeppp = normal_pdes;
-               return;
-       }
+       /* the kernel's pmap is always accessible */
+       if (pmap == pmap_kernel() || pmap->pm_pdirpa == cr3) {
+               *save_cr3 = 0;
+       } else {
+               *save_cr3 = cr3;
 
-       /* need to load a new alternate pt space into curpmap? */
-       opde = *APDP_PDE;
-       if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
-               npde = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
-               *APDP_PDE = npde;
-               if (pmap_valid_entry(opde))
-                       pmap_apte_flush(curpcb->pcb_pmap);
+               /*
+                * Not sure if we need this, but better be safe.
+                * We don't have the current pmap in order to unset its
+                * active bit, but this just means that we may receive
+                * an unneccessary cross-CPU TLB flush now and then.
+                */
+               x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number()));
+
+               lcr3(pmap->pm_pdirpa);
        }
-       *ptepp = APTE_BASE;
-       *pdeppp = alternate_pdes;
+
+       *ptepp = PTE_BASE;
+       *pdeppp = normal_pdes;
+       return;
 }
 
 void
-pmap_unmap_ptes(struct pmap *pmap)
+pmap_unmap_ptes(struct pmap *pmap, paddr_t save_cr3)
 {
-       if (pmap_is_curpmap(pmap))
-               return;
+       if (save_cr3 != 0) {
+               x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << 
cpu_number()));
+               lcr3(save_cr3);
+       }
+}
 
-#if defined(MULTIPROCESSOR)
-       *APDP_PDE = 0;
-       pmap_apte_flush(curpcb->pcb_pmap);
-#endif
-       COUNT(apdp_pde_unmap);
+int
+pmap_find_pte_direct(struct pmap *pm, vaddr_t va, pt_entry_t **pd, int *offs)
+{
+       u_long mask, shift;
+       pd_entry_t pde;
+       paddr_t pdpa;
+       int lev;
+
+       pdpa = pm->pm_pdirpa;
+       shift = L4_SHIFT;
+       mask = L4_MASK;
+       for (lev = PTP_LEVELS; lev > 0; lev--) {
+               *pd = (pd_entry_t *)PMAP_DIRECT_MAP(pdpa);
+               *offs = (VA_SIGN_POS(va) & mask) >> shift;
+               pde = (*pd)[*offs];
+
+               /* Large pages are different, break early if we run into one. */
+               if ((pde & (PG_PS|PG_V)) != PG_V)
+                       return (lev - 1);
+
+               pdpa = ((*pd)[*offs] & PG_FRAME);
+               shift -= 9;
+               mask >>= 9;
+       }
+
+       return (0);
 }
 
+
 /*
  * p m a p   k e n t e r   f u n c t i o n s
  *
@@ -442,7 +469,7 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
                if (pa & PMAP_NOCACHE && (opte & PG_N) == 0)
                        wbinvd();
                /* This shouldn't happen */
-               pmap_tlb_shootpage(pmap_kernel(), va);
+               pmap_tlb_shootpage(pmap_kernel(), va, 1);
                pmap_tlb_shootwait();
        }
 }
@@ -476,7 +503,7 @@ pmap_kremove(vaddr_t sva, vsize_t len)
                KASSERT((opte & PG_PVLIST) == 0);
        }
 
-       pmap_tlb_shootrange(pmap_kernel(), sva, eva);
+       pmap_tlb_shootrange(pmap_kernel(), sva, eva, 1);
        pmap_tlb_shootwait();
 }
 
@@ -822,11 +849,13 @@ pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, 
vaddr_t va,
                invaladdr = level == 1 ? (vaddr_t)ptes :
                    (vaddr_t)pdes[level - 2];
                pmap_tlb_shootpage(curpcb->pcb_pmap,
-                   invaladdr + index * PAGE_SIZE);
+                   invaladdr + index * PAGE_SIZE,
+                   pmap_is_curpmap(curpcb->pcb_pmap));
 #if defined(MULTIPROCESSOR)
                invaladdr = level == 1 ? (vaddr_t)PTE_BASE :
                    (vaddr_t)normal_pdes[level - 2];
-               pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE);
+               pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE,
+                   pmap_is_curpmap(curpcb->pcb_pmap));
 #endif
                if (level < PTP_LEVELS - 1) {
                        ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
@@ -1054,11 +1083,7 @@ pmap_destroy(struct pmap *pmap)
                }
        }
 
-       /*
-        * MULTIPROCESSOR -- no need to flush out of other processors'
-        * APTE space because we do that in pmap_unmap_ptes().
-        */
-       /* XXX: need to flush it out of other processor's APTE space? */
+       /* XXX: need to flush it out of other processor's space? */
        pool_put(&pmap_pdp_pool, pmap->pm_pdir);
 
        pool_put(&pmap_pmap_pool, pmap);
@@ -1147,8 +1172,8 @@ pmap_pdes_valid(vaddr_t va, pd_entry_t **pdes, pd_entry_t 
*lastpde)
 boolean_t
 pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
 {
-       pt_entry_t *ptes, pte;
-       pd_entry_t pde, **pdes;
+       pt_entry_t *ptes;
+       int level, offs;
 
        if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE &&
            va < PMAP_DIRECT_END) {
@@ -1156,24 +1181,17 @@ pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t 
*pap)
                return (TRUE);
        }
 
-       pmap_map_ptes(pmap, &ptes, &pdes);
-       if (pmap_pdes_valid(va, pdes, &pde) == FALSE) {
-               return FALSE;
-       }
+       level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
 
-       if (pde & PG_PS) {
+       if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))) {
                if (pap != NULL)
-                       *pap = (pde & PG_LGFRAME) | (va & 0x1fffff);
-               pmap_unmap_ptes(pmap);
+                       *pap = (ptes[offs] & PG_FRAME) | (va & 0xfff);
                return (TRUE);
        }
-
-       pte = ptes[pl1_i(va)];
-       pmap_unmap_ptes(pmap);
-
-       if (__predict_true(pmap_valid_entry(pte))) {
+       if (level == 1 &&
+           (ptes[offs] & (PG_PS|PG_V)) == (PG_PS|PG_V)) {
                if (pap != NULL)
-                       *pap = (pte & PG_FRAME) | (va & 0xfff);
+                       *pap = (ptes[offs] & PG_LGFRAME) | (va & 0x1fffff);
                return (TRUE);
        }
 
@@ -1438,11 +1456,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t 
eva, int flags)
        vaddr_t va;
        int shootall = 0;
        struct pg_to_free empty_ptps;
+       paddr_t scr3;
 
        TAILQ_INIT(&empty_ptps);
 
        PMAP_MAP_TO_HEAD_LOCK();
-       pmap_map_ptes(pmap, &ptes, &pdes);
+       pmap_map_ptes(pmap, &ptes, &pdes, &scr3);
 
        /*
         * removing one page?  take shortcut function.
@@ -1480,11 +1499,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t 
eva, int flags)
                        if (result && ptp && ptp->wire_count <= 1)
                                pmap_free_ptp(pmap, ptp, sva, ptes, pdes,
                                    &empty_ptps);
-                       pmap_tlb_shootpage(pmap, sva);
+                       pmap_tlb_shootpage(pmap, sva, scr3 == 0);
+                       pmap_unmap_ptes(pmap, scr3);
+                       pmap_tlb_shootwait();
+               } else {
+                       pmap_unmap_ptes(pmap, scr3);
                }
-
-               pmap_tlb_shootwait();
-               pmap_unmap_ptes(pmap);
                PMAP_MAP_TO_HEAD_UNLOCK();
 
                while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
@@ -1549,13 +1569,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t 
eva, int flags)
        }
 
        if (shootall)
-               pmap_tlb_shoottlb();
+               pmap_tlb_shoottlb(pmap, scr3 == 0);
        else
-               pmap_tlb_shootrange(pmap, sva, eva);
+               pmap_tlb_shootrange(pmap, sva, eva, scr3 == 0);
 
+       pmap_unmap_ptes(pmap, scr3);
        pmap_tlb_shootwait();
-
-       pmap_unmap_ptes(pmap);
        PMAP_MAP_TO_HEAD_UNLOCK();
 
        while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
@@ -1581,6 +1600,7 @@ pmap_page_remove(struct vm_page *pg)
 #endif
        struct pg_to_free empty_ptps;
        struct vm_page *ptp;
+       paddr_t scr3;
 
        TAILQ_INIT(&empty_ptps);
 
@@ -1589,7 +1609,8 @@ pmap_page_remove(struct vm_page *pg)
        while ((pve = pg->mdpage.pv_list) != NULL) {
                pg->mdpage.pv_list = pve->pv_next;
 
-               pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);
+               /* XXX use direct map? */
+               pmap_map_ptes(pve->pv_pmap, &ptes, &pdes, &scr3);
 
 #ifdef DIAGNOSTIC
                if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) &&
@@ -1612,7 +1633,7 @@ pmap_page_remove(struct vm_page *pg)
                        pve->pv_pmap->pm_stats.wired_count--;
                pve->pv_pmap->pm_stats.resident_count--;
 
-               pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
+               pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va, scr3 == 0);
 
                pmap_sync_flags_pte(pg, opte);
 
@@ -1624,7 +1645,7 @@ pmap_page_remove(struct vm_page *pg)
                                    pve->pv_va, ptes, pdes, &empty_ptps);
                        }
                }
-               pmap_unmap_ptes(pve->pv_pmap);
+               pmap_unmap_ptes(pve->pv_pmap, scr3);
                pool_put(&pmap_pv_pool, pve);
        }
 
@@ -1652,8 +1673,8 @@ boolean_t
 pmap_test_attrs(struct vm_page *pg, unsigned int testbits)
 {
        struct pv_entry *pve;
-       pt_entry_t *ptes, pte;
-       pd_entry_t **pdes;
+       pt_entry_t *ptes;
+       int level, offs;
        u_long mybits, testflags;
 
        testflags = pmap_pte2flags(testbits);
@@ -1665,10 +1686,10 @@ pmap_test_attrs(struct vm_page *pg, unsigned int 
testbits)
        mybits = 0;
        for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0;
            pve = pve->pv_next) {
-               pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);
-               pte = ptes[pl1_i(pve->pv_va)];
-               pmap_unmap_ptes(pve->pv_pmap);
-               mybits |= (pte & testbits);
+               level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
+                   &offs);
+               KASSERT(level == 0);
+               mybits |= (ptes[offs] & testbits);
        }
        PMAP_HEAD_TO_MAP_UNLOCK();
 
@@ -1691,9 +1712,8 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long 
clearbits)
 {
        struct pv_entry *pve;
        pt_entry_t *ptes, opte;
-       pd_entry_t **pdes;
        u_long clearflags;
-       int result;
+       int result, level, offs;
 
        clearflags = pmap_pte2flags(clearbits);
 
@@ -1704,20 +1724,19 @@ pmap_clear_attrs(struct vm_page *pg, unsigned long 
clearbits)
                atomic_clearbits_int(&pg->pg_flags, clearflags);
 
        for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) {
-               pmap_map_ptes(pve->pv_pmap, &ptes, &pdes);
+               level = pmap_find_pte_direct(pve->pv_pmap, pve->pv_va, &ptes,
+                   &offs);
 #ifdef DIAGNOSTIC
-               if (!pmap_pdes_valid(pve->pv_va, pdes, NULL))
-                       panic("%s: mapping without PTP detected", __func__);
+               KASSERT(level == 0);
 #endif
-
-               opte = ptes[pl1_i(pve->pv_va)];
+               opte = ptes[offs];
                if (opte & clearbits) {
                        result = 1;
-                       pmap_pte_clearbits(&ptes[pl1_i(pve->pv_va)],
+                       pmap_pte_clearbits(&ptes[offs],
                            (opte & clearbits));
-                       pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
+                       pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va,
+                               pmap_is_curpmap(pve->pv_pmap));
                }
-               pmap_unmap_ptes(pve->pv_pmap);
        }
 
        PMAP_HEAD_TO_MAP_UNLOCK();
@@ -1760,8 +1779,9 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, 
vaddr_t eva, vm_prot_t prot)
        vaddr_t blockend;
        int shootall = 0;
        vaddr_t va;
+       paddr_t scr3;
 
-       pmap_map_ptes(pmap, &ptes, &pdes);
+       pmap_map_ptes(pmap, &ptes, &pdes, &scr3);
 
        /* should be ok, but just in case ... */
        sva &= PG_FRAME;
@@ -1813,13 +1833,13 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, 
vaddr_t eva, vm_prot_t prot)
        }
 
        if (shootall)
-               pmap_tlb_shoottlb();
+               pmap_tlb_shoottlb(pmap, scr3 == 0);
        else
-               pmap_tlb_shootrange(pmap, sva, eva);
+               pmap_tlb_shootrange(pmap, sva, eva, scr3 == 0);
 
-       pmap_tlb_shootwait();
+       pmap_unmap_ptes(pmap, scr3);
 
-       pmap_unmap_ptes(pmap);
+       pmap_tlb_shootwait();
 }
 
 /*
@@ -1836,18 +1856,18 @@ void
 pmap_unwire(struct pmap *pmap, vaddr_t va)
 {
        pt_entry_t *ptes;
-       pd_entry_t **pdes;
+       int level, offs;
 
-       pmap_map_ptes(pmap, &ptes, &pdes);
+       level = pmap_find_pte_direct(pmap, va, &ptes, &offs);
 
-       if (pmap_pdes_valid(va, pdes, NULL)) {
+       if (level == 0) {
 
 #ifdef DIAGNOSTIC
-               if (!pmap_valid_entry(ptes[pl1_i(va)]))
+               if (!pmap_valid_entry(ptes[offs]))
                        panic("%s: invalid (unmapped) va 0x%lx", __func__, va);
 #endif
-               if (__predict_true((ptes[pl1_i(va)] & PG_W) != 0)) {
-                       pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W);
+               if (__predict_true((ptes[offs] & PG_W) != 0)) {
+                       pmap_pte_clearbits(&ptes[offs], PG_W);
                        pmap->pm_stats.wired_count--;
                }
 #ifdef DIAGNOSTIC
@@ -1856,7 +1876,6 @@ pmap_unwire(struct pmap *pmap, vaddr_t va)
                               "didn't change!\n", __func__, pmap, va);
                }
 #endif
-               pmap_unmap_ptes(pmap);
        }
 #ifdef DIAGNOSTIC
        else {
@@ -1914,12 +1933,13 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, 
vm_prot_t prot, int flags)
        boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
        boolean_t wc = (pa & PMAP_WC) != 0;
        int error;
+       paddr_t scr3;
 
        KASSERT(!(wc && nocache));
        pa &= PMAP_PA_MASK;
 
 #ifdef DIAGNOSTIC
-       if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
+       if (va == (vaddr_t) PDP_BASE)
                panic("%s: trying to map over PDP/APDP!", __func__);
 
        /* sanity check: kernel PTPs should already have been pre-allocated */
@@ -1936,7 +1956,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, 
vm_prot_t prot, int flags)
         * map in ptes and get a pointer to our PTP (unless we are the kernel)
         */
 
-       pmap_map_ptes(pmap, &ptes, &pdes);
+       pmap_map_ptes(pmap, &ptes, &pdes, &scr3);
        if (pmap == pmap_kernel()) {
                ptp = NULL;
        } else {
@@ -2109,14 +2129,14 @@ enter_now:
        if (pmap_valid_entry(opte)) {
                if (nocache && (opte & PG_N) == 0)
                        wbinvd();
-               pmap_tlb_shootpage(pmap, va);
+               pmap_tlb_shootpage(pmap, va, scr3 == 0);
                pmap_tlb_shootwait();
        }
 
        error = 0;
 
 out:
-       pmap_unmap_ptes(pmap);
+       pmap_unmap_ptes(pmap, scr3);
        PMAP_MAP_TO_HEAD_UNLOCK();
 
        return error;
@@ -2337,6 +2357,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
        pt_entry_t *ptes, *pte;
        pd_entry_t **pdes;
        vaddr_t blkendva;
+       paddr_t scr3;
 
        /*
         * if end is out of range truncate.
@@ -2348,7 +2369,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
 
 
        PMAP_MAP_TO_HEAD_LOCK();
-       pmap_map_ptes(pmap, &ptes, &pdes);
+       pmap_map_ptes(pmap, &ptes, &pdes, &scr3);
 
        /*
         * dumping a range of pages: we dump in PTP sized blocks (4MB)
@@ -2373,7 +2394,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
                               sva, *pte, *pte & PG_FRAME);
                }
        }
-       pmap_unmap_ptes(pmap);
+       pmap_unmap_ptes(pmap, scr3);
        PMAP_MAP_TO_HEAD_UNLOCK();
 }
 #endif
@@ -2414,7 +2435,7 @@ volatile vaddr_t tlb_shoot_addr1;
 volatile vaddr_t tlb_shoot_addr2;
 
 void
-pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
 {
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
@@ -2446,12 +2467,12 @@ pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
                splx(s);
        }
 
-       if (pmap_is_curpmap(pm))
+       if (shootself)
                pmap_update_pg(va);
 }
 
 void
-pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
 {
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
@@ -2485,13 +2506,13 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, 
vaddr_t eva)
                splx(s);
        }
 
-       if (pmap_is_curpmap(pm))
+       if (shootself)
                for (va = sva; va < eva; va += PAGE_SIZE)
                        pmap_update_pg(va);
 }
 
 void
-pmap_tlb_shoottlb(void)
+pmap_tlb_shoottlb(struct pmap *pm, int shootself)
 {
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
@@ -2499,7 +2520,7 @@ pmap_tlb_shoottlb(void)
        u_int64_t mask = 0;
 
        CPU_INFO_FOREACH(cii, ci) {
-               if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
+               if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 
!(ci->ci_flags & CPUF_RUNNING))
                        continue;
                mask |= (1ULL << ci->ci_cpuid);
                wait++;
@@ -2522,7 +2543,8 @@ pmap_tlb_shoottlb(void)
                splx(s);
        }
 
-       tlbflush();
+       if (shootself)
+               tlbflush();
 }
 
 void
@@ -2535,26 +2557,30 @@ pmap_tlb_shootwait(void)
 #else
 
 void
-pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
+pmap_tlb_shootpage(struct pmap *pm, vaddr_t va, int shootself)
 {
-       if (pmap_is_curpmap(pm))
+       if (shootself)
                pmap_update_pg(va);
 
 }
 
 void
-pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
+pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva, int shootself)
 {
        vaddr_t va;
 
+       if (!shootself)
+               return;
+
        for (va = sva; va < eva; va += PAGE_SIZE)
                pmap_update_pg(va);
 
 }
 
 void
-pmap_tlb_shoottlb(void)
+pmap_tlb_shoottlb(struct pmap *pm, int shootself)
 {
-       tlbflush();
+       if (shootself)
+               tlbflush();
 }
 #endif /* MULTIPROCESSOR */
diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h
index 3796fd3..0342128 100644
--- a/sys/arch/amd64/include/pmap.h
+++ b/sys/arch/amd64/include/pmap.h
@@ -79,35 +79,32 @@
 
 /*
  * The x86_64 pmap module closely resembles the i386 one. It uses
- * the same recursive entry scheme, and the same alternate area
- * trick for accessing non-current pmaps. See the i386 pmap.h
- * for a description. The first obvious difference is that 2 extra
- * levels of page table need to be dealt with. The level 1 page
- * table pages are at:
+ * the same recursive entry scheme. See the i386 pmap.h for a
+ * description. The alternate area trick for accessing non-current
+ * pmaps has been removed, though, because it performs badly on SMP
+ * systems.
+ * The most obvious difference to i386 is that 2 extra levels of page
+ * table need to be dealt with. The level 1 page table pages are at:
  *
  * l1: 0x00007f8000000000 - 0x00007fffffffffff     (39 bits, needs PML4 entry)
  *
- * The alternate space is at:
- *
- * l1: 0xffffff0000000000 - 0xffffff7fffffffff     (39 bits, needs PML4 entry)
- *
  * The other levels are kept as physical pages in 3 UVM objects and are
  * temporarily mapped for virtual access when needed.
  *
  * The other obvious difference from i386 is that it has a direct map of all
  * physical memory in the VA range:
  *
- *     0xfffffe8000000000 - 0xfffffeffffffffff
+ *     0xffffff0000000000 - 0xffffff7fffffffff
+ *
+ * The direct map is used in some cases to access PTEs of non-current pmaps.
  *
  * Note that address space is signed, so the layout for 48 bits is:
  *
  *  +---------------------------------+ 0xffffffffffffffff
  *  |         Kernel Image            |
  *  +---------------------------------+ 0xffffff8000000000
- *  |    alt.L1 table (PTE pages)     |
- *  +---------------------------------+ 0xffffff0000000000
  *  |         Direct Map              |
- *  +---------------------------------+ 0xfffffe8000000000
+ *  +---------------------------------+ 0xffffff0000000000
  *  ~                                 ~
  *  |                                 |
  *  |         Kernel Space            |
@@ -155,44 +152,34 @@
 #define L4_SLOT_PTE            255
 #define L4_SLOT_KERN           256
 #define L4_SLOT_KERNBASE       511
-#define L4_SLOT_APTE           510
-#define L4_SLOT_DIRECT         509
+#define L4_SLOT_DIRECT         510
 
 #define PDIR_SLOT_KERN         L4_SLOT_KERN
 #define PDIR_SLOT_PTE          L4_SLOT_PTE
-#define PDIR_SLOT_APTE         L4_SLOT_APTE
 #define PDIR_SLOT_DIRECT       L4_SLOT_DIRECT
 
 /*
  * the following defines give the virtual addresses of various MMU
  * data structures:
- * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
- * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
- * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
+ * PTE_BASE: the base VA of the linear PTE mappings
+ * PTD_BASE: the base VA of the recursive mapping of the PTD
+ * PDP_PDE: the VA of the PDE that points back to the PDP
  *
  */
 
 #define PTE_BASE  ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4))
-#define APTE_BASE ((pt_entry_t *) (VA_SIGN_NEG((L4_SLOT_APTE * NBPD_L4))))
 #define PMAP_DIRECT_BASE       (VA_SIGN_NEG((L4_SLOT_DIRECT * NBPD_L4)))
 #define PMAP_DIRECT_END                (VA_SIGN_NEG(((L4_SLOT_DIRECT + 1) * 
NBPD_L4)))
 
 #define L1_BASE                PTE_BASE
-#define AL1_BASE       APTE_BASE
 
 #define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3))
 #define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2))
 #define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1))
 
-#define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L4_SLOT_PTE * NBPD_L3))
-#define AL3_BASE ((pd_entry_t *)((char *)AL2_BASE + L4_SLOT_PTE * NBPD_L2))
-#define AL4_BASE ((pd_entry_t *)((char *)AL3_BASE + L4_SLOT_PTE * NBPD_L1))
-
 #define PDP_PDE                (L4_BASE + PDIR_SLOT_PTE)
-#define APDP_PDE       (L4_BASE + PDIR_SLOT_APTE)
 
 #define PDP_BASE       L4_BASE
-#define APDP_BASE      AL4_BASE
 
 #define NKL4_MAX_ENTRIES       (unsigned long)1
 #define NKL3_MAX_ENTRIES       (unsigned long)(NKL4_MAX_ENTRIES * 512)
@@ -247,7 +234,6 @@
                                  NKL3_MAX_ENTRIES, NKL4_MAX_ENTRIES }
 #define NBPD_INITIALIZER       { NBPD_L1, NBPD_L2, NBPD_L3, NBPD_L4 }
 #define PDES_INITIALIZER       { L2_BASE, L3_BASE, L4_BASE }
-#define APDES_INITIALIZER      { AL2_BASE, AL3_BASE, AL4_BASE }
 
 /*
  * PTP macros:
@@ -410,15 +396,6 @@ void               pmap_write_protect(struct pmap *, 
vaddr_t,
 
 vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */
 
-void   pmap_tlb_shootpage(struct pmap *, vaddr_t);
-void   pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t);
-void   pmap_tlb_shoottlb(void);
-#ifdef MULTIPROCESSOR
-void   pmap_tlb_shootwait(void);
-#else
-#define        pmap_tlb_shootwait()
-#endif
-
 paddr_t        pmap_prealloc_lowmem_ptps(paddr_t);
 
 void   pagezero(vaddr_t);

Reply via email to