Module Name: src
Committed By: ad
Date: Sun Jun 14 21:47:15 UTC 2020
Modified Files:
src/sys/arch/aarch64/aarch64: pmap.c
src/sys/arch/aarch64/include: pmap.h
Log Message:
- Fix a lock order reversal in pmap_page_protect().
- Make sure pmap is always locked when updating stats; atomics no longer
needed to do that.
- Remove unneeded traversal of pv list in pmap_enter_pv().
- Shrink struct vm_page from 136 to 128 bytes (cache line sized) and struct
pv_entry from 48 to 32 bytes (power of 2 sized).
- Embed a pv_entry in each vm_page. This means PV entries don't need to
be allocated for private anonymous memory / COW pages / most UBC mappings.
Dynamic PV entries are then used only for stuff like shared libraries and
shared memory.
Proposed on port-arm@.
To generate a diff of this commit:
cvs rdiff -u -r1.77 -r1.78 src/sys/arch/aarch64/aarch64/pmap.c
cvs rdiff -u -r1.39 -r1.40 src/sys/arch/aarch64/include/pmap.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/aarch64/aarch64/pmap.c
diff -u src/sys/arch/aarch64/aarch64/pmap.c:1.77 src/sys/arch/aarch64/aarch64/pmap.c:1.78
--- src/sys/arch/aarch64/aarch64/pmap.c:1.77 Wed Jun 10 22:24:22 2020
+++ src/sys/arch/aarch64/aarch64/pmap.c Sun Jun 14 21:47:14 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.77 2020/06/10 22:24:22 ad Exp $ */
+/* $NetBSD: pmap.c,v 1.78 2020/06/14 21:47:14 ad Exp $ */
/*
* Copyright (c) 2017 Ryo Shimizu <[email protected]>
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.77 2020/06/10 22:24:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.78 2020/06/14 21:47:14 ad Exp $");
#include "opt_arm_debug.h"
#include "opt_ddb.h"
@@ -102,8 +102,9 @@ PMAP_COUNTER(pdp_alloc_boot, "page table
PMAP_COUNTER(pdp_alloc, "page table page allocate (uvm_pagealloc)");
PMAP_COUNTER(pdp_free, "page table page free (uvm_pagefree)");
-PMAP_COUNTER(pv_enter, "pv_entry allocate and link");
-PMAP_COUNTER(pv_remove, "pv_entry free and unlink");
+PMAP_COUNTER(pv_enter, "pv_entry fill");
+PMAP_COUNTER(pv_remove_dyn, "pv_entry free and unlink dynamic");
+PMAP_COUNTER(pv_remove_emb, "pv_entry clear embedded");
PMAP_COUNTER(pv_remove_nopv, "no pv_entry found when removing pv");
PMAP_COUNTER(activate, "pmap_activate call");
@@ -184,15 +185,6 @@ PMAP_COUNTER(unwire_failure, "pmap_unwir
#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mdpg_pp)
-struct pv_entry {
- LIST_ENTRY(pv_entry) pv_link;
- struct pmap *pv_pmap;
- vaddr_t pv_va;
- paddr_t pv_pa; /* debug */
- pt_entry_t *pv_ptep; /* for fast pte lookup */
-};
-#define pv_next pv_link.le_next
-
#define L3INDEXMASK (L3_SIZE * Ln_ENTRIES - 1)
#define PDPSWEEP_TRIGGER 512
@@ -204,7 +196,7 @@ static void _pmap_remove(struct pmap *,
struct pv_entry **);
static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool);
-static struct pmap kernel_pmap;
+static struct pmap kernel_pmap __cacheline_aligned;
struct pmap * const kernel_pmap_ptr = &kernel_pmap;
static vaddr_t pmap_maxkvaddr;
@@ -223,27 +215,48 @@ static inline void
pmap_pv_lock(struct pmap_page *pp)
{
- mutex_enter(&pp->pp_pvlock);
+ mutex_spin_enter(&pp->pp_pvlock);
}
static inline void
pmap_pv_unlock(struct pmap_page *pp)
{
- mutex_exit(&pp->pp_pvlock);
+ mutex_spin_exit(&pp->pp_pvlock);
}
static inline void
pm_lock(struct pmap *pm)
{
- mutex_enter(&pm->pm_lock);
+ mutex_spin_enter(&pm->pm_lock);
}
static inline void
pm_unlock(struct pmap *pm)
{
- mutex_exit(&pm->pm_lock);
+ mutex_spin_exit(&pm->pm_lock);
+}
+
+static bool
+pm_reverse_lock(struct pmap *pm, struct pmap_page *pp)
+{
+
+ KASSERT(mutex_owned(&pp->pp_pvlock));
+
+ if (__predict_true(mutex_tryenter(&pm->pm_lock)))
+ return true;
+
+ if (pm != pmap_kernel())
+ pmap_reference(pm);
+ mutex_spin_exit(&pp->pp_pvlock);
+ mutex_spin_enter(&pm->pm_lock);
+ /* nothing, just wait for lock */
+ mutex_spin_exit(&pm->pm_lock);
+ if (pm != pmap_kernel())
+ pmap_destroy(pm);
+ mutex_spin_enter(&pp->pp_pvlock);
+ return false;
}
static inline struct pmap_page *
@@ -466,14 +479,22 @@ pmap_bootstrap(vaddr_t vstart, vaddr_t v
CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long));
CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
-#define PMSTAT_INC_WIRED_COUNT(pm) \
- atomic_inc_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_DEC_WIRED_COUNT(pm) \
- atomic_dec_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_INC_RESIDENT_COUNT(pm) \
- atomic_inc_ulong(&(pm)->pm_stats.resident_count)
-#define PMSTAT_DEC_RESIDENT_COUNT(pm) \
- atomic_dec_ulong(&(pm)->pm_stats.resident_count)
+#define PMSTAT_INC_WIRED_COUNT(pm) do { \
+ KASSERT(mutex_owned(&(pm)->pm_lock)); \
+ (pm)->pm_stats.wired_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_WIRED_COUNT(pm) do{ \
+ KASSERT(mutex_owned(&(pm)->pm_lock)); \
+ (pm)->pm_stats.wired_count--; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_INC_RESIDENT_COUNT(pm) do { \
+ KASSERT(mutex_owned(&(pm)->pm_lock)); \
+ (pm)->pm_stats.resident_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_RESIDENT_COUNT(pm) do { \
+ KASSERT(mutex_owned(&(pm)->pm_lock)); \
+ (pm)->pm_stats.resident_count--; \
+} while (/* CONSTCOND */ 0);
}
inline static int
@@ -501,10 +522,12 @@ pmap_init(void)
{
pool_cache_bootstrap(&_pmap_cache, sizeof(struct pmap),
- 0, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor, NULL, NULL);
- pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
- 0, 0, 0, "pvpl", NULL, IPL_VM, _pmap_pv_ctor, NULL, NULL);
+ coherency_unit, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor,
+ NULL, NULL);
+ pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
+ 32, 0, PR_LARGECACHE, "pvpl", NULL, IPL_NONE, _pmap_pv_ctor,
+ NULL, NULL);
}
void
@@ -584,17 +607,12 @@ pmap_alloc_pdp(struct pmap *pm, struct v
return POOL_PADDR_INVALID;
}
- LIST_INSERT_HEAD(&pm->pm_vmlist, pg, mdpage.mdpg_vmlist);
+ LIST_INSERT_HEAD(&pm->pm_vmlist, pg, pageq.list);
pg->flags &= ~PG_BUSY; /* never busy */
pg->wire_count = 1; /* max = 1 + Ln_ENTRIES = 513 */
pa = VM_PAGE_TO_PHYS(pg);
PMAP_COUNT(pdp_alloc);
-
- VM_PAGE_TO_MD(pg)->mdpg_ptep_parent = NULL;
-
- struct pmap_page *pp = VM_PAGE_TO_PP(pg);
- pp->pp_flags = 0;
-
+ PMAP_PAGE_INIT(VM_PAGE_TO_PP(pg));
} else {
/* uvm_pageboot_alloc() returns AARCH64 KSEG address */
pg = NULL;
@@ -614,13 +632,13 @@ pmap_alloc_pdp(struct pmap *pm, struct v
static void
pmap_free_pdp(struct pmap *pm, struct vm_page *pg)
{
- LIST_REMOVE(pg, mdpage.mdpg_vmlist);
- pg->flags |= PG_BUSY;
- pg->wire_count = 0;
- struct pmap_page *pp __diagused = VM_PAGE_TO_PP(pg);
- KASSERT(LIST_EMPTY(&pp->pp_pvhead));
+ KASSERT(pm != pmap_kernel());
+ KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_pmap == NULL);
+ KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_next == NULL);
+ LIST_REMOVE(pg, pageq.list);
+ pg->wire_count = 0;
uvm_pagefree(pg);
PMAP_COUNT(pdp_free);
}
@@ -635,8 +653,10 @@ _pmap_sweep_pdp(struct pmap *pm)
int nsweep;
uint16_t wirecount __diagused;
+ KASSERT(mutex_owned(&pm->pm_lock) || pm->pm_refcnt == 0);
+
nsweep = 0;
- LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+ LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, tmp) {
if (pg->wire_count != 1)
continue;
@@ -655,7 +675,7 @@ _pmap_sweep_pdp(struct pmap *pm)
/* unlink from parent */
opte = atomic_swap_64(ptep_in_parent, 0);
KASSERT(lxpde_valid(opte));
- wirecount = atomic_add_32_nv(&pg->wire_count, -1); /* 1 -> 0 */
+ wirecount = --pg->wire_count; /* 1 -> 0 */
KASSERT(wirecount == 0);
pmap_free_pdp(pm, pg);
nsweep++;
@@ -670,12 +690,12 @@ _pmap_sweep_pdp(struct pmap *pm)
KASSERTMSG(pg->wire_count >= 1,
"wire_count=%d", pg->wire_count);
/* decrement wire_count of parent */
- wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+ wirecount = --pg->wire_count;
KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
"pm=%p[%d], pg=%p, wire_count=%d",
pm, pm->pm_asid, pg, pg->wire_count);
}
- atomic_swap_uint(&pm->pm_idlepdp, 0);
+ pm->pm_idlepdp = 0;
return nsweep;
}
@@ -683,9 +703,9 @@ _pmap_sweep_pdp(struct pmap *pm)
static void
_pmap_free_pdp_all(struct pmap *pm)
{
- struct vm_page *pg, *tmp;
+ struct vm_page *pg;
- LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+ while ((pg = LIST_FIRST(&pm->pm_vmlist)) != NULL) {
pmap_free_pdp(pm, pg);
}
}
@@ -1015,9 +1035,10 @@ _pmap_pte_adjust_cacheflags(pt_entry_t p
}
static struct pv_entry *
-_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va, pt_entry_t pte)
+_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va,
+ pt_entry_t pte)
{
- struct pv_entry *pv;
+ struct pv_entry *pv, *ppv;
UVMHIST_FUNC(__func__);
UVMHIST_CALLED(pmaphist);
@@ -1025,18 +1046,26 @@ _pmap_remove_pv(struct pmap_page *pp, st
UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pte=%llx",
pp, pm, va, pte);
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
- if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
- LIST_REMOVE(pv, pv_link);
- PMAP_COUNT(pv_remove);
+ KASSERT(mutex_owned(&pp->pp_pvlock));
+
+ for (ppv = NULL, pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+ if (pv->pv_pmap == pm && trunc_page(pv->pv_va) == va) {
break;
}
+ ppv = pv;
}
-#ifdef PMAPCOUNTERS
- if (pv == NULL) {
+ if (ppv == NULL) {
+ /* embedded in pmap_page */
+ pv->pv_pmap = NULL;
+ pv = NULL;
+ PMAP_COUNT(pv_remove_emb);
+ } else if (pv != NULL) {
+ /* dynamically allocated */
+ ppv->pv_next = pv->pv_next;
+ PMAP_COUNT(pv_remove_dyn);
+ } else {
PMAP_COUNT(pv_remove_nopv);
}
-#endif
return pv;
}
@@ -1082,23 +1111,25 @@ static void
pv_dump(struct pmap_page *pp, void (*pr)(const char *, ...) __printflike(1, 2))
{
struct pv_entry *pv;
- int i;
+ int i, flags;
i = 0;
+ flags = pp->pp_pv.pv_va & (PAGE_SIZE - 1);
pr("pp=%p\n", pp);
- pr(" pp->pp_flags=%08x %s\n", pp->pp_flags,
- str_vmflags(pp->pp_flags));
+ pr(" pp flags=%08x %s\n", flags, str_vmflags(flags));
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+ for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+ if (pv->pv_pmap == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ continue;
+ }
pr(" pv[%d] pv=%p\n",
i, pv);
pr(" pv[%d].pv_pmap = %p (asid=%d)\n",
i, pv->pv_pmap, pv->pv_pmap->pm_asid);
pr(" pv[%d].pv_va = %016lx (color=%d)\n",
- i, pv->pv_va, _pmap_color(pv->pv_va));
- pr(" pv[%d].pv_pa = %016lx (color=%d)\n",
- i, pv->pv_pa, _pmap_color(pv->pv_pa));
+ i, trunc_page(pv->pv_va), _pmap_color(pv->pv_va));
pr(" pv[%d].pv_ptep = %p\n",
i, pv->pv_ptep);
i++;
@@ -1118,14 +1149,20 @@ _pmap_enter_pv(struct pmap_page *pp, str
UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pa=%llx", pp, pm, va, pa);
UVMHIST_LOG(pmaphist, "ptep=%p, flags=%08x", ptep, flags, 0, 0);
- /* pv is already registered? */
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
- if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
- break;
- }
- }
+ KASSERT(mutex_owned(&pp->pp_pvlock));
+ KASSERT(trunc_page(va) == va);
- if (pv == NULL) {
+ /*
+ * mapping cannot be already registered at this VA.
+ */
+ if (pp->pp_pv.pv_pmap == NULL) {
+ /*
+ * claim pv_entry embedded in pmap_page.
+ * take care not to wipe out acc/mod flags.
+ */
+ pv = &pp->pp_pv;
+ pv->pv_va = (pv->pv_va & (PAGE_SIZE - 1)) | va;
+ } else {
/*
* create and link new pv.
* pv is already allocated at beginning of _pmap_enter().
@@ -1134,23 +1171,18 @@ _pmap_enter_pv(struct pmap_page *pp, str
if (pv == NULL)
return ENOMEM;
*pvp = NULL;
-
- pv->pv_pmap = pm;
+ pv->pv_next = pp->pp_pv.pv_next;
+ pp->pp_pv.pv_next = pv;
pv->pv_va = va;
- pv->pv_pa = pa;
- pv->pv_ptep = ptep;
-
- LIST_INSERT_HEAD(&pp->pp_pvhead, pv, pv_link);
- PMAP_COUNT(pv_enter);
+ }
+ pv->pv_pmap = pm;
+ pv->pv_ptep = ptep;
+ PMAP_COUNT(pv_enter);
#ifdef PMAP_PV_DEBUG
- if (!LIST_EMPTY(&pp->pp_pvhead)){
- printf("pv %p alias added va=%016lx -> pa=%016lx\n",
- pv, va, pa);
- pv_dump(pp, printf);
- }
+ printf("pv %p alias added va=%016lx -> pa=%016lx\n", pv, va, pa);
+ pv_dump(pp, printf);
#endif
- }
return 0;
}
@@ -1158,18 +1190,14 @@ _pmap_enter_pv(struct pmap_page *pp, str
void
pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
{
- int s;
- s = splvm();
_pmap_enter(pmap_kernel(), va, pa, prot, flags | PMAP_WIRED, true);
- splx(s);
}
void
pmap_kremove(vaddr_t va, vsize_t size)
{
struct pmap *kpm = pmap_kernel();
- int s;
UVMHIST_FUNC(__func__);
UVMHIST_CALLED(pmaphist);
@@ -1182,11 +1210,9 @@ pmap_kremove(vaddr_t va, vsize_t size)
KDASSERT(!IN_KSEG_ADDR(va));
KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS));
- s = splvm();
pm_lock(kpm);
_pmap_remove(kpm, va, va + size, true, NULL);
pm_unlock(kpm);
- splx(s);
}
static void
@@ -1201,13 +1227,10 @@ _pmap_protect_pv(struct pmap_page *pp, s
UVMHIST_CALLED(pmaphist);
UVMHIST_LOG(pmaphist, "pp=%p, pv=%p, prot=%08x", pp, pv, prot, 0);
+ KASSERT(mutex_owned(&pv->pv_pmap->pm_lock));
/* get prot mask from referenced/modified */
- mdattr = pp->pp_flags &
- (VM_PROT_READ | VM_PROT_WRITE);
-
- pm_lock(pv->pv_pmap);
-
+ mdattr = pp->pp_pv.pv_va & (VM_PROT_READ | VM_PROT_WRITE);
ptep = pv->pv_ptep;
pte = *ptep;
@@ -1223,9 +1246,8 @@ _pmap_protect_pv(struct pmap_page *pp, s
/* new prot = prot & pteprot & mdattr */
pte = _pmap_pte_adjust_prot(pte, prot & pteprot, mdattr, user);
atomic_swap_64(ptep, pte);
- AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid, pv->pv_va, true);
-
- pm_unlock(pv->pv_pmap);
+ AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid, trunc_page(pv->pv_va),
+ true);
}
void
@@ -1307,7 +1329,8 @@ pmap_protect(struct pmap *pm, vaddr_t sv
if (pp != NULL) {
/* get prot mask from referenced/modified */
- mdattr = pp->pp_flags & (VM_PROT_READ | VM_PROT_WRITE);
+ mdattr = pp->pp_pv.pv_va &
+ (VM_PROT_READ | VM_PROT_WRITE);
} else {
/* unmanaged page */
mdattr = VM_PROT_ALL;
@@ -1471,8 +1494,11 @@ pmap_destroy(struct pmap *pm)
static inline void
_pmap_pdp_setparent(struct pmap *pm, struct vm_page *pg, pt_entry_t *ptep)
{
- if ((pm != pmap_kernel()) && (pg != NULL))
+
+ if ((pm != pmap_kernel()) && (pg != NULL)) {
+ KASSERT(mutex_owned(&pm->pm_lock));
VM_PAGE_TO_MD(pg)->mdpg_ptep_parent = ptep;
+ }
}
/*
@@ -1488,6 +1514,9 @@ _pmap_pdp_addref(struct pmap *pm, paddr_
/* kernel L0-L3 page will be never freed */
if (pm == pmap_kernel())
return;
+
+ KASSERT(mutex_owned(&pm->pm_lock));
+
/* no need for L0 page */
if (pm->pm_l0table_pa == pdppa)
return;
@@ -1497,8 +1526,7 @@ _pmap_pdp_addref(struct pmap *pm, paddr_
pg = PHYS_TO_VM_PAGE(pdppa);
KASSERT(pg != NULL);
- CTASSERT(sizeof(pg->wire_count) == sizeof(uint32_t));
- atomic_add_32(&pg->wire_count, 1);
+ pg->wire_count++;
KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
"pg=%p, wire_count=%d", pg, pg->wire_count);
@@ -1520,6 +1548,9 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
/* kernel L0-L3 page will be never freed */
if (pm == pmap_kernel())
return false;
+
+ KASSERT(mutex_owned(&pm->pm_lock));
+
/* no need for L0 page */
if (pm->pm_l0table_pa == pdppa)
return false;
@@ -1527,7 +1558,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
pg = PHYS_TO_VM_PAGE(pdppa);
KASSERT(pg != NULL);
- wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+ wirecount = --pg->wire_count;
if (!do_free_pdp) {
/*
@@ -1536,7 +1567,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
* pmap_enter(), but useful hint to just sweep.
*/
if (wirecount == 1)
- atomic_inc_uint(&pm->pm_idlepdp);
+ pm->pm_idlepdp++;
return false;
}
@@ -1854,9 +1885,8 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
if (pp != NULL) {
/* update referenced/modified flags */
- pp->pp_flags |=
- (flags & (VM_PROT_READ | VM_PROT_WRITE));
- mdattr &= pp->pp_flags;
+ pp->pp_pv.pv_va |= (flags & (VM_PROT_READ | VM_PROT_WRITE));
+ mdattr &= (uint32_t)pp->pp_pv.pv_va;
}
#ifdef PMAPCOUNTERS
@@ -2028,37 +2058,53 @@ pmap_page_remove(struct pmap_page *pp, v
{
struct pv_entry *pv, *pvtmp;
struct pv_entry *pvtofree = NULL;
+ struct pmap *pm;
pt_entry_t opte;
- /* remove all pages reference to this physical page */
- pmap_pv_lock(pp);
- LIST_FOREACH_SAFE(pv, &pp->pp_pvhead, pv_link, pvtmp) {
-
- opte = atomic_swap_64(pv->pv_ptep, 0);
- if (lxpde_valid(opte)) {
- _pmap_pdp_delref(pv->pv_pmap,
- AARCH64_KVA_TO_PA(trunc_page(
- (vaddr_t)pv->pv_ptep)), false);
- AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
- pv->pv_va, true);
+ /* remove all pages reference to this physical page */
+ pmap_pv_lock(pp);
+ for (pv = &pp->pp_pv; pv != NULL;) {
+ if ((pm = pv->pv_pmap) == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ pv = pp->pp_pv.pv_next;
+ continue;
+ }
+ if (!pm_reverse_lock(pm, pp)) {
+ /* now retry */
+ pv = &pp->pp_pv;
+ continue;
+ }
+ opte = atomic_swap_64(pv->pv_ptep, 0);
+ if (lxpde_valid(opte)) {
+ _pmap_pdp_delref(pv->pv_pmap,
+ AARCH64_KVA_TO_PA(trunc_page(
+ (vaddr_t)pv->pv_ptep)), false);
+ AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
+ trunc_page(pv->pv_va), true);
- if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
- PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
- }
- PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+ if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
+ PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
}
- LIST_REMOVE(pv, pv_link);
- PMAP_COUNT(pv_remove);
-
+ PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+ }
+ pvtmp = _pmap_remove_pv(pp, pm, trunc_page(pv->pv_va), opte);
+ if (pvtmp == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ } else {
+ KASSERT(pv == pvtmp);
+ pp->pp_pv.pv_next = pv->pv_next;
pv->pv_next = pvtofree;
pvtofree = pv;
}
- pmap_pv_unlock(pp);
+ pm_unlock(pm);
+ pv = pp->pp_pv.pv_next;
+ }
+ pmap_pv_unlock(pp);
- for (pv = pvtofree; pv != NULL; pv = pvtmp) {
- pvtmp = pv->pv_next;
- pool_cache_put(&_pmap_pv_pool, pv);
- }
+ for (pv = pvtofree; pv != NULL; pv = pvtmp) {
+ pvtmp = pv->pv_next;
+ pool_cache_put(&_pmap_pv_pool, pv);
+ }
}
#ifdef __HAVE_PMAP_PV_TRACK
@@ -2087,6 +2133,7 @@ pmap_page_protect(struct vm_page *pg, vm
{
struct pv_entry *pv;
struct pmap_page *pp;
+ struct pmap *pm;
KASSERT((prot & VM_PROT_READ) || !(prot & VM_PROT_WRITE));
@@ -2098,13 +2145,32 @@ pmap_page_protect(struct vm_page *pg, vm
UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, pa=%016lx, prot=%08x",
pg, pp, VM_PAGE_TO_PHYS(pg), prot);
+ /* do an unlocked check first */
+ if (atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL &&
+ atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL) {
+ return;
+ }
+
if ((prot & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
VM_PROT_NONE) {
pmap_page_remove(pp, prot);
} else {
pmap_pv_lock(pp);
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+ pv = &pp->pp_pv;
+ while (pv != NULL) {
+ if ((pm = pv->pv_pmap) == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ pv = pv->pv_next;
+ continue;
+ }
+ if (!pm_reverse_lock(pm, pp)) {
+ /* retry */
+ pv = &pp->pp_pv;
+ continue;
+ }
_pmap_protect_pv(pp, pv, prot);
+ pm_unlock(pm);
+ pv = pv->pv_next;
}
pmap_pv_unlock(pp);
}
@@ -2243,7 +2309,7 @@ pmap_fault_fixup(struct pmap *pm, vaddr_
"REFERENCED:"
" va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x",
va, pa, pmap_prot, accessprot);
- pp->pp_flags |= VM_PROT_READ; /* set referenced */
+ pp->pp_pv.pv_va |= VM_PROT_READ; /* set referenced */
pte |= LX_BLKPAG_AF;
PMAP_COUNT(fixup_referenced);
@@ -2255,7 +2321,7 @@ pmap_fault_fixup(struct pmap *pm, vaddr_
UVMHIST_LOG(pmaphist, "MODIFIED:"
" va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x",
va, pa, pmap_prot, accessprot);
- pp->pp_flags |= VM_PROT_WRITE; /* set modified */
+ pp->pp_pv.pv_va |= VM_PROT_WRITE; /* set modified */
pte &= ~LX_BLKPAG_AP;
pte |= LX_BLKPAG_AP_RW;
@@ -2284,23 +2350,40 @@ pmap_clear_modify(struct vm_page *pg)
UVMHIST_FUNC(__func__);
UVMHIST_CALLED(pmaphist);
- UVMHIST_LOG(pmaphist, "pg=%p, pp_flags=%08x",
- pg, pp->pp_flags, 0, 0);
+ UVMHIST_LOG(pmaphist, "pg=%p, flags=%08x",
+ pg, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0, 0);
+
+ PMAP_COUNT(clear_modify);
+
+ /*
+ * if this is a new page, assert it has no mappings and simply zap
+ * the stored attributes without taking any locks.
+ */
+ if ((pg->flags & PG_FAKE) != 0) {
+ KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL);
+ KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL);
+ atomic_store_relaxed(&pp->pp_pv.pv_va, 0);
+ return false;
+ }
pmap_pv_lock(pp);
- if ((pp->pp_flags & VM_PROT_WRITE) == 0) {
+ if ((pp->pp_pv.pv_va & VM_PROT_WRITE) == 0) {
pmap_pv_unlock(pp);
return false;
}
- pp->pp_flags &= ~VM_PROT_WRITE;
+ pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_WRITE;
+
+ for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+ if (pv->pv_pmap == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ continue;
+ }
- PMAP_COUNT(clear_modify);
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
PMAP_COUNT(clear_modify_pages);
- va = pv->pv_va;
+ va = trunc_page(pv->pv_va);
ptep = pv->pv_ptep;
opte = pte = *ptep;
@@ -2341,22 +2424,27 @@ pmap_clear_reference(struct vm_page *pg)
UVMHIST_FUNC(__func__);
UVMHIST_CALLED(pmaphist);
- UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, pp_flags=%08x",
- pg, pp, pp->pp_flags, 0);
+ UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, flags=%08x",
+ pg, pp, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0);
pmap_pv_lock(pp);
- if ((pp->pp_flags & VM_PROT_READ) == 0) {
+ if ((pp->pp_pv.pv_va & VM_PROT_READ) == 0) {
pmap_pv_unlock(pp);
return false;
}
- pp->pp_flags &= ~VM_PROT_READ;
+ pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_READ;
PMAP_COUNT(clear_reference);
- LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+ for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+ if (pv->pv_pmap == NULL) {
+ KASSERT(pv == &pp->pp_pv);
+ continue;
+ }
+
PMAP_COUNT(clear_reference_pages);
- va = pv->pv_va;
+ va = trunc_page(pv->pv_va);
ptep = pv->pv_ptep;
opte = pte = *ptep;
@@ -2389,7 +2477,7 @@ pmap_is_modified(struct vm_page *pg)
{
struct pmap_page * const pp = VM_PAGE_TO_PP(pg);
- return (pp->pp_flags & VM_PROT_WRITE);
+ return (pp->pp_pv.pv_va & VM_PROT_WRITE);
}
bool
@@ -2397,7 +2485,7 @@ pmap_is_referenced(struct vm_page *pg)
{
struct pmap_page * const pp = VM_PAGE_TO_PP(pg);
- return (pp->pp_flags & VM_PROT_READ);
+ return (pp->pp_pv.pv_va & VM_PROT_READ);
}
#ifdef DDB
Index: src/sys/arch/aarch64/include/pmap.h
diff -u src/sys/arch/aarch64/include/pmap.h:1.39 src/sys/arch/aarch64/include/pmap.h:1.40
--- src/sys/arch/aarch64/include/pmap.h:1.39 Thu May 14 07:59:03 2020
+++ src/sys/arch/aarch64/include/pmap.h Sun Jun 14 21:47:15 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.h,v 1.39 2020/05/14 07:59:03 skrll Exp $ */
+/* $NetBSD: pmap.h,v 1.40 2020/06/14 21:47:15 ad Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -84,34 +84,38 @@ struct pmap {
bool pm_activated;
};
-struct pv_entry;
+/* sized to reduce memory consumption & cache misses (32 bytes) */
+struct pv_entry {
+ struct pv_entry *pv_next;
+ struct pmap *pv_pmap;
+ vaddr_t pv_va; /* for embedded entry (pp_pv) also includes flags */
+ void *pv_ptep; /* pointer for fast pte lookup */
+};
struct pmap_page {
kmutex_t pp_pvlock;
- LIST_HEAD(, pv_entry) pp_pvhead;
-
- /* VM_PROT_READ means referenced, VM_PROT_WRITE means modified */
- uint32_t pp_flags;
+ struct pv_entry pp_pv;
};
+/* try to keep vm_page at or under 128 bytes to reduce cache misses */
struct vm_page_md {
- LIST_ENTRY(vm_page) mdpg_vmlist; /* L[0123] table vm_page list */
- pd_entry_t *mdpg_ptep_parent; /* for page descriptor page only */
-
struct pmap_page mdpg_pp;
};
+/* for page descriptor page only */
+#define mdpg_ptep_parent mdpg_pp.pp_pv.pv_ptep
#define VM_MDPAGE_INIT(pg) \
do { \
- (pg)->mdpage.mdpg_ptep_parent = NULL; \
PMAP_PAGE_INIT(&(pg)->mdpage.mdpg_pp); \
} while (/*CONSTCOND*/ 0)
#define PMAP_PAGE_INIT(pp) \
do { \
mutex_init(&(pp)->pp_pvlock, MUTEX_NODEBUG, IPL_VM); \
- LIST_INIT(&(pp)->pp_pvhead); \
- (pp)->pp_flags = 0; \
+ (pp)->pp_pv.pv_next = NULL; \
+ (pp)->pp_pv.pv_pmap = NULL; \
+ (pp)->pp_pv.pv_va = 0; \
+ (pp)->pp_pv.pv_ptep = NULL; \
} while (/*CONSTCOND*/ 0)
/* saved permission bit for referenced/modified emulation */