Module Name: src
Committed By: reinoud
Date: Sun Jan 1 21:40:23 UTC 2012
Modified Files:
src/sys/arch/usermode/usermode: pmap.c
Log Message:
Rewrite usermodes pmap to use indirect adressing significantly reducing pmap
memory usage. Typically a decimation. This is done using L1 and L2 mappings.
To generate a diff of this commit:
cvs rdiff -u -r1.87 -r1.88 src/sys/arch/usermode/usermode/pmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/usermode/usermode/pmap.c
diff -u src/sys/arch/usermode/usermode/pmap.c:1.87 src/sys/arch/usermode/usermode/pmap.c:1.88
--- src/sys/arch/usermode/usermode/pmap.c:1.87 Sun Jan 1 13:52:51 2012
+++ src/sys/arch/usermode/usermode/pmap.c Sun Jan 1 21:40:22 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $ */
+/* $NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $ */
/*-
* Copyright (c) 2011 Reinoud Zandijk <[email protected]>
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $");
#include "opt_memsize.h"
#include "opt_kmempages.h"
@@ -58,16 +58,25 @@ struct pv_entry {
#define PV_MODIFIED 0x02
};
+#define PMAP_L2_SIZE PAGE_SIZE
+#define PMAP_L2_NENTRY (PMAP_L2_SIZE / sizeof(struct pv_entry *))
+
+struct pmap_l2 {
+ struct pv_entry *pm_l2[PMAP_L2_NENTRY];
+};
+
struct pmap {
int pm_count;
int pm_flags;
#define PM_ACTIVE 0x01
struct pmap_statistics pm_stats;
- struct pv_entry **pm_entries;
+ struct pmap_l2 **pm_l1;
};
-static struct pv_entry *pv_table; /* physical pages */
-static struct pv_entry **tlb; /* current tlb mappings */
+
+static struct pv_entry **kernel_pm_entries;
+static struct pv_entry *pv_table; /* physical pages info (direct mapped) */
+static struct pv_entry **tlb; /* current tlb mappings (direct mapped) */
static struct pmap pmap_kernel_store;
struct pmap * const kernel_pmap_ptr = &pmap_kernel_store;
@@ -79,9 +88,14 @@ static void *mem_uvm; /* keeps all memor
static int phys_npages = 0;
static int pm_nentries = 0;
+static int pm_nl1 = 0;
+static int pm_l1_size = 0;
static uint64_t pm_entries_size = 0;
static struct pool pmap_pool;
+static struct pool pmap_l1_pool;
+static struct pool pmap_l2_pool;
+static struct pool pmap_pventry_pool;
/* forwards */
void pmap_bootstrap(void);
@@ -107,6 +121,7 @@ vaddr_t kmem_ext_cur_start, kmem_ext_cur
/* amount of physical memory */
int physmem;
int num_pv_entries = 0;
+int num_pmaps = 0;
#define SPARSE_MEMFILE
@@ -118,7 +133,7 @@ pmap_bootstrap(void)
struct pmap *pmap;
paddr_t totmem_len;
paddr_t fpos, file_len;
- paddr_t pv_fpos, pm_fpos, tlb_fpos;
+ paddr_t pv_fpos, tlb_fpos, pm_l1_fpos, pm_fpos;
paddr_t wlen;
paddr_t user_len, barrier_len;
paddr_t pv_table_size;
@@ -126,7 +141,7 @@ pmap_bootstrap(void)
vaddr_t mpos;
paddr_t pa;
vaddr_t va;
- uintptr_t pg;
+ uintptr_t pg, l1;
void *addr;
int err;
@@ -250,19 +265,30 @@ pmap_bootstrap(void)
assert(err == 0);
#endif
- /* set up pv_table; bootstrap problem! */
+ /* initialize counters */
fpos = 0;
free_start = fpos; /* in physical space ! */
free_end = file_len; /* in physical space ! */
+ kmem_ext_cur_start = kmem_ext_start;
+ /* calculate pv table size */
phys_npages = (free_end - free_start) / PAGE_SIZE;
pv_table_size = round_page(phys_npages * sizeof(struct pv_entry));
-
dprintf_debug("claiming %"PRIu64" KB of pv_table for "
"%"PRIdPTR" pages of physical memory\n",
(uint64_t) pv_table_size/1024, (uintptr_t) phys_npages);
- kmem_ext_cur_start = kmem_ext_start;
+ /* calculate number of pmap entries needed for a complete map */
+ pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE;
+ pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *));
+ dprintf_debug("tlb va->pa lookup table is %"PRIu64" KB for "
+ "%d logical pages\n", pm_entries_size/1024, pm_nentries);
+
+ /* calculate how big the l1 tables are going to be */
+ pm_nl1 = pm_nentries / PMAP_L2_NENTRY;
+ pm_l1_size = pm_nl1 * sizeof(struct pmap_l1 *);
+
+ /* claim pv table */
pv_fpos = fpos;
pv_table = (struct pv_entry *) kmem_ext_cur_start;
addr = thunk_mmap(pv_table, pv_table_size,
@@ -280,53 +306,70 @@ pmap_bootstrap(void)
kmem_ext_cur_start += pv_table_size;
fpos += pv_table_size;
- /* set up kernel pmap */
- pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE;
- pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *));
- dprintf_debug("pmap va->pa lookup table is %"PRIu64" KB for %d logical pages\n",
- pm_entries_size/1024, pm_nentries);
+ /* set up tlb space */
+ tlb = (struct pv_entry **) kmem_ext_cur_start;
+ tlb_fpos = fpos;
+ addr = thunk_mmap(tlb, pm_entries_size,
+ THUNK_PROT_READ | THUNK_PROT_WRITE,
+ THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
+ mem_fh, tlb_fpos);
+ if (addr != (void *) tlb)
+ panic("pmap_bootstrap: can't map in tlb entries\n");
+ memset(tlb, 0, pm_entries_size); /* test and clear */
+
+ dprintf_debug("kernel tlb entries initialized correctly\n");
+
+ /* advance */
+ kmem_ext_cur_start += pm_entries_size;
+ fpos += pm_entries_size;
+
+ /* set up kernel pmap and add a l1 map */
pmap = pmap_kernel();
memset(pmap, 0, sizeof(*pmap));
pmap->pm_count = 1; /* reference */
pmap->pm_flags = PM_ACTIVE; /* kernel pmap is allways active */
- pmap->pm_entries = (struct pv_entry **) kmem_ext_cur_start;
+ pmap->pm_l1 = (struct pmap_l2 **) kmem_ext_cur_start;
- pm_fpos = fpos;
- addr = thunk_mmap(pmap->pm_entries, pm_entries_size,
+ pm_l1_fpos = fpos;
+ addr = thunk_mmap(pmap->pm_l1, pm_l1_size,
THUNK_PROT_READ | THUNK_PROT_WRITE,
THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
- mem_fh, pm_fpos);
- if (addr != (void *) pmap->pm_entries)
- panic("pmap_bootstrap: can't map in pmap entries\n");
-
- memset(pmap->pm_entries, 0, pm_entries_size); /* test and clear */
+ mem_fh, pm_l1_fpos);
+ if (addr != (void *) pmap->pm_l1)
+ panic("pmap_bootstrap: can't map in pmap l1 entries\n");
- dprintf_debug("kernel pmap entries initialiased correctly\n");
+ memset(pmap->pm_l1, 0, pm_l1_size); /* test and clear */
- /* advance */
- kmem_ext_cur_start += pm_entries_size;
- fpos += pm_entries_size;
+ dprintf_debug("kernel pmap l1 table initialiased correctly\n");
- /* set up tlb space */
- tlb = (struct pv_entry **) kmem_ext_cur_start;
+ /* advance for l1 tables */
+ kmem_ext_cur_start += round_page(pm_l1_size);
+ fpos += round_page(pm_l1_size);
- tlb_fpos = fpos;
- addr = thunk_mmap(tlb, pm_entries_size,
+ /* followed by the pm entries */
+ pm_fpos = fpos;
+ kernel_pm_entries = (struct pv_entry **) kmem_ext_cur_start;
+ addr = thunk_mmap(kernel_pm_entries, pm_entries_size,
THUNK_PROT_READ | THUNK_PROT_WRITE,
THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
- mem_fh, tlb_fpos);
- if (addr != (void *) tlb)
- panic("pmap_bootstrap: can't map in tlb entries\n");
-
- memset(tlb, 0, pm_entries_size); /* test and clear */
+ mem_fh, pm_fpos);
+ if (addr != (void *) kernel_pm_entries)
+ panic("pmap_bootstrap: can't map in kernel pmap entries\n");
- dprintf_debug("kernel tlb entries initialized correctly\n");
+ memset(kernel_pm_entries, 0, pm_entries_size); /* test and clear */
- /* advance */
+ /* advance for the statically allocated pm_entries */
kmem_ext_cur_start += pm_entries_size;
fpos += pm_entries_size;
+ /* put pointers in the l1 to point to the pv_entry space */
+ for (l1 = 0; l1 < pm_nl1; l1++) {
+ pmap = pmap_kernel();
+ pmap->pm_l1[l1] = (struct pmap_l2 *)
+ ((vaddr_t) kernel_pm_entries + l1 * PMAP_L2_SIZE);
+ }
+
/* kmem used [kmem_ext_start - kmem_ext_cur_start] */
kmem_ext_cur_end = kmem_ext_cur_start;
@@ -338,17 +381,23 @@ pmap_bootstrap(void)
}
dprintf_debug("pv_table mem added to the kernel pmap\n");
for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
- pa = pm_fpos + pg;
- va = (vaddr_t) pmap->pm_entries + pg;
- pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
- }
- dprintf_debug("kernel pmap entries mem added to the kernel pmap\n");
- for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
pa = tlb_fpos + pg;
va = (vaddr_t) tlb + pg;
pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
}
dprintf_debug("kernel tlb entries mem added to the kernel pmap\n");
+ for (pg = 0; pg < pm_l1_size; pg += PAGE_SIZE) {
+ pa = pm_l1_fpos + pg;
+ va = (vaddr_t) pmap->pm_l1 + pg;
+ pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
+ }
+ dprintf_debug("kernel pmap l1 mem added to the kernel pmap\n");
+ for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
+ pa = pm_fpos + pg;
+ va = (vaddr_t) kernel_pm_entries + pg;
+ pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
+ }
+ dprintf_debug("kernel pmap entries mem added to the kernel pmap\n");
/* add file space to uvm's FREELIST */
/* XXX really from 0? or from fpos to have better stats */
@@ -388,6 +437,12 @@ pmap_deferred_init(void)
/* create pmap pool */
pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0,
"pmappool", NULL, IPL_NONE);
+ pool_init(&pmap_l2_pool, PMAP_L2_SIZE, 0, 0, 0,
+ "pmapl2pool", NULL, IPL_HIGH);
+ pool_init(&pmap_l1_pool, pm_l1_size, 0, 0, 0,
+ "pmapl1pool", NULL, IPL_NONE);
+ pool_init(&pmap_pventry_pool, sizeof(struct pv_entry), 0, 0, 0,
+ "pventry", NULL, IPL_HIGH);
}
pmap_t
@@ -402,14 +457,26 @@ pmap_create(void)
}
dprintf_debug("pmap_create\n");
+ num_pmaps++;
+#if 0
+ printf("%s: pre alloc: num_pmaps %"PRIu64" (%"PRIu64" kb), "
+ "num_pv_entries %"PRIu64" (%"PRIu64" kb)\n",
+ __func__,
+ (uint64_t) num_pmaps,
+ (uint64_t) num_pmaps * (sizeof(*pmap) + pm_l1_size) / 1024,
+ (uint64_t) num_pv_entries,
+ (uint64_t) num_pv_entries * (sizeof(struct pv_entry)) / 1024);
+#endif
+
pmap = pool_get(&pmap_pool, PR_WAITOK);
memset(pmap, 0, sizeof(*pmap));
-
pmap->pm_count = 1;
pmap->pm_flags = 0;
- pmap->pm_entries = (struct pv_entry **) malloc(
- pm_entries_size, M_VMPMAP,
- M_WAITOK | M_ZERO);
+
+ /* claim l1 table */
+ pmap->pm_l1 = pool_get(&pmap_l1_pool, PR_WAITOK);
+ memset(pmap->pm_l1, 0, pm_l1_size);
+
dprintf_debug("\tpmap %p\n", pmap);
return pmap;
@@ -418,23 +485,31 @@ pmap_create(void)
void
pmap_destroy(pmap_t pmap)
{
- int i;
+ struct pmap_l2 *l2tbl;
+ int l1, l2;
/* if multiple references exist just remove a reference */
dprintf_debug("pmap_destroy %p\n", pmap);
if (--pmap->pm_count > 0)
return;
+ num_pmaps--;
/* safe guard against silly errors */
KASSERT((pmap->pm_flags & PM_ACTIVE) == 0);
KASSERT(pmap->pm_stats.resident_count == 0);
KASSERT(pmap->pm_stats.wired_count == 0);
#ifdef DIAGNOSTIC
- for (i = 0; i < pm_nentries; i++)
- if (pmap->pm_entries[i] != NULL)
- panic("pmap_destroy: pmap isn't empty");
+ for (l1 = 0; l1 < pm_nl1; l1++) {
+ l2tbl = pmap->pm_l1[l1];
+ if (!l2tbl)
+ continue;
+ for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) {
+ if (l2tbl->pm_l2[l2])
+ panic("pmap_destroy: pmap isn't empty");
+ }
+ }
#endif
- free((void *)pmap->pm_entries, M_VMPMAP);
+ pool_put(&pmap_l1_pool, pmap->pm_l1);
pool_put(&pmap_pool, pmap);
}
@@ -460,15 +535,20 @@ pmap_wired_count(pmap_t pmap)
static struct pv_entry *
pv_alloc(void)
{
+ struct pv_entry *pv;
+
num_pv_entries++;
- return malloc(sizeof(struct pv_entry), M_VMPMAP, M_NOWAIT | M_ZERO);
+ pv = pool_get(&pmap_pventry_pool, PR_WAITOK);
+ memset(pv, 0, sizeof(struct pv_entry));
+
+ return pv;
}
static void
pv_free(struct pv_entry *pv)
{
num_pv_entries--;
- free(pv, M_VMPMAP);
+ pool_put(&pmap_pventry_pool, pv);
}
static struct pv_entry *
@@ -507,6 +587,34 @@ pv_get(pmap_t pmap, uintptr_t ppn, uintp
return pv;
}
+static void
+pmap_set_pv(pmap_t pmap, uint lpn, struct pv_entry *pv)
+{
+ struct pmap_l2 *l2tbl;
+ int l1 = lpn / PMAP_L2_NENTRY;
+ int l2 = lpn % PMAP_L2_NENTRY;
+
+ l2tbl = pmap->pm_l1[l1];
+ if (!l2tbl) {
+ l2tbl = pmap->pm_l1[l1] = pool_get(&pmap_l2_pool, PR_WAITOK);
+ memset(l2tbl, 0, PMAP_L2_SIZE);
+ }
+ l2tbl->pm_l2[l2] = pv;
+}
+
+static struct pv_entry *
+pmap_lookup_pv(pmap_t pmap, uint lpn)
+{
+ struct pmap_l2 *l2tbl;
+ int l1 = lpn / PMAP_L2_NENTRY;
+ int l2 = lpn % PMAP_L2_NENTRY;
+
+ l2tbl = pmap->pm_l1[l1];
+ if (l2tbl)
+ return l2tbl->pm_l2[l2];
+ return NULL;
+}
+
/*
* Check if the given page fault was our reference / modified emulation fault;
* if so return true otherwise return false and let uvm handle it
@@ -522,7 +630,7 @@ pmap_fault(pmap_t pmap, vaddr_t va, vm_p
/* get logical page from vaddr */
lpn = atop(va - VM_MIN_ADDRESS); /* V->L */
- pv = pmap->pm_entries[lpn];
+ pv = pmap_lookup_pv(pmap, lpn);
/* not known! then it must be UVM's work */
if (pv == NULL) {
@@ -732,8 +840,8 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p
s = splvm();
/* remove existing mapping at this lpn */
- if (pmap->pm_entries[lpn] &&
- pmap->pm_entries[lpn]->pv_ppn != ppn)
+ pv = pmap_lookup_pv(pmap, lpn);
+ if (pv && pv->pv_ppn != ppn)
pmap_remove(pmap, va, va + PAGE_SIZE);
/* get our entry */
@@ -768,7 +876,7 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p
/* map it in */
pmap_update_page(ppn);
- pmap->pm_entries[lpn] = pv;
+ pmap_set_pv(pmap, lpn, pv);
/* adjust stats */
if (pv->pv_vflags & PV_WIRED)
@@ -808,9 +916,9 @@ pv_release(pmap_t pmap, uintptr_t ppn, u
if ((pmap == pv->pv_pmap) && (lpn == pv->pv_lpn)) {
npv = pv->pv_next;
if (npv) {
- /* Pull up first entry from chain. */
+ /* pull up first entry from chain. */
memcpy(pv, npv, offsetof(struct pv_entry, pv_pflags));
- pv->pv_pmap->pm_entries[pv->pv_lpn] = pv;
+ pmap_set_pv(pv->pv_pmap, pv->pv_lpn, pv);
pv_free(npv);
} else {
memset(pv, 0, offsetof(struct pv_entry, pv_pflags));
@@ -825,7 +933,7 @@ pv_release(pmap_t pmap, uintptr_t ppn, u
pv->pv_next = npv->pv_next;
pv_free(npv);
}
- pmap->pm_entries[lpn] = NULL;
+ pmap_set_pv(pmap, lpn, NULL);
pmap->pm_stats.resident_count--;
}
@@ -844,14 +952,14 @@ pmap_remove(pmap_t pmap, vaddr_t sva, va
s = splvm();
for (lpn = slpn; lpn < elpn; lpn++) {
- pv = pmap->pm_entries[lpn];
+ pv = pmap_lookup_pv(pmap, lpn);
if (pv != NULL) {
if (pmap->pm_flags & PM_ACTIVE) {
pmap_page_deactivate(pv);
// MEMC_WRITE(pv->pv_deactivate);
// cpu_cache_flush();
}
- pmap->pm_entries[lpn] = NULL;
+ pmap_set_pv(pmap, lpn, NULL);
if (pv->pv_vflags & PV_WIRED)
pmap->pm_stats.wired_count--;
pv_release(pmap, pv->pv_ppn, lpn);
@@ -906,7 +1014,7 @@ pmap_protect(pmap_t pmap, vaddr_t sva, v
s = splvm();
for (lpn = slpn; lpn < elpn; lpn++) {
- pv = pmap->pm_entries[lpn];
+ pv = pmap_lookup_pv(pmap, lpn);
if (pv != NULL) {
pv->pv_prot &= prot;
pv_update(pv);
@@ -928,7 +1036,7 @@ pmap_unwire(pmap_t pmap, vaddr_t va)
return;
lpn = atop(va - VM_MIN_ADDRESS); /* V->L */
- pv = pmap->pm_entries[lpn];
+ pv = pmap_lookup_pv(pmap, lpn);
if (pv == NULL)
return;
/* but is it wired? */
@@ -953,7 +1061,7 @@ pmap_extract(pmap_t pmap, vaddr_t va, pa
panic("pmap_extract: invalid va isued\n");
#endif
lpn = atop(va - VM_MIN_ADDRESS); /* V->L */
- pv = pmap->pm_entries[lpn];
+ pv = pmap_lookup_pv(pmap, lpn);
if (pv == NULL)
return false;
@@ -1016,8 +1124,10 @@ void
pmap_deactivate(struct lwp *l)
{
struct proc *p = l->l_proc;
+ struct pv_entry *pv;
+ struct pmap_l2 *l2tbl;
pmap_t pmap;
- int i;
+ int l1, l2;
pmap = p->p_vmspace->vm_map.pmap;
dprintf_debug("pmap_DEactivate for lwp %p, pmap = %p\n", l, pmap);
@@ -1030,12 +1140,20 @@ pmap_deactivate(struct lwp *l)
active_pmap = NULL;
pmap->pm_flags &=~ PM_ACTIVE;
- for (i = 0; i < pm_nentries; i++) {
- if (pmap->pm_entries[i] != NULL) {
- pmap_page_deactivate(pmap->pm_entries[i]);
-// MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate);
+
+ for (l1 = 0; l1 < pm_nl1; l1++) {
+ l2tbl = pmap->pm_l1[l1];
+ if (!l2tbl)
+ continue;
+ for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) {
+ pv = l2tbl->pm_l2[l2];
+ if (pv) {
+ pmap_page_deactivate(pv);
+ // MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate);
+ }
}
}
+
/* dummy */
// cpu_cache_flush();
}
@@ -1124,7 +1242,7 @@ pmap_page_protect(struct vm_page *page,
npv = pv->pv_next;
/* remove from pmap */
- pv->pv_pmap->pm_entries[pv->pv_lpn] = NULL;
+ pmap_set_pv(pv->pv_pmap, pv->pv_lpn, NULL);
if (pv->pv_vflags & PV_WIRED)
pv->pv_pmap->pm_stats.wired_count--;
pv_release(pv->pv_pmap, ppn, pv->pv_lpn);