Module Name: src Committed By: reinoud Date: Sun Jan 1 21:40:23 UTC 2012
Modified Files: src/sys/arch/usermode/usermode: pmap.c Log Message: Rewrite usermodes pmap to use indirect adressing significantly reducing pmap memory usage. Typically a decimation. This is done using L1 and L2 mappings. To generate a diff of this commit: cvs rdiff -u -r1.87 -r1.88 src/sys/arch/usermode/usermode/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/usermode/usermode/pmap.c diff -u src/sys/arch/usermode/usermode/pmap.c:1.87 src/sys/arch/usermode/usermode/pmap.c:1.88 --- src/sys/arch/usermode/usermode/pmap.c:1.87 Sun Jan 1 13:52:51 2012 +++ src/sys/arch/usermode/usermode/pmap.c Sun Jan 1 21:40:22 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $ */ +/* $NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $"); #include "opt_memsize.h" #include "opt_kmempages.h" @@ -58,16 +58,25 @@ struct pv_entry { #define PV_MODIFIED 0x02 }; +#define PMAP_L2_SIZE PAGE_SIZE +#define PMAP_L2_NENTRY (PMAP_L2_SIZE / sizeof(struct pv_entry *)) + +struct pmap_l2 { + struct pv_entry *pm_l2[PMAP_L2_NENTRY]; +}; + struct pmap { int pm_count; int pm_flags; #define PM_ACTIVE 0x01 struct pmap_statistics pm_stats; - struct pv_entry **pm_entries; + struct pmap_l2 **pm_l1; }; -static struct pv_entry *pv_table; /* physical pages */ -static struct pv_entry **tlb; /* current tlb mappings */ + +static struct pv_entry **kernel_pm_entries; +static struct pv_entry *pv_table; /* physical pages info (direct mapped) */ +static struct pv_entry **tlb; /* current tlb mappings (direct mapped) */ static struct pmap pmap_kernel_store; struct pmap * const kernel_pmap_ptr = &pmap_kernel_store; @@ -79,9 +88,14 @@ static void *mem_uvm; /* keeps all memor static int phys_npages = 0; static int pm_nentries = 0; +static int pm_nl1 = 0; +static int pm_l1_size = 0; static uint64_t pm_entries_size = 0; static struct pool pmap_pool; +static struct pool pmap_l1_pool; +static struct pool pmap_l2_pool; +static struct pool pmap_pventry_pool; /* forwards */ void pmap_bootstrap(void); @@ -107,6 +121,7 @@ vaddr_t kmem_ext_cur_start, kmem_ext_cur /* amount of physical memory */ int physmem; int num_pv_entries = 0; +int num_pmaps = 0; #define SPARSE_MEMFILE @@ -118,7 +133,7 @@ pmap_bootstrap(void) struct pmap *pmap; paddr_t totmem_len; paddr_t fpos, file_len; - paddr_t pv_fpos, pm_fpos, tlb_fpos; + paddr_t pv_fpos, tlb_fpos, pm_l1_fpos, pm_fpos; paddr_t wlen; paddr_t user_len, barrier_len; paddr_t pv_table_size; @@ -126,7 +141,7 @@ pmap_bootstrap(void) vaddr_t mpos; paddr_t pa; vaddr_t va; - uintptr_t pg; + uintptr_t pg, l1; void *addr; int err; @@ -250,19 +265,30 @@ pmap_bootstrap(void) assert(err == 0); #endif - /* set up pv_table; bootstrap problem! */ + /* initialize counters */ fpos = 0; free_start = fpos; /* in physical space ! */ free_end = file_len; /* in physical space ! */ + kmem_ext_cur_start = kmem_ext_start; + /* calculate pv table size */ phys_npages = (free_end - free_start) / PAGE_SIZE; pv_table_size = round_page(phys_npages * sizeof(struct pv_entry)); - dprintf_debug("claiming %"PRIu64" KB of pv_table for " "%"PRIdPTR" pages of physical memory\n", (uint64_t) pv_table_size/1024, (uintptr_t) phys_npages); - kmem_ext_cur_start = kmem_ext_start; + /* calculate number of pmap entries needed for a complete map */ + pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE; + pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *)); + dprintf_debug("tlb va->pa lookup table is %"PRIu64" KB for " + "%d logical pages\n", pm_entries_size/1024, pm_nentries); + + /* calculate how big the l1 tables are going to be */ + pm_nl1 = pm_nentries / PMAP_L2_NENTRY; + pm_l1_size = pm_nl1 * sizeof(struct pmap_l1 *); + + /* claim pv table */ pv_fpos = fpos; pv_table = (struct pv_entry *) kmem_ext_cur_start; addr = thunk_mmap(pv_table, pv_table_size, @@ -280,53 +306,70 @@ pmap_bootstrap(void) kmem_ext_cur_start += pv_table_size; fpos += pv_table_size; - /* set up kernel pmap */ - pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE; - pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *)); - dprintf_debug("pmap va->pa lookup table is %"PRIu64" KB for %d logical pages\n", - pm_entries_size/1024, pm_nentries); + /* set up tlb space */ + tlb = (struct pv_entry **) kmem_ext_cur_start; + tlb_fpos = fpos; + addr = thunk_mmap(tlb, pm_entries_size, + THUNK_PROT_READ | THUNK_PROT_WRITE, + THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, + mem_fh, tlb_fpos); + if (addr != (void *) tlb) + panic("pmap_bootstrap: can't map in tlb entries\n"); + memset(tlb, 0, pm_entries_size); /* test and clear */ + + dprintf_debug("kernel tlb entries initialized correctly\n"); + + /* advance */ + kmem_ext_cur_start += pm_entries_size; + fpos += pm_entries_size; + + /* set up kernel pmap and add a l1 map */ pmap = pmap_kernel(); memset(pmap, 0, sizeof(*pmap)); pmap->pm_count = 1; /* reference */ pmap->pm_flags = PM_ACTIVE; /* kernel pmap is allways active */ - pmap->pm_entries = (struct pv_entry **) kmem_ext_cur_start; + pmap->pm_l1 = (struct pmap_l2 **) kmem_ext_cur_start; - pm_fpos = fpos; - addr = thunk_mmap(pmap->pm_entries, pm_entries_size, + pm_l1_fpos = fpos; + addr = thunk_mmap(pmap->pm_l1, pm_l1_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, - mem_fh, pm_fpos); - if (addr != (void *) pmap->pm_entries) - panic("pmap_bootstrap: can't map in pmap entries\n"); - - memset(pmap->pm_entries, 0, pm_entries_size); /* test and clear */ + mem_fh, pm_l1_fpos); + if (addr != (void *) pmap->pm_l1) + panic("pmap_bootstrap: can't map in pmap l1 entries\n"); - dprintf_debug("kernel pmap entries initialiased correctly\n"); + memset(pmap->pm_l1, 0, pm_l1_size); /* test and clear */ - /* advance */ - kmem_ext_cur_start += pm_entries_size; - fpos += pm_entries_size; + dprintf_debug("kernel pmap l1 table initialiased correctly\n"); - /* set up tlb space */ - tlb = (struct pv_entry **) kmem_ext_cur_start; + /* advance for l1 tables */ + kmem_ext_cur_start += round_page(pm_l1_size); + fpos += round_page(pm_l1_size); - tlb_fpos = fpos; - addr = thunk_mmap(tlb, pm_entries_size, + /* followed by the pm entries */ + pm_fpos = fpos; + kernel_pm_entries = (struct pv_entry **) kmem_ext_cur_start; + addr = thunk_mmap(kernel_pm_entries, pm_entries_size, THUNK_PROT_READ | THUNK_PROT_WRITE, THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED, - mem_fh, tlb_fpos); - if (addr != (void *) tlb) - panic("pmap_bootstrap: can't map in tlb entries\n"); - - memset(tlb, 0, pm_entries_size); /* test and clear */ + mem_fh, pm_fpos); + if (addr != (void *) kernel_pm_entries) + panic("pmap_bootstrap: can't map in kernel pmap entries\n"); - dprintf_debug("kernel tlb entries initialized correctly\n"); + memset(kernel_pm_entries, 0, pm_entries_size); /* test and clear */ - /* advance */ + /* advance for the statically allocated pm_entries */ kmem_ext_cur_start += pm_entries_size; fpos += pm_entries_size; + /* put pointers in the l1 to point to the pv_entry space */ + for (l1 = 0; l1 < pm_nl1; l1++) { + pmap = pmap_kernel(); + pmap->pm_l1[l1] = (struct pmap_l2 *) + ((vaddr_t) kernel_pm_entries + l1 * PMAP_L2_SIZE); + } + /* kmem used [kmem_ext_start - kmem_ext_cur_start] */ kmem_ext_cur_end = kmem_ext_cur_start; @@ -338,17 +381,23 @@ pmap_bootstrap(void) } dprintf_debug("pv_table mem added to the kernel pmap\n"); for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) { - pa = pm_fpos + pg; - va = (vaddr_t) pmap->pm_entries + pg; - pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); - } - dprintf_debug("kernel pmap entries mem added to the kernel pmap\n"); - for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) { pa = tlb_fpos + pg; va = (vaddr_t) tlb + pg; pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); } dprintf_debug("kernel tlb entries mem added to the kernel pmap\n"); + for (pg = 0; pg < pm_l1_size; pg += PAGE_SIZE) { + pa = pm_l1_fpos + pg; + va = (vaddr_t) pmap->pm_l1 + pg; + pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); + } + dprintf_debug("kernel pmap l1 mem added to the kernel pmap\n"); + for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) { + pa = pm_fpos + pg; + va = (vaddr_t) kernel_pm_entries + pg; + pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0); + } + dprintf_debug("kernel pmap entries mem added to the kernel pmap\n"); /* add file space to uvm's FREELIST */ /* XXX really from 0? or from fpos to have better stats */ @@ -388,6 +437,12 @@ pmap_deferred_init(void) /* create pmap pool */ pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappool", NULL, IPL_NONE); + pool_init(&pmap_l2_pool, PMAP_L2_SIZE, 0, 0, 0, + "pmapl2pool", NULL, IPL_HIGH); + pool_init(&pmap_l1_pool, pm_l1_size, 0, 0, 0, + "pmapl1pool", NULL, IPL_NONE); + pool_init(&pmap_pventry_pool, sizeof(struct pv_entry), 0, 0, 0, + "pventry", NULL, IPL_HIGH); } pmap_t @@ -402,14 +457,26 @@ pmap_create(void) } dprintf_debug("pmap_create\n"); + num_pmaps++; +#if 0 + printf("%s: pre alloc: num_pmaps %"PRIu64" (%"PRIu64" kb), " + "num_pv_entries %"PRIu64" (%"PRIu64" kb)\n", + __func__, + (uint64_t) num_pmaps, + (uint64_t) num_pmaps * (sizeof(*pmap) + pm_l1_size) / 1024, + (uint64_t) num_pv_entries, + (uint64_t) num_pv_entries * (sizeof(struct pv_entry)) / 1024); +#endif + pmap = pool_get(&pmap_pool, PR_WAITOK); memset(pmap, 0, sizeof(*pmap)); - pmap->pm_count = 1; pmap->pm_flags = 0; - pmap->pm_entries = (struct pv_entry **) malloc( - pm_entries_size, M_VMPMAP, - M_WAITOK | M_ZERO); + + /* claim l1 table */ + pmap->pm_l1 = pool_get(&pmap_l1_pool, PR_WAITOK); + memset(pmap->pm_l1, 0, pm_l1_size); + dprintf_debug("\tpmap %p\n", pmap); return pmap; @@ -418,23 +485,31 @@ pmap_create(void) void pmap_destroy(pmap_t pmap) { - int i; + struct pmap_l2 *l2tbl; + int l1, l2; /* if multiple references exist just remove a reference */ dprintf_debug("pmap_destroy %p\n", pmap); if (--pmap->pm_count > 0) return; + num_pmaps--; /* safe guard against silly errors */ KASSERT((pmap->pm_flags & PM_ACTIVE) == 0); KASSERT(pmap->pm_stats.resident_count == 0); KASSERT(pmap->pm_stats.wired_count == 0); #ifdef DIAGNOSTIC - for (i = 0; i < pm_nentries; i++) - if (pmap->pm_entries[i] != NULL) - panic("pmap_destroy: pmap isn't empty"); + for (l1 = 0; l1 < pm_nl1; l1++) { + l2tbl = pmap->pm_l1[l1]; + if (!l2tbl) + continue; + for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) { + if (l2tbl->pm_l2[l2]) + panic("pmap_destroy: pmap isn't empty"); + } + } #endif - free((void *)pmap->pm_entries, M_VMPMAP); + pool_put(&pmap_l1_pool, pmap->pm_l1); pool_put(&pmap_pool, pmap); } @@ -460,15 +535,20 @@ pmap_wired_count(pmap_t pmap) static struct pv_entry * pv_alloc(void) { + struct pv_entry *pv; + num_pv_entries++; - return malloc(sizeof(struct pv_entry), M_VMPMAP, M_NOWAIT | M_ZERO); + pv = pool_get(&pmap_pventry_pool, PR_WAITOK); + memset(pv, 0, sizeof(struct pv_entry)); + + return pv; } static void pv_free(struct pv_entry *pv) { num_pv_entries--; - free(pv, M_VMPMAP); + pool_put(&pmap_pventry_pool, pv); } static struct pv_entry * @@ -507,6 +587,34 @@ pv_get(pmap_t pmap, uintptr_t ppn, uintp return pv; } +static void +pmap_set_pv(pmap_t pmap, uint lpn, struct pv_entry *pv) +{ + struct pmap_l2 *l2tbl; + int l1 = lpn / PMAP_L2_NENTRY; + int l2 = lpn % PMAP_L2_NENTRY; + + l2tbl = pmap->pm_l1[l1]; + if (!l2tbl) { + l2tbl = pmap->pm_l1[l1] = pool_get(&pmap_l2_pool, PR_WAITOK); + memset(l2tbl, 0, PMAP_L2_SIZE); + } + l2tbl->pm_l2[l2] = pv; +} + +static struct pv_entry * +pmap_lookup_pv(pmap_t pmap, uint lpn) +{ + struct pmap_l2 *l2tbl; + int l1 = lpn / PMAP_L2_NENTRY; + int l2 = lpn % PMAP_L2_NENTRY; + + l2tbl = pmap->pm_l1[l1]; + if (l2tbl) + return l2tbl->pm_l2[l2]; + return NULL; +} + /* * Check if the given page fault was our reference / modified emulation fault; * if so return true otherwise return false and let uvm handle it @@ -522,7 +630,7 @@ pmap_fault(pmap_t pmap, vaddr_t va, vm_p /* get logical page from vaddr */ lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ - pv = pmap->pm_entries[lpn]; + pv = pmap_lookup_pv(pmap, lpn); /* not known! then it must be UVM's work */ if (pv == NULL) { @@ -732,8 +840,8 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p s = splvm(); /* remove existing mapping at this lpn */ - if (pmap->pm_entries[lpn] && - pmap->pm_entries[lpn]->pv_ppn != ppn) + pv = pmap_lookup_pv(pmap, lpn); + if (pv && pv->pv_ppn != ppn) pmap_remove(pmap, va, va + PAGE_SIZE); /* get our entry */ @@ -768,7 +876,7 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p /* map it in */ pmap_update_page(ppn); - pmap->pm_entries[lpn] = pv; + pmap_set_pv(pmap, lpn, pv); /* adjust stats */ if (pv->pv_vflags & PV_WIRED) @@ -808,9 +916,9 @@ pv_release(pmap_t pmap, uintptr_t ppn, u if ((pmap == pv->pv_pmap) && (lpn == pv->pv_lpn)) { npv = pv->pv_next; if (npv) { - /* Pull up first entry from chain. */ + /* pull up first entry from chain. */ memcpy(pv, npv, offsetof(struct pv_entry, pv_pflags)); - pv->pv_pmap->pm_entries[pv->pv_lpn] = pv; + pmap_set_pv(pv->pv_pmap, pv->pv_lpn, pv); pv_free(npv); } else { memset(pv, 0, offsetof(struct pv_entry, pv_pflags)); @@ -825,7 +933,7 @@ pv_release(pmap_t pmap, uintptr_t ppn, u pv->pv_next = npv->pv_next; pv_free(npv); } - pmap->pm_entries[lpn] = NULL; + pmap_set_pv(pmap, lpn, NULL); pmap->pm_stats.resident_count--; } @@ -844,14 +952,14 @@ pmap_remove(pmap_t pmap, vaddr_t sva, va s = splvm(); for (lpn = slpn; lpn < elpn; lpn++) { - pv = pmap->pm_entries[lpn]; + pv = pmap_lookup_pv(pmap, lpn); if (pv != NULL) { if (pmap->pm_flags & PM_ACTIVE) { pmap_page_deactivate(pv); // MEMC_WRITE(pv->pv_deactivate); // cpu_cache_flush(); } - pmap->pm_entries[lpn] = NULL; + pmap_set_pv(pmap, lpn, NULL); if (pv->pv_vflags & PV_WIRED) pmap->pm_stats.wired_count--; pv_release(pmap, pv->pv_ppn, lpn); @@ -906,7 +1014,7 @@ pmap_protect(pmap_t pmap, vaddr_t sva, v s = splvm(); for (lpn = slpn; lpn < elpn; lpn++) { - pv = pmap->pm_entries[lpn]; + pv = pmap_lookup_pv(pmap, lpn); if (pv != NULL) { pv->pv_prot &= prot; pv_update(pv); @@ -928,7 +1036,7 @@ pmap_unwire(pmap_t pmap, vaddr_t va) return; lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ - pv = pmap->pm_entries[lpn]; + pv = pmap_lookup_pv(pmap, lpn); if (pv == NULL) return; /* but is it wired? */ @@ -953,7 +1061,7 @@ pmap_extract(pmap_t pmap, vaddr_t va, pa panic("pmap_extract: invalid va isued\n"); #endif lpn = atop(va - VM_MIN_ADDRESS); /* V->L */ - pv = pmap->pm_entries[lpn]; + pv = pmap_lookup_pv(pmap, lpn); if (pv == NULL) return false; @@ -1016,8 +1124,10 @@ void pmap_deactivate(struct lwp *l) { struct proc *p = l->l_proc; + struct pv_entry *pv; + struct pmap_l2 *l2tbl; pmap_t pmap; - int i; + int l1, l2; pmap = p->p_vmspace->vm_map.pmap; dprintf_debug("pmap_DEactivate for lwp %p, pmap = %p\n", l, pmap); @@ -1030,12 +1140,20 @@ pmap_deactivate(struct lwp *l) active_pmap = NULL; pmap->pm_flags &=~ PM_ACTIVE; - for (i = 0; i < pm_nentries; i++) { - if (pmap->pm_entries[i] != NULL) { - pmap_page_deactivate(pmap->pm_entries[i]); -// MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate); + + for (l1 = 0; l1 < pm_nl1; l1++) { + l2tbl = pmap->pm_l1[l1]; + if (!l2tbl) + continue; + for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) { + pv = l2tbl->pm_l2[l2]; + if (pv) { + pmap_page_deactivate(pv); + // MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate); + } } } + /* dummy */ // cpu_cache_flush(); } @@ -1124,7 +1242,7 @@ pmap_page_protect(struct vm_page *page, npv = pv->pv_next; /* remove from pmap */ - pv->pv_pmap->pm_entries[pv->pv_lpn] = NULL; + pmap_set_pv(pv->pv_pmap, pv->pv_lpn, NULL); if (pv->pv_vflags & PV_WIRED) pv->pv_pmap->pm_stats.wired_count--; pv_release(pv->pv_pmap, ppn, pv->pv_lpn);