Module Name:    src
Committed By:   reinoud
Date:           Sun Jan  1 21:40:23 UTC 2012

Modified Files:
        src/sys/arch/usermode/usermode: pmap.c

Log Message:
Rewrite usermodes pmap to use indirect adressing significantly reducing pmap
memory usage. Typically a decimation. This is done using L1 and L2 mappings.


To generate a diff of this commit:
cvs rdiff -u -r1.87 -r1.88 src/sys/arch/usermode/usermode/pmap.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/usermode/usermode/pmap.c
diff -u src/sys/arch/usermode/usermode/pmap.c:1.87 src/sys/arch/usermode/usermode/pmap.c:1.88
--- src/sys/arch/usermode/usermode/pmap.c:1.87	Sun Jan  1 13:52:51 2012
+++ src/sys/arch/usermode/usermode/pmap.c	Sun Jan  1 21:40:22 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $ */
+/* $NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $ */
 
 /*-
  * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.87 2012/01/01 13:52:51 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.88 2012/01/01 21:40:22 reinoud Exp $");
 
 #include "opt_memsize.h"
 #include "opt_kmempages.h"
@@ -58,16 +58,25 @@ struct pv_entry {
 #define PV_MODIFIED	0x02
 };
 
+#define PMAP_L2_SIZE	 PAGE_SIZE
+#define PMAP_L2_NENTRY	(PMAP_L2_SIZE / sizeof(struct pv_entry *))
+
+struct pmap_l2 {
+	struct pv_entry *pm_l2[PMAP_L2_NENTRY];
+};
+
 struct pmap {
 	int	pm_count;
 	int	pm_flags;
 #define PM_ACTIVE 0x01
 	struct	pmap_statistics pm_stats;
-	struct	pv_entry **pm_entries;
+	struct	pmap_l2 **pm_l1;
 };
 
-static struct pv_entry  *pv_table;	/* physical pages */
-static struct pv_entry **tlb;		/* current tlb mappings */
+
+static struct pv_entry **kernel_pm_entries;
+static struct pv_entry  *pv_table;	/* physical pages info (direct mapped) */
+static struct pv_entry **tlb;		/* current tlb mappings (direct mapped) */
 static struct pmap	 pmap_kernel_store;
 struct pmap * const	 kernel_pmap_ptr = &pmap_kernel_store;
 
@@ -79,9 +88,14 @@ static void *mem_uvm;	/* keeps all memor
 
 static int phys_npages = 0;
 static int pm_nentries = 0;
+static int pm_nl1 = 0;
+static int pm_l1_size = 0;
 static uint64_t pm_entries_size = 0;
 
 static struct pool pmap_pool;
+static struct pool pmap_l1_pool;
+static struct pool pmap_l2_pool;
+static struct pool pmap_pventry_pool;
 
 /* forwards */
 void		pmap_bootstrap(void);
@@ -107,6 +121,7 @@ vaddr_t kmem_ext_cur_start, kmem_ext_cur
 /* amount of physical memory */
 int	physmem; 
 int	num_pv_entries = 0;
+int	num_pmaps = 0;
 
 #define SPARSE_MEMFILE
 
@@ -118,7 +133,7 @@ pmap_bootstrap(void)
 	struct pmap *pmap;
 	paddr_t totmem_len;
 	paddr_t fpos, file_len;
-	paddr_t pv_fpos, pm_fpos, tlb_fpos;
+	paddr_t pv_fpos, tlb_fpos, pm_l1_fpos, pm_fpos;
 	paddr_t wlen;
 	paddr_t user_len, barrier_len;
 	paddr_t pv_table_size;
@@ -126,7 +141,7 @@ pmap_bootstrap(void)
 	vaddr_t mpos;
 	paddr_t pa;
 	vaddr_t va;
-	uintptr_t pg;
+	uintptr_t pg, l1;
 	void *addr;
 	int err;
 
@@ -250,19 +265,30 @@ pmap_bootstrap(void)
 	assert(err == 0);
 #endif
 
-	/* set up pv_table; bootstrap problem! */
+	/* initialize counters */
 	fpos = 0;
 	free_start = fpos;     /* in physical space ! */
 	free_end   = file_len; /* in physical space ! */
+	kmem_ext_cur_start = kmem_ext_start;
 
+	/* calculate pv table size */
 	phys_npages = (free_end - free_start) / PAGE_SIZE;
 	pv_table_size = round_page(phys_npages * sizeof(struct pv_entry));
-
 	dprintf_debug("claiming %"PRIu64" KB of pv_table for "
 		"%"PRIdPTR" pages of physical memory\n",
 		(uint64_t) pv_table_size/1024, (uintptr_t) phys_npages);
 
-	kmem_ext_cur_start = kmem_ext_start;
+	/* calculate number of pmap entries needed for a complete map */
+	pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE;
+	pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *));
+	dprintf_debug("tlb va->pa lookup table is %"PRIu64" KB for "
+		"%d logical pages\n", pm_entries_size/1024, pm_nentries);
+
+	/* calculate how big the l1 tables are going to be */
+	pm_nl1 = pm_nentries / PMAP_L2_NENTRY;
+	pm_l1_size = pm_nl1 * sizeof(struct pmap_l1 *);
+
+	/* claim pv table */
 	pv_fpos = fpos;
 	pv_table = (struct pv_entry *) kmem_ext_cur_start;
 	addr = thunk_mmap(pv_table, pv_table_size,
@@ -280,53 +306,70 @@ pmap_bootstrap(void)
 	kmem_ext_cur_start += pv_table_size;
 	fpos += pv_table_size;
 
-	/* set up kernel pmap */
-	pm_nentries = (VM_MAX_KERNEL_ADDRESS - VM_MIN_ADDRESS) / PAGE_SIZE;
-	pm_entries_size = round_page(pm_nentries * sizeof(struct pv_entry *));
-	dprintf_debug("pmap va->pa lookup table is %"PRIu64" KB for %d logical pages\n",
-		pm_entries_size/1024, pm_nentries);
+	/* set up tlb space */
+	tlb = (struct pv_entry **) kmem_ext_cur_start;
+	tlb_fpos = fpos;
+	addr = thunk_mmap(tlb, pm_entries_size,
+		THUNK_PROT_READ | THUNK_PROT_WRITE,
+		THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
+		mem_fh, tlb_fpos);
+	if (addr != (void *) tlb)
+		panic("pmap_bootstrap: can't map in tlb entries\n");
 
+	memset(tlb, 0, pm_entries_size);	/* test and clear */
+
+	dprintf_debug("kernel tlb entries initialized correctly\n");
+
+	/* advance */
+	kmem_ext_cur_start += pm_entries_size;
+	fpos += pm_entries_size;
+
+	/* set up kernel pmap and add a l1 map */
         pmap = pmap_kernel(); 
         memset(pmap, 0, sizeof(*pmap)); 
 	pmap->pm_count = 1;		/* reference */
 	pmap->pm_flags = PM_ACTIVE;	/* kernel pmap is allways active */
-	pmap->pm_entries = (struct pv_entry **) kmem_ext_cur_start;
+	pmap->pm_l1 = (struct pmap_l2 **) kmem_ext_cur_start;
 
-	pm_fpos = fpos;
-	addr = thunk_mmap(pmap->pm_entries, pm_entries_size,
+	pm_l1_fpos = fpos;
+	addr = thunk_mmap(pmap->pm_l1, pm_l1_size,
 		THUNK_PROT_READ | THUNK_PROT_WRITE,
 		THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
-		mem_fh, pm_fpos);
-	if (addr != (void *) pmap->pm_entries)
-		panic("pmap_bootstrap: can't map in pmap entries\n");
-
-	memset(pmap->pm_entries, 0, pm_entries_size);	/* test and clear */
+		mem_fh, pm_l1_fpos);
+	if (addr != (void *) pmap->pm_l1)
+		panic("pmap_bootstrap: can't map in pmap l1 entries\n");
 
-	dprintf_debug("kernel pmap entries initialiased correctly\n");
+	memset(pmap->pm_l1, 0, pm_l1_size);	/* test and clear */
 
-	/* advance */
-	kmem_ext_cur_start += pm_entries_size;
-	fpos += pm_entries_size;
+	dprintf_debug("kernel pmap l1 table initialiased correctly\n");
 
-	/* set up tlb space */
-	tlb = (struct pv_entry **) kmem_ext_cur_start;
+	/* advance for l1 tables */
+	kmem_ext_cur_start += round_page(pm_l1_size);
+	fpos += round_page(pm_l1_size);
 
-	tlb_fpos = fpos;
-	addr = thunk_mmap(tlb, pm_entries_size,
+	/* followed by the pm entries */
+	pm_fpos = fpos;
+	kernel_pm_entries = (struct pv_entry **) kmem_ext_cur_start;
+	addr = thunk_mmap(kernel_pm_entries, pm_entries_size,
 		THUNK_PROT_READ | THUNK_PROT_WRITE,
 		THUNK_MAP_FILE | THUNK_MAP_FIXED | THUNK_MAP_SHARED,
-		mem_fh, tlb_fpos);
-	if (addr != (void *) tlb)
-		panic("pmap_bootstrap: can't map in tlb entries\n");
-
-	memset(tlb, 0, pm_entries_size);	/* test and clear */
+		mem_fh, pm_fpos);
+	if (addr != (void *) kernel_pm_entries)
+		panic("pmap_bootstrap: can't map in kernel pmap entries\n");
 
-	dprintf_debug("kernel tlb entries initialized correctly\n");
+	memset(kernel_pm_entries, 0, pm_entries_size);	/* test and clear */
 
-	/* advance */
+	/* advance for the statically allocated pm_entries */
 	kmem_ext_cur_start += pm_entries_size;
 	fpos += pm_entries_size;
 
+	/* put pointers in the l1 to point to the pv_entry space */
+	for (l1 = 0; l1 < pm_nl1; l1++) {
+		pmap = pmap_kernel();
+		pmap->pm_l1[l1] = (struct pmap_l2 *)
+			((vaddr_t) kernel_pm_entries + l1 * PMAP_L2_SIZE);
+	}
+
 	/* kmem used [kmem_ext_start - kmem_ext_cur_start] */
 	kmem_ext_cur_end = kmem_ext_cur_start;
 
@@ -338,17 +381,23 @@ pmap_bootstrap(void)
 	}
 	dprintf_debug("pv_table mem added to the kernel pmap\n");
 	for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
-		pa = pm_fpos + pg;
-		va = (vaddr_t) pmap->pm_entries + pg;
-		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
-	}
-	dprintf_debug("kernel pmap entries mem added to the kernel pmap\n");
-	for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
 		pa = tlb_fpos + pg;
 		va = (vaddr_t) tlb + pg;
 		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
 	}
 	dprintf_debug("kernel tlb entries mem added to the kernel pmap\n");
+	for (pg = 0; pg < pm_l1_size; pg += PAGE_SIZE) {
+		pa = pm_l1_fpos + pg;
+		va = (vaddr_t) pmap->pm_l1 + pg;
+		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
+	}
+	dprintf_debug("kernel pmap l1 mem added to the kernel pmap\n");
+	for (pg = 0; pg < pm_entries_size; pg += PAGE_SIZE) {
+		pa = pm_fpos + pg;
+		va = (vaddr_t) kernel_pm_entries + pg;
+		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE, 0);
+	}
+	dprintf_debug("kernel pmap entries mem added to the kernel pmap\n");
 
 	/* add file space to uvm's FREELIST */
 	/* XXX really from 0? or from fpos to have better stats */
@@ -388,6 +437,12 @@ pmap_deferred_init(void)
 	/* create pmap pool */
 	pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0,
 	    "pmappool", NULL, IPL_NONE);
+	pool_init(&pmap_l2_pool, PMAP_L2_SIZE, 0, 0, 0,
+	    "pmapl2pool", NULL, IPL_HIGH);
+	pool_init(&pmap_l1_pool, pm_l1_size, 0, 0, 0,
+	    "pmapl1pool", NULL, IPL_NONE);
+	pool_init(&pmap_pventry_pool, sizeof(struct pv_entry), 0, 0, 0,
+	    "pventry", NULL, IPL_HIGH);
 }
 
 pmap_t
@@ -402,14 +457,26 @@ pmap_create(void)
 	}
 
 	dprintf_debug("pmap_create\n");
+	num_pmaps++;
+#if 0
+	printf("%s: pre alloc: num_pmaps %"PRIu64" (%"PRIu64" kb), "
+		   "num_pv_entries %"PRIu64" (%"PRIu64" kb)\n",
+		__func__,
+		(uint64_t) num_pmaps,
+		(uint64_t) num_pmaps * (sizeof(*pmap) + pm_l1_size)   / 1024,
+		(uint64_t) num_pv_entries,
+		(uint64_t) num_pv_entries * (sizeof(struct pv_entry)) / 1024);
+#endif
+
 	pmap = pool_get(&pmap_pool, PR_WAITOK);
 	memset(pmap, 0, sizeof(*pmap));
-		
 	pmap->pm_count = 1;
 	pmap->pm_flags = 0;
-	pmap->pm_entries = (struct pv_entry **) malloc(
-		pm_entries_size, M_VMPMAP,
-		M_WAITOK | M_ZERO);
+
+	/* claim l1 table */
+	pmap->pm_l1 = pool_get(&pmap_l1_pool, PR_WAITOK);
+	memset(pmap->pm_l1, 0, pm_l1_size);
+
 	dprintf_debug("\tpmap %p\n", pmap);
 
 	return pmap;
@@ -418,23 +485,31 @@ pmap_create(void)
 void
 pmap_destroy(pmap_t pmap)
 {
-	int i;
+	struct pmap_l2 *l2tbl;
+	int l1, l2;
 
 	/* if multiple references exist just remove a reference */
 	dprintf_debug("pmap_destroy %p\n", pmap);
 	if (--pmap->pm_count > 0)
 		return;
+	num_pmaps--;
 
 	/* safe guard against silly errors */
 	KASSERT((pmap->pm_flags & PM_ACTIVE) == 0);
 	KASSERT(pmap->pm_stats.resident_count == 0);
 	KASSERT(pmap->pm_stats.wired_count == 0);
 #ifdef DIAGNOSTIC
-	for (i = 0; i < pm_nentries; i++)
-		if (pmap->pm_entries[i] != NULL)
-			panic("pmap_destroy: pmap isn't empty");
+	for (l1 = 0; l1 < pm_nl1; l1++) {
+		l2tbl = pmap->pm_l1[l1];
+		if (!l2tbl)
+			continue;
+		for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) {
+			if (l2tbl->pm_l2[l2])
+				panic("pmap_destroy: pmap isn't empty");
+		}
+	}
 #endif
-	free((void *)pmap->pm_entries, M_VMPMAP);
+	pool_put(&pmap_l1_pool, pmap->pm_l1);
 	pool_put(&pmap_pool, pmap);
 }
 
@@ -460,15 +535,20 @@ pmap_wired_count(pmap_t pmap)
 static struct pv_entry *
 pv_alloc(void)
 {
+	struct pv_entry *pv;
+
 	num_pv_entries++;
-	return malloc(sizeof(struct pv_entry), M_VMPMAP, M_NOWAIT | M_ZERO);
+	pv = pool_get(&pmap_pventry_pool, PR_WAITOK);
+	memset(pv, 0, sizeof(struct pv_entry));
+
+	return pv;
 }
 
 static void
 pv_free(struct pv_entry *pv)
 {
 	num_pv_entries--;
-	free(pv, M_VMPMAP);
+	pool_put(&pmap_pventry_pool, pv);
 }
 
 static struct pv_entry *
@@ -507,6 +587,34 @@ pv_get(pmap_t pmap, uintptr_t ppn, uintp
 	return pv;
 }
 
+static void
+pmap_set_pv(pmap_t pmap, uint lpn, struct pv_entry *pv)
+{
+	struct pmap_l2 *l2tbl;
+	int l1   = lpn / PMAP_L2_NENTRY;
+	int l2 = lpn % PMAP_L2_NENTRY;
+
+	l2tbl = pmap->pm_l1[l1];
+	if (!l2tbl) {
+		l2tbl = pmap->pm_l1[l1] = pool_get(&pmap_l2_pool, PR_WAITOK);
+		memset(l2tbl, 0, PMAP_L2_SIZE);
+	}
+	l2tbl->pm_l2[l2] = pv;
+}
+
+static struct pv_entry *
+pmap_lookup_pv(pmap_t pmap, uint lpn)
+{
+	struct pmap_l2 *l2tbl;
+	int l1   = lpn / PMAP_L2_NENTRY;
+	int l2 = lpn % PMAP_L2_NENTRY;
+
+	l2tbl = pmap->pm_l1[l1];
+	if (l2tbl)
+		return l2tbl->pm_l2[l2];
+	return NULL;
+}
+
 /*
  * Check if the given page fault was our reference / modified emulation fault;
  * if so return true otherwise return false and let uvm handle it
@@ -522,7 +630,7 @@ pmap_fault(pmap_t pmap, vaddr_t va, vm_p
 
 	/* get logical page from vaddr */
 	lpn = atop(va - VM_MIN_ADDRESS);	/* V->L */
-	pv  = pmap->pm_entries[lpn];
+	pv = pmap_lookup_pv(pmap, lpn);
 
 	/* not known! then it must be UVM's work */
 	if (pv == NULL) {
@@ -732,8 +840,8 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p
 	s = splvm();
 
 	/* remove existing mapping at this lpn */
-	if (pmap->pm_entries[lpn] &&
-	    pmap->pm_entries[lpn]->pv_ppn != ppn)
+	pv = pmap_lookup_pv(pmap, lpn);
+	if (pv && pv->pv_ppn != ppn)
 		pmap_remove(pmap, va, va + PAGE_SIZE);
 
 	/* get our entry */
@@ -768,7 +876,7 @@ pmap_do_enter(pmap_t pmap, vaddr_t va, p
 
 	/* map it in */
 	pmap_update_page(ppn);
-	pmap->pm_entries[lpn] = pv;
+	pmap_set_pv(pmap, lpn, pv);
 
 	/* adjust stats */
 	if (pv->pv_vflags & PV_WIRED)
@@ -808,9 +916,9 @@ pv_release(pmap_t pmap, uintptr_t ppn, u
 	if ((pmap == pv->pv_pmap) && (lpn == pv->pv_lpn)) {
 		npv = pv->pv_next;
 		if (npv) {
-			/* Pull up first entry from chain. */
+			/* pull up first entry from chain. */
 			memcpy(pv, npv, offsetof(struct pv_entry, pv_pflags));
-			pv->pv_pmap->pm_entries[pv->pv_lpn] = pv;
+			pmap_set_pv(pv->pv_pmap, pv->pv_lpn, pv);
 			pv_free(npv);
 		} else {
 			memset(pv, 0, offsetof(struct pv_entry, pv_pflags));
@@ -825,7 +933,7 @@ pv_release(pmap_t pmap, uintptr_t ppn, u
 		pv->pv_next = npv->pv_next;
 		pv_free(npv);
 	}
-	pmap->pm_entries[lpn] = NULL;
+	pmap_set_pv(pmap, lpn, NULL);
 	pmap->pm_stats.resident_count--;
 }
 
@@ -844,14 +952,14 @@ pmap_remove(pmap_t pmap, vaddr_t sva, va
 
 	s = splvm();
 	for (lpn = slpn; lpn < elpn; lpn++) {
-		pv = pmap->pm_entries[lpn];
+		pv = pmap_lookup_pv(pmap, lpn);
 		if (pv != NULL) {
 			if (pmap->pm_flags & PM_ACTIVE) {
 				pmap_page_deactivate(pv);
 //				MEMC_WRITE(pv->pv_deactivate);
 //				cpu_cache_flush();
 			}
-			pmap->pm_entries[lpn] = NULL;
+			pmap_set_pv(pmap, lpn, NULL);
 			if (pv->pv_vflags & PV_WIRED)
 				pmap->pm_stats.wired_count--;
 			pv_release(pmap, pv->pv_ppn, lpn);
@@ -906,7 +1014,7 @@ pmap_protect(pmap_t pmap, vaddr_t sva, v
 
 	s = splvm();
 	for (lpn = slpn; lpn < elpn; lpn++) {
-		pv = pmap->pm_entries[lpn];
+		pv = pmap_lookup_pv(pmap, lpn);
 		if (pv != NULL) {
 			pv->pv_prot &= prot;
 			pv_update(pv);
@@ -928,7 +1036,7 @@ pmap_unwire(pmap_t pmap, vaddr_t va)
 		return;
 
 	lpn = atop(va - VM_MIN_ADDRESS);	/* V->L */
-	pv = pmap->pm_entries[lpn];
+	pv = pmap_lookup_pv(pmap, lpn);
 	if (pv == NULL)
 		return;
 	/* but is it wired? */
@@ -953,7 +1061,7 @@ pmap_extract(pmap_t pmap, vaddr_t va, pa
 		panic("pmap_extract: invalid va isued\n");
 #endif
 	lpn = atop(va - VM_MIN_ADDRESS);	/* V->L */
-	pv = pmap->pm_entries[lpn];
+	pv = pmap_lookup_pv(pmap, lpn);
 
 	if (pv == NULL)
 		return false;
@@ -1016,8 +1124,10 @@ void
 pmap_deactivate(struct lwp *l)
 {
 	struct proc *p = l->l_proc;
+	struct pv_entry *pv;
+	struct pmap_l2 *l2tbl;
 	pmap_t pmap;
-	int i;
+	int l1, l2;
 
 	pmap = p->p_vmspace->vm_map.pmap;
 	dprintf_debug("pmap_DEactivate for lwp %p, pmap = %p\n", l, pmap);
@@ -1030,12 +1140,20 @@ pmap_deactivate(struct lwp *l)
 
 	active_pmap = NULL;
 	pmap->pm_flags &=~ PM_ACTIVE;
-	for (i = 0; i < pm_nentries; i++) {
-		if (pmap->pm_entries[i] != NULL) {
-			pmap_page_deactivate(pmap->pm_entries[i]);
-//			MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate);
+
+	for (l1 = 0; l1 < pm_nl1; l1++) {
+		l2tbl = pmap->pm_l1[l1];
+		if (!l2tbl)
+			continue;
+		for (l2 = 0; l2 < PMAP_L2_NENTRY; l2++) {
+			pv = l2tbl->pm_l2[l2];
+			if (pv) {
+				pmap_page_deactivate(pv);
+	//			MEMC_WRITE(pmap->pm_entries[i]->pv_deactivate);
+			}
 		}
 	}
+
 	/* dummy */
 //	cpu_cache_flush();
 }
@@ -1124,7 +1242,7 @@ pmap_page_protect(struct vm_page *page, 
 				npv = pv->pv_next;
 
 			/* remove from pmap */
-			pv->pv_pmap->pm_entries[pv->pv_lpn] = NULL;
+			pmap_set_pv(pv->pv_pmap, pv->pv_lpn, NULL);
 			if (pv->pv_vflags & PV_WIRED)
 				pv->pv_pmap->pm_stats.wired_count--;
 			pv_release(pv->pv_pmap, ppn, pv->pv_lpn);

Reply via email to