Module Name: src
Committed By: pooka
Date: Thu Jun 3 10:56:20 UTC 2010
Modified Files:
src/sys/rump/librump/rumpkern: locks_up.c memalloc.c rump.c
rump_private.h vm.c
Log Message:
Implement a sort-of pagedaemon: adjust all memory allocators to go
through an in-rumpkernel hypermemory allocator which knows it should
kick the pagedaemon and block in case ``waitok'' memory allocation
fails.
This allows us to recover from some out-of-memory situations.
Realworld'istically speaking (as opposed to whatever "should be"
theory), these OOM situations will happen extremely rarely if ever
when our hypervisor is a regular process. Speculatively, this
should be useful for other types of hosts.
issues remaining:
* the hypervisor does not know how to reclaim kernel memory (and
for the reason I stated above, I'm not sure if it makes sense
to teach the current implementation about that)
* vfs memory (buffers, vm object pages etc.) is not reclaimed
To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/rump/librump/rumpkern/locks_up.c
cvs rdiff -u -r1.6 -r1.7 src/sys/rump/librump/rumpkern/memalloc.c
cvs rdiff -u -r1.174 -r1.175 src/sys/rump/librump/rumpkern/rump.c
cvs rdiff -u -r1.49 -r1.50 src/sys/rump/librump/rumpkern/rump_private.h
cvs rdiff -u -r1.79 -r1.80 src/sys/rump/librump/rumpkern/vm.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/rump/librump/rumpkern/locks_up.c
diff -u src/sys/rump/librump/rumpkern/locks_up.c:1.2 src/sys/rump/librump/rumpkern/locks_up.c:1.3
--- src/sys/rump/librump/rumpkern/locks_up.c:1.2 Tue Jun 1 20:11:33 2010
+++ src/sys/rump/librump/rumpkern/locks_up.c Thu Jun 3 10:56:20 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $ */
+/* $NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $ */
/*
* Copyright (c) 2010 Antti Kantee. All Rights Reserved.
@@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -74,7 +74,7 @@
* XXX: pool_cache would be nice, but not easily possible,
* as pool cache init wants to call mutex_init() ...
*/
- upm = rumpuser_malloc(sizeof(*upm), 0);
+ upm = rump_hypermalloc(sizeof(*upm), 0, true, "mutex_init");
memset(upm, 0, sizeof(*upm));
rumpuser_cv_init(&upm->upm_rucv);
memcpy(mtx, &upm, sizeof(void *));
@@ -178,7 +178,7 @@
CTASSERT(sizeof(krwlock_t) >= sizeof(void *));
checkncpu();
- uprw = rumpuser_malloc(sizeof(*uprw), 0);
+ uprw = rump_hypermalloc(sizeof(*uprw), 0, true, "rwinit");
memset(uprw, 0, sizeof(*uprw));
rumpuser_cv_init(&uprw->uprw_rucv_reader);
rumpuser_cv_init(&uprw->uprw_rucv_writer);
Index: src/sys/rump/librump/rumpkern/memalloc.c
diff -u src/sys/rump/librump/rumpkern/memalloc.c:1.6 src/sys/rump/librump/rumpkern/memalloc.c:1.7
--- src/sys/rump/librump/rumpkern/memalloc.c:1.6 Tue Jun 1 20:11:33 2010
+++ src/sys/rump/librump/rumpkern/memalloc.c Thu Jun 3 10:56:20 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $ */
+/* $NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $ */
/*
* Copyright (c) 2009 Antti Kantee. All Rights Reserved.
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $");
#include <sys/param.h>
#include <sys/kmem.h>
@@ -73,7 +73,7 @@
{
void *rv;
- rv = rumpuser_malloc(size, 0);
+ rv = rump_hypermalloc(size, 0, (flags & M_WAITOK) != 0, "malloc");
if (rv && flags & M_ZERO)
memset(rv, 0, size);
@@ -110,7 +110,7 @@
kmem_alloc(size_t size, km_flag_t kmflag)
{
- return rumpuser_malloc(size, 0);
+ return rump_hypermalloc(size, 0, kmflag == KM_SLEEP, "kmem_alloc");
}
void *
@@ -240,18 +240,14 @@
void *
pool_get(struct pool *pp, int flags)
{
- void *rv;
#ifdef DIAGNOSTIC
if (pp->pr_size == 0)
panic("%s: pool unit size 0. not initialized?", __func__);
#endif
- rv = rumpuser_malloc(pp->pr_size, pp->pr_align);
- if (rv == NULL && (flags & PR_WAITOK && (flags & PR_LIMITFAIL) == 0))
- panic("%s: out of memory and PR_WAITOK", __func__);
-
- return rv;
+ return rump_hypermalloc(pp->pr_size, pp->pr_align,
+ (flags & PR_WAITOK) != 0, "pget");
}
void
@@ -299,6 +295,20 @@
pc->pc_pool.pr_drain_hook_arg = arg;
}
+void
+pool_drain_start(struct pool **ppp, uint64_t *wp)
+{
+
+ /* nada */
+}
+
+void
+pool_drain_end(struct pool *pp, uint64_t w)
+{
+
+ /* nada again */
+}
+
int
pool_prime(struct pool *pp, int nitems)
{
Index: src/sys/rump/librump/rumpkern/rump.c
diff -u src/sys/rump/librump/rumpkern/rump.c:1.174 src/sys/rump/librump/rumpkern/rump.c:1.175
--- src/sys/rump/librump/rumpkern/rump.c:1.174 Wed Jun 2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/rump.c Thu Jun 3 10:56:20 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $ */
+/* $NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $ */
/*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $");
#include <sys/systm.h>
#define ELFSIZE ARCH_ELFSIZE
@@ -49,6 +49,7 @@
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/kprintf.h>
+#include <sys/kthread.h>
#include <sys/ksyms.h>
#include <sys/msgbuf.h>
#include <sys/module.h>
@@ -357,6 +358,15 @@
pipe_init();
resource_init();
+ /* start page baroness */
+ if (rump_threads) {
+ if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL,
+ uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0)
+ panic("pagedaemon create failed");
+ } else
+ uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
+
+ /* process dso's */
rumpuser_dl_bootstrap(add_linkedin_modules, rump_kernelfsym_load);
/* these do nothing if not present */
Index: src/sys/rump/librump/rumpkern/rump_private.h
diff -u src/sys/rump/librump/rumpkern/rump_private.h:1.49 src/sys/rump/librump/rumpkern/rump_private.h:1.50
--- src/sys/rump/librump/rumpkern/rump_private.h:1.49 Wed Jun 2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/rump_private.h Thu Jun 3 10:56:20 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: rump_private.h,v 1.49 2010/06/02 10:55:18 pooka Exp $ */
+/* $NetBSD: rump_private.h,v 1.50 2010/06/03 10:56:20 pooka Exp $ */
/*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@@ -129,4 +129,6 @@
void rump_intr_init(void);
void rump_softint_run(struct cpu_info *);
+void *rump_hypermalloc(size_t, int, bool, const char *);
+
#endif /* _SYS_RUMP_PRIVATE_H_ */
Index: src/sys/rump/librump/rumpkern/vm.c
diff -u src/sys/rump/librump/rumpkern/vm.c:1.79 src/sys/rump/librump/rumpkern/vm.c:1.80
--- src/sys/rump/librump/rumpkern/vm.c:1.79 Wed Jun 2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/vm.c Thu Jun 3 10:56:20 2010
@@ -1,4 +1,4 @@
-/* $NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $ */
+/* $NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $ */
/*
* Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved.
@@ -43,15 +43,16 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
+#include <sys/buf.h>
+#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mman.h>
#include <sys/null.h>
#include <sys/vnode.h>
-#include <sys/buf.h>
#include <machine/pmap.h>
@@ -87,6 +88,10 @@
static struct vm_map_kernel kernel_map_store;
struct vm_map *kernel_map = &kernel_map_store.vmk_map;
+static unsigned int pdaemon_waiters;
+static kmutex_t pdaemonmtx;
+static kcondvar_t pdaemoncv, oomwait;
+
/*
* vm pages
*/
@@ -233,12 +238,15 @@
{
uvmexp.free = 1024*1024; /* XXX */
- uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
rump_vmspace.vm_map.pmap = pmap_kernel();
mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
+ mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0);
+ cv_init(&pdaemoncv, "pdaemon");
+ cv_init(&oomwait, "oomwait");
+
kernel_map->pmap = pmap_kernel();
callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
kmem_map->pmap = pmap_kernel();
@@ -460,31 +468,10 @@
vm_map_starved_p(struct vm_map *map)
{
- return false;
-}
+ if (map->flags & VM_MAP_WANTVA)
+ return true;
-void
-uvm_pageout_start(int npages)
-{
-
- uvmexp.paging += npages;
-}
-
-void
-uvm_pageout_done(int npages)
-{
-
- uvmexp.paging -= npages;
-
- /*
- * wake up either of pagedaemon or LWPs waiting for it.
- */
-
- if (uvmexp.free <= uvmexp.reserve_kernel) {
- wakeup(&uvm.pagedaemon);
- } else {
- wakeup(&uvmexp.free);
- }
+ return false;
}
int
@@ -582,14 +569,15 @@
uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
{
- return (vaddr_t)rumpuser_malloc(PAGE_SIZE, PAGE_SIZE);
+ return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
+ waitok, "kmalloc");
}
void
uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
{
- rumpuser_unmap((void *)addr, PAGE_SIZE);
+ rumpuser_free((void *)addr);
}
vaddr_t
@@ -648,13 +636,6 @@
}
void
-uvm_wait(const char *msg)
-{
-
- /* nothing to wait for */
-}
-
-void
uvmspace_free(struct vmspace *vm)
{
@@ -703,3 +684,129 @@
/* nada */
}
+
+/*
+ * Routines related to the Page Baroness.
+ */
+
+void
+uvm_wait(const char *msg)
+{
+
+ if (__predict_false(curlwp == uvm.pagedaemon_lwp))
+ panic("pagedaemon out of memory");
+ if (__predict_false(rump_threads == 0))
+ panic("pagedaemon missing (RUMP_THREADS = 0)");
+
+ mutex_enter(&pdaemonmtx);
+ pdaemon_waiters++;
+ cv_signal(&pdaemoncv);
+ cv_wait(&oomwait, &pdaemonmtx);
+ mutex_exit(&pdaemonmtx);
+}
+
+void
+uvm_pageout_start(int npages)
+{
+
+ /* we don't have the heuristics */
+}
+
+void
+uvm_pageout_done(int npages)
+{
+
+ /* could wakeup waiters, but just let the pagedaemon do it */
+}
+
+/*
+ * Under-construction page mistress. This is lacking vfs support, namely:
+ *
+ * 1) draining vfs buffers
+ * 2) paging out pages in vm vnode objects
+ * (we will not page out anon memory on the basis that
+ * that's the task of the host)
+ */
+
+void
+uvm_pageout(void *arg)
+{
+ struct pool *pp, *pp_first;
+ uint64_t where;
+ int timo = 0;
+ bool succ;
+
+ mutex_enter(&pdaemonmtx);
+ for (;;) {
+ cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
+ uvmexp.pdwoke++;
+ kernel_map->flags |= VM_MAP_WANTVA;
+ mutex_exit(&pdaemonmtx);
+
+ succ = false;
+ pool_drain_start(&pp_first, &where);
+ pp = pp_first;
+ for (;;) {
+ succ = pool_drain_end(pp, where);
+ if (succ)
+ break;
+ pool_drain_start(&pp, &where);
+ if (pp == pp_first) {
+ succ = pool_drain_end(pp, where);
+ break;
+ }
+ }
+ mutex_enter(&pdaemonmtx);
+
+ if (!succ) {
+ rumpuser_dprintf("pagedaemoness: failed to reclaim "
+ "memory ... sleeping (deadlock?)\n");
+ timo = hz;
+ continue;
+ }
+ kernel_map->flags &= ~VM_MAP_WANTVA;
+ timo = 0;
+
+ if (pdaemon_waiters) {
+ pdaemon_waiters = 0;
+ cv_broadcast(&oomwait);
+ }
+ }
+
+ panic("you can swap out any time you like, but you can never leave");
+}
+
+/*
+ * In a regular kernel the pagedaemon is activated when memory becomes
+ * low. In a virtual rump kernel we do not know exactly how much memory
+ * we have available -- it depends on the conditions on the host.
+ * Therefore, we cannot preemptively kick the pagedaemon. Rather, we
+ * wait until things we desperate and we're forced to uvm_wait().
+ *
+ * The alternative would be to allocate a huge chunk of memory at
+ * startup, but that solution has a number of problems including
+ * being a resource hog, failing anyway due to host memory overcommit
+ * and core dump size.
+ */
+
+void
+uvm_kick_pdaemon()
+{
+
+ /* nada */
+}
+
+void *
+rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
+{
+ void *rv;
+
+ again:
+ rv = rumpuser_malloc(howmuch, alignment);
+ if (__predict_false(rv == NULL && waitok)) {
+ uvm_wait(wmsg);
+ goto again;
+ }
+
+ return rv;
+}