Module Name: src
Committed By: chs
Date: Mon Feb 29 00:34:17 UTC 2016
Modified Files:
src/share/man/man9: kmem.9 vmem.9
src/sys/kern: subr_kmem.c subr_vmem.c
Log Message:
fix vmem_alloc() to never return an error for VM_SLEEP requests,
thus fixing kmem_alloc() to never return NULL for KM_SLEEP requests.
instead these operations will retry forever, which was the intent.
To generate a diff of this commit:
cvs rdiff -u -r1.19 -r1.20 src/share/man/man9/kmem.9
cvs rdiff -u -r1.15 -r1.16 src/share/man/man9/vmem.9
cvs rdiff -u -r1.61 -r1.62 src/sys/kern/subr_kmem.c
cvs rdiff -u -r1.93 -r1.94 src/sys/kern/subr_vmem.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/share/man/man9/kmem.9
diff -u src/share/man/man9/kmem.9:1.19 src/share/man/man9/kmem.9:1.20
--- src/share/man/man9/kmem.9:1.19 Fri Dec 11 10:05:17 2015
+++ src/share/man/man9/kmem.9 Mon Feb 29 00:34:17 2016
@@ -1,4 +1,4 @@
-.\" $NetBSD: kmem.9,v 1.19 2015/12/11 10:05:17 wiz Exp $
+.\" $NetBSD: kmem.9,v 1.20 2016/02/29 00:34:17 chs Exp $
.\"
.\" Copyright (c)2006 YAMAMOTO Takashi,
.\" All rights reserved.
@@ -25,7 +25,7 @@
.\" SUCH DAMAGE.
.\"
.\" ------------------------------------------------------------
-.Dd December 10, 2015
+.Dd February 28, 2016
.Dt KMEM 9
.Os
.\" ------------------------------------------------------------
@@ -77,15 +77,9 @@ Either of the following:
.It Dv KM_SLEEP
If the allocation cannot be satisfied immediately, sleep until enough
memory is available.
-Note that this does not mean that if
+If
.Dv KM_SLEEP
is specified, then the allocation cannot fail.
-Under resource stress conditions, the allocation can fail and the
-function will return
-.Dv NULL .
-One such scenario is when the allocation size is larger than it can ever
-be allocated; another is when the system memory resources are exhausted
-to even allocate pools of pages.
.It Dv KM_NOSLEEP
Don't sleep.
Immediately return
@@ -143,9 +137,6 @@ It must be the one returned by
.Fn kmem_alloc
or
.Fn kmem_zalloc .
-One such scenario is when the allocation size is larger than it can ever
-be allocated; another is when the system memory resources are exhausted
-to even allocate pools of pages.
.It Fa size
The size of the memory being freed, in bytes.
It must be the same as the
@@ -185,10 +176,6 @@ It should be noted that
.Fn kmem_free
may also block.
.Pp
-Always check the return value of the allocators, even when
-.Dv KM_SLEEP
-is specified to avoid kernel crashes during resource stress conditions.
-.Pp
For some locks this is permissible or even unavoidable.
For others, particularly locks that may be taken from soft interrupt context,
it is a serious problem.
Index: src/share/man/man9/vmem.9
diff -u src/share/man/man9/vmem.9:1.15 src/share/man/man9/vmem.9:1.16
--- src/share/man/man9/vmem.9:1.15 Tue Jan 29 22:02:17 2013
+++ src/share/man/man9/vmem.9 Mon Feb 29 00:34:17 2016
@@ -1,4 +1,4 @@
-.\" $NetBSD: vmem.9,v 1.15 2013/01/29 22:02:17 wiz Exp $
+.\" $NetBSD: vmem.9,v 1.16 2016/02/29 00:34:17 chs Exp $
.\"
.\" Copyright (c)2006 YAMAMOTO Takashi,
.\" All rights reserved.
@@ -25,7 +25,7 @@
.\" SUCH DAMAGE.
.\"
.\" ------------------------------------------------------------
-.Dd January 29, 2013
+.Dd February 28, 2016
.Dt VMEM 9
.Os
.\" ------------------------------------------------------------
@@ -83,7 +83,7 @@ other than virtual memory.
.Fn vmem_create
creates a new vmem arena.
.Pp
-.Bl -tag -width qcache_max
+.Bl -tag -offset indent -width qcache_max
.It Fa name
The string to describe the vmem.
.It Fa base
@@ -118,7 +118,7 @@ calls
to import a span of size at least
.Fa size .
.Fa allocfn
-should accept the same
+must accept the same
.Fa flags
as
.Fn vmem_alloc .
@@ -169,7 +169,8 @@ It is merely a hint and can be ignored.
Either of:
.Bl -tag -width VM_NOSLEEP
.It Dv VM_SLEEP
-Can sleep until enough resources are available.
+If the allocation cannot be satisfied immediately, sleep until enough
+resources are available.
.It Dv VM_NOSLEEP
Don't sleep.
Immediately return
@@ -184,7 +185,7 @@ Interrupt level to be blocked for alloca
.Fn vmem_xcreate
creates a new vmem arena.
.Pp
-.Bl -tag -width qcache_max
+.Bl -tag -offset indent -width qcache_max
.It Fa name
The string to describe the vmem.
.It Fa base
@@ -220,7 +221,7 @@ calls
to import a span of size at least
.Fa size .
.Fa allocfn
-should accept the same
+must accept the same
.Fa flags
as
.Fn vmem_alloc .
@@ -274,7 +275,8 @@ It is merely a hint and can be ignored.
Either of:
.Bl -tag -width VM_NOSLEEP
.It Dv VM_SLEEP
-Can sleep until enough resources are available.
+If the allocation cannot be satisfied immediately, sleep until enough
+resources are available.
.It Dv VM_NOSLEEP
Don't sleep.
Immediately return
@@ -297,23 +299,26 @@ Returns
on success,
.Dv ENOMEM
on failure.
-.Fa flags
-should be one of:
+.Bl -tag -offset indent -width flags
+.It Fa flags
+Either of:
.Bl -tag -width VM_NOSLEEP
.It Dv VM_SLEEP
-Can sleep until enough resources are available.
+If the allocation cannot be satisfied immediately, sleep until enough
+resources are available.
.It Dv VM_NOSLEEP
Don't sleep.
Immediately return
.Dv ENOMEM
if there are not enough resources available.
.El
+.El
.Pp
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
.Fn vmem_xalloc
allocates a resource from the arena.
.Pp
-.Bl -tag -width nocross
+.Bl -tag -offset indent -width nocross
.It Fa vm
The arena which we allocate from.
.It Fa size
@@ -333,10 +338,10 @@ If
.Fa align
is zero,
.Fa phase
-should be zero.
+must be zero.
Otherwise,
.Fa phase
-should be smaller than
+must be smaller than
.Fa align .
.It Fa nocross
Request a resource which doesn't cross
@@ -353,7 +358,7 @@ if the caller does not care.
.It Fa flags
A bitwise OR of an allocation strategy and a sleep flag.
.Pp
-The allocation strategy is one of:
+The allocation strategy must be one of:
.Bl -tag -width VM_INSTANTFIT
.It Dv VM_BESTFIT
Prefer space efficiency.
@@ -361,10 +366,11 @@ Prefer space efficiency.
Prefer performance.
.El
.Pp
-The sleep flag should be one of:
+The sleep flag must be one of:
.Bl -tag -width VM_NOSLEEP
.It Dv VM_SLEEP
-Can sleep until enough resources are available.
+If the allocation cannot be satisfied immediately, sleep until enough
+resources are available.
.It Dv VM_NOSLEEP
Don't sleep.
Immediately return
@@ -386,14 +392,14 @@ frees resource allocated by
.Fn vmem_xalloc
to the arena.
.Pp
-.Bl -tag -width addr
+.Bl -tag -offset indent -width addr
.It Fa vm
The arena which we free to.
.It Fa addr
The resource being freed.
-It must be the one returned by
+It must have been allocated via
.Fn vmem_xalloc .
-Notably, it must not be the one from
+Notably, it must not have been allocated via
.Fn vmem_alloc .
Otherwise, the behaviour is undefined.
.It Fa size
@@ -408,7 +414,7 @@ argument used for
.Fn vmem_alloc
allocates a resource from the arena.
.Pp
-.Bl -tag -width flags
+.Bl -tag -offset indent -width flags
.It Fa vm
The arena which we allocate from.
.It Fa size
@@ -416,7 +422,7 @@ Specify the size of the allocation.
.It Fa flags
A bitwise OR of an allocation strategy and a sleep flag.
.Pp
-The allocation strategy is one of:
+The allocation strategy must be one of:
.Bl -tag -width VM_INSTANTFIT
.It Dv VM_BESTFIT
Prefer space efficiency.
@@ -424,10 +430,11 @@ Prefer space efficiency.
Prefer performance.
.El
.Pp
-The sleep flag should be one of:
+The sleep flag must be one of:
.Bl -tag -width VM_NOSLEEP
.It Dv VM_SLEEP
-Can sleep until enough resources are available.
+If the allocation cannot be satisfied immediately, sleep until enough
+resources are available.
.It Dv VM_NOSLEEP
Don't sleep.
Immediately return
@@ -449,14 +456,14 @@ frees resource allocated by
.Fn vmem_alloc
to the arena.
.Pp
-.Bl -tag -width addr
+.Bl -tag -offset indent -width addr
.It Fa vm
The arena which we free to.
.It Fa addr
The resource being freed.
-It must be the one returned by
+It must have been allocated via
.Fn vmem_alloc .
-Notably, it must not be the one from
+Notably, it must not have been allocated via
.Fn vmem_xalloc .
Otherwise, the behaviour is undefined.
.It Fa size
@@ -471,10 +478,10 @@ argument used for
.Fn vmem_destroy
destroys a vmem arena.
.Pp
-.Bl -tag -width vm
+.Bl -tag -offset indent -width vm
.It Fa vm
The vmem arena being destroyed.
-The caller should ensure that no one will use it anymore.
+The caller must ensure that no one will use it anymore.
.El
.\" ------------------------------------------------------------
.Sh RETURN VALUES
Index: src/sys/kern/subr_kmem.c
diff -u src/sys/kern/subr_kmem.c:1.61 src/sys/kern/subr_kmem.c:1.62
--- src/sys/kern/subr_kmem.c:1.61 Mon Jul 27 09:24:28 2015
+++ src/sys/kern/subr_kmem.c Mon Feb 29 00:34:17 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: subr_kmem.c,v 1.61 2015/07/27 09:24:28 maxv Exp $ */
+/* $NetBSD: subr_kmem.c,v 1.62 2016/02/29 00:34:17 chs Exp $ */
/*-
* Copyright (c) 2009-2015 The NetBSD Foundation, Inc.
@@ -100,7 +100,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.61 2015/07/27 09:24:28 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.62 2016/02/29 00:34:17 chs Exp $");
#include <sys/param.h>
#include <sys/callback.h>
@@ -383,9 +383,13 @@ kmem_intr_free(void *p, size_t requested
void *
kmem_alloc(size_t size, km_flag_t kmflags)
{
+ void *v;
+
KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()),
"kmem(9) should not be used from the interrupt context");
- return kmem_intr_alloc(size, kmflags);
+ v = kmem_intr_alloc(size, kmflags);
+ KASSERT(v || (kmflags & KM_NOSLEEP) != 0);
+ return v;
}
/*
@@ -396,9 +400,13 @@ kmem_alloc(size_t size, km_flag_t kmflag
void *
kmem_zalloc(size_t size, km_flag_t kmflags)
{
+ void *v;
+
KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()),
"kmem(9) should not be used from the interrupt context");
- return kmem_intr_zalloc(size, kmflags);
+ v = kmem_intr_zalloc(size, kmflags);
+ KASSERT(v || (kmflags & KM_NOSLEEP) != 0);
+ return v;
}
/*
Index: src/sys/kern/subr_vmem.c
diff -u src/sys/kern/subr_vmem.c:1.93 src/sys/kern/subr_vmem.c:1.94
--- src/sys/kern/subr_vmem.c:1.93 Mon Aug 24 22:50:32 2015
+++ src/sys/kern/subr_vmem.c Mon Feb 29 00:34:17 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: subr_vmem.c,v 1.93 2015/08/24 22:50:32 pooka Exp $ */
+/* $NetBSD: subr_vmem.c,v 1.94 2016/02/29 00:34:17 chs Exp $ */
/*-
* Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
@@ -46,7 +46,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.93 2015/08/24 22:50:32 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.94 2016/02/29 00:34:17 chs Exp $");
#if defined(_KERNEL) && defined(_KERNEL_OPT)
#include "opt_ddb.h"
@@ -194,9 +194,19 @@ static LIST_HEAD(, vmem_btag) vmem_btag_
static size_t vmem_btag_freelist_count = 0;
static struct pool vmem_btag_pool;
+static void
+vmem_kick_pdaemon(void)
+{
+#if defined(_KERNEL)
+ mutex_spin_enter(&uvm_fpageqlock);
+ uvm_kick_pdaemon();
+ mutex_spin_exit(&uvm_fpageqlock);
+#endif
+}
+
/* ---- boundary tag */
-static int bt_refill(vmem_t *vm, vm_flag_t flags);
+static int bt_refill(vmem_t *vm);
static void *
pool_page_alloc_vmem_meta(struct pool *pp, int flags)
@@ -226,12 +236,10 @@ struct pool_allocator pool_allocator_vme
};
static int
-bt_refill(vmem_t *vm, vm_flag_t flags)
+bt_refill(vmem_t *vm)
{
bt_t *bt;
- KASSERT(flags & VM_NOSLEEP);
-
VMEM_LOCK(vm);
if (vm->vm_nfreetags > BT_MINRESERVE) {
VMEM_UNLOCK(vm);
@@ -270,12 +278,9 @@ bt_refill(vmem_t *vm, vm_flag_t flags)
VMEM_UNLOCK(vm);
if (kmem_meta_arena != NULL) {
- bt_refill(kmem_arena, (flags & ~VM_FITMASK)
- | VM_INSTANTFIT | VM_POPULATING);
- bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK)
- | VM_INSTANTFIT | VM_POPULATING);
- bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK)
- | VM_INSTANTFIT | VM_POPULATING);
+ (void)bt_refill(kmem_arena);
+ (void)bt_refill(kmem_va_meta_arena);
+ (void)bt_refill(kmem_meta_arena);
}
return 0;
@@ -288,8 +293,22 @@ bt_alloc(vmem_t *vm, vm_flag_t flags)
VMEM_LOCK(vm);
while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) {
VMEM_UNLOCK(vm);
- if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) {
- return NULL;
+ if (bt_refill(vm)) {
+ if ((flags & VM_NOSLEEP) != 0) {
+ return NULL;
+ }
+
+ /*
+ * It would be nice to wait for something specific here
+ * but there are multiple ways that a retry could
+ * succeed and we can't wait for multiple things
+ * simultaneously. So we'll just sleep for an arbitrary
+ * short period of time and retry regardless.
+ * This should be a very rare case.
+ */
+
+ vmem_kick_pdaemon();
+ kpause("btalloc", false, 1, NULL);
}
VMEM_LOCK(vm);
}
@@ -940,7 +959,7 @@ vmem_init(vmem_t *vm, const char *name,
#if defined(_KERNEL)
if (flags & VM_BOOTSTRAP) {
- bt_refill(vm, VM_NOSLEEP);
+ bt_refill(vm);
}
mutex_enter(&vmem_list_lock);
@@ -1177,11 +1196,7 @@ retry:
/* XXX */
if ((flags & VM_SLEEP) != 0) {
-#if defined(_KERNEL)
- mutex_spin_enter(&uvm_fpageqlock);
- uvm_kick_pdaemon();
- mutex_spin_exit(&uvm_fpageqlock);
-#endif
+ vmem_kick_pdaemon();
VMEM_LOCK(vm);
VMEM_CONDVAR_WAIT(vm);
VMEM_UNLOCK(vm);