Here's the diff I have to -current with just tmpfs (activate bits, uvm changes).
pedro, I know some of your stuff is not in there (I dropped the mount_tmpfs diff), feel free to give me a full correct updated diff. Index: lib/libc/sys/mount.2 =================================================================== RCS file: /home/openbsd/cvs/src/lib/libc/sys/mount.2,v retrieving revision 1.40 diff -u -p -r1.40 mount.2 --- lib/libc/sys/mount.2 17 Nov 2011 14:26:14 -0000 1.40 +++ lib/libc/sys/mount.2 2 May 2013 18:56:39 -0000 @@ -227,6 +227,23 @@ struct udf_args { }; .Ed .Pp +.Dv MOUNT_TMPFS +.Bd -literal -offset indent -compact +#define TMPFS_ARGS_VERSION 1 +struct tmpfs_args { + int ta_version; + + /* Size counters. */ + ino_t ta_nodes_max; + off_t ta_size_max; + + /* Root node attributes. */ + uid_t ta_root_uid; + gid_t ta_root_gid; + mode_t ta_root_mode; +}; +.Ed +.Pp The .Fn unmount function call disassociates the filesystem from the specified Index: sbin/mount/mount.8 =================================================================== RCS file: /home/openbsd/cvs/src/sbin/mount/mount.8,v retrieving revision 1.70 diff -u -p -r1.70 mount.8 --- sbin/mount/mount.8 23 Aug 2012 14:43:04 -0000 1.70 +++ sbin/mount/mount.8 2 May 2013 18:56:39 -0000 @@ -390,6 +390,7 @@ with option .Xr mount_ntfs 8 , .Xr mount_procfs 8 , .Xr mount_udf 8 , +.Xr mount_tmpfs 8 , .Xr mount_vnd 8 , .Xr sysctl 8 , .Xr umount 8 Index: share/man/man4/options.4 =================================================================== RCS file: /home/openbsd/cvs/src/share/man/man4/options.4,v retrieving revision 1.232 diff -u -p -r1.232 options.4 --- share/man/man4/options.4 23 May 2013 01:47:43 -0000 1.232 +++ share/man/man4/options.4 25 May 2013 14:56:27 -0000 @@ -273,6 +273,11 @@ Includes code for the UDF file systems t See .Xr mount_udf 8 for details. +.It Cd option TMPFS +Includes code for the TMPFS efficient memory file system. +See +.Xr mount_tmpfs 8 +for details. .El .Sh FILE SYSTEM OPTIONS .Bl -ohang Index: share/man/man5/fstab.5 =================================================================== RCS file: /home/openbsd/cvs/src/share/man/man5/fstab.5,v retrieving revision 1.45 diff -u -p -r1.45 fstab.5 --- share/man/man5/fstab.5 17 Apr 2011 20:17:12 -0000 1.45 +++ share/man/man5/fstab.5 2 May 2013 18:56:39 -0000 @@ -127,6 +127,8 @@ A disk partition to be used for swapping A UDF filesystem. .It vnd A VND image file. +.It tmpfs +A local, efficient memory-based file system. .El .Pp The fourth field, Index: sys/conf/GENERIC =================================================================== RCS file: /home/openbsd/cvs/src/sys/conf/GENERIC,v retrieving revision 1.196 diff -u -p -r1.196 GENERIC --- sys/conf/GENERIC 15 Apr 2013 15:32:19 -0000 1.196 +++ sys/conf/GENERIC 2 May 2013 18:56:39 -0000 @@ -47,6 +47,7 @@ option CD9660 # ISO 9660 + Rock Ridge option UDF # UDF (DVD) file system option MSDOSFS # MS-DOS file system option FIFO # FIFOs; RECOMMENDED +option TMPFS # efficient memory file system option SOCKET_SPLICE # Socket Splicing for TCP option TCP_SACK # Selective Acknowledgements for TCP Index: sys/conf/files =================================================================== RCS file: /home/openbsd/cvs/src/sys/conf/files,v retrieving revision 1.547 diff -u -p -r1.547 files --- sys/conf/files 3 Jun 2013 15:54:47 -0000 1.547 +++ sys/conf/files 5 Jun 2013 10:35:24 -0000 @@ -756,6 +756,12 @@ file ntfs/ntfs_ihash.c ntfs file ntfs/ntfs_subr.c ntfs file ntfs/ntfs_vfsops.c ntfs file ntfs/ntfs_vnops.c ntfs +file tmpfs/tmpfs_mem.c tmpfs +file tmpfs/tmpfs_subr.c tmpfs +file tmpfs/tmpfs_vfsops.c tmpfs +file tmpfs/tmpfs_vnops.c tmpfs +file tmpfs/tmpfs_specops.c tmpfs +file tmpfs/tmpfs_fifoops.c tmpfs & fifo file net/bpf.c bpfilter needs-count file net/bpf_filter.c bpfilter file net/if.c Index: sys/kern/vfs_init.c =================================================================== RCS file: /home/openbsd/cvs/src/sys/kern/vfs_init.c,v retrieving revision 1.32 diff -u -p -r1.32 vfs_init.c --- sys/kern/vfs_init.c 3 Jun 2013 15:54:48 -0000 1.32 +++ sys/kern/vfs_init.c 5 Jun 2013 10:42:54 -0000 @@ -90,6 +90,10 @@ extern const struct vfsops udf_vfsops; extern const struct vfsops fusefs_vfsops; #endif +#ifdef TMPFS +extern const struct vfsops tmpfs_vfsops; +#endif + /* Set up the filesystem operations for vnodes. */ static struct vfsconf vfsconflist[] = { #ifdef FFS @@ -130,6 +134,10 @@ static struct vfsconf vfsconflist[] = { #ifdef FUSE { &fusefs_vfsops, MOUNT_FUSEFS, 18, 0, MNT_LOCAL, NULL }, +#endif + +#ifdef TMPFS + { &tmpfs_vfsops, MOUNT_TMPFS, 42, 0, MNT_LOCAL, NULL }, #endif }; Index: sys/sys/mount.h =================================================================== RCS file: /home/openbsd/cvs/src/sys/sys/mount.h,v retrieving revision 1.111 diff -u -p -r1.111 mount.h --- sys/sys/mount.h 3 Jun 2013 15:56:01 -0000 1.111 +++ sys/sys/mount.h 5 Jun 2013 10:40:07 -0000 @@ -245,6 +245,23 @@ struct udf_args { }; /* + * Arguments to mount tmpfs file systems + */ +#define TMPFS_ARGS_VERSION 1 +struct tmpfs_args { + int ta_version; + + /* Size counters. */ + ino_t ta_nodes_max; + off_t ta_size_max; + + /* Root node attributes. */ + uid_t ta_root_uid; + gid_t ta_root_gid; + mode_t ta_root_mode; +}; + +/* * Arguments to mount procfs filesystems */ struct procfs_args { @@ -284,6 +301,7 @@ union mount_info { struct procfs_args procfs_args; struct msdosfs_args msdosfs_args; struct ntfs_args ntfs_args; + struct tmpfs_args tmpfs_args; char __align[160]; /* 64-bit alignment and room to grow */ }; @@ -369,6 +387,7 @@ struct statfs53 { #define MOUNT_NCPFS "ncpfs" /* NetWare Network File System */ #define MOUNT_NTFS "ntfs" /* NTFS */ #define MOUNT_UDF "udf" /* UDF */ +#define MOUNT_TMPFS "tmpfs" /* tmpfs */ #define MOUNT_FUSEFS "fuse" /* FUSE */ /* Index: sys/sys/vnode.h =================================================================== RCS file: /home/openbsd/cvs/src/sys/sys/vnode.h,v retrieving revision 1.116 diff -u -p -r1.116 vnode.h --- sys/sys/vnode.h 3 Jun 2013 15:54:48 -0000 1.116 +++ sys/sys/vnode.h 5 Jun 2013 10:41:00 -0000 @@ -69,13 +69,13 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCH enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_PORTAL, VT_PROCFS, VT_AFS, VT_ISOFS, VT_ADOSFS, - VT_EXT2FS, VT_VFS, VT_NTFS, VT_UDF, VT_FUSEFS + VT_EXT2FS, VT_VFS, VT_NTFS, VT_UDF, VT_FUSEFS, VT_TMPFS, }; #define VTAG_NAMES \ "NON", "UFS", "NFS", "MFS", "MSDOSFS", \ "PORTAL", "PROCFS", "AFS", "ISOFS", "ADOSFS", \ - "EXT2FS", "VFS", "NTFS", "UDF", "FUSEFS" + "EXT2FS", "VFS", "NTFS", "UDF", "FUSEFS", "TMPFS" /* * Each underlying filesystem allocates its own private area and hangs Index: sys/uvm/uvm_aobj.c =================================================================== RCS file: /home/openbsd/cvs/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.58 diff -u -p -r1.58 uvm_aobj.c --- sys/uvm/uvm_aobj.c 30 May 2013 16:39:26 -0000 1.58 +++ sys/uvm/uvm_aobj.c 31 May 2013 21:39:34 -0000 @@ -50,6 +50,7 @@ #include <sys/kernel.h> #include <sys/pool.h> #include <sys/kernel.h> +#include <sys/stdint.h> #include <uvm/uvm.h> @@ -77,6 +78,8 @@ /* get the "tag" for this page index */ #define UAO_SWHASH_ELT_TAG(PAGEIDX) \ ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) +#define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ + ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) /* given an ELT and a page index, find the swap slot */ #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \ @@ -99,16 +102,15 @@ */ #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) -#define UAO_USES_SWHASH(AOBJ) \ - ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD) /* use hash? */ +#define UAO_USES_SWHASH(pages) \ + ((pages) > UAO_SWHASH_THRESHOLD) /* use hash? */ /* * the number of buckets in a swhash, with an upper bound */ #define UAO_SWHASH_MAXBUCKETS 256 -#define UAO_SWHASH_BUCKETS(AOBJ) \ - (min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \ - UAO_SWHASH_MAXBUCKETS)) +#define UAO_SWHASH_BUCKETS(pages) \ + (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) /* @@ -183,6 +185,16 @@ static int uao_get(struct uvm_object static boolean_t uao_pagein(struct uvm_aobj *, int, int); static boolean_t uao_pagein_page(struct uvm_aobj *, int); +void uao_dropswap_range(struct uvm_object *, voff_t, voff_t); +void uao_shrink_flush(struct uvm_object *, int, int); +int uao_shrink_hash(struct uvm_object *, int); +int uao_shrink_array(struct uvm_object *, int); +int uao_shrink_convert(struct uvm_object *, int); + +int uao_grow_hash(struct uvm_object *, int); +int uao_grow_array(struct uvm_object *, int); +int uao_grow_convert(struct uvm_object *, int); + /* * aobj_pager * @@ -275,7 +287,7 @@ uao_find_swslot(struct uvm_aobj *aobj, i * if hashing, look in hash table. */ - if (UAO_USES_SWHASH(aobj)) { + if (UAO_USES_SWHASH(aobj->u_pages)) { struct uao_swhash_elt *elt = uao_find_swhash_elt(aobj, pageidx, FALSE); @@ -320,7 +332,7 @@ uao_set_swslot(struct uvm_object *uobj, * are we using a hash table? if so, add it in the hash. */ - if (UAO_USES_SWHASH(aobj)) { + if (UAO_USES_SWHASH(aobj->u_pages)) { /* * Avoid allocating an entry just to free it again if @@ -377,7 +389,7 @@ static void uao_free(struct uvm_aobj *aobj) { - if (UAO_USES_SWHASH(aobj)) { + if (UAO_USES_SWHASH(aobj->u_pages)) { int i, hashbuckets = aobj->u_swhashmask + 1; /* @@ -443,9 +455,300 @@ uao_free(struct uvm_aobj *aobj) */ /* + * Shrink an aobj to a given number of pages. The procedure is always the same: + * assess the necessity of data structure conversion (hash to array), secure + * resources, flush pages and drop swap slots. + * + * XXX pedro: We need a uao_flush() that returns success only when the + * requested pages have been free'd. + */ + +void +uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg) +{ + KASSERT(startpg < endpg); + KASSERT(uobj->uo_refs == 1); + uao_flush(uobj, startpg << PAGE_SHIFT, endpg << PAGE_SHIFT, PGO_FREE); + uao_dropswap_range(uobj, startpg, endpg); +} + +int +uao_shrink_hash(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + struct uao_swhash *new_swhash; + struct uao_swhash_elt *elt; + unsigned long new_hashmask; + int i; + + KASSERT(UAO_USES_SWHASH(aobj->u_pages) != 0); + + /* + * If the size of the hash table doesn't change, all we need to do is + * to adjust the page count. + */ + + if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { + aobj->u_pages = pages; + return 0; + } + + new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, + M_WAITOK | M_CANFAIL, &new_hashmask); + if (new_swhash == NULL) + return ENOMEM; + + uao_shrink_flush(uobj, pages, aobj->u_pages); + + /* + * Even though the hash table size is changing, the hash of the buckets + * we are interested in copying should not change. + */ + + for (i = 0; i < UAO_SWHASH_BUCKETS(pages); i++) { + /* XXX pedro: shouldn't copying the list pointers be enough? */ + while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { + elt = LIST_FIRST(&aobj->u_swhash[i]); + LIST_REMOVE(elt, list); + LIST_INSERT_HEAD(&new_swhash[i], elt, list); + } + } + + free(aobj->u_swhash, M_UVMAOBJ); + + aobj->u_swhash = new_swhash; + aobj->u_pages = pages; + aobj->u_swhashmask = new_hashmask; + + return 0; +} + +int +uao_shrink_convert(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + struct uao_swhash_elt *elt; + int i, *new_swslots; + + new_swslots = malloc(pages * sizeof(int), M_UVMAOBJ, + M_WAITOK | M_CANFAIL | M_ZERO); + if (new_swslots == NULL) + return ENOMEM; + + uao_shrink_flush(uobj, pages, aobj->u_pages); + + /* + * Convert swap slots from hash to array. + */ + + for (i = 0; i < pages; i++) { + elt = uao_find_swhash_elt(aobj, i, FALSE); + if (elt != NULL) { + new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i); + if (new_swslots[i] != 0) + elt->count--; + if (elt->count == 0) { + LIST_REMOVE(elt, list); + pool_put(&uao_swhash_elt_pool, elt); + } + } + } + + free(aobj->u_swhash, M_UVMAOBJ); + + aobj->u_swslots = new_swslots; + aobj->u_pages = pages; + + return 0; +} + +int +uao_shrink_array(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + int i, *new_swslots; + + new_swslots = malloc(pages * sizeof(int), M_UVMAOBJ, + M_WAITOK | M_CANFAIL | M_ZERO); + if (new_swslots == NULL) + return ENOMEM; + + uao_shrink_flush(uobj, pages, aobj->u_pages); + + for (i = 0; i < pages; i++) + new_swslots[i] = aobj->u_swslots[i]; + + free(aobj->u_swslots, M_UVMAOBJ); + + aobj->u_swslots = new_swslots; + aobj->u_pages = pages; + + return 0; +} + +int +uao_shrink(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + + KASSERT(pages < aobj->u_pages); + + /* + * Distinguish between three possible cases: + * 1. aobj uses hash and must be converted to array. + * 2. aobj uses array and array size needs to be adjusted. + * 3. aobj uses hash and hash size needs to be adjusted. + */ + + if (UAO_USES_SWHASH(pages) != 0) + return uao_shrink_hash(uobj, pages); /* case 3 */ + else if (UAO_USES_SWHASH(aobj->u_pages) != 0) + return uao_shrink_convert(uobj, pages); /* case 1 */ + else + return uao_shrink_array(uobj, pages); /* case 2 */ +} + +/* + * Grow an aobj to a given number of pages. Right now we only adjust the swap + * slots. We could additionally handle page allocation directly, so that they + * don't happen through uvm_fault(). That would allow us to use another + * mechanism for the swap slots other than malloc(). It is thus mandatory that + * the caller of these functions does not allow faults to happen in case of + * growth error. + */ + +int +uao_grow_array(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + int i, *new_swslots; + + KASSERT(UAO_USES_SWHASH(aobj->u_pages) == 0); + + new_swslots = malloc(pages * sizeof(int), M_UVMAOBJ, + M_WAITOK | M_CANFAIL | M_ZERO); + if (new_swslots == NULL) + return ENOMEM; + + for (i = 0; i < aobj->u_pages; i++) + new_swslots[i] = aobj->u_swslots[i]; + + free(aobj->u_swslots, M_UVMAOBJ); + + aobj->u_swslots = new_swslots; + aobj->u_pages = pages; + + return 0; +} + +int +uao_grow_hash(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + struct uao_swhash *new_swhash; + struct uao_swhash_elt *elt; + unsigned long new_hashmask; + int i; + + KASSERT(UAO_USES_SWHASH(pages) != 0); + + /* + * If the size of the hash table doesn't change, all we need to do is + * to adjust the page count. + */ + + if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { + aobj->u_pages = pages; + return 0; + } + + KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages)); + + new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, + M_WAITOK | M_CANFAIL, &new_hashmask); + if (new_swhash == NULL) + return ENOMEM; + + for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { + /* XXX pedro: shouldn't copying the list pointers be enough? */ + while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { + elt = LIST_FIRST(&aobj->u_swhash[i]); + LIST_REMOVE(elt, list); + LIST_INSERT_HEAD(&new_swhash[i], elt, list); + } + } + + free(aobj->u_swhash, M_UVMAOBJ); + + aobj->u_swhash = new_swhash; + aobj->u_pages = pages; + aobj->u_swhashmask = new_hashmask; + + return 0; +} + +int +uao_grow_convert(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + struct uao_swhash *new_swhash; + struct uao_swhash_elt *elt; + unsigned long new_hashmask; + int i, *old_swslots; + + new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, + M_WAITOK | M_CANFAIL, &new_hashmask); + if (new_swhash == NULL) + return ENOMEM; + + /* + * Set these now, so we can use uao_find_swhash_elt(). + */ + + old_swslots = aobj->u_swslots; + aobj->u_swhash = new_swhash; + aobj->u_swhashmask = new_hashmask; + + for (i = 0; i < aobj->u_pages; i++) { + if (old_swslots[i] != 0) { + elt = uao_find_swhash_elt(aobj, i, TRUE); + elt->count++; + UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i]; + } + } + + free(old_swslots, M_UVMAOBJ); + aobj->u_pages = pages; + + return 0; +} + +int +uao_grow(struct uvm_object *uobj, int pages) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + + KASSERT(pages > aobj->u_pages); + + /* + * Distinguish between three possible cases: + * 1. aobj uses hash and hash size needs to be adjusted. + * 2. aobj uses array and array size needs to be adjusted. + * 3. aobj uses array and must be converted to hash. + */ + + if (UAO_USES_SWHASH(pages) == 0) + return uao_grow_array(uobj, pages); /* case 2 */ + else if (UAO_USES_SWHASH(aobj->u_pages) != 0) + return uao_grow_hash(uobj, pages); /* case 1 */ + else + return uao_grow_convert(uobj, pages); +} + +/* * uao_create: create an aobj of the given size and return its uvm_object. * - * => for normal use, flags are always zero + * => for normal use, flags are zero or UAO_FLAG_CANFAIL. * => for the kernel object, the flags are: * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") @@ -457,6 +760,7 @@ uao_create(vsize_t size, int flags) static int kobj_alloced = 0; /* not allocated yet */ int pages = round_page(size) >> PAGE_SHIFT; int refs = UVM_OBJ_KERN; + int mflags; struct uvm_aobj *aobj; /* @@ -486,24 +790,36 @@ uao_create(vsize_t size, int flags) /* * allocate hash/array if necessary */ - if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) { - int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ? - M_NOWAIT : M_WAITOK; + if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) { + if (flags) + mflags = M_NOWAIT; + else + mflags = M_WAITOK; /* allocate hash table or array depending on object size */ - if (UAO_USES_SWHASH(aobj)) { - aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj), + if (UAO_USES_SWHASH(aobj->u_pages)) { + aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, mflags, &aobj->u_swhashmask); - if (aobj->u_swhash == NULL) + if (aobj->u_swhash == NULL) { + if (flags & UAO_FLAG_CANFAIL) { + pool_put(&uvm_aobj_pool, aobj); + return (NULL); + } panic("uao_create: hashinit swhash failed"); + } } else { aobj->u_swslots = malloc(pages * sizeof(int), M_UVMAOBJ, mflags|M_ZERO); - if (aobj->u_swslots == NULL) + if (aobj->u_swslots == NULL) { + if (flags & UAO_FLAG_CANFAIL) { + pool_put(&uvm_aobj_pool, aobj); + return (NULL); + } panic("uao_create: malloc swslots failed"); + } } - if (flags) { + if (flags & UAO_FLAG_KERNSWAP) { aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ return(&aobj->u_obj); /* done! */ @@ -1138,7 +1454,7 @@ uao_pagein(struct uvm_aobj *aobj, int st { boolean_t rv; - if (UAO_USES_SWHASH(aobj)) { + if (UAO_USES_SWHASH(aobj->u_pages)) { struct uao_swhash_elt *elt; int bucket; @@ -1249,4 +1565,114 @@ uao_pagein_page(struct uvm_aobj *aobj, i uvm_unlock_pageq(); return FALSE; +} + +/* + * XXX pedro: Once we are comfortable enough with this function, we can adapt + * uao_free() to use it. + * + * uao_dropswap_range: drop swapslots in the range. + * + * => aobj must be locked and is returned locked. + * => start is inclusive. end is exclusive. + */ + +void +uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + int swpgonlydelta = 0; + + /* KASSERT(mutex_owned(uobj->vmobjlock)); */ + + if (end == 0) { + end = INT64_MAX; + } + + if (UAO_USES_SWHASH(aobj->u_pages)) { + int i, hashbuckets = aobj->u_swhashmask + 1; + voff_t taghi; + voff_t taglo; + + taglo = UAO_SWHASH_ELT_TAG(start); + taghi = UAO_SWHASH_ELT_TAG(end); + + for (i = 0; i < hashbuckets; i++) { + struct uao_swhash_elt *elt, *next; + + for (elt = LIST_FIRST(&aobj->u_swhash[i]); + elt != NULL; + elt = next) { + int startidx, endidx; + int j; + + next = LIST_NEXT(elt, list); + + if (elt->tag < taglo || taghi < elt->tag) { + continue; + } + + if (elt->tag == taglo) { + startidx = + UAO_SWHASH_ELT_PAGESLOT_IDX(start); + } else { + startidx = 0; + } + + if (elt->tag == taghi) { + endidx = + UAO_SWHASH_ELT_PAGESLOT_IDX(end); + } else { + endidx = UAO_SWHASH_CLUSTER_SIZE; + } + + for (j = startidx; j < endidx; j++) { + int slot = elt->slots[j]; + + KASSERT(uvm_pagelookup(&aobj->u_obj, + (UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + + j) << PAGE_SHIFT) == NULL); + + if (slot > 0) { + uvm_swap_free(slot, 1); + swpgonlydelta++; + KASSERT(elt->count > 0); + elt->slots[j] = 0; + elt->count--; + } + } + + if (elt->count == 0) { + LIST_REMOVE(elt, list); + pool_put(&uao_swhash_elt_pool, elt); + } + } + } + } else { + int i; + + if (aobj->u_pages < end) { + end = aobj->u_pages; + } + for (i = start; i < end; i++) { + int slot = aobj->u_swslots[i]; + + if (slot > 0) { + uvm_swap_free(slot, 1); + swpgonlydelta++; + } + } + } + + /* + * adjust the counter of pages only in swap for all + * the swap slots we've freed. + */ + + if (swpgonlydelta > 0) { + simple_lock(&uvm.swap_data_lock); + KASSERT(uvmexp.swpgonly >= swpgonlydelta); + uvmexp.swpgonly -= swpgonlydelta; + simple_unlock(&uvm.swap_data_lock); + } } Index: sys/uvm/uvm_aobj.h =================================================================== RCS file: /home/openbsd/cvs/src/sys/uvm/uvm_aobj.h,v retrieving revision 1.13 diff -u -p -r1.13 uvm_aobj.h --- sys/uvm/uvm_aobj.h 10 May 2011 21:48:17 -0000 1.13 +++ sys/uvm/uvm_aobj.h 2 May 2013 18:56:40 -0000 @@ -53,6 +53,7 @@ /* flags for uao_create: can only be used one time (at bootup) */ #define UAO_FLAG_KERNOBJ 0x1 /* create kernel object */ #define UAO_FLAG_KERNSWAP 0x2 /* enable kernel swap */ +#define UAO_FLAG_CANFAIL 0x4 /* creation can fail */ /* internal flags */ #define UAO_FLAG_NOSWAP 0x8 /* aobj can't swap (kernel obj only!) */ @@ -67,6 +68,8 @@ void uao_init(void); int uao_set_swslot(struct uvm_object *, int, int); int uao_dropswap(struct uvm_object *, int); int uao_swap_off(int, int); +int uao_shrink(struct uvm_object *, int); +int uao_grow(struct uvm_object *, int); /* * globals