Author: markj
Date: Sat May 18 01:46:38 2019
New Revision: 347949
URL: https://svnweb.freebsd.org/changeset/base/347949

Log:
  Implement the M_NEXTFIT allocation strategy for vmem(9).
  
  This is described in the vmem paper: "directs vmem to use the next free
  segment after the one previously allocated."  The implementation adds a
  new boundary tag type, M_CURSOR, which is linked into the segment list
  and precedes the segment following the previous M_NEXTFIT allocation.
  The cursor is used to locate the next free segment satisfying the
  allocation constraints.
  
  This implementation isn't O(1) since busy tags aren't coalesced, and we
  may potentially scan the entire segment list during an M_NEXTFIT
  allocation.
  
  Reviewed by:  alc
  MFC after:    1 month
  Differential Revision:        https://reviews.freebsd.org/D17226

Modified:
  head/share/man/man9/vmem.9
  head/sys/kern/subr_vmem.c
  head/sys/sys/malloc.h

Modified: head/share/man/man9/vmem.9
==============================================================================
--- head/share/man/man9/vmem.9  Sat May 18 00:22:28 2019        (r347948)
+++ head/share/man/man9/vmem.9  Sat May 18 01:46:38 2019        (r347949)
@@ -27,7 +27,7 @@
 .\" $FreeBSD$
 .\"
 .\" ------------------------------------------------------------
-.Dd July 12, 2013
+.Dd May 17, 2019
 .Dt VMEM 9
 .Os
 .\" ------------------------------------------------------------
@@ -95,18 +95,9 @@ The smallest unit of allocation.
 The largest size of allocations which can be served by quantum cache.
 It is merely a hint and can be ignored.
 .It Fa flags
-Combination of
 .Xr malloc 9
-wait flag and
-.Nm
-allocation strategy flag:
-.Bl -tag -width M_FIRSTFIT
-.It Dv M_FIRSTFIT
-Prefer allocation performance.
-.It Dv M_BESTFIT
-Prefer space efficiency.
+wait flag.
 .El
-.El
 .Pp
 .\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 .Fn vmem_add
@@ -169,10 +160,16 @@ if the caller does not care.
 A bitwise OR of an allocation strategy and a
 .Xr malloc 9
 wait flag.
-The allocation strategy is one of
-.Dv M_FIRSTFIT
-and
-.Dv M_BESTFIT .
+The allocation strategy is one of:
+.Bl -tag width indent
+.It Dv M_FIRSTFIT
+Prefer allocation performance.
+.It Dv M_BESTFIT
+Prefer space efficiency.
+.It Dv M_NEXTFIT
+Perform an address-ordered search for free addresses, beginning where
+the previous search ended.
+.El
 .It Fa addrp
 On success, if
 .Fa addrp

Modified: head/sys/kern/subr_vmem.c
==============================================================================
--- head/sys/kern/subr_vmem.c   Sat May 18 00:22:28 2019        (r347948)
+++ head/sys/kern/subr_vmem.c   Sat May 18 01:46:38 2019        (r347949)
@@ -89,10 +89,10 @@ int vmem_startup_count(void);
 
 #define        VMEM_QCACHE_IDX_MAX     16
 
-#define        VMEM_FITMASK    (M_BESTFIT | M_FIRSTFIT)
+#define        VMEM_FITMASK    (M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
 
-#define        VMEM_FLAGS                                              \
-    (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | M_BESTFIT | M_FIRSTFIT)
+#define        VMEM_FLAGS      (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | 
\
+    M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
 
 #define        BT_FLAGS        (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
 
@@ -120,6 +120,20 @@ typedef struct qcache qcache_t;
 
 #define        VMEM_NAME_MAX   16
 
+/* boundary tag */
+struct vmem_btag {
+       TAILQ_ENTRY(vmem_btag) bt_seglist;
+       union {
+               LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
+               LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
+       } bt_u;
+#define        bt_hashlist     bt_u.u_hashlist
+#define        bt_freelist     bt_u.u_freelist
+       vmem_addr_t     bt_start;
+       vmem_size_t     bt_size;
+       int             bt_type;
+};
+
 /* vmem arena */
 struct vmem {
        struct mtx_padalign     vm_lock;
@@ -145,6 +159,7 @@ struct vmem {
        vmem_size_t             vm_inuse;
        vmem_size_t             vm_size;
        vmem_size_t             vm_limit;
+       struct vmem_btag        vm_cursor;
 
        /* Used on import. */
        vmem_import_t           *vm_importfn;
@@ -158,24 +173,11 @@ struct vmem {
        qcache_t                vm_qcache[VMEM_QCACHE_IDX_MAX];
 };
 
-/* boundary tag */
-struct vmem_btag {
-       TAILQ_ENTRY(vmem_btag) bt_seglist;
-       union {
-               LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
-               LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
-       } bt_u;
-#define        bt_hashlist     bt_u.u_hashlist
-#define        bt_freelist     bt_u.u_freelist
-       vmem_addr_t     bt_start;
-       vmem_size_t     bt_size;
-       int             bt_type;
-};
-
 #define        BT_TYPE_SPAN            1       /* Allocated from importfn */
 #define        BT_TYPE_SPAN_STATIC     2       /* vmem_add() or create. */
 #define        BT_TYPE_FREE            3       /* Available space. */
 #define        BT_TYPE_BUSY            4       /* Used space. */
+#define        BT_TYPE_CURSOR          5       /* Cursor for nextfit 
allocations. */
 #define        BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
 
 #define        BT_END(bt)      ((bt)->bt_start + (bt)->bt_size - 1)
@@ -990,6 +992,162 @@ vmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vme
        MPASS(bt->bt_size >= size);
 }
 
+static int
+vmem_try_fetch(vmem_t *vm, const vmem_size_t size, vmem_size_t align, int 
flags)
+{
+       vmem_size_t avail;
+
+       VMEM_ASSERT_LOCKED(vm);
+
+       /*
+        * XXX it is possible to fail to meet xalloc constraints with the
+        * imported region.  It is up to the user to specify the
+        * import quantum such that it can satisfy any allocation.
+        */
+       if (vmem_import(vm, size, align, flags) == 0)
+               return (1);
+
+       /*
+        * Try to free some space from the quantum cache or reclaim
+        * functions if available.
+        */
+       if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) {
+               avail = vm->vm_size - vm->vm_inuse;
+               VMEM_UNLOCK(vm);
+               if (vm->vm_qcache_max != 0)
+                       qc_drain(vm);
+               if (vm->vm_reclaimfn != NULL)
+                       vm->vm_reclaimfn(vm, flags);
+               VMEM_LOCK(vm);
+               /* If we were successful retry even NOWAIT. */
+               if (vm->vm_size - vm->vm_inuse > avail)
+                       return (1);
+       }
+       if ((flags & M_NOWAIT) != 0)
+               return (0);
+       VMEM_CONDVAR_WAIT(vm);
+       return (1);
+}
+
+static int
+vmem_try_release(vmem_t *vm, struct vmem_btag *bt, const bool remfree)
+{
+       struct vmem_btag *prev;
+
+       MPASS(bt->bt_type == BT_TYPE_FREE);
+
+       if (vm->vm_releasefn == NULL)
+               return (0);
+
+       prev = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
+       MPASS(prev != NULL);
+       MPASS(prev->bt_type != BT_TYPE_FREE);
+
+       if (prev->bt_type == BT_TYPE_SPAN && prev->bt_size == bt->bt_size) {
+               vmem_addr_t spanaddr;
+               vmem_size_t spansize;
+
+               MPASS(prev->bt_start == bt->bt_start);
+               spanaddr = prev->bt_start;
+               spansize = prev->bt_size;
+               if (remfree)
+                       bt_remfree(vm, bt);
+               bt_remseg(vm, bt);
+               bt_remseg(vm, prev);
+               vm->vm_size -= spansize;
+               VMEM_CONDVAR_BROADCAST(vm);
+               bt_freetrim(vm, BT_MAXFREE);
+               vm->vm_releasefn(vm->vm_arg, spanaddr, spansize);
+               return (1);
+       }
+       return (0);
+}
+
+static int
+vmem_xalloc_nextfit(vmem_t *vm, const vmem_size_t size, vmem_size_t align,
+    const vmem_size_t phase, const vmem_size_t nocross, int flags,
+    vmem_addr_t *addrp)
+{
+       struct vmem_btag *bt, *cursor, *next, *prev;
+       int error;
+
+       error = ENOMEM;
+       VMEM_LOCK(vm);
+retry:
+       /*
+        * Make sure we have enough tags to complete the operation.
+        */
+       if (vm->vm_nfreetags < BT_MAXALLOC && bt_fill(vm, flags) != 0)
+               goto out;
+
+       /*
+        * Find the next free tag meeting our constraints.  If one is found,
+        * perform the allocation.
+        */
+       for (cursor = &vm->vm_cursor, bt = TAILQ_NEXT(cursor, bt_seglist);
+           bt != cursor; bt = TAILQ_NEXT(bt, bt_seglist)) {
+               if (bt == NULL)
+                       bt = TAILQ_FIRST(&vm->vm_seglist);
+               if (bt->bt_type == BT_TYPE_FREE && bt->bt_size >= size &&
+                   (error = vmem_fit(bt, size, align, phase, nocross,
+                   VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) {
+                       vmem_clip(vm, bt, *addrp, size);
+                       break;
+               }
+       }
+
+       /*
+        * Try to coalesce free segments around the cursor.  If we succeed, and
+        * have not yet satisfied the allocation request, try again with the
+        * newly coalesced segment.
+        */
+       if ((next = TAILQ_NEXT(cursor, bt_seglist)) != NULL &&
+           (prev = TAILQ_PREV(cursor, vmem_seglist, bt_seglist)) != NULL &&
+           next->bt_type == BT_TYPE_FREE && prev->bt_type == BT_TYPE_FREE &&
+           prev->bt_start + prev->bt_size == next->bt_start) {
+               prev->bt_size += next->bt_size;
+               bt_remfree(vm, next);
+               bt_remseg(vm, next);
+
+               /*
+                * The coalesced segment might be able to satisfy our request.
+                * If not, we might need to release it from the arena.
+                */
+               if (error == ENOMEM && prev->bt_size >= size &&
+                   (error = vmem_fit(prev, size, align, phase, nocross,
+                   VMEM_ADDR_MIN, VMEM_ADDR_MAX, addrp)) == 0) {
+                       vmem_clip(vm, prev, *addrp, size);
+                       bt = prev;
+               } else
+                       (void)vmem_try_release(vm, prev, true);
+       }
+
+       /*
+        * If the allocation was successful, advance the cursor.
+        */
+       if (error == 0) {
+               TAILQ_REMOVE(&vm->vm_seglist, cursor, bt_seglist);
+               for (; bt != NULL && bt->bt_start < *addrp + size;
+                   bt = TAILQ_NEXT(bt, bt_seglist))
+                       ;
+               if (bt != NULL)
+                       TAILQ_INSERT_BEFORE(bt, cursor, bt_seglist);
+               else
+                       TAILQ_INSERT_HEAD(&vm->vm_seglist, cursor, bt_seglist);
+       }
+
+       /*
+        * Attempt to bring additional resources into the arena.  If that fails
+        * and M_WAITOK is specified, sleep waiting for resources to be freed.
+        */
+       if (error == ENOMEM && vmem_try_fetch(vm, size, align, flags))
+               goto retry;
+
+out:
+       VMEM_UNLOCK(vm);
+       return (error);
+}
+
 /* ---- vmem API */
 
 void
@@ -1051,9 +1209,13 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t ba
        qc_init(vm, qcache_max);
 
        TAILQ_INIT(&vm->vm_seglist);
-       for (i = 0; i < VMEM_MAXORDER; i++) {
+       vm->vm_cursor.bt_start = vm->vm_cursor.bt_size = 0;
+       vm->vm_cursor.bt_type = BT_TYPE_CURSOR;
+       TAILQ_INSERT_TAIL(&vm->vm_seglist, &vm->vm_cursor, bt_seglist);
+
+       for (i = 0; i < VMEM_MAXORDER; i++)
                LIST_INIT(&vm->vm_freelist[i]);
-       }
+
        memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0));
        vm->vm_hashsize = VMEM_HASHSIZE_MIN;
        vm->vm_hashlist = vm->vm_hash0;
@@ -1120,7 +1282,7 @@ vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vm
 
        flags &= VMEM_FLAGS;
        MPASS(size > 0);
-       MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT);
+       MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT);
        if ((flags & M_NOWAIT) == 0)
                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc");
 
@@ -1151,7 +1313,6 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
        struct vmem_freelist *list;
        struct vmem_freelist *first;
        struct vmem_freelist *end;
-       vmem_size_t avail;
        bt_t *bt;
        int error;
        int strat;
@@ -1160,7 +1321,7 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
        strat = flags & VMEM_FITMASK;
        MPASS(size0 > 0);
        MPASS(size > 0);
-       MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT);
+       MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT || strat == M_NEXTFIT);
        MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK));
        if ((flags & M_NOWAIT) == 0)
                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc");
@@ -1173,11 +1334,20 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
        MPASS(nocross == 0 || nocross >= size);
        MPASS(minaddr <= maxaddr);
        MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
+       if (strat == M_NEXTFIT)
+               MPASS(minaddr == VMEM_ADDR_MIN && maxaddr == VMEM_ADDR_MAX);
 
        if (align == 0)
                align = vm->vm_quantum_mask + 1;
-
        *addrp = 0;
+
+       /*
+        * Next-fit allocations don't use the freelists.
+        */
+       if (strat == M_NEXTFIT)
+               return (vmem_xalloc_nextfit(vm, size0, align, phase, nocross,
+                   flags, addrp));
+
        end = &vm->vm_freelist[VMEM_MAXORDER];
        /*
         * choose a free block from which we allocate.
@@ -1194,6 +1364,7 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
                        error = ENOMEM;
                        break;
                }
+
                /*
                 * Scan freelists looking for a tag that satisfies the
                 * allocation.  If we're doing BESTFIT we may encounter
@@ -1215,6 +1386,7 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
                                        break;
                        }
                }
+
                /*
                 * Retry if the fast algorithm failed.
                 */
@@ -1223,35 +1395,16 @@ vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_
                        first = bt_freehead_toalloc(vm, size, strat);
                        continue;
                }
-               /*
-                * XXX it is possible to fail to meet restrictions with the
-                * imported region.  It is up to the user to specify the
-                * import quantum such that it can satisfy any allocation.
-                */
-               if (vmem_import(vm, size, align, flags) == 0)
-                       continue;
 
                /*
-                * Try to free some space from the quantum cache or reclaim
-                * functions if available.
+                * Try a few measures to bring additional resources into the
+                * arena.  If all else fails, we will sleep waiting for
+                * resources to be freed.
                 */
-               if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) {
-                       avail = vm->vm_size - vm->vm_inuse;
-                       VMEM_UNLOCK(vm);
-                       if (vm->vm_qcache_max != 0)
-                               qc_drain(vm);
-                       if (vm->vm_reclaimfn != NULL)
-                               vm->vm_reclaimfn(vm, flags);
-                       VMEM_LOCK(vm);
-                       /* If we were successful retry even NOWAIT. */
-                       if (vm->vm_size - vm->vm_inuse > avail)
-                               continue;
-               }
-               if ((flags & M_NOWAIT) != 0) {
+               if (!vmem_try_fetch(vm, size, align, flags)) {
                        error = ENOMEM;
                        break;
                }
-               VMEM_CONDVAR_WAIT(vm);
        }
 out:
        VMEM_UNLOCK(vm);
@@ -1313,24 +1466,7 @@ vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t s
                bt_remseg(vm, t);
        }
 
-       t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
-       MPASS(t != NULL);
-       MPASS(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
-       if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN &&
-           t->bt_size == bt->bt_size) {
-               vmem_addr_t spanaddr;
-               vmem_size_t spansize;
-
-               MPASS(t->bt_start == bt->bt_start);
-               spanaddr = bt->bt_start;
-               spansize = bt->bt_size;
-               bt_remseg(vm, bt);
-               bt_remseg(vm, t);
-               vm->vm_size -= spansize;
-               VMEM_CONDVAR_BROADCAST(vm);
-               bt_freetrim(vm, BT_MAXFREE);
-               (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize);
-       } else {
+       if (!vmem_try_release(vm, bt, false)) {
                bt_insfree(vm, bt);
                VMEM_CONDVAR_BROADCAST(vm);
                bt_freetrim(vm, BT_MAXFREE);
@@ -1409,6 +1545,8 @@ bt_type_string(int type)
                return "span";
        case BT_TYPE_SPAN_STATIC:
                return "static span";
+       case BT_TYPE_CURSOR:
+               return "cursor";
        default:
                break;
        }

Modified: head/sys/sys/malloc.h
==============================================================================
--- head/sys/sys/malloc.h       Sat May 18 00:22:28 2019        (r347948)
+++ head/sys/sys/malloc.h       Sat May 18 01:46:38 2019        (r347949)
@@ -57,9 +57,10 @@
 #define        M_NOVM          0x0200          /* don't ask VM for pages */
 #define        M_USE_RESERVE   0x0400          /* can alloc out of reserve 
memory */
 #define        M_NODUMP        0x0800          /* don't dump pages in this 
allocation */
-#define        M_FIRSTFIT      0x1000          /* Only for vmem, fast fit. */
-#define        M_BESTFIT       0x2000          /* Only for vmem, low 
fragmentation. */
-#define        M_EXEC          0x4000          /* allocate executable space. */
+#define        M_FIRSTFIT      0x1000          /* only for vmem, fast fit */
+#define        M_BESTFIT       0x2000          /* only for vmem, low 
fragmentation */
+#define        M_EXEC          0x4000          /* allocate executable space */
+#define        M_NEXTFIT       0x8000          /* only for vmem, follow cursor 
*/
 
 #define        M_MAGIC         877983977       /* time when first defined :-) 
*/
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to