The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7685aaea8850f5b6995a17740a016019e0956c70

commit 7685aaea8850f5b6995a17740a016019e0956c70
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2025-12-20 16:03:40 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2025-12-30 03:25:36 +0000

    vm_object_coalesce(): return swap reservation back if overcharged
    
    It is possible for both vm_map_insert() and vm_object_coalesce() to charge
    both for the same region.  The issue is that vm_map_insert() must charge
    in advance to ensure that the mapping would not exceed the swap limit,
    but then the coalesce might decide to extend the object, and already
    (partially) backs the mapped region.
    
    Handle this by passing to vm_object_coalesce() exact information about
    the charging mode of the extending range 'not charging', 'charged' using
    flags instead of simple boolean.  In vm_object_coalesce(), detect
    overcharge and undo it if needed.
    
    Note that this relies on vm_object_coalesce() call being the last action
    in vm_map_insert() before extending the previous map entry.
    
    Reported and tested by: pho
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D54338
---
 sys/vm/vm_map.c    | 44 ++++++++++++++++++++++++++++++--------------
 sys/vm/vm_object.c | 41 +++++++++++++++++++++++++++++++----------
 sys/vm/vm_object.h |  8 +++++++-
 3 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 6b09552c5fee..68dcadd2b2f1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1620,6 +1620,7 @@ vm_map_insert1(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
        vm_inherit_t inheritance;
        u_long bdry;
        u_int bidx;
+       int cflags;
 
        VM_MAP_ASSERT_LOCKED(map);
        KASSERT(object != kernel_object ||
@@ -1696,20 +1697,36 @@ vm_map_insert1(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
        }
 
        cred = NULL;
-       if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
-               goto charged;
-       if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
-           ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
-               if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
-                       return (KERN_RESOURCE_SHORTAGE);
-               KASSERT(object == NULL ||
-                   (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
-                   object->cred == NULL,
-                   ("overcommit: vm_map_insert o %p", object));
-               cred = curthread->td_ucred;
+       if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) {
+               cflags = OBJCO_NO_CHARGE;
+       } else {
+               cflags = 0;
+               if ((cow & MAP_ACC_CHARGED) != 0 ||
+                   ((prot & VM_PROT_WRITE) != 0 &&
+                   ((protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
+                   object == NULL))) {
+                       if ((cow & MAP_ACC_CHARGED) == 0) {
+                               if (!swap_reserve(end - start))
+                                       return (KERN_RESOURCE_SHORTAGE);
+
+                               /*
+                                * Only inform vm_object_coalesce()
+                                * that the object was charged if
+                                * there is no need for CoW, so the
+                                * swap amount reserved is applicable
+                                * to the prev_entry->object.
+                                */
+                               if ((protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)
+                                       cflags |= OBJCO_CHARGED;
+                       }
+                       KASSERT(object == NULL ||
+                           (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
+                           object->cred == NULL,
+                           ("overcommit: vm_map_insert o %p", object));
+                       cred = curthread->td_ucred;
+               }
        }
 
-charged:
        /* Expand the kernel pmap, if necessary. */
        if (map == kernel_map && end > kernel_vm_end) {
                int rv;
@@ -1741,8 +1758,7 @@ charged:
            vm_object_coalesce(prev_entry->object.vm_object,
            prev_entry->offset,
            (vm_size_t)(prev_entry->end - prev_entry->start),
-           (vm_size_t)(end - prev_entry->end), cred != NULL &&
-           (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
+           (vm_size_t)(end - prev_entry->end), cflags)) {
                /*
                 * We were able to extend the object.  Determine if we
                 * can extend the previous map entry to include the
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index c216fdc01af1..f4c54ba91742 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2161,7 +2161,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, 
vm_pindex_t end)
  */
 boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
-    vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
+    vm_size_t prev_size, vm_size_t next_size, int cflags)
 {
        vm_pindex_t next_end, next_pindex;
 
@@ -2202,8 +2202,7 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t 
prev_offset,
        /*
         * Account for the charge.
         */
-       if (prev_object->cred != NULL &&
-           next_pindex + next_size > prev_object->size) {
+       if (prev_object->cred != NULL && (cflags & OBJCO_NO_CHARGE) == 0) {
                /*
                 * If prev_object was charged, then this mapping,
                 * although not charged now, may become writable
@@ -2214,14 +2213,36 @@ vm_object_coalesce(vm_object_t prev_object, 
vm_ooffset_t prev_offset,
                 * entry, and swap reservation for this entry is
                 * managed in appropriate time.
                 */
-               vm_size_t charge = ptoa(next_pindex + next_size -
-                   prev_object->size);
-               if (!reserved &&
-                   !swap_reserve_by_cred(charge, prev_object->cred)) {
-                       VM_OBJECT_WUNLOCK(prev_object);
-                       return (FALSE);
+               if (next_end > prev_object->size) {
+                       vm_size_t charge = ptoa(next_end - prev_object->size);
+
+                       if ((cflags & OBJCO_CHARGED) == 0) {
+                               if (!swap_reserve_by_cred(charge,
+                                   prev_object->cred)) {
+                                       VM_OBJECT_WUNLOCK(prev_object);
+                                       return (FALSE);
+                               }
+                       } else if (prev_object->size > next_pindex) {
+                               /*
+                                * The caller charged, but:
+                                * - the object has already accounted for the
+                                *   space,
+                                * - and the object end is between previous
+                                *   mapping end and next_end.
+                                */
+                               swap_release_by_cred(ptoa(prev_object->size -
+                                   next_pindex), prev_object->cred);
+                       }
+                       prev_object->charge += charge;
+               } else if ((cflags & OBJCO_CHARGED) != 0) {
+                       /*
+                        * The caller charged, but the object has
+                        * already accounted for the space.  Whole new
+                        * mapping charge should be released,
+                        */
+                       swap_release_by_cred(ptoa(next_size),
+                           prev_object->cred);
                }
-               prev_object->charge += charge;
        }
 
        /*
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index e58fae5f0090..98afb93f8389 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -228,6 +228,12 @@ struct vm_object {
 #define        OBJPR_NOTMAPPED 0x2             /* Don't unmap pages. */
 #define        OBJPR_VALIDONLY 0x4             /* Ignore invalid pages. */
 
+/*
+ * Options for vm_object_coalesce().
+ */
+#define        OBJCO_CHARGED   0x1             /* The next_size was charged 
already */
+#define        OBJCO_NO_CHARGE 0x2             /* Do not do swap accounting at 
all */
+
 TAILQ_HEAD(object_q, vm_object);
 
 extern struct object_q vm_object_list; /* list of allocated objects */
@@ -354,7 +360,7 @@ vm_object_t vm_object_allocate_anon(vm_pindex_t, 
vm_object_t, struct ucred *,
    vm_size_t);
 vm_object_t vm_object_allocate_dyn(objtype_t, vm_pindex_t, u_short);
 boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
-   boolean_t);
+   int);
 void vm_object_collapse (vm_object_t);
 void vm_object_deallocate (vm_object_t);
 void vm_object_destroy (vm_object_t);

Reply via email to