Author: kib
Date: Wed Sep  9 22:02:30 2020
New Revision: 365520
URL: https://svnweb.freebsd.org/changeset/base/365520

Log:
  vm_map: Add a map entry kind that can only be clipped at specific boundary.
  
  The entries and their clip boundaries must be aligned on supported
  superpages sizes from pagesizes[].  vm_map operations return Mach
  error KERN_INVALID_ARGUMENT, which is usually translated to EINVAL, if
  it would require clip not at the boundary.
  
  In other words, entries force preserving virtual addresses superpage
  properties.
  
  Reviewed by:  markj
  Tested by:    pho
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 week
  Differential revision:        https://reviews.freebsd.org/D24652

Modified:
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c        Wed Sep  9 21:57:55 2020        (r365519)
+++ head/sys/vm/vm_map.c        Wed Sep  9 22:02:30 2020        (r365520)
@@ -1554,13 +1554,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
        struct ucred *cred;
        vm_eflags_t protoeflags;
        vm_inherit_t inheritance;
+       u_long bdry;
+       u_int bidx;
 
        VM_MAP_ASSERT_LOCKED(map);
        KASSERT(object != kernel_object ||
            (cow & MAP_COPY_ON_WRITE) == 0,
            ("vm_map_insert: kernel object and COW"));
-       KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
-           ("vm_map_insert: paradoxical MAP_NOFAULT request"));
+       KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 ||
+           (cow & MAP_SPLIT_BOUNDARY_MASK) != 0,
+           ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x",
+           object, cow));
        KASSERT((prot & ~max) == 0,
            ("prot %#x is not subset of max_prot %#x", prot, max));
 
@@ -1615,6 +1619,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
                inheritance = VM_INHERIT_SHARE;
        else
                inheritance = VM_INHERIT_DEFAULT;
+       if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) {
+               /* This magically ignores index 0, for usual page size. */
+               bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >>
+                   MAP_SPLIT_BOUNDARY_SHIFT;
+               if (bidx >= MAXPAGESIZES)
+                       return (KERN_INVALID_ARGUMENT);
+               bdry = pagesizes[bidx] - 1;
+               if ((start & bdry) != 0 || (end & bdry) != 0)
+                       return (KERN_INVALID_ARGUMENT);
+               protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+       }
 
        cred = NULL;
        if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
@@ -2342,31 +2357,40 @@ vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry)
  *     the specified address; if necessary,
  *     it splits the entry into two.
  */
-static inline void
-vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
+static int
+vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr)
 {
        vm_map_entry_t new_entry;
+       int bdry_idx;
 
        if (!map->system_map)
                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
                    "%s: map %p entry %p start 0x%jx", __func__, map, entry,
-                   (uintmax_t)start);
+                   (uintmax_t)startaddr);
 
-       if (start <= entry->start)
-               return;
+       if (startaddr <= entry->start)
+               return (KERN_SUCCESS);
 
        VM_MAP_ASSERT_LOCKED(map);
-       KASSERT(entry->end > start && entry->start < start,
+       KASSERT(entry->end > startaddr && entry->start < startaddr,
            ("%s: invalid clip of entry %p", __func__, entry));
 
+       bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+           MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+       if (bdry_idx != 0) {
+               if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0)
+                       return (KERN_INVALID_ARGUMENT);
+       }
+
        new_entry = vm_map_entry_clone(map, entry);
 
        /*
         * Split off the front portion.  Insert the new entry BEFORE this one,
         * so that this entry has the specified starting address.
         */
-       new_entry->end = start;
+       new_entry->end = startaddr;
        vm_map_entry_link(map, new_entry);
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -2376,11 +2400,12 @@ vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, 
  *     the interior of the entry.  Return entry after 'start', and in
  *     prev_entry set the entry before 'start'.
  */
-static inline vm_map_entry_t
+static int
 vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start,
-    vm_map_entry_t *prev_entry)
+    vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry)
 {
        vm_map_entry_t entry;
+       int rv;
 
        if (!map->system_map)
                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
@@ -2389,11 +2414,14 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
 
        if (vm_map_lookup_entry(map, start, prev_entry)) {
                entry = *prev_entry;
-               vm_map_clip_start(map, entry, start);
+               rv = vm_map_clip_start(map, entry, start);
+               if (rv != KERN_SUCCESS)
+                       return (rv);
                *prev_entry = vm_map_entry_pred(entry);
        } else
                entry = vm_map_entry_succ(*prev_entry);
-       return (entry);
+       *res_entry = entry;
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -2403,31 +2431,41 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
  *     the specified address; if necessary,
  *     it splits the entry into two.
  */
-static inline void
-vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
+static int
+vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr)
 {
        vm_map_entry_t new_entry;
+       int bdry_idx;
 
        if (!map->system_map)
                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
                    "%s: map %p entry %p end 0x%jx", __func__, map, entry,
-                   (uintmax_t)end);
+                   (uintmax_t)endaddr);
 
-       if (end >= entry->end)
-               return;
+       if (endaddr >= entry->end)
+               return (KERN_SUCCESS);
 
        VM_MAP_ASSERT_LOCKED(map);
-       KASSERT(entry->start < end && entry->end > end,
+       KASSERT(entry->start < endaddr && entry->end > endaddr,
            ("%s: invalid clip of entry %p", __func__, entry));
 
+       bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+           MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+       if (bdry_idx != 0) {
+               if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0)
+                       return (KERN_INVALID_ARGUMENT);
+       }
+
        new_entry = vm_map_entry_clone(map, entry);
 
        /*
         * Split off the back portion.  Insert the new entry AFTER this one,
         * so that this entry has the specified ending address.
         */
-       new_entry->start = end;
+       new_entry->start = endaddr;
        vm_map_entry_link(map, new_entry);
+
+       return (KERN_SUCCESS);
 }
 
 /*
@@ -2469,12 +2507,17 @@ vm_map_submap(
        if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end &&
            (entry->eflags & MAP_ENTRY_COW) == 0 &&
            entry->object.vm_object == NULL) {
-               vm_map_clip_start(map, entry, start);
-               vm_map_clip_end(map, entry, end);
+               result = vm_map_clip_start(map, entry, start);
+               if (result != KERN_SUCCESS)
+                       goto unlock;
+               result = vm_map_clip_end(map, entry, end);
+               if (result != KERN_SUCCESS)
+                       goto unlock;
                entry->object.sub_map = submap;
                entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
                result = KERN_SUCCESS;
        }
+unlock:
        vm_map_unlock(map);
 
        if (result != KERN_SUCCESS) {
@@ -2661,11 +2704,18 @@ again:
         * of this loop early and let the next loop simplify the entries, since
         * some may now be mergeable.
         */
-       rv = KERN_SUCCESS;
-       vm_map_clip_start(map, first_entry, start);
+       rv = vm_map_clip_start(map, first_entry, start);
+       if (rv != KERN_SUCCESS) {
+               vm_map_unlock(map);
+               return (rv);
+       }
        for (entry = first_entry; entry->start < end;
            entry = vm_map_entry_succ(entry)) {
-               vm_map_clip_end(map, entry, end);
+               rv = vm_map_clip_end(map, entry, end);
+               if (rv != KERN_SUCCESS) {
+                       vm_map_unlock(map);
+                       return (rv);
+               }
 
                if (set_max ||
                    ((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 ||
@@ -2785,6 +2835,7 @@ vm_map_madvise(
        int behav)
 {
        vm_map_entry_t entry, prev_entry;
+       int rv;
        bool modify_map;
 
        /*
@@ -2830,13 +2881,22 @@ vm_map_madvise(
                 * We clip the vm_map_entry so that behavioral changes are
                 * limited to the specified address range.
                 */
-               for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
-                   entry->start < end;
-                   prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+               rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry);
+               if (rv != KERN_SUCCESS) {
+                       vm_map_unlock(map);
+                       return (vm_mmap_to_errno(rv));
+               }
+
+               for (; entry->start < end; prev_entry = entry,
+                   entry = vm_map_entry_succ(entry)) {
                        if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
                                continue;
 
-                       vm_map_clip_end(map, entry, end);
+                       rv = vm_map_clip_end(map, entry, end);
+                       if (rv != KERN_SUCCESS) {
+                               vm_map_unlock(map);
+                               return (vm_mmap_to_errno(rv));
+                       }
 
                        switch (behav) {
                        case MADV_NORMAL:
@@ -2969,7 +3029,8 @@ int
 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
               vm_inherit_t new_inheritance)
 {
-       vm_map_entry_t entry, prev_entry;
+       vm_map_entry_t entry, lentry, prev_entry, start_entry;
+       int rv;
 
        switch (new_inheritance) {
        case VM_INHERIT_NONE:
@@ -2984,18 +3045,37 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
                return (KERN_SUCCESS);
        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);
-       for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
-           entry->start < end;
-           prev_entry = entry, entry = vm_map_entry_succ(entry)) {
-               vm_map_clip_end(map, entry, end);
+       rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry);
+       if (rv != KERN_SUCCESS)
+               goto unlock;
+       if (vm_map_lookup_entry(map, end - 1, &lentry)) {
+               rv = vm_map_clip_end(map, lentry, end);
+               if (rv != KERN_SUCCESS)
+                       goto unlock;
+       }
+       if (new_inheritance == VM_INHERIT_COPY) {
+               for (entry = start_entry; entry->start < end;
+                   prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+                       if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+                           != 0) {
+                               rv = KERN_INVALID_ARGUMENT;
+                               goto unlock;
+                       }
+               }
+       }
+       for (entry = start_entry; entry->start < end; prev_entry = entry,
+           entry = vm_map_entry_succ(entry)) {
+               KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx",
+                   entry, (uintmax_t)entry->end, (uintmax_t)end));
                if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
                    new_inheritance != VM_INHERIT_ZERO)
                        entry->inheritance = new_inheritance;
                vm_map_try_merge_entries(map, prev_entry, entry);
        }
        vm_map_try_merge_entries(map, prev_entry, entry);
+unlock:
        vm_map_unlock(map);
-       return (KERN_SUCCESS);
+       return (rv);
 }
 
 /*
@@ -3094,8 +3174,13 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offs
                            next_entry : NULL;
                        continue;
                }
-               vm_map_clip_start(map, entry, start);
-               vm_map_clip_end(map, entry, end);
+               rv = vm_map_clip_start(map, entry, start);
+               if (rv != KERN_SUCCESS)
+                       break;
+               rv = vm_map_clip_end(map, entry, end);
+               if (rv != KERN_SUCCESS)
+                       break;
+
                /*
                 * Mark the entry in case the map lock is released.  (See
                 * above.)
@@ -3262,8 +3347,8 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
 {
        vm_map_entry_t entry, first_entry, next_entry, prev_entry;
        vm_offset_t faddr, saved_end, saved_start;
-       u_long npages;
-       u_int last_timestamp;
+       u_long incr, npages;
+       u_int bidx, last_timestamp;
        int rv;
        bool holes_ok, need_wakeup, user_wire;
        vm_prot_t prot;
@@ -3301,8 +3386,13 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
                            next_entry : NULL;
                        continue;
                }
-               vm_map_clip_start(map, entry, start);
-               vm_map_clip_end(map, entry, end);
+               rv = vm_map_clip_start(map, entry, start);
+               if (rv != KERN_SUCCESS)
+                       goto done;
+               rv = vm_map_clip_end(map, entry, end);
+               if (rv != KERN_SUCCESS)
+                       goto done;
+
                /*
                 * Mark the entry in case the map lock is released.  (See
                 * above.)
@@ -3339,20 +3429,23 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
                        saved_start = entry->start;
                        saved_end = entry->end;
                        last_timestamp = map->timestamp;
+                       bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+                           >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+                       incr =  pagesizes[bidx];
                        vm_map_busy(map);
                        vm_map_unlock(map);
 
-                       faddr = saved_start;
-                       do {
+                       for (faddr = saved_start; faddr < saved_end;
+                           faddr += incr) {
                                /*
                                 * Simulate a fault to get the page and enter
                                 * it into the physical map.
                                 */
-                               if ((rv = vm_fault(map, faddr,
-                                   VM_PROT_NONE, VM_FAULT_WIRE, NULL)) !=
-                                   KERN_SUCCESS)
+                               rv = vm_fault(map, faddr, VM_PROT_NONE,
+                                   VM_FAULT_WIRE, NULL);
+                               if (rv != KERN_SUCCESS)
                                        break;
-                       } while ((faddr += PAGE_SIZE) < saved_end);
+                       }
                        vm_map_lock(map);
                        vm_map_unbusy(map);
                        if (last_timestamp + 1 != map->timestamp) {
@@ -3427,10 +3520,14 @@ done:
                 * Moreover, another thread could be simultaneously
                 * wiring this new mapping entry.  Detect these cases
                 * and skip any entries marked as in transition not by us.
+                *
+                * Another way to get an entry not marked with
+                * MAP_ENTRY_IN_TRANSITION is after failed clipping,
+                * which set rv to KERN_INVALID_ARGUMENT.
                 */
                if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
                    entry->wiring_thread != curthread) {
-                       KASSERT(holes_ok,
+                       KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT,
                            ("vm_map_wire: !HOLESOK and new/changed entry"));
                        continue;
                }
@@ -3508,6 +3605,7 @@ vm_map_sync(
        vm_object_t object;
        vm_ooffset_t offset;
        unsigned int last_timestamp;
+       int bdry_idx;
        boolean_t failed;
 
        vm_map_lock_read(map);
@@ -3519,14 +3617,26 @@ vm_map_sync(
                start = first_entry->start;
                end = first_entry->end;
        }
+
        /*
-        * Make a first pass to check for user-wired memory and holes.
+        * Make a first pass to check for user-wired memory, holes,
+        * and partial invalidation of largepage mappings.
         */
        for (entry = first_entry; entry->start < end; entry = next_entry) {
-               if (invalidate &&
-                   (entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
-                       vm_map_unlock_read(map);
-                       return (KERN_INVALID_ARGUMENT);
+               if (invalidate) {
+                       if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
+                               vm_map_unlock_read(map);
+                               return (KERN_INVALID_ARGUMENT);
+                       }
+                       bdry_idx = (entry->eflags &
+                           MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+                           MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+                       if (bdry_idx != 0 &&
+                           ((start & (pagesizes[bdry_idx] - 1)) != 0 ||
+                           (end & (pagesizes[bdry_idx] - 1)) != 0)) {
+                               vm_map_unlock_read(map);
+                               return (KERN_INVALID_ARGUMENT);
+                       }
                }
                next_entry = vm_map_entry_succ(entry);
                if (end > entry->end &&
@@ -3703,7 +3813,8 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
 int
 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 {
-       vm_map_entry_t entry, next_entry;
+       vm_map_entry_t entry, next_entry, scratch_entry;
+       int rv;
 
        VM_MAP_ASSERT_LOCKED(map);
 
@@ -3714,8 +3825,10 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
         * Find the start of the region, and clip it.
         * Step through all entries in this region.
         */
-       for (entry = vm_map_lookup_clip_start(map, start, &entry);
-           entry->start < end; entry = next_entry) {
+       rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry);
+       if (rv != KERN_SUCCESS)
+               return (rv);
+       for (; entry->start < end; entry = next_entry) {
                /*
                 * Wait for wiring or unwiring of an entry to complete.
                 * Also wait for any system wirings to disappear on
@@ -3739,13 +3852,19 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
                                 * Specifically, the entry may have been
                                 * clipped, merged, or deleted.
                                 */
-                               next_entry = vm_map_lookup_clip_start(map,
-                                   saved_start, &next_entry);
+                               rv = vm_map_lookup_clip_start(map, saved_start,
+                                   &next_entry, &scratch_entry);
+                               if (rv != KERN_SUCCESS)
+                                       break;
                        } else
                                next_entry = entry;
                        continue;
                }
-               vm_map_clip_end(map, entry, end);
+
+               /* XXXKIB or delete to the upper superpage boundary ? */
+               rv = vm_map_clip_end(map, entry, end);
+               if (rv != KERN_SUCCESS)
+                       break;
                next_entry = vm_map_entry_succ(entry);
 
                /*
@@ -3775,7 +3894,7 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
                 */
                vm_map_entry_delete(map, entry);
        }
-       return (KERN_SUCCESS);
+       return (rv);
 }
 
 /*
@@ -4219,7 +4338,8 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
                        new_entry->end = old_entry->end;
                        new_entry->eflags = old_entry->eflags &
                            ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
-                           MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC);
+                           MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC |
+                           MAP_ENTRY_SPLIT_BOUNDARY_MASK);
                        new_entry->protection = old_entry->protection;
                        new_entry->max_protection = old_entry->max_protection;
                        new_entry->inheritance = VM_INHERIT_ZERO;

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h        Wed Sep  9 21:57:55 2020        (r365519)
+++ head/sys/vm/vm_map.h        Wed Sep  9 22:02:30 2020        (r365520)
@@ -149,6 +149,10 @@ struct vm_map_entry {
 #define        MAP_ENTRY_STACK_GAP_UP          0x00040000
 #define        MAP_ENTRY_HEADER                0x00080000
 
+#define        MAP_ENTRY_SPLIT_BOUNDARY_MASK   0x00300000
+
+#define        MAP_ENTRY_SPLIT_BOUNDARY_SHIFT  20
+
 #ifdef _KERNEL
 static __inline u_char
 vm_map_entry_behavior(vm_map_entry_t entry)
@@ -373,6 +377,9 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define        MAP_CREATE_STACK_GAP_UP 0x00010000
 #define        MAP_CREATE_STACK_GAP_DN 0x00020000
 #define        MAP_VN_EXEC             0x00040000
+#define        MAP_SPLIT_BOUNDARY_MASK 0x00180000
+
+#define        MAP_SPLIT_BOUNDARY_SHIFT 19
 
 /*
  * vm_fault option flags
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to