Author: kib
Date: Wed Jul  5 06:18:00 2017
New Revision: 320666
URL: https://svnweb.freebsd.org/changeset/base/320666

Log:
  Add MAP_GUARD and use it for stack grow area protection.
  Bump __FreeBSD_version.
  
  MFC r320317:
  Implement address space guards.
  
  MFC r320338:
  Remove stale part of the comment.
  
  MFC r320339:
  Correctly handle small MAP_STACK requests.
  
  MFC r320344:
  For now, allow mprotect(2) over the guards to succeed regardless of
  the requested protection.
  
  MFC r320430:
  Treat the addr argument for mmap(2) request without MAP_FIXED flag as
  a hint.
  
  MFC r320560 (by alc):
  Modify vm_map_growstack() to protect itself from the possibility of the
  gap entry in the vm map being smaller than the sysctl-derived stack guard
  size.

Modified:
  stable/11/lib/libc/sys/mmap.2
  stable/11/lib/libc/sys/munmap.2
  stable/11/sys/sys/mman.h
  stable/11/sys/sys/param.h
  stable/11/sys/vm/vm.h
  stable/11/sys/vm/vm_fault.c
  stable/11/sys/vm/vm_map.c
  stable/11/sys/vm/vm_map.h
  stable/11/sys/vm/vm_mmap.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/lib/libc/sys/mmap.2
==============================================================================
--- stable/11/lib/libc/sys/mmap.2       Wed Jul  5 06:12:21 2017        
(r320665)
+++ stable/11/lib/libc/sys/mmap.2       Wed Jul  5 06:18:00 2017        
(r320666)
@@ -199,6 +199,21 @@ In contrast, if
 .Dv MAP_EXCL
 is specified, the request will fail if a mapping
 already exists within the range.
+.It Dv MAP_GUARD
+Instead of a mapping, create a guard of the specified size.
+Guards allow a process to create reservations in its address space,
+which can later be replaced by actual mappings.
+.Pp
+.Fa mmap
+will not create mappings in the address range of a guard unless
+the request specifies
+.Dv MAP_FIXED .
+Guards can be destroyed with
+.Xr munmap 2 .
+Any memory access by a thread to the guarded range results
+in the delivery of a
+.Dv SIGSEGV
+signal to that thread.
 .It Dv MAP_NOCORE
 Region is not included in a core file.
 .It Dv MAP_NOSYNC
@@ -303,6 +318,7 @@ must include at least
 .Dv PROT_READ
 and
 .Dv PROT_WRITE .
+.Pp
 This option creates
 a memory region that grows to at most
 .Fa len
@@ -313,6 +329,10 @@ stack top is the starting address returned by the call
 bytes.
 The bottom of the stack at maximum growth is the starting
 address returned by the call.
+The system uses guards to prevent the inadvertent use of
+regions into which stacks created with
+.Dv MAP_STACK
+will automatically grow, without mapping the whole stack in advance.
 .El
 .Pp
 The
@@ -406,6 +426,7 @@ were specified.
 .It Bq Er EINVAL
 None of
 .Dv MAP_ANON ,
+.Dv MAP_GUARD ,
 .Dv MAP_PRIVATE ,
 .Dv MAP_SHARED ,
 or
@@ -455,6 +476,25 @@ were specified, but the requested region is already us
 was specified, but
 .Dv MAP_FIXED
 was not.
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified, but the
+.Fa offset
+argument was not zero, the
+.Fa fd
+argument was not -1, or the
+.Fa prot
+argument was not
+.Dv PROT_NONE .
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified together with one of the flags
+.Dv MAP_ANON ,
+.Dv MAP_PREFAULT ,
+.Dv MAP_PREFAULT_READ ,
+.Dv MAP_PRIVATE ,
+.Dv MAP_SHARED ,
+.Dv MAP_STACK .
 .It Bq Er ENODEV
 .Dv MAP_ANON
 has not been specified and

Modified: stable/11/lib/libc/sys/munmap.2
==============================================================================
--- stable/11/lib/libc/sys/munmap.2     Wed Jul  5 06:12:21 2017        
(r320665)
+++ stable/11/lib/libc/sys/munmap.2     Wed Jul  5 06:18:00 2017        
(r320666)
@@ -28,7 +28,7 @@
 .\"    @(#)munmap.2    8.3 (Berkeley) 5/27/94
 .\" $FreeBSD$
 .\"
-.Dd May 27, 1994
+.Dd June 22, 2017
 .Dt MUNMAP 2
 .Os
 .Sh NAME
@@ -44,7 +44,7 @@
 The
 .Fn munmap
 system call
-deletes the mappings for the specified address range,
+deletes the mappings and guards for the specified address range,
 and causes further references to addresses within the range
 to generate invalid memory references.
 .Sh RETURN VALUES

Modified: stable/11/sys/sys/mman.h
==============================================================================
--- stable/11/sys/sys/mman.h    Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/sys/mman.h    Wed Jul  5 06:18:00 2017        (r320666)
@@ -90,6 +90,7 @@
 /*
  * Extended flags
  */
+#define        MAP_GUARD        0x00002000 /* reserve but don't map address 
range */
 #define        MAP_EXCL         0x00004000 /* for MAP_FIXED, fail if address 
is used */
 #define        MAP_NOCORE       0x00020000 /* dont include these pages in a 
coredump */
 #define        MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */

Modified: stable/11/sys/sys/param.h
==============================================================================
--- stable/11/sys/sys/param.h   Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/sys/param.h   Wed Jul  5 06:18:00 2017        (r320666)
@@ -58,7 +58,7 @@
  *             in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1101500      /* Master, propagated to newvers */
+#define __FreeBSD_version 1101501      /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@@ -76,12 +76,14 @@
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(IN_RTLD)
 #define        P_OSREL_SIGWAIT                 700000
 #define        P_OSREL_SIGSEGV                 700004
 #define        P_OSREL_MAP_ANON                800104
 #define        P_OSREL_MAP_FSTRICT             1100036
 #define        P_OSREL_SHUTDOWN_ENOTCONN       1100077
+#define        P_OSREL_MAP_GUARD               1200035
+#define        P_OSREL_MAP_GUARD_11            1101501
 
 #define        P_OSREL_MAJOR(x)                ((x) / 100000)
 #endif

Modified: stable/11/sys/vm/vm.h
==============================================================================
--- stable/11/sys/vm/vm.h       Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/vm/vm.h       Wed Jul  5 06:18:00 2017        (r320666)
@@ -78,6 +78,7 @@ typedef u_char vm_prot_t;     /* protection codes */
 #define        VM_PROT_WRITE           ((vm_prot_t) 0x02)
 #define        VM_PROT_EXECUTE         ((vm_prot_t) 0x04)
 #define        VM_PROT_COPY            ((vm_prot_t) 0x08)      /* copy-on-read 
*/
+#define        VM_PROT_FAULT_LOOKUP    ((vm_prot_t) 0x010)
 
 #define        VM_PROT_ALL             
(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
 #define VM_PROT_RW             (VM_PROT_READ|VM_PROT_WRITE)

Modified: stable/11/sys/vm/vm_fault.c
==============================================================================
--- stable/11/sys/vm/vm_fault.c Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/vm/vm_fault.c Wed Jul  5 06:18:00 2017        (r320666)
@@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot
        int locked, nera, result, rv;
        u_char behavior;
        boolean_t wired;        /* Passed by reference. */
-       bool dead, growstack, hardfault, is_first_object_locked;
+       bool dead, hardfault, is_first_object_locked;
 
        PCPU_INC(cnt.v_vm_faults);
        fs.vp = NULL;
        faultcount = 0;
        nera = -1;
-       growstack = true;
        hardfault = false;
 
 RetryFault:;
@@ -511,17 +510,10 @@ RetryFault:;
         * search.
         */
        fs.map = map;
-       result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
-           &fs.first_object, &fs.first_pindex, &prot, &wired);
+       result = vm_map_lookup(&fs.map, vaddr, fault_type |
+           VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
+           &fs.first_pindex, &prot, &wired);
        if (result != KERN_SUCCESS) {
-               if (growstack && result == KERN_INVALID_ADDRESS &&
-                   map != kernel_map) {
-                       result = vm_map_growstack(curproc, vaddr);
-                       if (result != KERN_SUCCESS)
-                               return (KERN_FAILURE);
-                       growstack = false;
-                       goto RetryFault;
-               }
                unlock_vp(&fs);
                return (result);
        }
@@ -546,6 +538,8 @@ RetryFault:;
                        vm_map_unlock(fs.map);
                goto RetryFault;
        }
+
+       MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
 
        if (wired)
                fault_type = prot | (fault_type & VM_PROT_COPY);

Modified: stable/11/sys/vm/vm_map.c
==============================================================================
--- stable/11/sys/vm/vm_map.c   Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/vm/vm_map.c   Wed Jul  5 06:18:00 2017        (r320666)
@@ -133,6 +133,8 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm
 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t 
system_map);
 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
+static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
+    vm_map_entry_t gap_entry);
 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
 #ifdef INVARIANTS
@@ -1214,6 +1216,10 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
        if (prev_entry->next != &map->header && prev_entry->next->start < end)
                return (KERN_NO_SPACE);
 
+       if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
+           max != VM_PROT_NONE))
+               return (KERN_INVALID_ARGUMENT);
+
        protoeflags = 0;
        if (cow & MAP_COPY_ON_WRITE)
                protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
@@ -1229,13 +1235,19 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
                protoeflags |= MAP_ENTRY_GROWS_UP;
        if (cow & MAP_VN_WRITECOUNT)
                protoeflags |= MAP_ENTRY_VN_WRITECNT;
+       if ((cow & MAP_CREATE_GUARD) != 0)
+               protoeflags |= MAP_ENTRY_GUARD;
+       if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
+               protoeflags |= MAP_ENTRY_STACK_GAP_DN;
+       if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
+               protoeflags |= MAP_ENTRY_STACK_GAP_UP;
        if (cow & MAP_INHERIT_SHARE)
                inheritance = VM_INHERIT_SHARE;
        else
                inheritance = VM_INHERIT_DEFAULT;
 
        cred = NULL;
-       if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
+       if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
                goto charged;
        if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
            ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
@@ -1284,7 +1296,8 @@ charged:
                if (prev_entry->inheritance == inheritance &&
                    prev_entry->protection == prot &&
                    prev_entry->max_protection == max) {
-                       map->size += end - prev_entry->end;
+                       if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
+                               map->size += end - prev_entry->end;
                        prev_entry->end = end;
                        vm_map_entry_resize_free(map, prev_entry);
                        vm_map_simplify_entry(map, prev_entry);
@@ -1321,7 +1334,6 @@ charged:
        new_entry->eflags = protoeflags;
        new_entry->object.vm_object = object;
        new_entry->offset = offset;
-       new_entry->avail_ssize = 0;
 
        new_entry->inheritance = inheritance;
        new_entry->protection = prot;
@@ -1339,7 +1351,8 @@ charged:
         * Insert the new entry into the list
         */
        vm_map_entry_link(map, prev_entry, new_entry);
-       map->size += new_entry->end - new_entry->start;
+       if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
+               map->size += new_entry->end - new_entry->start;
 
        /*
         * Try to coalesce the new entry with both the previous and next
@@ -1543,6 +1556,25 @@ again:
        return (result);
 }
 
+int
+vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
+    vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+    vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
+    int cow)
+{
+       vm_offset_t hint;
+       int rv;
+
+       hint = *addr;
+       for (;;) {
+               rv = vm_map_find(map, object, offset, addr, length, max_addr,
+                   find_space, prot, max, cow);
+               if (rv == KERN_SUCCESS || min_addr >= hint)
+                       return (rv);
+               *addr = min_addr;
+       }
+}
+
 /*
  *     vm_map_simplify_entry:
  *
@@ -1674,7 +1706,8 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
         * map.  This is a bit of a hack, but is also about the best place to
         * put this improvement.
         */
-       if (entry->object.vm_object == NULL && !map->system_map) {
+       if (entry->object.vm_object == NULL && !map->system_map &&
+           (entry->eflags & MAP_ENTRY_GUARD) == 0) {
                vm_object_t object;
                object = vm_object_allocate(OBJT_DEFAULT,
                                atop(entry->end - entry->start));
@@ -1753,7 +1786,8 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v
         * map.  This is a bit of a hack, but is also about the best place to
         * put this improvement.
         */
-       if (entry->object.vm_object == NULL && !map->system_map) {
+       if (entry->object.vm_object == NULL && !map->system_map &&
+           (entry->eflags & MAP_ENTRY_GUARD) == 0) {
                vm_object_t object;
                object = vm_object_allocate(OBJT_DEFAULT,
                                atop(entry->end - entry->start));
@@ -1988,6 +2022,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
         */
        for (current = entry; current != &map->header && current->start < end;
            current = current->next) {
+               if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+                       continue;
                if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
                        vm_map_unlock(map);
                        return (KERN_INVALID_ARGUMENT);
@@ -2010,7 +2046,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
 
                if (set_max ||
                    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 
||
-                   ENTRY_CHARGED(current)) {
+                   ENTRY_CHARGED(current) ||
+                   (current->eflags & MAP_ENTRY_GUARD) != 0) {
                        continue;
                }
 
@@ -2059,6 +2096,9 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
         */
        for (current = entry; current != &map->header && current->start < end;
            current = current->next) {
+               if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+                       continue;
+
                old_prot = current->protection;
 
                if (set_max)
@@ -2312,7 +2352,9 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
                entry = temp_entry->next;
        while ((entry != &map->header) && (entry->start < end)) {
                vm_map_clip_end(map, entry, end);
-               entry->inheritance = new_inheritance;
+               if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
+                   new_inheritance != VM_INHERIT_ZERO)
+                       entry->inheritance = new_inheritance;
                vm_map_simplify_entry(map, entry);
                entry = entry->next;
        }
@@ -2918,6 +2960,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
 
        vm_map_entry_unlink(map, entry);
        object = entry->object.vm_object;
+
+       if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
+               MPASS(entry->cred == NULL);
+               MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
+               MPASS(object == NULL);
+               vm_map_entry_deallocate(entry, map->system_map);
+               return;
+       }
+
        size = entry->end - entry->start;
        map->size -= size;
 
@@ -3276,6 +3327,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, st
        vm_size_t entrysize;
        vm_offset_t newend;
 
+       if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
+               return;
        entrysize = entry->end - entry->start;
        vm2->vm_map.size += entrysize;
        if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
@@ -3312,6 +3365,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
        vm_map_entry_t new_entry, old_entry;
        vm_object_t object;
        int locked;
+       vm_inherit_t inh;
 
        old_map = &vm1->vm_map;
        /* Copy immutable fields of vm1 to vm2. */
@@ -3334,7 +3388,12 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
                if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
                        panic("vm_map_fork: encountered a submap");
 
-               switch (old_entry->inheritance) {
+               inh = old_entry->inheritance;
+               if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+                   inh != VM_INHERIT_NONE)
+                       inh = VM_INHERIT_COPY;
+
+               switch (inh) {
                case VM_INHERIT_NONE:
                        break;
 
@@ -3467,7 +3526,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 
                        new_entry->start = old_entry->start;
                        new_entry->end = old_entry->end;
-                       new_entry->avail_ssize = old_entry->avail_ssize;
                        new_entry->eflags = old_entry->eflags &
                            ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
                            MAP_ENTRY_VN_WRITECNT);
@@ -3530,30 +3588,40 @@ out:
        return (rv);
 }
 
+static int stack_guard_page = 1;
+SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
+    &stack_guard_page, 0,
+    "Specifies the number of guard pages for a stack that grows");
+
 static int
 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 {
        vm_map_entry_t new_entry, prev_entry;
-       vm_offset_t bot, top;
-       vm_size_t init_ssize;
+       vm_offset_t bot, gap_bot, gap_top, top;
+       vm_size_t init_ssize, sgp;
        int orient, rv;
 
        /*
         * The stack orientation is piggybacked with the cow argument.
         * Extract it into orient and mask the cow argument so that we
         * don't pass it around further.
-        * NOTE: We explicitly allow bi-directional stacks.
         */
-       orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
+       orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
        KASSERT(orient != 0, ("No stack grow direction"));
+       KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
+           ("bi-dir stack"));
 
+       sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
        if (addrbos < vm_map_min(map) ||
            addrbos > vm_map_max(map) ||
-           addrbos + max_ssize < addrbos)
+           addrbos + max_ssize < addrbos ||
+           sgp >= max_ssize)
                return (KERN_NO_SPACE);
 
-       init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
+       init_ssize = growsize;
+       if (max_ssize < init_ssize + sgp)
+               init_ssize = max_ssize - sgp;
 
        /* If addr is already mapped, no go */
        if (vm_map_lookup_entry(map, addrbos, &prev_entry))
@@ -3561,12 +3629,6 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
 
        /*
         * If we can't accommodate max_ssize in the current mapping, no go.
-        * However, we need to be aware that subsequent user mappings might
-        * map into the space we have reserved for stack, and currently this
-        * space is not protected.
-        *
-        * Hopefully we will at least detect this condition when we try to
-        * grow the stack.
         */
        if ((prev_entry->next != &map->header) &&
            (prev_entry->next->start < addrbos + max_ssize))
@@ -3582,57 +3644,53 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
         * and cow to be 0.  Possibly we should eliminate these as input
         * parameters, and just pass these values here in the insert call.
         */
-       if (orient == MAP_STACK_GROWS_DOWN)
+       if (orient == MAP_STACK_GROWS_DOWN) {
                bot = addrbos + max_ssize - init_ssize;
-       else if (orient == MAP_STACK_GROWS_UP)
+               top = bot + init_ssize;
+               gap_bot = addrbos;
+               gap_top = bot;
+       } else /* if (orient == MAP_STACK_GROWS_UP) */ {
                bot = addrbos;
-       else
-               bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
-       top = bot + init_ssize;
-       rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
-
-       /* Now set the avail_ssize amount. */
-       if (rv == KERN_SUCCESS) {
-               new_entry = prev_entry->next;
-               if (new_entry->end != top || new_entry->start != bot)
-                       panic("Bad entry start/end for new stack entry");
-
-               new_entry->avail_ssize = max_ssize - init_ssize;
-               KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
-                   (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
-                   ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-               KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
-                   (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
-                   ("new entry lacks MAP_ENTRY_GROWS_UP"));
+               top = bot + init_ssize;
+               gap_bot = top;
+               gap_top = addrbos + max_ssize;
        }
-
+       rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
+       if (rv != KERN_SUCCESS)
+               return (rv);
+       new_entry = prev_entry->next;
+       KASSERT(new_entry->end == top || new_entry->start == bot,
+           ("Bad entry start/end for new stack entry"));
+       KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
+           (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
+           ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
+       KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
+           (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
+           ("new entry lacks MAP_ENTRY_GROWS_UP"));
+       rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
+           VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
+           MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
+       if (rv != KERN_SUCCESS)
+               (void)vm_map_delete(map, bot, top);
        return (rv);
 }
 
-static int stack_guard_page = 0;
-SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
-    &stack_guard_page, 0,
-    "Insert stack guard page ahead of the growable segments.");
-
-/* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
- * desired address is already mapped, or if we successfully grow
- * the stack.  Also returns KERN_SUCCESS if addr is outside the
- * stack range (this is strange, but preserves compatibility with
- * the grow function in vm_machdep.c).
+/*
+ * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
+ * successfully grow the stack.
  */
-int
-vm_map_growstack(struct proc *p, vm_offset_t addr)
+static int
+vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 {
-       vm_map_entry_t next_entry, prev_entry;
-       vm_map_entry_t new_entry, stack_entry;
-       struct vmspace *vm = p->p_vmspace;
-       vm_map_t map = &vm->vm_map;
-       vm_offset_t end;
-       vm_size_t growsize;
-       size_t grow_amount, max_grow;
-       rlim_t lmemlim, stacklim, vmemlim;
-       int is_procstack, rv;
+       vm_map_entry_t stack_entry;
+       struct proc *p;
+       struct vmspace *vm;
        struct ucred *cred;
+       vm_offset_t gap_end, gap_start, grow_start;
+       size_t grow_amount, guard, max_grow;
+       rlim_t lmemlim, stacklim, vmemlim;
+       int rv, rv1;
+       bool gap_deleted, grow_down, is_procstack;
 #ifdef notyet
        uint64_t limit;
 #endif
@@ -3640,125 +3698,74 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
        int error;
 #endif
 
+       p = curproc;
+       vm = p->p_vmspace;
+       MPASS(map == &p->p_vmspace->vm_map);
+       MPASS(!map->system_map);
+
+       guard = stack_guard_page * PAGE_SIZE;
        lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
        stacklim = lim_cur(curthread, RLIMIT_STACK);
        vmemlim = lim_cur(curthread, RLIMIT_VMEM);
-Retry:
-
-       vm_map_lock_read(map);
-
-       /* If addr is already in the entry range, no need to grow.*/
-       if (vm_map_lookup_entry(map, addr, &prev_entry)) {
-               vm_map_unlock_read(map);
+retry:
+       /* If addr is not in a hole for a stack grow area, no need to grow. */
+       if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
+               return (KERN_FAILURE);
+       if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
                return (KERN_SUCCESS);
-       }
-
-       next_entry = prev_entry->next;
-       if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
-               /*
-                * This entry does not grow upwards. Since the address lies
-                * beyond this entry, the next entry (if one exists) has to
-                * be a downward growable entry. The entry list header is
-                * never a growable entry, so it suffices to check the flags.
-                */
-               if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
-                       vm_map_unlock_read(map);
-                       return (KERN_SUCCESS);
-               }
-               stack_entry = next_entry;
+       if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
+               stack_entry = gap_entry->next;
+               if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
+                   stack_entry->start != gap_entry->end)
+                       return (KERN_FAILURE);
+               grow_amount = round_page(stack_entry->start - addr);
+               grow_down = true;
+       } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
+               stack_entry = gap_entry->prev;
+               if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
+                   stack_entry->end != gap_entry->start)
+                       return (KERN_FAILURE);
+               grow_amount = round_page(addr + 1 - stack_entry->end);
+               grow_down = false;
        } else {
-               /*
-                * This entry grows upward. If the next entry does not at
-                * least grow downwards, this is the entry we need to grow.
-                * otherwise we have two possible choices and we have to
-                * select one.
-                */
-               if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
-                       /*
-                        * We have two choices; grow the entry closest to
-                        * the address to minimize the amount of growth.
-                        */
-                       if (addr - prev_entry->end <= next_entry->start - addr)
-                               stack_entry = prev_entry;
-                       else
-                               stack_entry = next_entry;
-               } else
-                       stack_entry = prev_entry;
+               return (KERN_FAILURE);
        }
-
-       if (stack_entry == next_entry) {
-               KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
-               KASSERT(addr < stack_entry->start, ("foo"));
-               end = (prev_entry != &map->header) ? prev_entry->end :
-                   stack_entry->start - stack_entry->avail_ssize;
-               grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
-               max_grow = stack_entry->start - end;
-       } else {
-               KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
-               KASSERT(addr >= stack_entry->end, ("foo"));
-               end = (next_entry != &map->header) ? next_entry->start :
-                   stack_entry->end + stack_entry->avail_ssize;
-               grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
-               max_grow = end - stack_entry->end;
-       }
-
-       if (grow_amount > stack_entry->avail_ssize) {
-               vm_map_unlock_read(map);
+       max_grow = gap_entry->end - gap_entry->start;
+       if (guard > max_grow)
                return (KERN_NO_SPACE);
-       }
-
-       /*
-        * If there is no longer enough space between the entries nogo, and
-        * adjust the available space.  Note: this  should only happen if the
-        * user has mapped into the stack area after the stack was created,
-        * and is probably an error.
-        *
-        * This also effectively destroys any guard page the user might have
-        * intended by limiting the stack size.
-        */
-       if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
-               if (vm_map_lock_upgrade(map))
-                       goto Retry;
-
-               stack_entry->avail_ssize = max_grow;
-
-               vm_map_unlock(map);
+       max_grow -= guard;
+       if (grow_amount > max_grow)
                return (KERN_NO_SPACE);
-       }
 
-       is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr &&
-           addr < (vm_offset_t)p->p_sysent->sv_usrstack) ? 1 : 0;
-
        /*
         * If this is the main process stack, see if we're over the stack
         * limit.
         */
-       if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
-               vm_map_unlock_read(map);
+       is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
+           addr < (vm_offset_t)p->p_sysent->sv_usrstack;
+       if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
                return (KERN_NO_SPACE);
-       }
+
 #ifdef RACCT
        if (racct_enable) {
                PROC_LOCK(p);
                if (is_procstack && racct_set(p, RACCT_STACK,
                    ctob(vm->vm_ssize) + grow_amount)) {
                        PROC_UNLOCK(p);
-                       vm_map_unlock_read(map);
                        return (KERN_NO_SPACE);
                }
                PROC_UNLOCK(p);
        }
 #endif
 
-       /* Round up the grow amount modulo sgrowsiz */
-       growsize = sgrowsiz;
-       grow_amount = roundup(grow_amount, growsize);
-       if (grow_amount > stack_entry->avail_ssize)
-               grow_amount = stack_entry->avail_ssize;
+       grow_amount = roundup(grow_amount, sgrowsiz);
+       if (grow_amount > max_grow)
+               grow_amount = max_grow;
        if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
                grow_amount = trunc_page((vm_size_t)stacklim) -
                    ctob(vm->vm_ssize);
        }
+
 #ifdef notyet
        PROC_LOCK(p);
        limit = racct_get_available(p, RACCT_STACK);
@@ -3766,9 +3773,9 @@ Retry:
        if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
                grow_amount = limit - ctob(vm->vm_ssize);
 #endif
-       if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+
+       if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
                if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
-                       vm_map_unlock_read(map);
                        rv = KERN_NO_SPACE;
                        goto out;
                }
@@ -3778,7 +3785,6 @@ Retry:
                        if (racct_set(p, RACCT_MEMLOCK,
                            ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
                                PROC_UNLOCK(p);
-                               vm_map_unlock_read(map);
                                rv = KERN_NO_SPACE;
                                goto out;
                        }
@@ -3786,9 +3792,9 @@ Retry:
                }
 #endif
        }
+
        /* If we would blow our VMEM resource limit, no go */
        if (map->size + grow_amount > vmemlim) {
-               vm_map_unlock_read(map);
                rv = KERN_NO_SPACE;
                goto out;
        }
@@ -3797,7 +3803,6 @@ Retry:
                PROC_LOCK(p);
                if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
                        PROC_UNLOCK(p);
-                       vm_map_unlock_read(map);
                        rv = KERN_NO_SPACE;
                        goto out;
                }
@@ -3805,62 +3810,42 @@ Retry:
        }
 #endif
 
-       if (vm_map_lock_upgrade(map))
-               goto Retry;
+       if (vm_map_lock_upgrade(map)) {
+               gap_entry = NULL;
+               vm_map_lock_read(map);
+               goto retry;
+       }
 
-       if (stack_entry == next_entry) {
-               /*
-                * Growing downward.
-                */
-               /* Get the preliminary new entry start value */
-               addr = stack_entry->start - grow_amount;
-
-               /*
-                * If this puts us into the previous entry, cut back our
-                * growth to the available space. Also, see the note above.
-                */
-               if (addr < end) {
-                       stack_entry->avail_ssize = max_grow;
-                       addr = end;
-                       if (stack_guard_page)
-                               addr += PAGE_SIZE;
+       if (grow_down) {
+               grow_start = gap_entry->end - grow_amount;
+               if (gap_entry->start + grow_amount == gap_entry->end) {
+                       gap_start = gap_entry->start;
+                       gap_end = gap_entry->end;
+                       vm_map_entry_delete(map, gap_entry);
+                       gap_deleted = true;
+               } else {
+                       MPASS(gap_entry->start < gap_entry->end - grow_amount);
+                       gap_entry->end -= grow_amount;
+                       vm_map_entry_resize_free(map, gap_entry);
+                       gap_deleted = false;
                }
-
-               rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
-                   next_entry->protection, next_entry->max_protection,
+               rv = vm_map_insert(map, NULL, 0, grow_start,
+                   grow_start + grow_amount,
+                   stack_entry->protection, stack_entry->max_protection,
                    MAP_STACK_GROWS_DOWN);
-
-               /* Adjust the available stack space by the amount we grew. */
-               if (rv == KERN_SUCCESS) {
-                       new_entry = prev_entry->next;
-                       KASSERT(new_entry == stack_entry->prev, ("foo"));
-                       KASSERT(new_entry->end == stack_entry->start, ("foo"));
-                       KASSERT(new_entry->start == addr, ("foo"));
-                       KASSERT((new_entry->eflags & MAP_ENTRY_GROWS_DOWN) !=
-                           0, ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-                       grow_amount = new_entry->end - new_entry->start;
-                       new_entry->avail_ssize = stack_entry->avail_ssize -
-                           grow_amount;
-                       stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
+               if (rv != KERN_SUCCESS) {
+                       if (gap_deleted) {
+                               rv1 = vm_map_insert(map, NULL, 0, gap_start,
+                                   gap_end, VM_PROT_NONE, VM_PROT_NONE,
+                                   MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
+                               MPASS(rv1 == KERN_SUCCESS);
+                       } else {
+                               gap_entry->end += grow_amount;
+                               vm_map_entry_resize_free(map, gap_entry);
+                       }
                }
        } else {
-               /*
-                * Growing upward.
-                */
-               addr = stack_entry->end + grow_amount;
-
-               /*
-                * If this puts us into the next entry, cut back our growth
-                * to the available space. Also, see the note above.
-                */
-               if (addr > end) {
-                       stack_entry->avail_ssize = end - stack_entry->end;
-                       addr = end;
-                       if (stack_guard_page)
-                               addr -= PAGE_SIZE;
-               }
-
-               grow_amount = addr - stack_entry->end;
+               grow_start = stack_entry->end;
                cred = stack_entry->cred;
                if (cred == NULL && stack_entry->object.vm_object != NULL)
                        cred = stack_entry->object.vm_object->cred;
@@ -3872,30 +3857,30 @@ Retry:
                    stack_entry->offset,
                    (vm_size_t)(stack_entry->end - stack_entry->start),
                    (vm_size_t)grow_amount, cred != NULL)) {
-                       map->size += (addr - stack_entry->end);
-                       /* Update the current entry. */
-                       stack_entry->end = addr;
-                       stack_entry->avail_ssize -= grow_amount;
+                       if (gap_entry->start + grow_amount == gap_entry->end)
+                               vm_map_entry_delete(map, gap_entry);
+                       else
+                               gap_entry->start += grow_amount;
+                       stack_entry->end += grow_amount;
+                       map->size += grow_amount;
                        vm_map_entry_resize_free(map, stack_entry);
                        rv = KERN_SUCCESS;
                } else
                        rv = KERN_FAILURE;
        }
-
        if (rv == KERN_SUCCESS && is_procstack)
                vm->vm_ssize += btoc(grow_amount);
 
-       vm_map_unlock(map);
-
        /*
         * Heed the MAP_WIREFUTURE flag if it was set for this process.
         */
-       if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
-               vm_map_wire(map,
-                   (stack_entry == next_entry) ? addr : addr - grow_amount,
-                   (stack_entry == next_entry) ? stack_entry->start : addr,
+       if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
+               vm_map_unlock(map);
+               vm_map_wire(map, grow_start, grow_start + grow_amount,
                    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-       }
+               vm_map_lock_read(map);
+       } else
+               vm_map_lock_downgrade(map);
 
 out:
 #ifdef RACCT
@@ -4019,10 +4004,11 @@ vm_map_lookup(vm_map_t *var_map,                /* 
IN/OUT */
        vm_size_t size;
        struct ucred *cred;
 
-RetryLookup:;
+RetryLookup:
 
        vm_map_lock_read(map);
 
+RetryLookupLocked:
        /*
         * Lookup the faulting address.
         */
@@ -4048,7 +4034,16 @@ RetryLookup:;
         * Check whether this task is allowed to have this page.
         */
        prot = entry->protection;
-       fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+       if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
+               fault_typea &= ~VM_PROT_FAULT_LOOKUP;
+               if (prot == VM_PROT_NONE && map != kernel_map &&
+                   (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+                   (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
+                   MAP_ENTRY_STACK_GAP_UP)) != 0 &&
+                   vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
+                       goto RetryLookupLocked;
+       }
+       fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
        if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
                vm_map_unlock_read(map);
                return (KERN_PROTECTION_FAILURE);
@@ -4282,8 +4277,9 @@ vm_map_print(vm_map_t map)
        db_indent += 2;
        for (entry = map->header.next; entry != &map->header;
            entry = entry->next) {
-               db_iprintf("map entry %p: start=%p, end=%p\n",
-                   (void *)entry, (void *)entry->start, (void *)entry->end);
+               db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
+                   (void *)entry, (void *)entry->start, (void *)entry->end,
+                   entry->eflags);
                {
                        static char *inheritance_name[4] =
                        {"share", "copy", "none", "donate_copy"};

Modified: stable/11/sys/vm/vm_map.h
==============================================================================
--- stable/11/sys/vm/vm_map.h   Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/vm/vm_map.h   Wed Jul  5 06:18:00 2017        (r320666)
@@ -103,7 +103,6 @@ struct vm_map_entry {
        struct vm_map_entry *right;     /* right child in binary search tree */
        vm_offset_t start;              /* start address */
        vm_offset_t end;                /* end address */
-       vm_offset_t avail_ssize;        /* amt can grow if this is a stack */
        vm_offset_t next_read;          /* vaddr of the next sequential read */
        vm_size_t adj_free;             /* amount of adjacent free space */
        vm_size_t max_free;             /* max free space in subtree */
@@ -142,6 +141,9 @@ struct vm_map_entry {
 
 #define        MAP_ENTRY_WIRE_SKIPPED          0x4000
 #define        MAP_ENTRY_VN_WRITECNT           0x8000  /* writeable vnode 
mapping */
+#define        MAP_ENTRY_GUARD                 0x10000
+#define        MAP_ENTRY_STACK_GAP_DN          0x20000
+#define        MAP_ENTRY_STACK_GAP_UP          0x40000
 
 #ifdef _KERNEL
 static __inline u_char
@@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define MAP_PREFAULT_PARTIAL   0x0010
 #define MAP_DISABLE_SYNCER     0x0020
 #define        MAP_CHECK_EXCL          0x0040
+#define        MAP_CREATE_GUARD        0x0080
 #define MAP_DISABLE_COREDUMP   0x0100
 #define MAP_PREFAULT_MADVISE   0x0200  /* from (user) madvise request */
 #define        MAP_VN_WRITECOUNT       0x0400
@@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define        MAP_STACK_GROWS_UP      0x2000
 #define        MAP_ACC_CHARGED         0x4000
 #define        MAP_ACC_NO_CHARGE       0x8000
+#define        MAP_CREATE_STACK_GAP_UP 0x10000
+#define        MAP_CREATE_STACK_GAP_DN 0x20000
 
 /*
  * vm_fault option flags
@@ -367,6 +372,8 @@ vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_
 int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
     vm_offset_t, int, vm_prot_t, vm_prot_t, int);
+int vm_map_find_min(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *,
+    vm_size_t, vm_offset_t, vm_offset_t, int, vm_prot_t, vm_prot_t, int);
 int vm_map_fixed(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_size_t,
     vm_prot_t, vm_prot_t, int);
 int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *);
@@ -387,7 +394,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t,
 int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
-int vm_map_growstack (struct proc *p, vm_offset_t addr);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,

Modified: stable/11/sys/vm/vm_mmap.c
==============================================================================
--- stable/11/sys/vm/vm_mmap.c  Wed Jul  5 06:12:21 2017        (r320665)
+++ stable/11/sys/vm/vm_mmap.c  Wed Jul  5 06:18:00 2017        (r320666)
@@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t s
        }

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to