Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to
reduce the contention due to page faults.  A single part of the handler
spinning on the lock is enough to hide bugs and increase latency.  One
recent example is the uvm_map_inentry() check.

uvm_grow() is another small function called in trap that currently needs
the KERNEL_LOCK().  Diff below changes this requirement without removing
the KERNEL_LOCK() yet. 

It uses the underlying vm_space lock to serialize writes to the fields
of "truct vmspace". 

While here I also documented that the reference counting is currently
protected by the KERNEL_LOCK() and introduced a wrapper to help with
future changes and reduce the differences with NetBSD.

Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap
functions can be adapted on a case-per-case basis.

Comments, Oks?

Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.379
diff -u -p -r1.379 kern_sysctl.c
--- kern/kern_sysctl.c  1 Sep 2020 01:53:50 -0000       1.379
+++ kern/kern_sysctl.c  14 Oct 2020 09:35:00 -0000
@@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele
        /* Execing - danger. */
        if ((vpr->ps_flags & PS_INEXEC))
                return (EBUSY);
-       
+
        /* Only owner or root can get env */
        if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
            (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
@@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele
 
        ps_strings = vpr->ps_strings;
        vm = vpr->ps_vmspace;
-       vm->vm_refcnt++;
+       uvmspace_addref(vm);
        vpr = NULL;
 
        buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
retrieving revision 1.83
diff -u -p -r1.83 sys_process.c
--- kern/sys_process.c  16 Mar 2020 11:58:46 -0000      1.83
+++ kern/sys_process.c  14 Oct 2020 09:35:00 -0000
@@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct 
        if ((error = process_checkioperm(curp, tr)) != 0)
                return error;
 
-       /* XXXCDC: how should locking work here? */
        vm = tr->ps_vmspace;
        if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
                return EFAULT;
        addr = uio->uio_offset;
 
-       vm->vm_refcnt++;
+       uvmspace_addref(vm);
 
        error = uvm_io(&vm->vm_map, uio,
            (uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0);
@@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s
        if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
                return EFAULT;
 
-       vm->vm_refcnt++;
+       uvmspace_addref(vm);
        error = uvm_io(&vm->vm_map, &uio, 0);
        uvmspace_free(vm);
 
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.153
diff -u -p -r1.153 uvm_extern.h
--- uvm/uvm_extern.h    13 Sep 2020 10:05:25 -0000      1.153
+++ uvm/uvm_extern.h    14 Oct 2020 09:35:00 -0000
@@ -192,11 +192,13 @@ struct pmap;
  * Several fields are temporary (text, data stuff).
  *
  *  Locks used to protect struct members in this file:
+ *     K       kernel lock
  *     I       immutable after creation
+ *     v       vm_map's lock
  */
 struct vmspace {
        struct  vm_map vm_map;  /* VM address map */
-       int     vm_refcnt;      /* number of references */
+       int     vm_refcnt;      /* [K] number of references */
        caddr_t vm_shm;         /* SYS5 shared memory private data XXX */
 /* we copy from vm_startcopy to the end of the structure on fork */
 #define vm_startcopy vm_rssize
@@ -205,9 +207,9 @@ struct vmspace {
        segsz_t vm_tsize;       /* text size (pages) XXX */
        segsz_t vm_dsize;       /* data size (pages) XXX */
        segsz_t vm_dused;       /* data segment length (pages) XXX */
-       segsz_t vm_ssize;       /* stack size (pages) */
-       caddr_t vm_taddr;       /* user virtual address of text XXX */
-       caddr_t vm_daddr;       /* user virtual address of data XXX */
+       segsz_t vm_ssize;       /* [v] stack size (pages) */
+       caddr_t vm_taddr;       /* [I] user virtual address of text */
+       caddr_t vm_daddr;       /* [I] user virtual address of data */
        caddr_t vm_maxsaddr;    /* [I] user VA at max stack growth */
        caddr_t vm_minsaddr;    /* [I] user VA at top of stack */
 };
@@ -413,6 +415,7 @@ void                        uvmspace_init(struct vmspace *, 
s
                            vaddr_t, vaddr_t, boolean_t, boolean_t);
 void                   uvmspace_exec(struct proc *, vaddr_t, vaddr_t);
 struct vmspace         *uvmspace_fork(struct process *);
+void                   uvmspace_addref(struct vmspace *);
 void                   uvmspace_free(struct vmspace *);
 struct vmspace         *uvmspace_share(struct process *);
 int                    uvm_share(vm_map_t, vaddr_t, vm_prot_t,
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.268
diff -u -p -r1.268 uvm_map.c
--- uvm/uvm_map.c       22 Sep 2020 14:31:08 -0000      1.268
+++ uvm/uvm_map.c       14 Oct 2020 09:35:00 -0000
@@ -3517,7 +3517,6 @@ uvmspace_init(struct vmspace *vm, struct
 /*
  * uvmspace_share: share a vmspace between two processes
  *
- * - XXX: no locking on vmspace
  * - used for vfork
  */
 
@@ -3526,7 +3525,7 @@ uvmspace_share(struct process *pr)
 {
        struct vmspace *vm = pr->ps_vmspace;
 
-       vm->vm_refcnt++;
+       uvmspace_addref(vm);
        return vm;
 }
 
@@ -3630,13 +3629,25 @@ uvmspace_exec(struct proc *p, vaddr_t st
 }
 
 /*
+ * uvmspace_addref: add a reference to a vmspace.
+ */
+void
+uvmspace_addref(struct vmspace *vm)
+{
+       KERNEL_ASSERT_LOCKED();
+       KASSERT(vm->vm_refcnt > 0);
+
+       vm->vm_refcnt++;
+}
+
+/*
  * uvmspace_free: free a vmspace data structure
- *
- * - XXX: no locking on vmspace
  */
 void
 uvmspace_free(struct vmspace *vm)
 {
+       KERNEL_ASSERT_LOCKED();
+
        if (--vm->vm_refcnt == 0) {
                /*
                 * lock the map, to wait out all other references to it.  delete
Index: uvm/uvm_unix.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_unix.c,v
retrieving revision 1.68
diff -u -p -r1.68 uvm_unix.c
--- uvm/uvm_unix.c      6 Jul 2020 13:33:09 -0000       1.68
+++ uvm/uvm_unix.c      14 Oct 2020 09:35:00 -0000
@@ -108,11 +108,14 @@ void
 uvm_grow(struct proc *p, vaddr_t sp)
 {
        struct vmspace *vm = p->p_vmspace;
+       vm_map_t map = &vm->vm_map;
        int si;
 
+       vm_map_lock(map);
+
        /* For user defined stacks (from sendsig). */
        if (sp < (vaddr_t)vm->vm_maxsaddr)
-               return;
+               goto out;
 
        /* For common case of already allocated (from trap). */
 #ifdef MACHINE_STACK_GROWS_UP
@@ -120,7 +123,7 @@ uvm_grow(struct proc *p, vaddr_t sp)
 #else
        if (sp >= (vaddr_t)vm->vm_minsaddr - ptoa(vm->vm_ssize))
 #endif
-               return;
+               goto out;
 
        /* Really need to check vs limit and increment stack size if ok. */
 #ifdef MACHINE_STACK_GROWS_UP
@@ -130,6 +133,8 @@ uvm_grow(struct proc *p, vaddr_t sp)
 #endif
        if (vm->vm_ssize + si <= atop(lim_cur(RLIMIT_STACK)))
                vm->vm_ssize += si;
+out:
+       vm_map_unlock(map);
 }
 
 #ifndef SMALL_KERNEL

Reply via email to