> Date: Wed, 14 Oct 2020 12:01:10 +0200
> From: Martin Pieuchot <[email protected]>
>
> Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to
> reduce the contention due to page faults. A single part of the handler
> spinning on the lock is enough to hide bugs and increase latency. One
> recent example is the uvm_map_inentry() check.
>
> uvm_grow() is another small function called in trap that currently needs
> the KERNEL_LOCK(). Diff below changes this requirement without removing
> the KERNEL_LOCK() yet.
>
> It uses the underlying vm_space lock to serialize writes to the fields
> of "truct vmspace".
>
> While here I also documented that the reference counting is currently
> protected by the KERNEL_LOCK() and introduced a wrapper to help with
> future changes and reduce the differences with NetBSD.
>
> Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap
> functions can be adapted on a case-per-case basis.
>
> Comments, Oks?
I considered the same approach of using the lock of the underlying
vm_map. I have seen some evidence of contention on that lock, but I
don't think it is too bad (yet). I looked at a lock-free approach as
well, but it got a bit messy. So I think the approach is fine.
However...
>
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.379
> diff -u -p -r1.379 kern_sysctl.c
> --- kern/kern_sysctl.c 1 Sep 2020 01:53:50 -0000 1.379
> +++ kern/kern_sysctl.c 14 Oct 2020 09:35:00 -0000
> @@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele
> /* Execing - danger. */
> if ((vpr->ps_flags & PS_INEXEC))
> return (EBUSY);
> -
> +
> /* Only owner or root can get env */
> if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
> (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
> @@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele
>
> ps_strings = vpr->ps_strings;
> vm = vpr->ps_vmspace;
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
> vpr = NULL;
>
> buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
> Index: kern/sys_process.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_process.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 sys_process.c
> --- kern/sys_process.c 16 Mar 2020 11:58:46 -0000 1.83
> +++ kern/sys_process.c 14 Oct 2020 09:35:00 -0000
> @@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct
> if ((error = process_checkioperm(curp, tr)) != 0)
> return error;
>
> - /* XXXCDC: how should locking work here? */
> vm = tr->ps_vmspace;
> if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
> return EFAULT;
> addr = uio->uio_offset;
>
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
>
> error = uvm_io(&vm->vm_map, uio,
> (uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0);
> @@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s
> if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
> return EFAULT;
>
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
> error = uvm_io(&vm->vm_map, &uio, 0);
> uvmspace_free(vm);
>
> Index: uvm/uvm_extern.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.153
> diff -u -p -r1.153 uvm_extern.h
> --- uvm/uvm_extern.h 13 Sep 2020 10:05:25 -0000 1.153
> +++ uvm/uvm_extern.h 14 Oct 2020 09:35:00 -0000
> @@ -192,11 +192,13 @@ struct pmap;
> * Several fields are temporary (text, data stuff).
> *
> * Locks used to protect struct members in this file:
> + * K kernel lock
> * I immutable after creation
> + * v vm_map's lock
> */
> struct vmspace {
> struct vm_map vm_map; /* VM address map */
> - int vm_refcnt; /* number of references */
> + int vm_refcnt; /* [K] number of references */
> caddr_t vm_shm; /* SYS5 shared memory private data XXX */
> /* we copy from vm_startcopy to the end of the structure on fork */
> #define vm_startcopy vm_rssize
> @@ -205,9 +207,9 @@ struct vmspace {
> segsz_t vm_tsize; /* text size (pages) XXX */
> segsz_t vm_dsize; /* data size (pages) XXX */
> segsz_t vm_dused; /* data segment length (pages) XXX */
> - segsz_t vm_ssize; /* stack size (pages) */
> - caddr_t vm_taddr; /* user virtual address of text XXX */
> - caddr_t vm_daddr; /* user virtual address of data XXX */
> + segsz_t vm_ssize; /* [v] stack size (pages) */
> + caddr_t vm_taddr; /* [I] user virtual address of text */
> + caddr_t vm_daddr; /* [I] user virtual address of data */
> caddr_t vm_maxsaddr; /* [I] user VA at max stack growth */
> caddr_t vm_minsaddr; /* [I] user VA at top of stack */
> };
> @@ -413,6 +415,7 @@ void uvmspace_init(struct vmspace *,
> s
> vaddr_t, vaddr_t, boolean_t, boolean_t);
> void uvmspace_exec(struct proc *, vaddr_t, vaddr_t);
> struct vmspace *uvmspace_fork(struct process *);
> +void uvmspace_addref(struct vmspace *);
> void uvmspace_free(struct vmspace *);
> struct vmspace *uvmspace_share(struct process *);
> int uvm_share(vm_map_t, vaddr_t, vm_prot_t,
> Index: uvm/uvm_map.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.268
> diff -u -p -r1.268 uvm_map.c
> --- uvm/uvm_map.c 22 Sep 2020 14:31:08 -0000 1.268
> +++ uvm/uvm_map.c 14 Oct 2020 09:35:00 -0000
> @@ -3517,7 +3517,6 @@ uvmspace_init(struct vmspace *vm, struct
> /*
> * uvmspace_share: share a vmspace between two processes
> *
> - * - XXX: no locking on vmspace
> * - used for vfork
> */
>
> @@ -3526,7 +3525,7 @@ uvmspace_share(struct process *pr)
> {
> struct vmspace *vm = pr->ps_vmspace;
>
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
> return vm;
> }
>
> @@ -3630,13 +3629,25 @@ uvmspace_exec(struct proc *p, vaddr_t st
> }
>
> /*
> + * uvmspace_addref: add a reference to a vmspace.
> + */
> +void
> +uvmspace_addref(struct vmspace *vm)
> +{
> + KERNEL_ASSERT_LOCKED();
> + KASSERT(vm->vm_refcnt > 0);
> +
> + vm->vm_refcnt++;
> +}
> +
> +/*
> * uvmspace_free: free a vmspace data structure
> - *
> - * - XXX: no locking on vmspace
> */
> void
> uvmspace_free(struct vmspace *vm)
> {
> + KERNEL_ASSERT_LOCKED();
> +
> if (--vm->vm_refcnt == 0) {
> /*
> * lock the map, to wait out all other references to it. delete
> Index: uvm/uvm_unix.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_unix.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 uvm_unix.c
> --- uvm/uvm_unix.c 6 Jul 2020 13:33:09 -0000 1.68
> +++ uvm/uvm_unix.c 14 Oct 2020 09:35:00 -0000
> @@ -108,11 +108,14 @@ void
> uvm_grow(struct proc *p, vaddr_t sp)
> {
> struct vmspace *vm = p->p_vmspace;
> + vm_map_t map = &vm->vm_map;
> int si;
>
> + vm_map_lock(map);
> +
> /* For user defined stacks (from sendsig). */
> if (sp < (vaddr_t)vm->vm_maxsaddr)
> - return;
> + goto out;
Since vm_maxsaddr is ummutable, this check can be done without holding
the lock. I think that's worth it as it will prevent contention in
multi-threaded processes as this check will almost always be true for
anything but the first thread since those will use a user-defined
stack.
> /* For common case of already allocated (from trap). */
> #ifdef MACHINE_STACK_GROWS_UP
> @@ -120,7 +123,7 @@ uvm_grow(struct proc *p, vaddr_t sp)
> #else
> if (sp >= (vaddr_t)vm->vm_minsaddr - ptoa(vm->vm_ssize))
> #endif
> - return;
> + goto out;
>
> /* Really need to check vs limit and increment stack size if ok. */
> #ifdef MACHINE_STACK_GROWS_UP
> @@ -130,6 +133,8 @@ uvm_grow(struct proc *p, vaddr_t sp)
> #endif
> if (vm->vm_ssize + si <= atop(lim_cur(RLIMIT_STACK)))
> vm->vm_ssize += si;
> +out:
> + vm_map_unlock(map);
> }
>
> #ifndef SMALL_KERNEL
>
>