Module Name: src Committed By: jdolecek Date: Sat May 19 15:13:26 UTC 2018
Modified Files: src/sys/uvm: uvm_bio.c Log Message: change code to take advantage of direct map when available, avoiding the need to map pages into kernel this improves performance of UBC-based (read(2)/write(2)) I/O especially for cached block I/O - sequential read on my NVMe goes from 1.7 GB/s to 1.9 GB/s for non-cached, and from 2.2 GB/s to 5.6 GB/s for cached read the new code is conditional now and off for now, so that it can be tested further; can be turned on by adjusting ubc_direct variable to true part of fix for PR kern/53124 To generate a diff of this commit: cvs rdiff -u -r1.94 -r1.95 src/sys/uvm/uvm_bio.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/uvm/uvm_bio.c diff -u src/sys/uvm/uvm_bio.c:1.94 src/sys/uvm/uvm_bio.c:1.95 --- src/sys/uvm/uvm_bio.c:1.94 Fri Apr 20 18:58:10 2018 +++ src/sys/uvm/uvm_bio.c Sat May 19 15:13:26 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $ */ +/* $NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $ */ /* * Copyright (c) 1998 Chuck Silvers. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $"); #include "opt_uvmhist.h" #include "opt_ubc.h" @@ -48,9 +48,9 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v #include <uvm/uvm.h> -/* - * global data structures - */ +#ifdef PMAP_DIRECT +# define UBC_USE_PMAP_DIRECT +#endif /* * local functions @@ -59,6 +59,13 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **, int, int, vm_prot_t, int); static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t); +#ifdef UBC_USE_PMAP_DIRECT +static int __noinline ubc_uiomove_direct(struct uvm_object *, struct uio *, vsize_t, + int, int); +static void __noinline ubc_zerorange_direct(struct uvm_object *, off_t, size_t, int); + +bool ubc_direct = false; /* XXX */ +#endif /* * local data structues @@ -149,15 +156,12 @@ UBC_EVCNT_DEFINE(faultbusy) void ubc_init(void) { - struct ubc_map *umap; - vaddr_t va; - int i; - /* * Make sure ubc_winshift is sane. */ if (ubc_winshift < PAGE_SHIFT) ubc_winshift = PAGE_SHIFT; + ubc_winsize = 1 << ubc_winshift; /* * init ubc_object. @@ -174,10 +178,7 @@ ubc_init(void) if (ubc_object.umap == NULL) panic("ubc_init: failed to allocate ubc_map"); - if (ubc_winshift < PAGE_SHIFT) { - ubc_winshift = PAGE_SHIFT; - } - va = (vaddr_t)1L; + vaddr_t va = (vaddr_t)1L; #ifdef PMAP_PREFER PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */ ubc_nqueues = va >> ubc_winshift; @@ -185,13 +186,13 @@ ubc_init(void) ubc_nqueues = 1; } #endif - ubc_winsize = 1 << ubc_winshift; ubc_object.inactive = kmem_alloc(UBC_NQUEUES * sizeof(struct ubc_inactive_head), KM_SLEEP); - for (i = 0; i < UBC_NQUEUES; i++) { + for (int i = 0; i < UBC_NQUEUES; i++) { TAILQ_INIT(&ubc_object.inactive[i]); } - for (i = 0; i < ubc_nwins; i++) { + for (int i = 0; i < ubc_nwins; i++) { + struct ubc_map *umap; umap = &ubc_object.umap[i]; TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)], umap, inactive); @@ -199,7 +200,7 @@ ubc_init(void) ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true, &ubc_object.hashmask); - for (i = 0; i <= ubc_object.hashmask; i++) { + for (int i = 0; i <= ubc_object.hashmask; i++) { LIST_INIT(&ubc_object.hash[i]); } @@ -562,6 +563,7 @@ again: (uintptr_t)umap, umap->refcount, (uintptr_t)va, flags); if (flags & UBC_FAULTBUSY) { + // XXX add offset from slot_offset? int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT; struct vm_page *pgs[npages]; int gpflags = @@ -732,6 +734,12 @@ ubc_uiomove(struct uvm_object *uobj, str KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) || ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ)); +#ifdef UBC_USE_PMAP_DIRECT + if (ubc_direct) { + return ubc_uiomove_direct(uobj, uio, todo, advice, flags); + } +#endif + off = uio->uio_offset; error = 0; while (todo > 0) { @@ -769,13 +777,20 @@ ubc_uiomove(struct uvm_object *uobj, str void ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags) { - void *win; + +#ifdef UBC_USE_PMAP_DIRECT + if (ubc_direct) { + ubc_zerorange_direct(uobj, off, len, flags); + return; + } +#endif /* * XXXUBC invent kzero() and use it */ while (len) { + void *win; vsize_t bytelen = len; win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE); @@ -787,6 +802,196 @@ ubc_zerorange(struct uvm_object *uobj, o } } +#ifdef UBC_USE_PMAP_DIRECT +/* Copy data using direct map */ + +/* + * ubc_alloc_direct: allocate a file mapping window using direct map + */ +static int __noinline +ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, + int advice, int flags, struct vm_page **pgs, int *npages) +{ + voff_t pgoff; + int error; + int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO | PGO_ALLPAGES; + int access_type = VM_PROT_READ; + + if (flags & UBC_WRITE) { + if (flags & UBC_FAULTBUSY) + gpflags |= PGO_OVERWRITE; +#if 0 + KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj)); +#endif + + gpflags |= PGO_PASTEOF; + access_type |= VM_PROT_WRITE; + } + + pgoff = (offset & PAGE_MASK); + *lenp = MIN(*lenp, ubc_winsize - pgoff); + +again: + *npages = (*lenp + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT; + KASSERT((*npages * PAGE_SIZE) <= ubc_winsize); + KASSERT(*lenp + pgoff <= ubc_winsize); + memset(pgs, 0, *npages * sizeof(pgs[0])); + + mutex_enter(uobj->vmobjlock); + error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs, + npages, 0, access_type, advice, gpflags); + UVMHIST_LOG(ubchist, "alloc_direct getpages %jd", error, 0, 0, 0); + if (error) { + if (error == EAGAIN) { + kpause("ubc_alloc_directg", false, hz >> 2, NULL); + goto again; + } + return error; + } + + mutex_enter(uobj->vmobjlock); + for (int i = 0; i < *npages; i++) { + struct vm_page *pg = pgs[i]; + + KASSERT(pg != NULL); + KASSERT(pg != PGO_DONTCARE); + KASSERT((pg->flags & PG_FAKE) == 0 || (gpflags & PGO_OVERWRITE)); + KASSERT(pg->uobject->vmobjlock == uobj->vmobjlock); + + /* Avoid breaking loan if possible, only do it on write */ + if ((flags & UBC_WRITE) && pg->loan_count != 0) { + pg = uvm_loanbreak(pg); + if (pg == NULL) { + uvm_page_unbusy(pgs, *npages); + mutex_exit(uobj->vmobjlock); + uvm_wait("ubc_alloc_directl"); + goto again; + } + pgs[i] = pg; + } + + /* Page must be writable by now */ + KASSERT((pg->flags & PG_RDONLY) == 0 || (flags & UBC_WRITE) == 0); + + mutex_enter(&uvm_pageqlock); + uvm_pageactivate(pg); + mutex_exit(&uvm_pageqlock); + + /* Page will be changed, no longer clean */ + /* XXX do this AFTER the write? */ + if (flags & UBC_WRITE) + pg->flags &= ~(PG_FAKE|PG_CLEAN); + } + mutex_exit(uobj->vmobjlock); + + return 0; +} + +static int +ubc_uiomove_process(void *win, size_t len, void *arg) +{ + struct uio *uio = (struct uio *)arg; + + return uiomove(win, len, uio); +} + +static int +ubc_zerorange_process(void *win, size_t len, void *arg) +{ + memset(win, 0, len); + return 0; +} + +static int __noinline +ubc_uiomove_direct(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice, + int flags) +{ + const bool overwrite = (flags & UBC_FAULTBUSY) != 0; + voff_t off; + int error, npages; + struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT]; + + KASSERT(todo <= uio->uio_resid); + KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) || + ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ)); + + off = uio->uio_offset; + error = 0; + while (todo > 0) { + vsize_t bytelen = todo; + + error = ubc_alloc_direct(uobj, off, &bytelen, advice, flags, + pgs, &npages); + if (error != 0) { + /* can't do anything, failed to get the pages */ + break; + } + + if (error == 0) { + error = uvm_direct_process(pgs, npages, off, bytelen, + ubc_uiomove_process, uio); + } + if (error != 0 && overwrite) { + /* + * if we haven't initialized the pages yet, + * do it now. it's safe to use memset here + * because we just mapped the pages above. + */ + printf("%s: error=%d\n", __func__, error); + (void) uvm_direct_process(pgs, npages, off, bytelen, + ubc_zerorange_process, NULL); + } + + mutex_enter(uobj->vmobjlock); + uvm_page_unbusy(pgs, npages); + mutex_exit(uobj->vmobjlock); + + off += bytelen; + todo -= bytelen; + + if (error != 0 && ISSET(flags, UBC_PARTIALOK)) { + break; + } + } + + return error; +} + +static void __noinline +ubc_zerorange_direct(struct uvm_object *uobj, off_t off, size_t todo, int flags) +{ + int error, npages; + struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT]; + + error = 0; + while (todo > 0) { + vsize_t bytelen = todo; + + error = ubc_alloc_direct(uobj, off, &bytelen, UVM_ADV_NORMAL, + UBC_WRITE, pgs, &npages); + if (error != 0) { + /* can't do anything, failed to get the pages */ + break; + } + + error = uvm_direct_process(pgs, npages, off, bytelen, + ubc_zerorange_process, NULL); + + mutex_enter(uobj->vmobjlock); + uvm_page_unbusy(pgs, npages); + mutex_exit(uobj->vmobjlock); + + off += bytelen; + todo -= bytelen; + + if (error != 0 && ISSET(flags, UBC_PARTIALOK)) { + break; + } + } +} + +#endif /* UBC_USE_PMAP_DIRECT */ + /* * ubc_purge: disassociate ubc_map structures from an empty uvm_object. */