Author: kib Date: Mon Mar 30 22:13:32 2020 New Revision: 359473 URL: https://svnweb.freebsd.org/changeset/base/359473
Log: kern_sendfile.c: fix bugs with handling of busy page states. - Do not call into a vnode pager while leaving some pages from the same block as the current run, xbusy. This immediately deadlocks if pager needs to instantiate the buffer. - Only relookup bogus pages after io finished, otherwise we might obliterate the valid pages by out of date disk content. While there, expand the comment explaining this pecularity. - Do not double-unbusy on error. Split unbusy for error case, which is left in the sendfile_swapin(), from the more properly coded normal case in sendfile_iodone(). - Add an XXXKIB comment explaining the serious bug in the validation algorithm, not fixed by this patch series. PR: 244713 Reviewed by: glebius, markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D24038 Modified: head/sys/kern/kern_sendfile.c Modified: head/sys/kern/kern_sendfile.c ============================================================================== --- head/sys/kern/kern_sendfile.c Mon Mar 30 22:07:11 2020 (r359472) +++ head/sys/kern/kern_sendfile.c Mon Mar 30 22:13:32 2020 (r359473) @@ -92,6 +92,7 @@ struct sf_io { struct socket *so; struct mbuf *m; vm_object_t obj; + vm_pindex_t pindex0; #ifdef KERN_TLS struct ktls_session *tls; #endif @@ -270,17 +271,42 @@ sendfile_iowait(struct sf_io *sfio, const char *wmesg) * I/O completion callback. */ static void -sendfile_iodone(void *arg, vm_page_t *pg, int count, int error) +sendfile_iodone(void *arg, vm_page_t *pa, int count, int error) { struct sf_io *sfio = arg; struct socket *so; + int i; - for (int i = 0; i < count; i++) - if (pg[i] != bogus_page) - vm_page_xunbusy_unchecked(pg[i]); - - if (error) + if (error != 0) { sfio->error = error; + /* + * Restore of the pg[] elements is done by + * sendfile_swapin(). + */ + } else { + /* + * Restore the valid page pointers. They are already + * unbusied, but still wired. For error != 0 case, + * sendfile_swapin() handles unbusy. + * + * XXXKIB since pages are only wired, and we do not + * own the object lock, other users might have + * invalidated them in meantime. Similarly, after we + * unbusied the swapped-in pages, they can become + * invalid under us. + */ + for (i = 0; i < count; i++) { + if (pa[i] == bogus_page) { + pa[i] = vm_page_relookup(sfio->obj, + sfio->pindex0 + i + (sfio->pa - pa)); + KASSERT(pa[i] != NULL, + ("%s: page %p[%d] disappeared", + __func__, pa, i)); + } else { + vm_page_xunbusy_unchecked(pa[i]); + } + } + } if (!refcount_release(&sfio->nios)) return; @@ -361,11 +387,13 @@ static int sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off, off_t len, int npages, int rhpages, int flags) { - vm_page_t *pa = sfio->pa; - int grabbed; + vm_page_t *pa; + int a, count, count1, grabbed, i, j, rv; + pa = sfio->pa; *nios = 0; flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; + sfio->pindex0 = OFF_TO_IDX(off); /* * First grab all the pages and wire them. Note that we grab @@ -380,9 +408,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i rhpages = 0; } - for (int i = 0; i < npages;) { - int j, a, count, rv; - + for (i = 0; i < npages;) { /* Skip valid pages. */ if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK, xfsize(i, npages, off, len))) { @@ -422,19 +448,41 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i count = min(a + 1, npages - i); /* - * We should not pagein into a valid page, thus we first trim - * any valid pages off the end of request, and substitute - * to bogus_page those, that are in the middle. + * We should not pagein into a valid page because + * there might be still unfinished write tracked by + * e.g. a buffer, thus we substitute any valid pages + * with the bogus one. + * + * We must not leave around xbusy pages which are not + * part of the run passed to vm_pager_getpages(), + * otherwise pager might deadlock waiting for the busy + * status of the page, e.g. if it constitues the + * buffer needed to validate other page. + * + * First trim the end of the run consisting of the + * valid pages, then replace the rest of the valid + * with bogus. */ + count1 = count; for (j = i + count - 1; j > i; j--) { if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { + vm_page_xunbusy(pa[j]); + SFSTAT_INC(sf_pages_valid); count--; - rhpages = 0; - } else + } else { break; + } } - for (j = i + 1; j < i + count - 1; j++) + + /* + * The last page in the run pa[i + count - 1] is + * guaranteed to be invalid by the trim above, so it + * is not replaced with bogus, thus -1 in the loop end + * condition. + */ + MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL); + for (j = i + 1; j < i + count - 1; j++) { if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { vm_page_xunbusy(pa[j]); @@ -442,6 +490,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i SFSTAT_INC(sf_pages_bogus); pa[j] = bogus_page; } + } refcount_acquire(&sfio->nios); rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, @@ -453,12 +502,16 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i /* * Perform full pages recovery before returning EIO. * Pages from 0 to npages are wired. - * Pages from i to npages are also busied. * Pages from (i + 1) to (i + count - 1) may be * substituted to bogus page, and not busied. + * Pages from (i + count) to (i + count1 - 1) are + * not busied. + * Rest of the pages from i to npages are busied. */ for (j = 0; j < npages; j++) { - if (j > i && j < i + count - 1 && + if (j >= i + count && j < i + count1) + ; + else if (j > i && j < i + count - 1 && pa[j] == bogus_page) pa[j] = vm_page_relookup(obj, OFF_TO_IDX(vmoff(j, off))); @@ -477,19 +530,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i if (i + count == npages) SFSTAT_ADD(sf_rhpages_read, rhpages); - /* - * Restore the valid page pointers. They are already - * unbusied, but still wired. - */ - for (j = i + 1; j < i + count - 1; j++) - if (pa[j] == bogus_page) { - pa[j] = vm_page_relookup(obj, - OFF_TO_IDX(vmoff(j, off))); - KASSERT(pa[j], ("%s: page %p[%d] disappeared", - __func__, pa, j)); - - } - i += count; + i += count1; (*nios)++; } _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"