Author: kib
Date: Mon Mar 30 22:13:32 2020
New Revision: 359473
URL: https://svnweb.freebsd.org/changeset/base/359473

Log:
  kern_sendfile.c: fix bugs with handling of busy page states.
  
  - Do not call into a vnode pager while leaving some pages from the
    same block as the current run, xbusy. This immediately deadlocks if
    pager needs to instantiate the buffer.
  - Only relookup bogus pages after io finished, otherwise we might
    obliterate the valid pages by out of date disk content.  While there,
    expand the comment explaining this pecularity.
  - Do not double-unbusy on error.  Split unbusy for error case, which
    is left in the sendfile_swapin(), from the more properly coded
    normal case in sendfile_iodone().
  - Add an XXXKIB comment explaining the serious bug in the validation
    algorithm, not fixed by this patch series.
  
  PR:   244713
  Reviewed by:  glebius, markj
  Tested by:    pho
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 week
  Differential revision:        https://reviews.freebsd.org/D24038

Modified:
  head/sys/kern/kern_sendfile.c

Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c       Mon Mar 30 22:07:11 2020        
(r359472)
+++ head/sys/kern/kern_sendfile.c       Mon Mar 30 22:13:32 2020        
(r359473)
@@ -92,6 +92,7 @@ struct sf_io {
        struct socket   *so;
        struct mbuf     *m;
        vm_object_t     obj;
+       vm_pindex_t     pindex0;
 #ifdef KERN_TLS
        struct ktls_session *tls;
 #endif
@@ -270,17 +271,42 @@ sendfile_iowait(struct sf_io *sfio, const char *wmesg)
  * I/O completion callback.
  */
 static void
-sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
+sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
 {
        struct sf_io *sfio = arg;
        struct socket *so;
+       int i;
 
-       for (int i = 0; i < count; i++)
-               if (pg[i] != bogus_page)
-                       vm_page_xunbusy_unchecked(pg[i]);
-
-       if (error)
+       if (error != 0) {
                sfio->error = error;
+               /*
+                * Restore of the pg[] elements is done by
+                * sendfile_swapin().
+                */
+       } else {
+               /*
+                * Restore the valid page pointers.  They are already
+                * unbusied, but still wired.  For error != 0 case,
+                * sendfile_swapin() handles unbusy.
+                *
+                * XXXKIB since pages are only wired, and we do not
+                * own the object lock, other users might have
+                * invalidated them in meantime.  Similarly, after we
+                * unbusied the swapped-in pages, they can become
+                * invalid under us.
+                */
+               for (i = 0; i < count; i++) {
+                       if (pa[i] == bogus_page) {
+                               pa[i] = vm_page_relookup(sfio->obj,
+                                   sfio->pindex0 + i + (sfio->pa - pa));
+                               KASSERT(pa[i] != NULL,
+                                   ("%s: page %p[%d] disappeared",
+                                   __func__, pa, i));
+                       } else {
+                               vm_page_xunbusy_unchecked(pa[i]);
+                       }
+               }
+       }
 
        if (!refcount_release(&sfio->nios))
                return;
@@ -361,11 +387,13 @@ static int
 sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
     off_t len, int npages, int rhpages, int flags)
 {
-       vm_page_t *pa = sfio->pa;
-       int grabbed;
+       vm_page_t *pa;
+       int a, count, count1, grabbed, i, j, rv;
 
+       pa = sfio->pa;
        *nios = 0;
        flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
+       sfio->pindex0 = OFF_TO_IDX(off);
 
        /*
         * First grab all the pages and wire them.  Note that we grab
@@ -380,9 +408,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
                rhpages = 0;
        }
 
-       for (int i = 0; i < npages;) {
-               int j, a, count, rv;
-
+       for (i = 0; i < npages;) {
                /* Skip valid pages. */
                if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
                    xfsize(i, npages, off, len))) {
@@ -422,19 +448,41 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
                count = min(a + 1, npages - i);
 
                /*
-                * We should not pagein into a valid page, thus we first trim
-                * any valid pages off the end of request, and substitute
-                * to bogus_page those, that are in the middle.
+                * We should not pagein into a valid page because
+                * there might be still unfinished write tracked by
+                * e.g. a buffer, thus we substitute any valid pages
+                * with the bogus one.
+                *
+                * We must not leave around xbusy pages which are not
+                * part of the run passed to vm_pager_getpages(),
+                * otherwise pager might deadlock waiting for the busy
+                * status of the page, e.g. if it constitues the
+                * buffer needed to validate other page.
+                *
+                * First trim the end of the run consisting of the
+                * valid pages, then replace the rest of the valid
+                * with bogus.
                 */
+               count1 = count;
                for (j = i + count - 1; j > i; j--) {
                        if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
                            xfsize(j, npages, off, len))) {
+                               vm_page_xunbusy(pa[j]);
+                               SFSTAT_INC(sf_pages_valid);
                                count--;
-                               rhpages = 0;
-                       } else
+                       } else {
                                break;
+                       }
                }
-               for (j = i + 1; j < i + count - 1; j++)
+
+               /*
+                * The last page in the run pa[i + count - 1] is
+                * guaranteed to be invalid by the trim above, so it
+                * is not replaced with bogus, thus -1 in the loop end
+                * condition.
+                */
+               MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL);
+               for (j = i + 1; j < i + count - 1; j++) {
                        if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
                            xfsize(j, npages, off, len))) {
                                vm_page_xunbusy(pa[j]);
@@ -442,6 +490,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
                                SFSTAT_INC(sf_pages_bogus);
                                pa[j] = bogus_page;
                        }
+               }
 
                refcount_acquire(&sfio->nios);
                rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
@@ -453,12 +502,16 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
                        /*
                         * Perform full pages recovery before returning EIO.
                         * Pages from 0 to npages are wired.
-                        * Pages from i to npages are also busied.
                         * Pages from (i + 1) to (i + count - 1) may be
                         * substituted to bogus page, and not busied.
+                        * Pages from (i + count) to (i + count1 - 1) are
+                        * not busied.
+                        * Rest of the pages from i to npages are busied.
                         */
                        for (j = 0; j < npages; j++) {
-                               if (j > i && j < i + count - 1 &&
+                               if (j >= i + count && j < i + count1)
+                                       ;
+                               else if (j > i && j < i + count - 1 &&
                                    pa[j] == bogus_page)
                                        pa[j] = vm_page_relookup(obj,
                                            OFF_TO_IDX(vmoff(j, off)));
@@ -477,19 +530,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
                if (i + count == npages)
                        SFSTAT_ADD(sf_rhpages_read, rhpages);
 
-               /*
-                * Restore the valid page pointers.  They are already
-                * unbusied, but still wired.
-                */
-               for (j = i + 1; j < i + count - 1; j++)
-                       if (pa[j] == bogus_page) {
-                               pa[j] = vm_page_relookup(obj,
-                                   OFF_TO_IDX(vmoff(j, off)));
-                               KASSERT(pa[j], ("%s: page %p[%d] disappeared",
-                                   __func__, pa, j));
-
-                       }
-               i += count;
+               i += count1;
                (*nios)++;
        }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to