Author: jeff
Date: Tue Oct 29 20:37:59 2019
New Revision: 354155
URL: https://svnweb.freebsd.org/changeset/base/354155

Log:
  Drop the object lock in vfs_bio and cluster where it is now safe to do so.
  
  Recent changes to busy/valid/dirty have enabled page based synchronization
  and the object lock is no longer required in many cases.
  
  Reviewed by:  kib
  Sponsored by: Netflix, Intel
  Differential Revision:        https://reviews.freebsd.org/D21597

Modified:
  head/sys/kern/vfs_bio.c
  head/sys/kern/vfs_cluster.c

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c     Tue Oct 29 20:28:02 2019        (r354154)
+++ head/sys/kern/vfs_bio.c     Tue Oct 29 20:37:59 2019        (r354155)
@@ -162,7 +162,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooff
 static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off,
                vm_page_t m);
 static void vfs_clean_pages_dirty_buf(struct buf *bp);
-static void vfs_setdirty_locked_object(struct buf *bp);
+static void vfs_setdirty_range(struct buf *bp);
 static void vfs_vmio_invalidate(struct buf *bp);
 static void vfs_vmio_truncate(struct buf *bp, int npages);
 static void vfs_vmio_extend(struct buf *bp, int npages, int size);
@@ -955,8 +955,6 @@ vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, 
     vm_offset_t size, vm_page_t m)
 {
 
-       VM_OBJECT_ASSERT_LOCKED(m->object);
-
        /*
         * This function and its results are protected by higher level
         * synchronization requiring vnode and buf locks to page in and
@@ -2865,7 +2863,6 @@ vfs_vmio_iodone(struct buf *bp)
 
        bogus = false;
        iosize = bp->b_bcount - bp->b_resid;
-       VM_OBJECT_WLOCK(obj);
        for (i = 0; i < bp->b_npages; i++) {
                resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
                if (resid > iosize)
@@ -2876,7 +2873,10 @@ vfs_vmio_iodone(struct buf *bp)
                 */
                m = bp->b_pages[i];
                if (m == bogus_page) {
-                       bogus = true;
+                       if (bogus == false) {
+                               bogus = true;
+                               VM_OBJECT_RLOCK(obj);
+                       }
                        m = vm_page_lookup(obj, OFF_TO_IDX(foff));
                        if (m == NULL)
                                panic("biodone: page disappeared!");
@@ -2900,8 +2900,9 @@ vfs_vmio_iodone(struct buf *bp)
                foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
                iosize -= resid;
        }
+       if (bogus)
+               VM_OBJECT_RUNLOCK(obj);
        vm_object_pip_wakeupn(obj, bp->b_npages);
-       VM_OBJECT_WUNLOCK(obj);
        if (bogus && buf_mapped(bp)) {
                BUF_CHECK_MAPPED(bp);
                pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
@@ -3029,7 +3030,6 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int 
         * are not valid for the range covered by the buffer.
         */
        obj = bp->b_bufobj->bo_object;
-       VM_OBJECT_WLOCK(obj);
        if (bp->b_npages < desiredpages) {
                /*
                 * We must allocate system pages since blocking
@@ -3041,11 +3041,13 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int 
                 * deadlocks once allocbuf() is called after
                 * pages are vfs_busy_pages().
                 */
+               VM_OBJECT_WLOCK(obj);
                (void)vm_page_grab_pages(obj,
                    OFF_TO_IDX(bp->b_offset) + bp->b_npages,
                    VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY |
                    VM_ALLOC_NOBUSY | VM_ALLOC_WIRED,
                    &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages);
+               VM_OBJECT_WUNLOCK(obj);
                bp->b_npages = desiredpages;
        }
 
@@ -3076,7 +3078,6 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int 
                toff += tinc;
                tinc = PAGE_SIZE;
        }
-       VM_OBJECT_WUNLOCK(obj);
 
        /*
         * Step 3, fixup the KVA pmap.
@@ -3656,9 +3657,8 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
        KASSERT(bp->b_offset != NOOFFSET,
            ("vfs_clean_pages_dirty_buf: no buffer offset"));
 
-       VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
        vfs_busy_pages_acquire(bp);
-       vfs_setdirty_locked_object(bp);
+       vfs_setdirty_range(bp);
        for (i = 0; i < bp->b_npages; i++) {
                noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
                eoff = noff;
@@ -3670,69 +3670,57 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
                foff = noff;
        }
        vfs_busy_pages_release(bp);
-       VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 }
 
 static void
-vfs_setdirty_locked_object(struct buf *bp)
+vfs_setdirty_range(struct buf *bp)
 {
-       vm_object_t object;
+       vm_offset_t boffset;
+       vm_offset_t eoffset;
        int i;
 
-       object = bp->b_bufobj->bo_object;
-       VM_OBJECT_ASSERT_WLOCKED(object);
+       /*
+        * test the pages to see if they have been modified directly
+        * by users through the VM system.
+        */
+       for (i = 0; i < bp->b_npages; i++)
+               vm_page_test_dirty(bp->b_pages[i]);
 
        /*
-        * We qualify the scan for modified pages on whether the
-        * object has been flushed yet.
+        * Calculate the encompassing dirty range, boffset and eoffset,
+        * (eoffset - boffset) bytes.
         */
-       if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) {
-               vm_offset_t boffset;
-               vm_offset_t eoffset;
 
-               /*
-                * test the pages to see if they have been modified directly
-                * by users through the VM system.
-                */
-               for (i = 0; i < bp->b_npages; i++)
-                       vm_page_test_dirty(bp->b_pages[i]);
+       for (i = 0; i < bp->b_npages; i++) {
+               if (bp->b_pages[i]->dirty)
+                       break;
+       }
+       boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
-               /*
-                * Calculate the encompassing dirty range, boffset and eoffset,
-                * (eoffset - boffset) bytes.
-                */
-
-               for (i = 0; i < bp->b_npages; i++) {
-                       if (bp->b_pages[i]->dirty)
-                               break;
+       for (i = bp->b_npages - 1; i >= 0; --i) {
+               if (bp->b_pages[i]->dirty) {
+                       break;
                }
-               boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
+       }
+       eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
-               for (i = bp->b_npages - 1; i >= 0; --i) {
-                       if (bp->b_pages[i]->dirty) {
-                               break;
-                       }
-               }
-               eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
+       /*
+        * Fit it to the buffer.
+        */
 
-               /*
-                * Fit it to the buffer.
-                */
+       if (eoffset > bp->b_bcount)
+               eoffset = bp->b_bcount;
 
-               if (eoffset > bp->b_bcount)
-                       eoffset = bp->b_bcount;
+       /*
+        * If we have a good dirty range, merge with the existing
+        * dirty range.
+        */
 
-               /*
-                * If we have a good dirty range, merge with the existing
-                * dirty range.
-                */
-
-               if (boffset < eoffset) {
-                       if (bp->b_dirtyoff > boffset)
-                               bp->b_dirtyoff = boffset;
-                       if (bp->b_dirtyend < eoffset)
-                               bp->b_dirtyend = eoffset;
-               }
+       if (boffset < eoffset) {
+               if (bp->b_dirtyoff > boffset)
+                       bp->b_dirtyoff = boffset;
+               if (bp->b_dirtyend < eoffset)
+                       bp->b_dirtyend = eoffset;
        }
 }
 
@@ -4472,16 +4460,21 @@ vfs_unbusy_pages(struct buf *bp)
        int i;
        vm_object_t obj;
        vm_page_t m;
+       bool bogus;
 
        runningbufwakeup(bp);
        if (!(bp->b_flags & B_VMIO))
                return;
 
        obj = bp->b_bufobj->bo_object;
-       VM_OBJECT_WLOCK(obj);
+       bogus = false;
        for (i = 0; i < bp->b_npages; i++) {
                m = bp->b_pages[i];
                if (m == bogus_page) {
+                       if (bogus == false) {
+                               bogus = true;
+                               VM_OBJECT_RLOCK(obj);
+                       }
                        m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
                        if (!m)
                                panic("vfs_unbusy_pages: page missing\n");
@@ -4495,8 +4488,9 @@ vfs_unbusy_pages(struct buf *bp)
                }
                vm_page_sunbusy(m);
        }
+       if (bogus)
+               VM_OBJECT_RUNLOCK(obj);
        vm_object_pip_wakeupn(obj, bp->b_npages);
-       VM_OBJECT_WUNLOCK(obj);
 }
 
 /*
@@ -4573,7 +4567,6 @@ vfs_busy_pages_acquire(struct buf *bp)
 {
        int i;
 
-       VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
        for (i = 0; i < bp->b_npages; i++)
                vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY);
 }
@@ -4583,7 +4576,6 @@ vfs_busy_pages_release(struct buf *bp)
 {
        int i;
 
-       VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
        for (i = 0; i < bp->b_npages; i++)
                vm_page_sunbusy(bp->b_pages[i]);
 }
@@ -4616,13 +4608,12 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
        foff = bp->b_offset;
        KASSERT(bp->b_offset != NOOFFSET,
            ("vfs_busy_pages: no buffer offset"));
-       VM_OBJECT_WLOCK(obj);
        if ((bp->b_flags & B_CLUSTER) == 0) {
                vm_object_pip_add(obj, bp->b_npages);
                vfs_busy_pages_acquire(bp);
        }
        if (bp->b_bufsize != 0)
-               vfs_setdirty_locked_object(bp);
+               vfs_setdirty_range(bp);
        bogus = false;
        for (i = 0; i < bp->b_npages; i++) {
                m = bp->b_pages[i];
@@ -4653,7 +4644,6 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
                }
                foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
        }
-       VM_OBJECT_WUNLOCK(obj);
        if (bogus && buf_mapped(bp)) {
                BUF_CHECK_MAPPED(bp);
                pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
@@ -4686,8 +4676,6 @@ vfs_bio_set_valid(struct buf *bp, int base, int size)
        base += (bp->b_offset & PAGE_MASK);
        n = PAGE_SIZE - (base & PAGE_MASK);
 
-       VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
-
        /*
         * Busy may not be strictly necessary here because the pages are
         * unlikely to be fully valid and the vnode lock will synchronize
@@ -4705,7 +4693,6 @@ vfs_bio_set_valid(struct buf *bp, int base, int size)
                n = PAGE_SIZE;
        }
        vfs_busy_pages_release(bp);
-       VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
 }
 
 /*
@@ -4731,22 +4718,7 @@ vfs_bio_clrbuf(struct buf *bp) 
        }
        bp->b_flags &= ~B_INVAL;
        bp->b_ioflags &= ~BIO_ERROR;
-       VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
        vfs_busy_pages_acquire(bp);
-       if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
-           (bp->b_offset & PAGE_MASK) == 0) {
-               if (bp->b_pages[0] == bogus_page)
-                       goto unlock;
-               mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
-               VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[0]->object);
-               if ((bp->b_pages[0]->valid & mask) == mask)
-                       goto unlock;
-               if ((bp->b_pages[0]->valid & mask) == 0) {
-                       pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize);
-                       bp->b_pages[0]->valid |= mask;
-                       goto unlock;
-               }
-       }
        sa = bp->b_offset & PAGE_MASK;
        slide = 0;
        for (i = 0; i < bp->b_npages; i++, sa = 0) {
@@ -4758,7 +4730,6 @@ vfs_bio_clrbuf(struct buf *bp) 
                        continue;
                j = sa / DEV_BSIZE;
                mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
-               VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[i]->object);
                if ((bp->b_pages[i]->valid & mask) == mask)
                        continue;
                if ((bp->b_pages[i]->valid & mask) == 0)
@@ -4771,11 +4742,10 @@ vfs_bio_clrbuf(struct buf *bp) 
                                }
                        }
                }
-               bp->b_pages[i]->valid |= mask;
+               vm_page_set_valid_range(bp->b_pages[i], j * DEV_BSIZE,
+                   roundup2(ea - sa, DEV_BSIZE));
        }
-unlock:
        vfs_busy_pages_release(bp);
-       VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
        bp->b_resid = 0;
 }
 
@@ -5186,11 +5156,9 @@ vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int 
 
        br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
            != 0) ? GB_UNMAPPED : 0;
-       VM_OBJECT_WLOCK(object);
 again:
        for (i = 0; i < count; i++)
                vm_page_busy_downgrade(ma[i]);
-       VM_OBJECT_WUNLOCK(object);
 
        lbnp = -1;
        for (i = 0; i < count; i++) {
@@ -5249,11 +5217,9 @@ again:
                    vm_page_all_valid(m) || i == count - 1,
                    ("buf %d %p invalid", i, m));
                if (i == count - 1 && lpart) {
-                       VM_OBJECT_WLOCK(object);
                        if (!vm_page_none_valid(m) &&
                            !vm_page_all_valid(m))
                                vm_page_zero_invalid(m, TRUE);
-                       VM_OBJECT_WUNLOCK(object);
                }
 next_page:;
        }
@@ -5281,9 +5247,9 @@ end_pages:
                if (!vm_page_all_valid(ma[i]))
                        redo = true;
        }
+       VM_OBJECT_WUNLOCK(object);
        if (redo && error == 0)
                goto again;
-       VM_OBJECT_WUNLOCK(object);
        return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
 }
 

Modified: head/sys/kern/vfs_cluster.c
==============================================================================
--- head/sys/kern/vfs_cluster.c Tue Oct 29 20:28:02 2019        (r354154)
+++ head/sys/kern/vfs_cluster.c Tue Oct 29 20:37:59 2019        (r354155)
@@ -417,11 +417,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, da
        inc = btodb(size);
        for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
                if (i == 0) {
-                       VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
                        vm_object_pip_add(tbp->b_bufobj->bo_object,
                            tbp->b_npages);
                        vfs_busy_pages_acquire(tbp);
-                       VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
                } else {
                        if ((bp->b_npages * PAGE_SIZE) +
                            round_page(size) > vp->v_mount->mnt_iosize_max) {
@@ -458,13 +456,11 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, da
                         */
                        off = tbp->b_offset;
                        tsize = size;
-                       VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
                        for (j = 0; tsize > 0; j++) {
                                toff = off & PAGE_MASK;
                                tinc = tsize;
                                if (toff + tinc > PAGE_SIZE)
                                        tinc = PAGE_SIZE - toff;
-                               
VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object);
                                if (vm_page_trysbusy(tbp->b_pages[j]) == 0)
                                        break;
                                if ((tbp->b_pages[j]->valid &
@@ -482,11 +478,9 @@ clean_sbusy:
                                    j);
                                for (k = 0; k < j; k++)
                                        vm_page_sunbusy(tbp->b_pages[k]);
-                               VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
                                bqrelse(tbp);
                                break;
                        }
-                       VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
 
                        /*
                         * Set a read-ahead mark as appropriate
@@ -506,7 +500,6 @@ clean_sbusy:
                        if (tbp->b_blkno == tbp->b_lblkno) {
                                tbp->b_blkno = bn;
                        } else if (tbp->b_blkno != bn) {
-                               VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
                                goto clean_sbusy;
                        }
                }
@@ -517,9 +510,9 @@ clean_sbusy:
                BUF_KERNPROC(tbp);
                TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
                        tbp, b_cluster.cluster_entry);
-               VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
                for (j = 0; j < tbp->b_npages; j += 1) {
                        vm_page_t m;
+
                        m = tbp->b_pages[j];
                        if ((bp->b_npages == 0) ||
                            (bp->b_pages[bp->b_npages-1] != m)) {
@@ -529,7 +522,7 @@ clean_sbusy:
                        if (vm_page_all_valid(m))
                                tbp->b_pages[j] = bogus_page;
                }
-               VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
+
                /*
                 * Don't inherit tbp->b_bufsize as it may be larger due to
                 * a non-page-aligned size.  Instead just aggregate using
@@ -547,13 +540,10 @@ clean_sbusy:
         * Fully valid pages in the cluster are already good and do not need
         * to be re-read from disk.  Replace the page with bogus_page
         */
-       VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
        for (j = 0; j < bp->b_npages; j++) {
-               VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object);
                if (vm_page_all_valid(bp->b_pages[j]))
                        bp->b_pages[j] = bogus_page;
        }
-       VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
        if (bp->b_bufsize > bp->b_kvasize)
                panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
                    bp->b_bufsize, bp->b_kvasize);
@@ -988,7 +978,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t st
                        if (tbp->b_flags & B_VMIO) {
                                vm_page_t m;
 
-                               VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
                                if (i == 0) {
                                        vfs_busy_pages_acquire(tbp);
                                } else { /* if not first buffer */
@@ -998,8 +987,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t st
                                                        for (j--; j >= 0; j--)
                                                                vm_page_sunbusy(
                                                                    
tbp->b_pages[j]);
-                                                       VM_OBJECT_WUNLOCK(
-                                                           tbp->b_object);
                                                        bqrelse(tbp);
                                                        goto finishcluster;
                                                }
@@ -1015,7 +1002,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t st
                                                bp->b_npages++;
                                        }
                                }
-                               VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
                        }
                        bp->b_bcount += size;
                        bp->b_bufsize += size;
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to