Author: mav
Date: Tue Jan 29 20:35:09 2019
New Revision: 343562
URL: https://svnweb.freebsd.org/changeset/base/343562

Log:
  Reimplement BIO_ORDERED handling in nvd(4).
  
  This fixes BIO_ORDERED semantics while also improving performance by:
   - sleeping also before BIO_ORDERED bio, as defined, not only after;
   - not queueing BIO_ORDERED bio to taskqueue if no other bios running;
   - waking up sleeping taskqueue explicitly rather then rely on polling.
  
  On Samsung SSD 970 PRO this shows sync write latency, measured with
  `diskinfo -wS`, reduction from ~2ms to ~1.1ms by not sleeping without
  reason till next HZ tick.
  
  On the same device ZFS pool with 8 ZVOLs synchronously writing 4KB blocks
  shows ~950 IOPS instead of ~750 IOPS before.  I suspect ZFS does not need
  BIO_ORDERED on BIO_FLUSH at all, but that will be next question.
  
  MFC after:    2 weeks
  Sponsored by: iXsystems, Inc.

Modified:
  head/sys/dev/nvd/nvd.c

Modified: head/sys/dev/nvd/nvd.c
==============================================================================
--- head/sys/dev/nvd/nvd.c      Tue Jan 29 20:10:27 2019        (r343561)
+++ head/sys/dev/nvd/nvd.c      Tue Jan 29 20:35:09 2019        (r343562)
@@ -82,6 +82,7 @@ struct nvd_disk {
        struct nvme_namespace   *ns;
 
        uint32_t                cur_depth;
+#define        NVD_ODEPTH      (1 << 31)
        uint32_t                ordered_in_flight;
        u_int                   unit;
 
@@ -181,39 +182,50 @@ nvd_unload()
        mtx_destroy(&nvd_lock);
 }
 
-static int
+static void
 nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
 {
        int err;
 
        bp->bio_driver1 = NULL;
-       atomic_add_int(&ndisk->cur_depth, 1);
+       if (__predict_false(bp->bio_flags & BIO_ORDERED))
+               atomic_add_int(&ndisk->cur_depth, NVD_ODEPTH);
+       else
+               atomic_add_int(&ndisk->cur_depth, 1);
        err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
        if (err) {
-               atomic_add_int(&ndisk->cur_depth, -1);
-               if (__predict_false(bp->bio_flags & BIO_ORDERED))
+               if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+                       atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
                        atomic_add_int(&ndisk->ordered_in_flight, -1);
+                       wakeup(&ndisk->cur_depth);
+               } else {
+                       if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+                           __predict_false(ndisk->ordered_in_flight != 0))
+                               wakeup(&ndisk->cur_depth);
+               }
                bp->bio_error = err;
                bp->bio_flags |= BIO_ERROR;
                bp->bio_resid = bp->bio_bcount;
                biodone(bp);
-               return (-1);
        }
-
-       return (0);
 }
 
 static void
 nvd_strategy(struct bio *bp)
 {
-       struct nvd_disk *ndisk;
+       struct nvd_disk *ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
 
-       ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
-
-       if (__predict_false(bp->bio_flags & BIO_ORDERED))
-               atomic_add_int(&ndisk->ordered_in_flight, 1);
-
-       if (__predict_true(ndisk->ordered_in_flight == 0)) {
+       /*
+        * bio with BIO_ORDERED flag must be executed after all previous
+        * bios in the queue, and before any successive bios.
+        */
+       if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+               if (atomic_fetchadd_int(&ndisk->ordered_in_flight, 1) == 0 &&
+                   ndisk->cur_depth == 0 && bioq_first(&ndisk->bioq) == NULL) {
+                       nvd_bio_submit(ndisk, bp);
+                       return;
+               }
+       } else if (__predict_true(ndisk->ordered_in_flight == 0)) {
                nvd_bio_submit(ndisk, bp);
                return;
        }
@@ -281,28 +293,27 @@ nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, 
 static int
 nvd_dump(void *arg, void *virt, vm_offset_t phys, off_t offset, size_t len)
 {
-       struct nvd_disk *ndisk;
-       struct disk *dp;
+       struct disk *dp = arg;
+       struct nvd_disk *ndisk = dp->d_drv1;
 
-       dp = arg;
-       ndisk = dp->d_drv1;
-
        return (nvme_ns_dump(ndisk->ns, virt, offset, len));
 }
 
 static void
 nvd_done(void *arg, const struct nvme_completion *cpl)
 {
-       struct bio *bp;
-       struct nvd_disk *ndisk;
+       struct bio *bp = (struct bio *)arg;
+       struct nvd_disk *ndisk = bp->bio_disk->d_drv1;
 
-       bp = (struct bio *)arg;
-
-       ndisk = bp->bio_disk->d_drv1;
-
-       atomic_add_int(&ndisk->cur_depth, -1);
-       if (__predict_false(bp->bio_flags & BIO_ORDERED))
+       if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+               atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
                atomic_add_int(&ndisk->ordered_in_flight, -1);
+               wakeup(&ndisk->cur_depth);
+       } else {
+               if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+                   __predict_false(ndisk->ordered_in_flight != 0))
+                       wakeup(&ndisk->cur_depth);
+       }
 
        biodone(bp);
 }
@@ -320,22 +331,23 @@ nvd_bioq_process(void *arg, int pending)
                if (bp == NULL)
                        break;
 
-               if (nvd_bio_submit(ndisk, bp) != 0) {
-                       continue;
+               if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+                       /*
+                        * bio with BIO_ORDERED flag set must be executed
+                        * after all previous bios.
+                        */
+                       while (ndisk->cur_depth > 0)
+                               tsleep(&ndisk->cur_depth, 0, "nvdorb", 1);
+               } else {
+                       /*
+                        * bio with BIO_ORDERED flag set must be completed
+                        * before proceeding with additional bios.
+                        */
+                       while (ndisk->cur_depth >= NVD_ODEPTH)
+                               tsleep(&ndisk->cur_depth, 0, "nvdora", 1);
                }
 
-#ifdef BIO_ORDERED
-               /*
-                * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
-                *  flag set must be completed before proceeding with
-                *  additional bios.
-                */
-               if (bp->bio_flags & BIO_ORDERED) {
-                       while (ndisk->cur_depth > 0) {
-                               pause("nvd flush", 1);
-                       }
-               }
-#endif
+               nvd_bio_submit(ndisk, bp);
        }
 }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to