Author: jimharris
Date: Sun Nov  3 20:50:48 2013
New Revision: 257587
URL: http://svnweb.freebsd.org/changeset/base/257587

Log:
  MFC r256151:
  
  Add driver-assisted striping for upcoming Intel NVMe controllers that can
  benefit from it.
  
  Sponsored by: Intel

Modified:
  stable/9/sys/dev/nvd/nvd.c
  stable/9/sys/dev/nvme/nvme.h
  stable/9/sys/dev/nvme/nvme_ns.c
  stable/9/sys/dev/nvme/nvme_private.h
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/dev/   (props changed)

Modified: stable/9/sys/dev/nvd/nvd.c
==============================================================================
--- stable/9/sys/dev/nvd/nvd.c  Sun Nov  3 20:38:51 2013        (r257586)
+++ stable/9/sys/dev/nvd/nvd.c  Sun Nov  3 20:50:48 2013        (r257587)
@@ -187,17 +187,6 @@ nvd_done(void *arg, const struct nvme_co
 
        atomic_add_int(&ndisk->cur_depth, -1);
 
-       /*
-        * TODO: add more extensive translation of NVMe status codes
-        *  to different bio error codes (i.e. EIO, EINVAL, etc.)
-        */
-       if (nvme_completion_is_error(cpl)) {
-               bp->bio_error = EIO;
-               bp->bio_flags |= BIO_ERROR;
-               bp->bio_resid = bp->bio_bcount;
-       } else
-               bp->bio_resid = 0;
-
        biodone(bp);
 }
 

Modified: stable/9/sys/dev/nvme/nvme.h
==============================================================================
--- stable/9/sys/dev/nvme/nvme.h        Sun Nov  3 20:38:51 2013        
(r257586)
+++ stable/9/sys/dev/nvme/nvme.h        Sun Nov  3 20:50:48 2013        
(r257587)
@@ -532,7 +532,7 @@ struct nvme_controller_data {
        uint8_t                 reserved6[1024];
 
        /* bytes 3072-4095: vendor specific */
-       uint8_t                 reserved7[1024];
+       uint8_t                 vs[1024];
 } __packed __aligned(4);
 
 struct nvme_namespace_data {

Modified: stable/9/sys/dev/nvme/nvme_ns.c
==============================================================================
--- stable/9/sys/dev/nvme/nvme_ns.c     Sun Nov  3 20:38:51 2013        
(r257586)
+++ stable/9/sys/dev/nvme/nvme_ns.c     Sun Nov  3 20:50:48 2013        
(r257587)
@@ -34,13 +34,31 @@ __FBSDID("$FreeBSD$");
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
+#include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 
 #include <dev/pci/pcivar.h>
 
+#include <geom/geom.h>
+
 #include "nvme_private.h"
 
+static void            nvme_bio_child_inbed(struct bio *parent, int bio_error);
+static void            nvme_bio_child_done(void *arg,
+                                           const struct nvme_completion *cpl);
+static uint32_t                nvme_get_num_segments(uint64_t addr, uint64_t 
size,
+                                             uint32_t alignment);
+static void            nvme_free_child_bios(int num_bios,
+                                            struct bio **child_bios);
+static struct bio **   nvme_allocate_child_bios(int num_bios);
+static struct bio **   nvme_construct_child_bios(struct bio *bp,
+                                                 uint32_t alignment,
+                                                 int *num_bios);
+static int             nvme_ns_split_bio(struct nvme_namespace *ns,
+                                         struct bio *bp,
+                                         uint32_t alignment);
+
 static int
 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
     struct thread *td)
@@ -202,18 +220,218 @@ nvme_ns_bio_done(void *arg, const struct
        if (bp->bio_driver2)
                free(bp->bio_driver2, M_NVME);
 
+       if (nvme_completion_is_error(status)) {
+               bp->bio_flags |= BIO_ERROR;
+               if (bp->bio_error == 0)
+                       bp->bio_error = EIO;
+       }
+
+       if ((bp->bio_flags & BIO_ERROR) == 0)
+               bp->bio_resid = 0;
+       else
+               bp->bio_resid = bp->bio_bcount;
+
        bp_cb_fn(bp, status);
 }
 
+static void
+nvme_bio_child_inbed(struct bio *parent, int bio_error)
+{
+       struct nvme_completion  parent_cpl;
+       int                     inbed;
+
+       if (bio_error != 0) {
+               parent->bio_flags |= BIO_ERROR;
+               parent->bio_error = bio_error;
+       }
+
+       /*
+        * atomic_fetchadd will return value before adding 1, so we still
+        *  must add 1 to get the updated inbed number.
+        */
+       inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1;
+       if (inbed == parent->bio_children) {
+               bzero(&parent_cpl, sizeof(parent_cpl));
+               if (parent->bio_flags & BIO_ERROR)
+                       parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR;
+               nvme_ns_bio_done(parent, &parent_cpl);
+       }
+}
+
+static void
+nvme_bio_child_done(void *arg, const struct nvme_completion *cpl)
+{
+       struct bio              *child = arg;
+       struct bio              *parent;
+       int                     bio_error;
+
+       parent = child->bio_parent;
+       g_destroy_bio(child);
+       bio_error = nvme_completion_is_error(cpl) ? EIO : 0;
+       nvme_bio_child_inbed(parent, bio_error);
+}
+
+static uint32_t
+nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align)
+{
+       uint32_t        num_segs, offset, remainder;
+
+       if (align == 0)
+               return (1);
+
+       KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n"));
+
+       num_segs = size / align;
+       remainder = size & (align - 1);
+       offset = addr & (align - 1);
+       if (remainder > 0 || offset > 0)
+               num_segs += 1 + (remainder + offset - 1) / align;
+       return (num_segs);
+}
+
+static void
+nvme_free_child_bios(int num_bios, struct bio **child_bios)
+{
+       int i;
+
+       for (i = 0; i < num_bios; i++) {
+               if (child_bios[i] != NULL)
+                       g_destroy_bio(child_bios[i]);
+       }
+
+       free(child_bios, M_NVME);
+}
+
+static struct bio **
+nvme_allocate_child_bios(int num_bios)
+{
+       struct bio **child_bios;
+       int err = 0, i;
+
+       child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT);
+       if (child_bios == NULL)
+               return (NULL);
+
+       for (i = 0; i < num_bios; i++) {
+               child_bios[i] = g_new_bio();
+               if (child_bios[i] == NULL)
+                       err = ENOMEM;
+       }
+
+       if (err == ENOMEM) {
+               nvme_free_child_bios(num_bios, child_bios);
+               return (NULL);
+       }
+
+       return (child_bios);
+}
+
+static struct bio **
+nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios)
+{
+       struct bio      **child_bios;
+       struct bio      *child;
+       uint64_t        cur_offset;
+       caddr_t         data;
+       uint32_t        rem_bcount;
+       int             i;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+       struct vm_page  **ma;
+       uint32_t        ma_offset;
+#endif
+
+       *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount,
+           alignment);
+       child_bios = nvme_allocate_child_bios(*num_bios);
+       if (child_bios == NULL)
+               return (NULL);
+
+       bp->bio_children = *num_bios;
+       bp->bio_inbed = 0;
+       cur_offset = bp->bio_offset;
+       rem_bcount = bp->bio_bcount;
+       data = bp->bio_data;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+       ma_offset = bp->bio_ma_offset;
+       ma = bp->bio_ma;
+#endif
+
+       for (i = 0; i < *num_bios; i++) {
+               child = child_bios[i];
+               child->bio_parent = bp;
+               child->bio_cmd = bp->bio_cmd;
+               child->bio_offset = cur_offset;
+               child->bio_bcount = min(rem_bcount,
+                   alignment - (cur_offset & (alignment - 1)));
+               child->bio_flags = bp->bio_flags;
+#ifdef NVME_UNMAPPED_BIO_SUPPORT
+               if (bp->bio_flags & BIO_UNMAPPED) {
+                       child->bio_ma_offset = ma_offset;
+                       child->bio_ma = ma;
+                       child->bio_ma_n =
+                           nvme_get_num_segments(child->bio_ma_offset,
+                               child->bio_bcount, PAGE_SIZE);
+                       ma_offset = (ma_offset + child->bio_bcount) &
+                           PAGE_MASK;
+                       ma += child->bio_ma_n;
+                       if (ma_offset != 0)
+                               ma -= 1;
+               } else
+#endif
+               {
+                       child->bio_data = data;
+                       data += child->bio_bcount;
+               }
+               cur_offset += child->bio_bcount;
+               rem_bcount -= child->bio_bcount;
+       }
+
+       return (child_bios);
+}
+
+static int
+nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp,
+    uint32_t alignment)
+{
+       struct bio      *child;
+       struct bio      **child_bios;
+       int             err, i, num_bios;
+
+       child_bios = nvme_construct_child_bios(bp, alignment, &num_bios);
+       if (child_bios == NULL)
+               return (ENOMEM);
+
+       for (i = 0; i < num_bios; i++) {
+               child = child_bios[i];
+               err = nvme_ns_bio_process(ns, child, nvme_bio_child_done);
+               if (err != 0) {
+                       nvme_bio_child_inbed(bp, err);
+                       g_destroy_bio(child);
+               }
+       }
+
+       free(child_bios, M_NVME);
+       return (0);
+}
+
 int
 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
        nvme_cb_fn_t cb_fn)
 {
        struct nvme_dsm_range   *dsm_range;
+       uint32_t                num_bios;
        int                     err;
 
        bp->bio_driver1 = cb_fn;
 
+       if (ns->stripesize > 0 &&
+           (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
+               num_bios = nvme_get_num_segments(bp->bio_offset,
+                   bp->bio_bcount, ns->stripesize);
+               if (num_bios > 1)
+                       return (nvme_ns_split_bio(ns, bp, ns->stripesize));
+       }
+
        switch (bp->bio_cmd) {
        case BIO_READ:
                err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
@@ -276,6 +494,11 @@ nvme_ns_construct(struct nvme_namespace 
 
        ns->ctrlr = ctrlr;
        ns->id = id;
+       ns->stripesize = 0;
+
+       if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0)
+               ns->stripesize =
+                   (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size;
 
        /*
         * Namespaces are reconstructed after a controller reset, so check

Modified: stable/9/sys/dev/nvme/nvme_private.h
==============================================================================
--- stable/9/sys/dev/nvme/nvme_private.h        Sun Nov  3 20:38:51 2013        
(r257586)
+++ stable/9/sys/dev/nvme/nvme_private.h        Sun Nov  3 20:50:48 2013        
(r257587)
@@ -238,6 +238,7 @@ struct nvme_namespace {
        uint16_t                        flags;
        struct cdev                     *cdev;
        void                            *cons_cookie[NVME_MAX_CONSUMERS];
+       uint32_t                        stripesize;
        struct mtx                      lock;
 };
 
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "[email protected]"

Reply via email to