Based on reverse engineering and original patch by

Paul Pawlowski <p...@mrarm.io>

This adds support for Apple weird implementation of NVME in their
2018 or later machines. It accounts for the twice-as-big SQ entries
for the IO queues, and the fact that only interrupt vector 0 appears
to function properly.

Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org>
---

I reworked Paul's patch to be less invasive in nvme_submit_cmd()
hot path, effectively only adding a shift with a value hopefully
coming from the same cache line as existing stuff.

It could probably be made even less by making sq_extra_shift be
instead "sq_shift" and contain the complete shift between entries,
ie, 6 or 7, and then replacing the memcpy to

        &nvmeq->sq_cmds[nvmeq->sq_tail]

With something like

        ((char *)nvmeq->sq_cmds) + ((size_t)nvmeq->sq_tail) << nvmeq->sq_shift

But I doubt the difference will be measurable anywhere and it makes
the code grosser imho.

Note: I'm not subscribed to linux-nvme, please CC me on replies.

 drivers/nvme/host/nvme.h |  5 ++++
 drivers/nvme/host/pci.c  | 59 ++++++++++++++++++++++++++++------------
 2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55553d293a98..9ae53cbfb320 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -92,6 +92,11 @@ enum nvme_quirks {
         * Broken Write Zeroes.
         */
        NVME_QUIRK_DISABLE_WRITE_ZEROES         = (1 << 9),
+
+       /*
+        * Apple 2018 and latter variant has a few issues
+        */
+       NVME_QUIRK_APPLE_2018                   = (1 << 10),
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 524d6bd6d095..1a41412fc48b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -27,8 +27,8 @@
 #include "trace.h"
 #include "nvme.h"
 
-#define SQ_SIZE(depth)         (depth * sizeof(struct nvme_command))
-#define CQ_SIZE(depth)         (depth * sizeof(struct nvme_completion))
+#define SQ_SIZE(q)     ((((size_t)(q)->q_depth) << (q)->sq_extra_shift) * 
sizeof(struct nvme_command))
+#define CQ_SIZE(q)     (((size_t)(q)->q_depth) * sizeof(struct 
nvme_completion))
 
 #define SGES_PER_PAGE  (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
@@ -195,6 +195,7 @@ struct nvme_queue {
        u16 last_cq_head;
        u16 qid;
        u8 cq_phase;
+       u8 sq_extra_shift;
        unsigned long flags;
 #define NVMEQ_ENABLED          0
 #define NVMEQ_SQ_CMB           1
@@ -504,8 +505,11 @@ static inline void nvme_write_sq_db(struct nvme_queue 
*nvmeq, bool write_sq)
 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
                            bool write_sq)
 {
+       u16 sq_actual_pos;
+
        spin_lock(&nvmeq->sq_lock);
-       memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+       sq_actual_pos = nvmeq->sq_tail << nvmeq->sq_extra_shift;
+       memcpy(&nvmeq->sq_cmds[sq_actual_pos], cmd, sizeof(*cmd));
        if (++nvmeq->sq_tail == nvmeq->q_depth)
                nvmeq->sq_tail = 0;
        nvme_write_sq_db(nvmeq, write_sq);
@@ -1361,16 +1365,16 @@ static enum blk_eh_timer_return nvme_timeout(struct 
request *req, bool reserved)
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
 {
-       dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth),
+       dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
                                (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
        if (!nvmeq->sq_cmds)
                return;
 
        if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
                pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
-                               nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
+                               nvmeq->sq_cmds, SQ_SIZE(nvmeq));
        } else {
-               dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth),
+               dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
                                nvmeq->sq_cmds, nvmeq->sq_dma_addr);
        }
 }
@@ -1450,12 +1454,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int 
nr_io_queues,
 }
 
 static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
-                               int qid, int depth)
+                               int qid)
 {
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
-               nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+               nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
                nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
                                                nvmeq->sq_cmds);
                if (nvmeq->sq_dma_addr) {
@@ -1464,7 +1468,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, 
struct nvme_queue *nvmeq,
                }
        }
 
-       nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+       nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
                                &nvmeq->sq_dma_addr, GFP_KERNEL);
        if (!nvmeq->sq_cmds)
                return -ENOMEM;
@@ -1478,12 +1482,24 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int 
qid, int depth)
        if (dev->ctrl.queue_count > qid)
                return 0;
 
-       nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+       /*
+        * On Apple 2018 or later implementations, the IO submission
+        * queue(s) have twice as large entries. Current implementations
+        * seem to only have qid 1, let's assume this is true of all IO
+        * queues until we know better.
+        */
+       if (qid && (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018))
+               nvmeq->sq_extra_shift = 1;
+       else
+               nvmeq->sq_extra_shift = 0;
+
+       nvmeq->q_depth = depth;
+       nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
                                         &nvmeq->cq_dma_addr, GFP_KERNEL);
        if (!nvmeq->cqes)
                goto free_nvmeq;
 
-       if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
+       if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
                goto free_cqdma;
 
        nvmeq->dev = dev;
@@ -1492,15 +1508,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int 
qid, int depth)
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
-       nvmeq->q_depth = depth;
        nvmeq->qid = qid;
        dev->ctrl.queue_count++;
 
        return 0;
 
  free_cqdma:
-       dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
-                                                       nvmeq->cq_dma_addr);
+       dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
+                         nvmeq->cq_dma_addr);
  free_nvmeq:
        return -ENOMEM;
 }
@@ -1527,8 +1542,9 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 
qid)
        nvmeq->last_sq_tail = 0;
        nvmeq->cq_head = 0;
        nvmeq->cq_phase = 1;
+
        nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
-       memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+       memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
        nvme_dbbuf_init(dev, nvmeq, qid);
        dev->online_queues++;
        wmb(); /* ensure the first interrupt sees the initialization */
@@ -1546,9 +1562,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, 
int qid, bool polled)
         * A queue's vector matches the queue identifier unless the controller
         * has only one vector available.
         */
-       if (!polled)
+       if (!polled) {
                vector = dev->num_vecs == 1 ? 0 : qid;
-       else
+
+               /*
+                * On Apple 2018 or later implementations, only vector 0 is 
accepted
+                * by the drive firmware
+                */
+               if (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018)
+                       vector = 0;
+       } else
                set_bit(NVMEQ_POLLED, &nvmeq->flags);
 
        result = adapter_alloc_cq(dev, qid, nvmeq, vector);
@@ -2949,6 +2972,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+       { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
+               .driver_data = NVME_QUIRK_APPLE_2018},
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);


Reply via email to