Add possibility to rely on an additional MDMA channel and chain
the DMA and MDMA channels allowing to achieve faster capture.
Indeed, on the MP15 platform, the MDMA engine has an higher
bandwidth to the DDR than the DMA engine.  Relying on that
it is possible to capture frames from the DCMI IP into the DDR
by using two channels as follow:

  DCMI -> (DMA) -> SRAM -> (MDMA) -> DDR

The DMA is able by himself to trigger a MDMA request hence, once
properly configured, the DCMI IP can simply trigger the DMA in order
to have the data pushed up to the DDR (via the SRAM and a MDMA
channel).

This behavior is detailed in the document
Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst

Signed-off-by: Alain Volmat <[email protected]>
---
 drivers/media/platform/st/stm32/stm32-dcmi.c | 250 +++++++++++++++++++++++----
 1 file changed, 220 insertions(+), 30 deletions(-)

diff --git a/drivers/media/platform/st/stm32/stm32-dcmi.c 
b/drivers/media/platform/st/stm32/stm32-dcmi.c
index 4c0f8ed0f87e..e4a9f96a989b 100644
--- a/drivers/media/platform/st/stm32/stm32-dcmi.c
+++ b/drivers/media/platform/st/stm32/stm32-dcmi.c
@@ -15,6 +15,7 @@
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
+#include <linux/genalloc.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -113,7 +114,9 @@ struct dcmi_buf {
        struct vb2_v4l2_buffer  vb;
        bool                    prepared;
        struct sg_table         sgt;
+       struct sg_table         sgt_mdma;
        struct dma_async_tx_descriptor *dma_desc;
+       struct dma_async_tx_descriptor *mdma_desc;
        size_t                  size;
        struct list_head        list;
 };
@@ -159,6 +162,15 @@ struct stm32_dcmi {
        struct dma_chan                 *dma_chan;
        dma_cookie_t                    dma_cookie;
        u32                             dma_max_burst;
+
+       /* Elements for the MDMA - DMA chaining */
+       struct gen_pool                 *sram_pool;
+       struct dma_chan                 *mdma_chan;
+       void                            *sram_buf;
+       u32                             sram_buf_size;
+       dma_addr_t                      sram_dma_buf;
+       dma_cookie_t                    mdma_cookie;
+
        u32                             misr;
        int                             errors_count;
        int                             overrun_count;
@@ -247,12 +259,22 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi,
                          struct dcmi_buf *buf)
 {
        /* Push current DMA transaction in the pending queue */
+       if (dcmi->mdma_chan) {
+               dcmi->mdma_cookie = dmaengine_submit(buf->mdma_desc);
+               if (dma_submit_error(dcmi->mdma_cookie)) {
+                       dev_err(dcmi->dev, "%s: MDMA submission failed\n", 
__func__);
+                       return -ENXIO;
+               }
+       }
+
        dcmi->dma_cookie = dmaengine_submit(buf->dma_desc);
        if (dma_submit_error(dcmi->dma_cookie)) {
                dev_err(dcmi->dev, "%s: DMA submission failed\n", __func__);
                return -ENXIO;
        }
 
+       if (dcmi->mdma_chan)
+               dma_async_issue_pending(dcmi->mdma_chan);
        dma_async_issue_pending(dcmi->dma_chan);
 
        return 0;
@@ -301,7 +323,9 @@ static void dcmi_set_crop(struct stm32_dcmi *dcmi)
 
 static void dcmi_process_frame(struct stm32_dcmi *dcmi)
 {
-       struct dma_tx_state state;
+       struct dma_tx_state state, state_dma;
+       size_t bytes_used;
+
        enum dma_status status;
        struct dcmi_buf *buf = dcmi->active;
 
@@ -309,23 +333,36 @@ static void dcmi_process_frame(struct stm32_dcmi *dcmi)
                return;
 
        /*
-        * At the time FRAME interrupt is received, all dma req have been sent 
to the DMA,
-        * however DMA might still be transferring data hence first synchronize 
prior to
-        * getting the status of the DMA transfer.
-        * Then DMA tx status gives the amount of data transferred to memory, 
which is then
-        * returned to V4L2 through the active buffer payload.
+        * Pause the DMA transfer, leading to trigger of the MDMA channel read 
while
+        * keeping a valid residue value on the dma channel
         */
+       if (dcmi->mdma_chan) {
+               spin_unlock_irq(&dcmi->irqlock);
+               dmaengine_pause(dcmi->dma_chan);
+               spin_lock_irq(&dcmi->irqlock);
+
+               do {
+                       status = dmaengine_tx_status(dcmi->mdma_chan, 
dcmi->mdma_cookie, &state);
+                       cpu_relax();
+               } while (status != DMA_ERROR && status != DMA_COMPLETE &&
+                        state.residue && state.in_flight_bytes);
+       } else {
+               status = dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, 
&state);
+       }
 
-       spin_unlock_irq(&dcmi->irqlock);
-       /* Drain DMA */
-       dmaengine_synchronize(dcmi->dma_chan);
-       spin_lock_irq(&dcmi->irqlock);
-
-       /* Get DMA status and residue size */
-       status = dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, &state);
        if (status != DMA_ERROR && state.residue < buf->size) {
+               bytes_used = buf->size - state.residue;
+
+               if (state.residue && dcmi->mdma_chan) {
+                       dmaengine_tx_status(dcmi->dma_chan, dcmi->dma_cookie, 
&state_dma);
+                       /* Getting full size residue equal to no residue */
+                       if (state_dma.residue == dcmi->sram_buf_size)
+                               state_dma.residue = 0;
+                       bytes_used -= state_dma.residue;
+               }
+
                /* Return buffer to V4L2 with received data size */
-               dcmi_buffer_done(dcmi, buf, buf->size - state.residue, 0);
+               dcmi_buffer_done(dcmi, buf, bytes_used, 0);
        } else {
                dcmi->errors_count++;
                dev_err(dcmi->dev, "%s: DMA error. status: 0x%x, residue: %d\n",
@@ -336,6 +373,8 @@ static void dcmi_process_frame(struct stm32_dcmi *dcmi)
 
        /* Abort DMA operation */
        dmaengine_terminate_async(dcmi->dma_chan);
+       if (dcmi->mdma_chan)
+               dmaengine_terminate_async(dcmi->mdma_chan);
 }
 
 static irqreturn_t dcmi_irq_thread(int irq, void *arg)
@@ -354,13 +393,15 @@ static irqreturn_t dcmi_irq_thread(int irq, void *arg)
                        dcmi->errors_count++;
 
                dmaengine_terminate_async(dcmi->dma_chan);
+               if (dcmi->mdma_chan)
+                       dmaengine_terminate_async(dcmi->mdma_chan);
 
                if (dcmi_restart_capture(dcmi))
                        dev_err(dcmi->dev, "%s: Cannot restart capture\n", 
__func__);
                spin_unlock_irq(&dcmi->irqlock);
-
                return IRQ_HANDLED;
        }
+
        if (dcmi->misr & IT_ERR)
                dcmi->errors_count++;
 
@@ -447,28 +488,70 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
        vb2_set_plane_payload(vb, 0, size);
 
        if (!buf->prepared) {
+               u32 max_size = dcmi->dma_max_burst;
+               unsigned int dma_nents;
+
                /* Get memory addresses */
                buf->size = vb2_plane_size(&buf->vb.vb2_buf, 0);
-               if (buf->size > dcmi->dma_max_burst)
-                       num_sgs = DIV_ROUND_UP(buf->size, dcmi->dma_max_burst);
+               if (dcmi->mdma_chan)
+                       max_size = dcmi->sram_buf_size / 2;
 
-               ret = sg_alloc_table(&buf->sgt, num_sgs, GFP_ATOMIC);
+               if (buf->size > max_size)
+                       num_sgs = DIV_ROUND_UP(buf->size, max_size);
+
+               /*
+                * If we use MDMA chaining, DMA is used in cyclic mode and the 
scatterlist
+                * maximum size is thus 2
+                */
+               dma_nents = num_sgs;
+               if (dcmi->mdma_chan)
+                       dma_nents = min_t(unsigned int, num_sgs, 2);
+
+               ret = sg_alloc_table(&buf->sgt, dma_nents, GFP_ATOMIC);
                if (ret) {
-                       dev_err(dcmi->dev, "sg table alloc failed\n");
+                       dev_err(dcmi->dev, "sg table alloc failed for DMA\n");
                        return ret;
                }
 
+               if (dcmi->mdma_chan) {
+                       ret = sg_alloc_table(&buf->sgt_mdma, num_sgs, 
GFP_ATOMIC);
+                       if (ret) {
+                               dev_err(dcmi->dev, "sg table alloc failed for 
MDMA\n");
+                               return ret;
+                       }
+               }
+
                dma_buf = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
 
                dev_dbg(dcmi->dev, "buffer[%d] phy=%pad size=%zu\n",
                        vb->index, &dma_buf, buf->size);
 
-               for_each_sg(buf->sgt.sgl, sg, num_sgs, i) {
-                       size_t bytes = min_t(size_t, size, dcmi->dma_max_burst);
+               for_each_sg(buf->sgt.sgl, sg, dma_nents, i) {
+                       size_t bytes = min_t(size_t, size, max_size);
+
+                       if (!dcmi->mdma_chan) {
+                               sg_dma_address(sg) = dma_buf;
+                               dma_buf += bytes;
+                       } else {
+                               /* Targets the beginning = first half of the 
sram_buf */
+                               sg_dma_address(sg) = dcmi->sram_dma_buf;
+                               /*
+                                * Targets the second half of the sram_bubf
+                                * for odd indexes of the item of the sg_list
+                                */
+                               if (i & 1)
+                                       sg->dma_address += (dcmi->sram_buf_size 
/ 2);
+                       }
+                       /*
+                        * In case of DMA-MDMA chaining, since the DMA is 
working in cyclic mode,
+                        * we need to provide two linked-list node of same size 
in order to have
+                        * a correct residue value computed.
+                        */
+                       if (!dcmi->mdma_chan)
+                               sg_dma_len(sg) = bytes;
+                       else
+                               sg_dma_len(sg) = dcmi->sram_buf_size / 2;
 
-                       sg_dma_address(sg) = dma_buf;
-                       sg_dma_len(sg) = bytes;
-                       dma_buf += bytes;
                        size -= bytes;
                }
 
@@ -480,6 +563,8 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
                                                        DMA_PREP_INTERRUPT);
                if (!buf->dma_desc) {
                        dev_err(dcmi->dev, "%s: DMA dmaengine_prep_slave_sg 
failed\n", __func__);
+                       if (dcmi->mdma_chan)
+                               sg_free_table(&buf->sgt_mdma);
                        sg_free_table(&buf->sgt);
                        return -EIO;
                }
@@ -489,10 +574,48 @@ static int dcmi_buf_prepare(struct vb2_buffer *vb)
                if (ret) {
                        dev_err(dcmi->dev, "%s: DMA dmaengine_desc_set_reuse 
failed\n", __func__);
                        dmaengine_desc_free(buf->dma_desc);
+                       if (dcmi->mdma_chan)
+                               sg_free_table(&buf->sgt_mdma);
                        sg_free_table(&buf->sgt);
                        return -EIO;
                }
 
+               if (dcmi->mdma_chan) {
+                       size = dcmi->fmt.fmt.pix.sizeimage;
+                       for_each_sg(buf->sgt_mdma.sgl, sg, num_sgs, i) {
+                               size_t bytes = min_t(size_t, size, max_size);
+
+                               sg_dma_address(sg) = dma_buf;
+                               sg_dma_len(sg) = bytes;
+                               dma_buf += bytes;
+                               size -= bytes;
+                       }
+
+                       buf->mdma_desc = 
dmaengine_prep_slave_sg(dcmi->mdma_chan, buf->sgt_mdma.sgl,
+                                                                
buf->sgt_mdma.nents,
+                                                                DMA_DEV_TO_MEM,
+                                                                
DMA_PREP_INTERRUPT);
+                       if (!buf->mdma_desc) {
+                               dev_err(dcmi->dev, "%s: failed 
dmaengine_prep_slave_sg for MDMA\n",
+                                       __func__);
+                               dmaengine_desc_free(buf->dma_desc);
+                               sg_free_table(&buf->sgt_mdma);
+                               sg_free_table(&buf->sgt);
+                               return -EIO;
+                       }
+
+                       ret = dmaengine_desc_set_reuse(buf->mdma_desc);
+                       if (ret) {
+                               dev_err(dcmi->dev, "%s: failed to set reuse for 
mdma desc\n",
+                                       __func__);
+                               dmaengine_desc_free(buf->mdma_desc);
+                               dmaengine_desc_free(buf->dma_desc);
+                               sg_free_table(&buf->sgt_mdma);
+                               sg_free_table(&buf->sgt);
+                               return -EIO;
+                       }
+               }
+
                buf->prepared = true;
 
                vb2_set_plane_payload(&buf->vb.vb2_buf, 0, buf->size);
@@ -511,6 +634,14 @@ static void dcmi_buf_cleanup(struct vb2_buffer *vb)
        if (!buf->prepared)
                return;
 
+       if (dcmi->mdma_chan) {
+               ret = dmaengine_desc_free(buf->mdma_desc);
+               if (ret)
+                       dev_err(dcmi->dev, "%s: Failed to free the mdma 
descriptor (0x%x)\n",
+                               __func__, ret);
+               sg_free_table(&buf->sgt_mdma);
+       }
+
        ret = dmaengine_desc_free(buf->dma_desc);
        if (ret)
                dev_err(dcmi->dev, "%s: Failed to free the mdma descriptor 
(0x%x)\n",
@@ -816,6 +947,8 @@ static void dcmi_stop_streaming(struct vb2_queue *vq)
 
        /* Stop all pending DMA operations */
        dmaengine_terminate_sync(dcmi->dma_chan);
+       if (dcmi->mdma_chan)
+               dmaengine_terminate_sync(dcmi->mdma_chan);
 
        pm_runtime_put(dcmi->dev);
 
@@ -1824,9 +1957,9 @@ static int dcmi_probe(struct platform_device *pdev)
        struct v4l2_fwnode_endpoint ep = { .bus_type = 0 };
        struct stm32_dcmi *dcmi;
        struct vb2_queue *q;
-       struct dma_chan *chan;
+       struct dma_chan *chan, *mdma_chan;
        struct dma_slave_caps caps;
-       struct dma_slave_config dma_config;
+       struct dma_slave_config dma_config, mdma_config;
        struct clk *mclk;
        int ret = 0;
 
@@ -1888,15 +2021,21 @@ static int dcmi_probe(struct platform_device *pdev)
                return dev_err_probe(&pdev->dev, PTR_ERR(chan),
                                     "Failed to request DMA channel\n");
 
-       dcmi->dma_max_burst = UINT_MAX;
-       ret = dma_get_slave_caps(chan, &caps);
-       if (!ret && caps.max_sg_burst)
-               dcmi->dma_max_burst = caps.max_sg_burst * 
DMA_SLAVE_BUSWIDTH_4_BYTES;
+       mdma_chan = dma_request_chan(&pdev->dev, "mdma_tx");
+       if (IS_ERR(mdma_chan)) {
+               ret = PTR_ERR(mdma_chan);
+               if (ret != -ENODEV)
+                       return dev_err_probe(&pdev->dev, ret, "Failed to 
request MDMA channel\n");
+               mdma_chan = NULL;
+       }
 
+       /* Configure the DMA channel */
        memset(&dma_config, 0, sizeof(dma_config));
 
        dma_config.src_addr = (dma_addr_t)dcmi->res->start + DCMI_DR;
        dma_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       if (mdma_chan)
+               dma_config.peripheral_size = 1; /* Indicates chaining */
 
        /* Configure DMA channel */
        ret = dmaengine_slave_config(chan, &dma_config);
@@ -1906,6 +2045,47 @@ static int dcmi_probe(struct platform_device *pdev)
                goto err_dma_slave_config;
        }
 
+       /* If we want MDMA, we also need a sram pool */
+       if (mdma_chan) {
+               dcmi->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0);
+               if (!dcmi->sram_pool) {
+                       dev_info(&pdev->dev, "No SRAM pool, can't use MDMA 
chaining\n");
+                       goto err_dma_slave_config;
+               }
+
+               dev_info(&pdev->dev, "SRAM pool: %zu KiB for DMA/MDMA 
chaining\n",
+                        gen_pool_size(dcmi->sram_pool) / 1024);
+
+               dcmi->sram_buf_size = gen_pool_size(dcmi->sram_pool);
+               dcmi->sram_buf = gen_pool_dma_zalloc(dcmi->sram_pool, 
dcmi->sram_buf_size,
+                                                    &dcmi->sram_dma_buf);
+               if (!dcmi->sram_buf) {
+                       dev_err(dcmi->dev, "Failed to allocate from SRAM\n");
+                       goto err_dma_slave_config;
+               }
+
+               /* Configure the MDMA Channel */
+               memset(&mdma_config, 0, sizeof(mdma_config));
+               mdma_config.direction = DMA_DEV_TO_MEM;
+               mdma_config.src_addr = dcmi->sram_dma_buf;
+               mdma_config.peripheral_size = dma_config.peripheral_size;
+               mdma_config.peripheral_config = dma_config.peripheral_config;
+               ret = dmaengine_slave_config(mdma_chan, &mdma_config);
+               if (ret < 0) {
+                       dev_err(dcmi->dev, "%s: MDMA channel config failed 
(%d)\n",
+                               __func__, ret);
+                       goto err_mdma_slave_config;
+               }
+       }
+
+       dcmi->dma_max_burst = UINT_MAX;
+       /* In case of using DMA-MDMA chaining we consider the maximum infini */
+       if (!mdma_chan) {
+               ret = dma_get_slave_caps(chan, &caps);
+               if (!ret && caps.max_sg_burst)
+                       dcmi->dma_max_burst = caps.max_sg_burst * 
DMA_SLAVE_BUSWIDTH_4_BYTES;
+       }
+
        spin_lock_init(&dcmi->irqlock);
        mutex_init(&dcmi->lock);
        init_completion(&dcmi->complete);
@@ -1915,6 +2095,7 @@ static int dcmi_probe(struct platform_device *pdev)
        dcmi->mclk = mclk;
        dcmi->state = STOPPED;
        dcmi->dma_chan = chan;
+       dcmi->mdma_chan = mdma_chan;
 
        q = &dcmi->queue;
 
@@ -2023,8 +2204,13 @@ static int dcmi_probe(struct platform_device *pdev)
        v4l2_device_unregister(&dcmi->v4l2_dev);
 err_media_device_cleanup:
        media_device_cleanup(&dcmi->mdev);
+err_mdma_slave_config:
+       if (dcmi->mdma_chan)
+               gen_pool_free(dcmi->sram_pool, (unsigned long)dcmi->sram_buf, 
dcmi->sram_buf_size);
 err_dma_slave_config:
        dma_release_channel(dcmi->dma_chan);
+       if (dcmi->mdma_chan)
+               dma_release_channel(mdma_chan);
 
        return ret;
 }
@@ -2042,6 +2228,10 @@ static void dcmi_remove(struct platform_device *pdev)
        media_device_cleanup(&dcmi->mdev);
 
        dma_release_channel(dcmi->dma_chan);
+       if (dcmi->mdma_chan) {
+               gen_pool_free(dcmi->sram_pool, (unsigned long)dcmi->sram_buf, 
dcmi->sram_buf_size);
+               dma_release_channel(dcmi->mdma_chan);
+       }
 }
 
 static int dcmi_runtime_suspend(struct device *dev)

-- 
2.34.1

Reply via email to