From: Jun Yang <jun.y...@nxp.com>

Perform SG operation by copy_sg callback of DMA lib or
burst request from application.
Perform Simple operation if burst number is 1.

Signed-off-by: Jun Yang <jun.y...@nxp.com>
---
 drivers/dma/dpaa/dpaa_qdma.c | 856 ++++++++++++++++++++++++++---------
 drivers/dma/dpaa/dpaa_qdma.h | 184 +++++---
 2 files changed, 763 insertions(+), 277 deletions(-)

diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c
index 646542eb8f..75e92a1b0c 100644
--- a/drivers/dma/dpaa/dpaa_qdma.c
+++ b/drivers/dma/dpaa/dpaa_qdma.c
@@ -4,45 +4,31 @@
 
 #include <bus_dpaa_driver.h>
 #include <rte_dmadev_pmd.h>
+#include <rte_pmd_dpaax_qdma.h>
 
 #include "dpaa_qdma.h"
 #include "dpaa_qdma_logs.h"
 
+static int s_data_validation;
+static int s_hw_err_check;
+static int s_sg_disable;
+
 static inline void
-qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr)
+qdma_desc_addr_set64(struct fsl_qdma_comp_cmd_desc *ccdf, u64 addr)
 {
        ccdf->addr_hi = upper_32_bits(addr);
        ccdf->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr));
 }
 
 static inline void
-qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset)
-{
-       ccdf->cfg = rte_cpu_to_le_32(QDMA_CCDF_FOTMAT | offset);
-}
-
-static inline int
-qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf)
-{
-       return (rte_le_to_cpu_32(ccdf->status) & QDMA_CCDF_MASK)
-               >> QDMA_CCDF_STATUS;
-}
-
-static inline void
-qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status)
+qdma_desc_sge_addr_set64(struct fsl_qdma_comp_sg_desc *sge, u64 addr)
 {
-       ccdf->status = rte_cpu_to_le_32(QDMA_CCDF_SER | status);
-}
-
-static inline void
-qdma_ccdf_set_queue(struct fsl_qdma_format *ccdf,
-       uint8_t queue_idx)
-{
-       ccdf->queue = queue_idx;
+       sge->addr_hi = upper_32_bits(addr);
+       sge->addr_lo = rte_cpu_to_le_32(lower_32_bits(addr));
 }
 
 static inline int
-qdma_ccdf_get_queue(struct fsl_qdma_format *ccdf,
+qdma_ccdf_get_queue(struct fsl_qdma_comp_cmd_desc *ccdf,
        uint8_t *queue_idx)
 {
        uint64_t addr = ((uint64_t)ccdf->addr_hi) << 32 | ccdf->addr_lo;
@@ -58,18 +44,6 @@ qdma_ccdf_get_queue(struct fsl_qdma_format *ccdf,
        return false;
 }
 
-static inline void
-qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len)
-{
-       csgf->cfg = rte_cpu_to_le_32(len & QDMA_SG_LEN_MASK);
-}
-
-static inline void
-qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len)
-{
-       csgf->cfg = rte_cpu_to_le_32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK));
-}
-
 static inline int ilog2(int x)
 {
        int log = 0;
@@ -126,11 +100,11 @@ qdma_writel_be(uint32_t val, void *addr)
 }
 
 static void *
-dma_pool_alloc(int size, int aligned, dma_addr_t *phy_addr)
+dma_pool_alloc(char *nm, int size, int aligned, dma_addr_t *phy_addr)
 {
        void *virt_addr;
 
-       virt_addr = rte_malloc("dma pool alloc", size, aligned);
+       virt_addr = rte_zmalloc(nm, size, aligned);
        if (!virt_addr)
                return NULL;
 
@@ -149,28 +123,46 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue)
        struct fsl_qdma_engine *fsl_qdma = queue->engine;
        struct fsl_qdma_sdf *sdf;
        struct fsl_qdma_ddf *ddf;
-       struct fsl_qdma_format *ccdf;
+       struct fsl_qdma_comp_cmd_desc *ccdf;
        uint16_t i, j;
-       struct fsl_qdma_format *head;
        struct fsl_qdma_cmpd_ft *ft;
        struct fsl_qdma_df *df;
 
-       head = queue->cmd_desc;
-
        for (i = 0; i < queue->n_cq; i++) {
                dma_addr_t phy_ft = 0, phy_df = 0;
 
-               queue->ft[i] =
-                       dma_pool_alloc(sizeof(struct fsl_qdma_cmpd_ft),
-                               RTE_CACHE_LINE_SIZE, &phy_ft);
+               queue->ft[i] = dma_pool_alloc(NULL,
+                       sizeof(struct fsl_qdma_cmpd_ft),
+                       RTE_CACHE_LINE_SIZE, &phy_ft);
                if (!queue->ft[i])
                        goto fail;
-
-               queue->df[i] =
-                       dma_pool_alloc(sizeof(struct fsl_qdma_df),
-                               RTE_CACHE_LINE_SIZE, &phy_df);
+               if (((uint64_t)queue->ft[i]) &
+                       (RTE_CACHE_LINE_SIZE - 1)) {
+                       DPAA_QDMA_ERR("FD[%d] addr(%p) not cache aligned",
+                               i, queue->ft[i]);
+                       rte_free(queue->ft[i]);
+                       queue->ft[i] = NULL;
+                       goto fail;
+               }
+               if (((uint64_t)(&queue->ft[i]->desc_ssge[0])) &
+                       (RTE_CACHE_LINE_SIZE - 1)) {
+                       DPAA_QDMA_ERR("FD[%d] SGE addr(%p) not cache aligned",
+                               i, &queue->ft[i]->desc_ssge[0]);
+                       rte_free(queue->ft[i]);
+                       queue->ft[i] = NULL;
+                       goto fail;
+               }
+               queue->ft[i]->phy_ssge = phy_ft +
+                       offsetof(struct fsl_qdma_cmpd_ft, desc_ssge);
+               queue->ft[i]->phy_dsge = phy_ft +
+                       offsetof(struct fsl_qdma_cmpd_ft, desc_dsge);
+
+               queue->df[i] = dma_pool_alloc(NULL,
+                       sizeof(struct fsl_qdma_df),
+                       RTE_CACHE_LINE_SIZE, &phy_df);
                if (!queue->df[i]) {
                        rte_free(queue->ft[i]);
+                       queue->ft[i] = NULL;
                        goto fail;
                }
 
@@ -182,31 +174,25 @@ fsl_qdma_pre_comp_sd_desc(struct fsl_qdma_queue *queue)
                sdf = &df->sdf;
                ddf = &df->ddf;
                /* Compound Command Descriptor(Frame List Table) */
-               qdma_desc_addr_set64(&ft->desc_buf, phy_df);
-
+               qdma_desc_sge_addr_set64(&ft->desc_buf, phy_df);
                /* It must be 32 as Compound S/G Descriptor */
-               qdma_csgf_set_len(&ft->desc_buf,
-                       sizeof(struct fsl_qdma_df));
+               ft->desc_buf.length = sizeof(struct fsl_qdma_df);
+
                /* Descriptor Buffer */
-               sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE <<
-                              FSL_QDMA_CMD_RWTTYPE_OFFSET);
+               sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050265
-               sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_PF);
+               sdf->prefetch = 1;
 #endif
-               ddf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE <<
-                              FSL_QDMA_CMD_RWTTYPE_OFFSET);
-               ddf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_LWC <<
-                               FSL_QDMA_CMD_LWC_OFFSET);
+               ddf->dwttype = FSL_QDMA_CMD_RWTTYPE;
+               ddf->lwc = FSL_QDMA_CMD_LWC;
 
-               ccdf = queue->cmd_desc;
+               ccdf = &queue->cq[i];
                qdma_desc_addr_set64(ccdf, phy_ft);
-               qdma_ccdf_set_format(ccdf, 0);
+               ccdf->format = FSL_QDMA_COMP_SG_FORMAT;
                if (!fsl_qdma->is_slient)
-                       qdma_ccdf_set_ser(ccdf, 0);
-               qdma_ccdf_set_queue(ccdf, queue->queue_id);
-               queue->cmd_desc++;
+                       ccdf->ser = 1;
+               ccdf->queue = queue->queue_id;
        }
-       queue->cmd_desc = head;
        queue->ci = 0;
 
        return 0;
@@ -226,40 +212,107 @@ fsl_qdma_alloc_queue_resources(struct fsl_qdma_engine 
*fsl_qdma,
 {
        struct fsl_qdma_queue *cmd_queue;
        uint32_t queue_size;
+       char nm[RTE_MEMZONE_NAMESIZE];
 
        cmd_queue = &fsl_qdma->cmd_queues[block_id][queue_id];
        cmd_queue->engine = fsl_qdma;
 
-       queue_size = sizeof(struct fsl_qdma_format) * QDMA_QUEUE_SIZE;
+       queue_size = sizeof(struct fsl_qdma_comp_cmd_desc) *
+               QDMA_QUEUE_SIZE;
 
-       cmd_queue->cq = dma_pool_alloc(queue_size,
+       sprintf(nm, "Command queue_%d_%d",
+               block_id, queue_id);
+       cmd_queue->cq = dma_pool_alloc(nm, queue_size,
                queue_size, &cmd_queue->bus_addr);
-       if (!cmd_queue->cq)
+       if (!cmd_queue->cq) {
+               DPAA_QDMA_ERR("%s alloc failed!", nm);
                return -ENOMEM;
-
-       memset(cmd_queue->cq, 0x0, queue_size);
+       }
 
        cmd_queue->block_vir = fsl_qdma->block_base +
                FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, block_id);
        cmd_queue->n_cq = QDMA_QUEUE_SIZE;
        cmd_queue->queue_id = queue_id;
        cmd_queue->block_id = block_id;
-       cmd_queue->pending = 0;
-       cmd_queue->cmd_desc = cmd_queue->cq;
-       cmd_queue->ft = rte_malloc("Compound Frame Table",
+       cmd_queue->pending_start = 0;
+       cmd_queue->pending_num = 0;
+       cmd_queue->complete_start = 0;
+
+       sprintf(nm, "Compound Table_%d_%d",
+               block_id, queue_id);
+       cmd_queue->ft = rte_zmalloc(nm,
                        sizeof(void *) * QDMA_QUEUE_SIZE, 0);
        if (!cmd_queue->ft) {
+               DPAA_QDMA_ERR("%s zmalloc failed!", nm);
                rte_free(cmd_queue->cq);
                return -ENOMEM;
        }
-       cmd_queue->df = rte_malloc("Descriptor Buffer",
+       sprintf(nm, "Descriptor Buf_%d_%d",
+               block_id, queue_id);
+       cmd_queue->df = rte_zmalloc(nm,
                        sizeof(void *) * QDMA_QUEUE_SIZE, 0);
        if (!cmd_queue->df) {
+               DPAA_QDMA_ERR("%s zmalloc failed!", nm);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "Pending_desc_%d_%d",
+               block_id, queue_id);
+       cmd_queue->pending_desc = rte_zmalloc(nm,
+               sizeof(struct fsl_qdma_desc) * FSL_QDMA_MAX_DESC_NUM, 0);
+       if (!cmd_queue->pending_desc) {
+               DPAA_QDMA_ERR("%s zmalloc failed!", nm);
+               rte_free(cmd_queue->df);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               return -ENOMEM;
+       }
+       sprintf(nm, "complete-burst_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_burst = rte_ring_create(nm,
+               QDMA_QUEUE_SIZE * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_burst) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_free(cmd_queue->pending_desc);
                rte_free(cmd_queue->ft);
                rte_free(cmd_queue->cq);
+               rte_free(cmd_queue->df);
                return -ENOMEM;
        }
+       sprintf(nm, "complete-desc_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_desc = rte_ring_create(nm,
+               FSL_QDMA_MAX_DESC_NUM * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_desc) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_ring_free(cmd_queue->complete_burst);
+               rte_free(cmd_queue->pending_desc);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               rte_free(cmd_queue->df);
+               return -ENOMEM;
+       }
+       sprintf(nm, "complete-pool-desc_ring_%d_%d",
+               block_id, queue_id);
+       cmd_queue->complete_pool = rte_ring_create(nm,
+               FSL_QDMA_MAX_DESC_NUM * 2, 0,
+               RING_F_SP_ENQ | RING_F_SC_DEQ);
+       if (!cmd_queue->complete_pool) {
+               DPAA_QDMA_ERR("%s create failed!", nm);
+               rte_ring_free(cmd_queue->complete_desc);
+               rte_ring_free(cmd_queue->complete_burst);
+               rte_free(cmd_queue->pending_desc);
+               rte_free(cmd_queue->ft);
+               rte_free(cmd_queue->cq);
+               rte_free(cmd_queue->df);
+               return -ENOMEM;
+       }
+
        memset(&cmd_queue->stats, 0, sizeof(struct rte_dma_stats));
+       cmd_queue->pending_max = FSL_QDMA_MAX_DESC_NUM;
 
        return 0;
 }
@@ -270,6 +323,10 @@ fsl_qdma_free_cmdq_res(struct fsl_qdma_queue *queue)
        rte_free(queue->df);
        rte_free(queue->ft);
        rte_free(queue->cq);
+       rte_free(queue->pending_desc);
+       rte_ring_free(queue->complete_burst);
+       rte_ring_free(queue->complete_desc);
+       rte_ring_free(queue->complete_pool);
 }
 
 static void
@@ -289,9 +346,10 @@ fsl_qdma_prep_status_queue(struct fsl_qdma_engine 
*fsl_qdma,
        status = &fsl_qdma->stat_queues[block_id];
        status->engine = fsl_qdma;
 
-       status_size = QDMA_STATUS_SIZE * sizeof(struct fsl_qdma_format);
+       status_size = QDMA_STATUS_SIZE *
+               sizeof(struct fsl_qdma_comp_cmd_desc);
 
-       status->cq = dma_pool_alloc(status_size,
+       status->cq = dma_pool_alloc(NULL, status_size,
                status_size, &status->bus_addr);
 
        if (!status->cq)
@@ -352,31 +410,116 @@ fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
        return 0;
 }
 
+static void
+fsl_qdma_data_validation(struct fsl_qdma_desc *desc[],
+       uint8_t num, struct fsl_qdma_queue *fsl_queue)
+{
+       uint32_t i, j;
+       uint8_t *v_src, *v_dst;
+       char err_msg[512];
+       int offset;
+
+       if (likely(!s_data_validation))
+               return;
+
+       offset = sprintf(err_msg, "Fatal TC%d/queue%d: ",
+               fsl_queue->block_id,
+               fsl_queue->queue_id);
+       for (i = 0; i < num; i++) {
+               v_src = rte_mem_iova2virt(desc[i]->src);
+               v_dst = rte_mem_iova2virt(desc[i]->dst);
+               for (j = 0; j < desc[i]->len; j++) {
+                       if (v_src[j] != v_dst[j]) {
+                               sprintf(&err_msg[offset],
+                                       
"job[%"PRId64"]:src(%p)[%d](%d)!=dst(%p)[%d](%d)",
+                                       desc[i]->flag, v_src, j, v_src[j],
+                                       v_dst, j, v_dst[j]);
+                               DPAA_QDMA_ERR("%s, stop validating!\r\n",
+                                       err_msg);
+                               return;
+                       }
+               }
+       }
+}
+
 static int
-fsl_qdma_queue_transfer_complete(struct fsl_qdma_queue *fsl_queue,
-       const uint16_t nb_cpls,
-       enum rte_dma_status_code *status)
+fsl_qdma_queue_drain(struct fsl_qdma_queue *fsl_queue)
 {
        uint32_t reg;
-       int count = 0;
+       int count = 0, ret;
        uint8_t *block = fsl_queue->block_vir;
+       uint16_t *dq_complete, drain_num = 0;
+       struct fsl_qdma_desc *desc[FSL_QDMA_SG_MAX_ENTRY];
 
-       while (count < nb_cpls) {
+       while (1) {
+               if (rte_ring_free_count(fsl_queue->complete_pool) <
+                       (FSL_QDMA_SG_MAX_ENTRY * 2))
+                       break;
                reg = qdma_readl_be(block + FSL_QDMA_BSQSR);
                if (reg & FSL_QDMA_BSQSR_QE_BE)
-                       return count;
+                       break;
 
                qdma_writel_be(FSL_QDMA_BSQMR_DI, block + FSL_QDMA_BSQMR);
-               if (status != NULL)
-                       status[count] = RTE_DMA_STATUS_SUCCESSFUL;
-               fsl_queue->complete++;
-               if (unlikely(fsl_queue->complete >= fsl_queue->n_cq))
-                       fsl_queue->complete = 0;
+               ret = rte_ring_dequeue(fsl_queue->complete_burst,
+                       (void **)&dq_complete);
+               if (ret)
+                       DPAA_QDMA_ERR("DQ desc number failed!\n");
+
+               ret = rte_ring_dequeue_bulk(fsl_queue->complete_desc,
+                       (void **)desc, *dq_complete, NULL);
+               if (ret != (*dq_complete)) {
+                       DPAA_QDMA_ERR("DQ %d descs failed!(%d)\n",
+                               *dq_complete, ret);
+               }
+
+               fsl_qdma_data_validation(desc, *dq_complete, fsl_queue);
+
+               ret = rte_ring_enqueue_bulk(fsl_queue->complete_pool,
+                       (void **)desc, (*dq_complete), NULL);
+               if (ret != (*dq_complete)) {
+                       DPAA_QDMA_ERR("EQ %d descs to return queue 
failed!(%d)\n",
+                               *dq_complete, ret);
+               }
+
+               drain_num += *dq_complete;
+               fsl_queue->complete_start =
+                       (fsl_queue->complete_start + (*dq_complete)) &
+                       (fsl_queue->pending_max - 1);
                fsl_queue->stats.completed++;
 
                count++;
        }
-       return count;
+
+       return drain_num;
+}
+
+static int
+fsl_qdma_queue_transfer_complete(struct fsl_qdma_queue *fsl_queue,
+       const uint16_t nb_cpls, uint16_t *last_idx,
+       enum rte_dma_status_code *status)
+{
+       int ret;
+       uint16_t dq_num = 0, i;
+       struct fsl_qdma_desc *desc_complete[nb_cpls];
+
+       ret = fsl_qdma_queue_drain(fsl_queue);
+       if (ret < 0) {
+               DPAA_QDMA_ERR("Drain TX%d/Q%d failed!(%d)",
+                       fsl_queue->block_id, fsl_queue->queue_id,
+                       ret);
+       }
+
+       dq_num = rte_ring_dequeue_burst(fsl_queue->complete_pool,
+               (void **)desc_complete, nb_cpls, NULL);
+       for (i = 0; i < dq_num; i++)
+               last_idx[i] = desc_complete[i]->flag;
+
+       if (status) {
+               for (i = 0; i < dq_num; i++)
+                       status[i] = RTE_DMA_STATUS_SUCCESSFUL;
+       }
+
+       return dq_num;
 }
 
 static int
@@ -473,87 +616,253 @@ fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
 }
 
 static int
-fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue,
-       uint64_t flags, dma_addr_t dst,
-       dma_addr_t src, size_t len)
+fsl_qdma_enqueue_desc_to_ring(struct fsl_qdma_queue *fsl_queue,
+       int is_burst)
+{
+       uint16_t i, num = fsl_queue->pending_num, idx, start;
+       int ret;
+
+       num = is_burst ? fsl_queue->pending_num : 1;
+
+       fsl_queue->desc_in_hw[fsl_queue->ci] = num;
+       ret = rte_ring_enqueue(fsl_queue->complete_burst,
+                       &fsl_queue->desc_in_hw[fsl_queue->ci]);
+       if (ret) {
+               DPAA_QDMA_ERR("%s: Queue is full, try dequeue first",
+                       __func__);
+               DPAA_QDMA_ERR("%s: submitted:%"PRIu64", completed:%"PRIu64"",
+                       __func__, fsl_queue->stats.submitted,
+                       fsl_queue->stats.completed);
+               return ret;
+       }
+       start = fsl_queue->pending_start;
+       for (i = 0; i < num; i++) {
+               idx = (start + i) & (fsl_queue->pending_max - 1);
+               ret = rte_ring_enqueue(fsl_queue->complete_desc,
+                               &fsl_queue->pending_desc[idx]);
+               if (ret) {
+                       DPAA_QDMA_ERR("Descriptors eq failed!\r\n");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int
+fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue,
+       dma_addr_t dst, dma_addr_t src, size_t len)
 {
        uint8_t *block = fsl_queue->block_vir;
-       struct fsl_qdma_format *csgf_src, *csgf_dest;
+       struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest;
        struct fsl_qdma_cmpd_ft *ft;
-#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
-       struct fsl_qdma_sdf *sdf;
-       uint32_t cfg = 0;
-#endif
+       int ret;
 
-#ifdef CONFIG_RTE_DMA_DPAA_ERR_CHK
-       uint32_t reg;
+       ft = fsl_queue->ft[fsl_queue->ci];
+       csgf_src = &ft->desc_sbuf;
+       csgf_dest = &ft->desc_dbuf;
+       qdma_desc_sge_addr_set64(csgf_src, src);
+       csgf_src->length = len;
+       csgf_src->extion = 0;
+       qdma_desc_sge_addr_set64(csgf_dest, dst);
+       csgf_dest->length = len;
+       csgf_dest->extion = 0;
+       /* This entry is the last entry. */
+       csgf_dest->final = 1;
+
+       ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 0);
+       if (ret)
+               return ret;
+       fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1);
+
+       qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
+               block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
+       fsl_queue->stats.submitted++;
+
+       return 0;
+}
+
+static int
+fsl_qdma_enqueue_overflow(struct fsl_qdma_queue *fsl_queue)
+{
+       int overflow = 0, drain;
+       uint32_t reg, check_num, drain_num;
+       uint8_t *block = fsl_queue->block_vir;
+       const struct rte_dma_stats *st = &fsl_queue->stats;
 
-       /* retrieve and store the register value in big endian
-        * to avoid bits swap
-        */
-       reg = qdma_readl_be(block +
+       check_num = 0;
+overflow_check:
+       if (unlikely(s_hw_err_check)) {
+               reg = qdma_readl_be(block +
                         FSL_QDMA_BCQSR(fsl_queue->queue_id));
-       if (reg & (FSL_QDMA_BCQSR_QF_XOFF_BE)) {
-               DPAA_QDMA_ERR("QDMA Engine is busy\n");
-               return -EBUSY;
+               overflow = (reg & FSL_QDMA_BCQSR_QF_XOFF_BE) ?
+                       1 : 0;
+       } else {
+               overflow = (fsl_qdma_queue_bd_in_hw(fsl_queue) >=
+                       QDMA_QUEUE_CR_WM) ? 1 : 0;
+       }
+
+       if (likely(!overflow))
+               return 0;
+
+       DPAA_QDMA_ERR("TC%d/Q%d submitted(%"PRIu64")-completed(%"PRIu64") >= 
%d",
+               fsl_queue->block_id, fsl_queue->queue_id,
+               st->submitted, st->completed, QDMA_QUEUE_CR_WM);
+       drain_num = 0;
+
+drain_again:
+       drain = fsl_qdma_queue_drain(fsl_queue);
+       if (drain <= 0) {
+               drain_num++;
+               if (drain_num > 100) {
+                       DPAA_QDMA_ERR("TC%d/Q%d failed drain, %"PRIu64" bd in 
HW.",
+                               fsl_queue->block_id, fsl_queue->queue_id,
+                               st->submitted - st->completed);
+                       return -ENOSPC;
+               }
+               goto drain_again;
        }
-#else
-       /* check whether critical watermark level reached,
-        * below check is valid for only single queue per block
-        */
-       if (fsl_qdma_queue_bd_in_hw(fsl_queue) >= QDMA_QUEUE_CR_WM) {
-               DPAA_QDMA_DEBUG("Queue is full, try dequeue first\n");
+       check_num++;
+       if (check_num > 10) {
+               DPAA_QDMA_ERR("TC%d/Q%d failed drain, %"PRIu64" bd in HW.",
+                       fsl_queue->block_id, fsl_queue->queue_id,
+                       st->submitted - st->completed);
                return -ENOSPC;
        }
+       goto overflow_check;
+
+       return -ENOSPC;
+}
+
+static int
+fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue)
+{
+       uint8_t *block = fsl_queue->block_vir, i;
+       struct fsl_qdma_comp_sg_desc *csgf_src, *csgf_dest;
+       struct fsl_qdma_cmpd_ft *ft;
+       uint32_t total_len = 0;
+       uint8_t num = fsl_queue->pending_num;
+       uint16_t start = fsl_queue->pending_start, idx;
+       int ret;
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+       struct fsl_qdma_sdf *sdf;
 #endif
 
-       if (unlikely(fsl_queue->pending == fsl_queue->n_cq)) {
-               DPAA_QDMA_DEBUG("Queue is full, try dma submit first\n");
-               return -ENOSPC;
-       }
+       ret = fsl_qdma_enqueue_overflow(fsl_queue);
+       if (unlikely(ret))
+               return ret;
 
        ft = fsl_queue->ft[fsl_queue->ci];
        csgf_src = &ft->desc_sbuf;
        csgf_dest = &ft->desc_dbuf;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050757
-       sdf = fsl_queue->df[fsl_queue->ci];
-       sdf->cmd = rte_cpu_to_le_32(FSL_QDMA_CMD_RWTTYPE <<
-                       FSL_QDMA_CMD_RWTTYPE_OFFSET);
+       sdf = &fsl_queue->df[fsl_queue->ci]->sdf;
+       sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050265
-       sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_PF);
+       sdf->prefetch = 1;
 #endif
-       if (len > FSL_QDMA_CMD_SSS_DISTANCE) {
-               sdf->cmd |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSEN);
-               cfg |= rte_cpu_to_le_32(FSL_QDMA_CMD_SSS_STRIDE <<
-                                       FSL_QDMA_CFG_SSS_OFFSET |
-                                       FSL_QDMA_CMD_SSS_DISTANCE);
-               sdf->cfg = cfg;
-       } else
-               sdf->cfg = 0;
 #endif
-       qdma_desc_addr_set64(csgf_src, src);
-       qdma_csgf_set_len(csgf_src, len);
-       qdma_desc_addr_set64(csgf_dest, dst);
-       qdma_csgf_set_len(csgf_dest, len);
-       /* This entry is the last entry. */
-       qdma_csgf_set_f(csgf_dest, len);
-       fsl_queue->ci++;
 
-       if (fsl_queue->ci == fsl_queue->n_cq)
-               fsl_queue->ci = 0;
+       if (num == 1) {
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+               if (fsl_queue->pending_desc[start].len >
+                       FSL_QDMA_CMD_SSS_DISTANCE) {
+                       sdf->ssen = 1;
+                       sdf->sss = FSL_QDMA_CMD_SSS_STRIDE;
+                       sdf->ssd = FSL_QDMA_CMD_SSS_DISTANCE;
+               } else {
+                       sdf->sss = 0;
+                       sdf->ssd = 0;
+               }
+#endif
+               ret = fsl_qdma_enqueue_desc_single(fsl_queue,
+                       fsl_queue->pending_desc[start].dst,
+                       fsl_queue->pending_desc[start].src,
+                       fsl_queue->pending_desc[start].len);
+               if (!ret) {
+                       fsl_queue->pending_start =
+                               (start + 1) & (fsl_queue->pending_max - 1);
+                       fsl_queue->pending_num = 0;
+               }
+               return ret;
+       } else if (s_sg_disable) {
+               while (fsl_queue->pending_num > 0) {
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+                       if (fsl_queue->pending_desc[start].len >
+                               FSL_QDMA_CMD_SSS_DISTANCE) {
+                               sdf->ssen = 1;
+                               sdf->sss = FSL_QDMA_CMD_SSS_STRIDE;
+                               sdf->ssd = FSL_QDMA_CMD_SSS_DISTANCE;
+                       } else {
+                               sdf->sss = 0;
+                               sdf->ssd = 0;
+                       }
+#endif
+                       ret = fsl_qdma_enqueue_desc_single(fsl_queue,
+                               fsl_queue->pending_desc[start].dst,
+                               fsl_queue->pending_desc[start].src,
+                               fsl_queue->pending_desc[start].len);
+                       if (!ret) {
+                               start = (start + 1) &
+                                       (fsl_queue->pending_max - 1);
+                               fsl_queue->pending_start = start;
+                               fsl_queue->pending_num--;
+                       } else {
+                               DPAA_QDMA_ERR("Eq pending desc failed(%d)",
+                                       ret);
+                               return -EIO;
+                       }
+               }
 
-       if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-               qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
-                       block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
-               fsl_queue->stats.submitted++;
+               return 0;
+       }
+       qdma_desc_sge_addr_set64(csgf_src, ft->phy_ssge);
+       csgf_src->extion = 1;
+       qdma_desc_sge_addr_set64(csgf_dest, ft->phy_dsge);
+       csgf_dest->extion = 1;
+       /* This entry is the last entry. */
+       csgf_dest->final = 1;
+       for (i = 0; i < num; i++) {
+               idx = (start + i) & (fsl_queue->pending_max - 1);
+               qdma_desc_sge_addr_set64(&ft->desc_ssge[i],
+                       fsl_queue->pending_desc[idx].src);
+               ft->desc_ssge[i].length = fsl_queue->pending_desc[idx].len;
+               ft->desc_ssge[i].final = 0;
+               qdma_desc_sge_addr_set64(&ft->desc_dsge[i],
+                       fsl_queue->pending_desc[idx].dst);
+               ft->desc_dsge[i].length = fsl_queue->pending_desc[idx].len;
+               ft->desc_dsge[i].final = 0;
+               total_len += fsl_queue->pending_desc[idx].len;
+       }
+       ft->desc_ssge[num - 1].final = 1;
+       ft->desc_dsge[num - 1].final = 1;
+       csgf_src->length = total_len;
+       csgf_dest->length = total_len;
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+       if (total_len > FSL_QDMA_CMD_SSS_DISTANCE) {
+               sdf->ssen = 1;
+               sdf->sss = FSL_QDMA_CMD_SSS_STRIDE;
+               sdf->ssd = FSL_QDMA_CMD_SSS_DISTANCE;
        } else {
-               fsl_queue->pending++;
+               sdf->sss = 0;
+               sdf->ssd = 0;
        }
+#endif
+       ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, 1);
+       if (ret)
+               return ret;
+
+       fsl_queue->ci = (fsl_queue->ci + 1) & (fsl_queue->n_cq - 1);
 
-       if (fsl_queue->ci)
-               return fsl_queue->ci - 1;
-       else
-               return fsl_queue->n_cq;
+       qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
+               block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
+       fsl_queue->stats.submitted++;
+
+       fsl_queue->pending_start =
+               (start + num) & (fsl_queue->pending_max - 1);
+       fsl_queue->pending_num = 0;
+
+       return 0;
 }
 
 static int
@@ -564,8 +873,9 @@ dpaa_info_get(const struct rte_dma_dev *dev, struct 
rte_dma_info *dev_info,
 
        dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
                RTE_DMA_CAPA_SILENT | RTE_DMA_CAPA_OPS_COPY;
+       dev_info->dev_capa |= RTE_DMA_CAPA_DPAAX_QDMA_FLAGS_INDEX;
        dev_info->max_vchans = fsl_qdma->n_queues;
-       dev_info->max_desc = QDMA_QUEUE_SIZE;
+       dev_info->max_desc = FSL_QDMA_MAX_DESC_NUM;
        dev_info->min_desc = QDMA_QUEUE_SIZE;
 
        return 0;
@@ -651,16 +961,11 @@ dpaa_qdma_submit(void *dev_private, uint16_t vchan)
 {
        struct fsl_qdma_engine *fsl_qdma = dev_private;
        struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
-       void *block = fsl_queue->block_vir;
 
-       while (fsl_queue->pending) {
-               qdma_writel(fsl_queue->le_cqmr | FSL_QDMA_BCQMR_EI,
-                       block + FSL_QDMA_BCQMR(fsl_queue->queue_id));
-               fsl_queue->pending--;
-               fsl_queue->stats.submitted++;
-       }
+       if (!fsl_queue->pending_num)
+               return 0;
 
-       return 0;
+       return fsl_qdma_enqueue_desc(fsl_queue);
 }
 
 static int
@@ -670,9 +975,86 @@ dpaa_qdma_enqueue(void *dev_private, uint16_t vchan,
 {
        struct fsl_qdma_engine *fsl_qdma = dev_private;
        struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+       uint16_t start = fsl_queue->pending_start;
+       uint8_t pending = fsl_queue->pending_num;
+       uint16_t idx;
        int ret;
 
-       ret = fsl_qdma_enqueue_desc(fsl_queue, flags, dst, src, length);
+       if (pending >= FSL_QDMA_SG_MAX_ENTRY) {
+               DPAA_QDMA_ERR("Too many pending jobs on queue%d",
+                       vchan);
+               return -ENOSPC;
+       }
+       idx = (start + pending) & (fsl_queue->pending_max - 1);
+
+       fsl_queue->pending_desc[idx].src = src;
+       fsl_queue->pending_desc[idx].dst = dst;
+       fsl_queue->pending_desc[idx].flag =
+               DPAA_QDMA_IDX_FROM_FLAG(flags);
+       fsl_queue->pending_desc[idx].len = length;
+       fsl_queue->pending_num++;
+
+       if (!(flags & RTE_DMA_OP_FLAG_SUBMIT))
+               return idx;
+
+       ret = fsl_qdma_enqueue_desc(fsl_queue);
+       if (!ret)
+               return fsl_queue->pending_start;
+
+       return ret;
+}
+
+static int
+dpaa_qdma_copy_sg(void *dev_private,
+       uint16_t vchan,
+       const struct rte_dma_sge *src,
+       const struct rte_dma_sge *dst,
+       uint16_t nb_src, uint16_t nb_dst,
+       uint64_t flags)
+{
+       int ret;
+       uint16_t i, start, idx;
+       struct fsl_qdma_engine *fsl_qdma = dev_private;
+       struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
+       const uint16_t *idx_addr = NULL;
+
+       if (unlikely(nb_src != nb_dst)) {
+               DPAA_QDMA_ERR("%s: nb_src(%d) != nb_dst(%d) on  queue%d",
+                       __func__, nb_src, nb_dst, vchan);
+               return -EINVAL;
+       }
+
+       if ((fsl_queue->pending_num + nb_src) > FSL_QDMA_SG_MAX_ENTRY) {
+               DPAA_QDMA_ERR("Too many pending jobs on queue%d",
+                       vchan);
+               return -ENOSPC;
+       }
+       start = fsl_queue->pending_start + fsl_queue->pending_num;
+       start = start & (fsl_queue->pending_max - 1);
+       idx = start;
+
+       idx_addr = DPAA_QDMA_IDXADDR_FROM_SG_FLAG(flags);
+
+       for (i = 0; i < nb_src; i++) {
+               if (unlikely(src[i].length != dst[i].length)) {
+                       DPAA_QDMA_ERR("src.len(%d) != dst.len(%d)",
+                               src[i].length, dst[i].length);
+                       return -EINVAL;
+               }
+               idx = (start + i) & (fsl_queue->pending_max - 1);
+               fsl_queue->pending_desc[idx].src = src[i].addr;
+               fsl_queue->pending_desc[idx].dst = dst[i].addr;
+               fsl_queue->pending_desc[idx].len = dst[i].length;
+               fsl_queue->pending_desc[idx].flag = idx_addr[i];
+       }
+       fsl_queue->pending_num += nb_src;
+
+       if (!(flags & RTE_DMA_OP_FLAG_SUBMIT))
+               return idx;
+
+       ret = fsl_qdma_enqueue_desc(fsl_queue);
+       if (!ret)
+               return fsl_queue->pending_start;
 
        return ret;
 }
@@ -683,7 +1065,7 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma,
 {
        struct fsl_qdma_status_queue *stat_queue;
        struct fsl_qdma_queue *cmd_queue;
-       struct fsl_qdma_format *cq;
+       struct fsl_qdma_comp_cmd_desc *cq;
        uint16_t start, count = 0;
        uint8_t qid;
        int ret;
@@ -697,9 +1079,6 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma,
                if (ret == true) {
                        cmd_queue = &fsl_qdma->cmd_queues[block_id][qid];
                        cmd_queue->stats.completed++;
-                       cmd_queue->complete++;
-                       if (unlikely(cmd_queue->complete == cmd_queue->n_cq))
-                               cmd_queue->complete = 0;
                        start++;
                        if (unlikely(start == stat_queue->n_cq))
                                start = 0;
@@ -713,16 +1092,81 @@ dpaa_qdma_block_dequeue(struct fsl_qdma_engine *fsl_qdma,
        return count;
 }
 
+static int
+dpaa_qdma_err_handle(struct fsl_qdma_err_reg *reg)
+{
+       struct fsl_qdma_err_reg local;
+       size_t i, offset = 0;
+       char err_msg[512];
+
+       local.dedr_be = rte_read32(&reg->dedr_be);
+       if (!local.dedr_be)
+               return 0;
+       offset = sprintf(err_msg, "ERR detected:\n");
+       if (local.dedr.ere) {
+               offset += sprintf(&err_msg[offset],
+                       " ere(Enqueue rejection error)\n");
+       }
+       if (local.dedr.dde) {
+               offset += sprintf(&err_msg[offset],
+                       " dde(Destination descriptor error)\n");
+       }
+       if (local.dedr.sde) {
+               offset += sprintf(&err_msg[offset],
+                       " sde(Source descriptor error)\n");
+       }
+       if (local.dedr.cde) {
+               offset += sprintf(&err_msg[offset],
+                       " cde(Command descriptor error)\n");
+       }
+       if (local.dedr.wte) {
+               offset += sprintf(&err_msg[offset],
+                       " wte(Write transaction error)\n");
+       }
+       if (local.dedr.rte) {
+               offset += sprintf(&err_msg[offset],
+                       " rte(Read transaction error)\n");
+       }
+       if (local.dedr.me) {
+               offset += sprintf(&err_msg[offset],
+                       " me(Multiple errors of the same type)\n");
+       }
+       DPAA_QDMA_ERR("%s", err_msg);
+       for (i = 0; i < FSL_QDMA_DECCD_ERR_NUM; i++) {
+               local.deccd_le[FSL_QDMA_DECCD_ERR_NUM - 1 - i] =
+                       QDMA_IN(&reg->deccd_le[i]);
+       }
+       local.deccqidr_be = rte_read32(&reg->deccqidr_be);
+       local.decbr = rte_read32(&reg->decbr);
+
+       offset = sprintf(err_msg, "ERR command:\n");
+       offset += sprintf(&err_msg[offset],
+               " status: %02x, ser: %d, offset:%d, fmt: %02x\n",
+               local.err_cmd.status, local.err_cmd.ser,
+               local.err_cmd.offset, local.err_cmd.format);
+       offset += sprintf(&err_msg[offset],
+               " address: 0x%"PRIx64", queue: %d, dd: %02x\n",
+               (uint64_t)local.err_cmd.addr_hi << 32 |
+               local.err_cmd.addr_lo,
+               local.err_cmd.queue, local.err_cmd.dd);
+       DPAA_QDMA_ERR("%s", err_msg);
+       DPAA_QDMA_ERR("ERR command block: %d, queue: %d\n",
+               local.deccqidr.block, local.deccqidr.queue);
+
+       rte_write32(local.dedr_be, &reg->dedr_be);
+
+       return -EIO;
+}
+
 static uint16_t
 dpaa_qdma_dequeue_status(void *dev_private, uint16_t vchan,
        const uint16_t nb_cpls, uint16_t *last_idx,
        enum rte_dma_status_code *st)
 {
        struct fsl_qdma_engine *fsl_qdma = dev_private;
-       int ret;
+       int ret, err;
        struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
        void *status = fsl_qdma->status_base;
-       int intr;
 
        if (unlikely(fsl_qdma->is_slient)) {
                DPAA_QDMA_WARN("Can't dq in silent mode\n");
@@ -734,55 +1178,27 @@ dpaa_qdma_dequeue_status(void *dev_private, uint16_t 
vchan,
                                fsl_queue->block_id);
        } else {
                ret = fsl_qdma_queue_transfer_complete(fsl_queue,
-                               nb_cpls, st);
+                               nb_cpls, last_idx, st);
        }
-       if (!ret) {
-               intr = qdma_readl_be(status + FSL_QDMA_DEDR);
-               if (intr) {
-#ifdef CONFIG_RTE_DMA_DPAA_ERR_CHK
-                       DPAA_QDMA_ERR("DMA transaction error! %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW0R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW0R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW1R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW1R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW2R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW2R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW3R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW3R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFQIDR);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFQIDR %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECBR);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECBR %x\n", intr);
-#endif
-                       qdma_writel_be(0xbf,
-                                   status + FSL_QDMA_DEDR);
+       if (s_hw_err_check) {
+               err = dpaa_qdma_err_handle(status +
+                       FSL_QDMA_ERR_REG_STATUS_OFFSET);
+               if (err)
                        fsl_queue->stats.errors++;
-               }
-       }
-
-       if (last_idx) {
-               if (unlikely(!fsl_queue->complete))
-                       *last_idx = fsl_queue->n_cq - 1;
-               else
-                       *last_idx = fsl_queue->complete - 1;
        }
 
        return ret;
 }
 
-
 static uint16_t
 dpaa_qdma_dequeue(void *dev_private,
        uint16_t vchan, const uint16_t nb_cpls,
        uint16_t *last_idx, bool *has_error)
 {
        struct fsl_qdma_engine *fsl_qdma = dev_private;
-       int ret;
+       int ret, err;
        struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
-#ifdef CONFIG_RTE_DMA_DPAA_ERR_CHK
        void *status = fsl_qdma->status_base;
-       int intr;
-#endif
 
        if (unlikely(fsl_qdma->is_slient)) {
                DPAA_QDMA_WARN("Can't dq in silent mode\n");
@@ -796,39 +1212,17 @@ dpaa_qdma_dequeue(void *dev_private,
                                fsl_queue->block_id);
        } else {
                ret = fsl_qdma_queue_transfer_complete(fsl_queue,
-                               nb_cpls, NULL);
+                               nb_cpls, last_idx, NULL);
        }
-#ifdef CONFIG_RTE_DMA_DPAA_ERR_CHK
-       if (!ret) {
-               intr = qdma_readl_be(status + FSL_QDMA_DEDR);
-               if (intr) {
-                       DPAA_QDMA_ERR("DMA transaction error! %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW0R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW0R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW1R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW1R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW2R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW2R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFDW3R);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFDW3R %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECFQIDR);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECFQIDR %x\n", intr);
-                       intr = qdma_readl(status + FSL_QDMA_DECBR);
-                       DPAA_QDMA_INFO("reg FSL_QDMA_DECBR %x\n", intr);
-                       qdma_writel_be(0xbf,
-                                   status + FSL_QDMA_DEDR);
-                       intr = qdma_readl(status + FSL_QDMA_DEDR);
-                       *has_error = true;
+       if (s_hw_err_check) {
+               err = dpaa_qdma_err_handle(status +
+                       FSL_QDMA_ERR_REG_STATUS_OFFSET);
+               if (err) {
+                       if (has_error)
+                               *has_error = true;
                        fsl_queue->stats.errors++;
                }
        }
-#endif
-       if (last_idx) {
-               if (unlikely(!fsl_queue->complete))
-                       *last_idx = fsl_queue->n_cq - 1;
-               else
-                       *last_idx = fsl_queue->complete - 1;
-       }
 
        return ret;
 }
@@ -868,7 +1262,7 @@ dpaa_qdma_burst_capacity(const void *dev_private, uint16_t 
vchan)
        const struct fsl_qdma_engine *fsl_qdma = dev_private;
        struct fsl_qdma_queue *fsl_queue = fsl_qdma->chan[vchan];
 
-       return fsl_queue->n_cq - fsl_queue->pending;
+       return fsl_queue->pending_max - fsl_queue->pending_num;
 }
 
 static struct rte_dma_dev_ops dpaa_qdma_ops = {
@@ -891,6 +1285,15 @@ dpaa_qdma_init(struct rte_dma_dev *dmadev)
        int ret;
        uint32_t i, j, k;
 
+       if (getenv("DPAA_QDMA_DATA_VALIDATION"))
+               s_data_validation = 1;
+
+       if (getenv("DPAA_QDMA_HW_ERR_CHECK"))
+               s_hw_err_check = 1;
+
+       if (getenv("DPAA_QDMA_SG_DISABLE"))
+               s_sg_disable = 1;
+
        fsl_qdma->n_queues = QDMA_QUEUES * QDMA_BLOCKS;
        fsl_qdma->num_blocks = QDMA_BLOCKS;
        fsl_qdma->block_offset = QDMA_BLOCK_OFFSET;
@@ -981,6 +1384,7 @@ dpaa_qdma_probe(__rte_unused struct rte_dpaa_driver 
*dpaa_drv,
        dmadev->device = &dpaa_dev->device;
        dmadev->fp_obj->dev_private = dmadev->data->dev_private;
        dmadev->fp_obj->copy = dpaa_qdma_enqueue;
+       dmadev->fp_obj->copy_sg = dpaa_qdma_copy_sg;
        dmadev->fp_obj->submit = dpaa_qdma_submit;
        dmadev->fp_obj->completed = dpaa_qdma_dequeue;
        dmadev->fp_obj->completed_status = dpaa_qdma_dequeue_status;
diff --git a/drivers/dma/dpaa/dpaa_qdma.h b/drivers/dma/dpaa/dpaa_qdma.h
index a767da0a3f..f4535af3dd 100644
--- a/drivers/dma/dpaa/dpaa_qdma.h
+++ b/drivers/dma/dpaa/dpaa_qdma.h
@@ -83,29 +83,15 @@
 #define FSL_QDMA_CIRCULAR_DESC_SIZE_MAX        16384
 #define FSL_QDMA_QUEUE_NUM_MAX         8
 
+#define FSL_QDMA_COMP_SG_FORMAT                0x1
+
 #define FSL_QDMA_CMD_RWTTYPE           0x4
 #define FSL_QDMA_CMD_LWC               0x2
 
-#define FSL_QDMA_CMD_RWTTYPE_OFFSET    28
-#define FSL_QDMA_CMD_LWC_OFFSET                16
-#define FSL_QDMA_CMD_PF                        BIT(17)
-
-#define FSL_QDMA_CMD_SSEN              BIT(19)
 #define FSL_QDMA_CFG_SSS_OFFSET                12
 #define FSL_QDMA_CMD_SSS_STRIDE                128
 #define FSL_QDMA_CMD_SSS_DISTANCE      128
 
-#define QDMA_CCDF_STATUS               20
-#define QDMA_CCDF_OFFSET               20
-#define QDMA_CCDF_MASK                 GENMASK(28, 20)
-#define QDMA_CCDF_FOTMAT               BIT(29)
-#define QDMA_CCDF_SER                  BIT(30)
-
-#define QDMA_SG_FIN                    BIT(30)
-#define QDMA_SG_LEN_MASK               GENMASK(29, 0)
-
-#define COMMAND_QUEUE_OVERFLOW         10
-
 /* qdma engine attribute */
 #define QDMA_QUEUE_SIZE FSL_QDMA_CIRCULAR_DESC_SIZE_MIN
 #define QDMA_STATUS_SIZE QDMA_QUEUE_SIZE
@@ -132,64 +118,160 @@
        (((fsl_qdma_engine)->block_offset) * (x))
 
 /* qDMA Command Descriptor Formats */
-struct fsl_qdma_format {
-       uint32_t status; /* ser, status */
-       uint32_t cfg;   /* format, offset */
-       union {
-               struct {
-                       uint32_t addr_lo; /* low 32-bits of 40-bit address */
-                       uint8_t addr_hi; /* high 8-bits of 40-bit address */
-                       uint8_t __reserved1[2];
-                       uint8_t queue:3;
-                       uint8_t rsv:3;
-                       uint8_t dd:2;
-               };
-               uint64_t data;
-       };
-};
+struct fsl_qdma_comp_cmd_desc {
+       uint8_t status;
+       uint32_t rsv0:22;
+       uint32_t ser:1;
+       uint32_t rsv1:21;
+       uint32_t offset:9;
+       uint32_t format:3;
+       uint32_t addr_lo;
+       uint8_t addr_hi;
+       uint16_t rsv3;
+       uint8_t queue:3;
+       uint8_t rsv4:3;
+       uint8_t dd:2;
+} __rte_packed;
+
+struct fsl_qdma_comp_sg_desc {
+       uint32_t offset:13;
+       uint32_t rsv0:19;
+       uint32_t length:30;
+       uint32_t final:1;
+       uint32_t extion:1;
+       uint32_t addr_lo;
+       uint8_t addr_hi;
+       uint32_t rsv1:24;
+} __rte_packed;
 
-/* qDMA Source Descriptor Format */
 struct fsl_qdma_sdf {
-       uint32_t rev3;
-       uint32_t cfg; /* rev4, bit[0-11] - ssd, bit[12-23] sss */
-       uint32_t rev5;
-       uint32_t cmd;
-};
+       uint32_t rsv0;
+       uint32_t ssd:12;
+       uint32_t sss:12;
+       uint32_t rsv1:8;
+       uint32_t rsv2;
+
+       uint32_t rsv3:17;
+       uint32_t prefetch:1;
+       uint32_t rsv4:1;
+       uint32_t ssen:1;
+       uint32_t rthrotl:4;
+       uint32_t sqos:3;
+       uint32_t ns:1;
+       uint32_t srttype:4;
+} __rte_packed;
 
-/* qDMA Destination Descriptor Format */
 struct fsl_qdma_ddf {
-       uint32_t rev1;
-       uint32_t cfg; /* rev2, bit[0-11] - dsd, bit[12-23] - dss */
-       uint32_t rev3;
-       uint32_t cmd;
-};
+       uint32_t rsv0;
+       uint32_t dsd:12;
+       uint32_t dss:12;
+       uint32_t rsv1:8;
+       uint32_t rsv2;
+
+       uint16_t rsv3;
+       uint32_t lwc:2;
+       uint32_t rsv4:1;
+       uint32_t dsen:1;
+       uint32_t wthrotl:4;
+       uint32_t dqos:3;
+       uint32_t ns:1;
+       uint32_t dwttype:4;
+} __rte_packed;
 
 struct fsl_qdma_df {
        struct fsl_qdma_sdf sdf;
        struct fsl_qdma_ddf ddf;
 };
 
+#define FSL_QDMA_SG_MAX_ENTRY RTE_DPAAX_QDMA_JOB_SUBMIT_MAX
+#define FSL_QDMA_MAX_DESC_NUM (FSL_QDMA_SG_MAX_ENTRY * QDMA_QUEUE_SIZE)
 struct fsl_qdma_cmpd_ft {
-       struct fsl_qdma_format desc_buf;
-       struct fsl_qdma_format desc_sbuf;
-       struct fsl_qdma_format desc_dbuf;
+       struct fsl_qdma_comp_sg_desc desc_buf;
+       struct fsl_qdma_comp_sg_desc desc_sbuf;
+       struct fsl_qdma_comp_sg_desc desc_dbuf;
+       uint64_t cache_align[2];
+       struct fsl_qdma_comp_sg_desc desc_ssge[FSL_QDMA_SG_MAX_ENTRY];
+       struct fsl_qdma_comp_sg_desc desc_dsge[FSL_QDMA_SG_MAX_ENTRY];
+       uint64_t phy_ssge;
+       uint64_t phy_dsge;
+} __rte_packed;
+
+#define FSL_QDMA_ERR_REG_STATUS_OFFSET 0xe00
+
+struct fsl_qdma_dedr_reg {
+       uint32_t me:1;
+       uint32_t rsv0:1;
+       uint32_t rte:1;
+       uint32_t wte:1;
+       uint32_t cde:1;
+       uint32_t sde:1;
+       uint32_t dde:1;
+       uint32_t ere:1;
+       uint32_t rsv1:24;
+};
+
+struct fsl_qdma_deccqidr_reg {
+       uint32_t rsv:27;
+       uint32_t block:2;
+       uint32_t queue:3;
+};
+
+#define FSL_QDMA_DECCD_ERR_NUM \
+       (sizeof(struct fsl_qdma_comp_cmd_desc) / sizeof(uint32_t))
+
+struct fsl_qdma_err_reg {
+       uint32_t deier;
+       union {
+               rte_be32_t dedr_be;
+               struct fsl_qdma_dedr_reg dedr;
+       };
+       uint32_t rsv0[2];
+       union {
+               rte_le32_t deccd_le[FSL_QDMA_DECCD_ERR_NUM];
+               struct fsl_qdma_comp_cmd_desc err_cmd;
+       };
+       uint32_t rsv1[4];
+       union {
+               rte_be32_t deccqidr_be;
+               struct fsl_qdma_deccqidr_reg deccqidr;
+       };
+       rte_be32_t decbr;
+};
+
+#define DPAA_QDMA_IDXADDR_FROM_SG_FLAG(flag) \
+       ((void *)((flag) - ((flag) & RTE_DPAAX_QDMA_SG_IDX_ADDR_MASK)))
+
+#define DPAA_QDMA_IDX_FROM_FLAG(flag) \
+       ((flag) >> RTE_DPAAX_QDMA_COPY_IDX_OFFSET)
+
+struct fsl_qdma_desc {
+       rte_iova_t src;
+       rte_iova_t dst;
+       uint64_t flag;
+       uint64_t len;
 };
 
 struct fsl_qdma_queue {
-       struct fsl_qdma_format *cmd_desc;
        int used;
        struct fsl_qdma_cmpd_ft **ft;
        uint16_t ci;
-       uint16_t complete;
+       struct rte_ring *complete_burst;
+       struct rte_ring *complete_desc;
+       struct rte_ring *complete_pool;
        uint16_t n_cq;
        uint8_t block_id;
        uint8_t queue_id;
        uint8_t channel_id;
        void *block_vir;
        uint32_t le_cqmr;
-       struct fsl_qdma_format *cq;
+       struct fsl_qdma_comp_cmd_desc *cq;
+       uint16_t desc_in_hw[QDMA_QUEUE_SIZE];
        struct rte_dma_stats stats;
-       uint8_t pending;
+       struct fsl_qdma_desc *pending_desc;
+       uint16_t pending_max;
+       uint16_t pending_start;
+       uint8_t pending_num;
+       uint16_t complete_start;
        dma_addr_t bus_addr;
        struct fsl_qdma_df **df;
        void *engine;
@@ -200,7 +282,7 @@ struct fsl_qdma_status_queue {
        uint16_t complete;
        uint8_t block_id;
        void *block_vir;
-       struct fsl_qdma_format *cq;
+       struct fsl_qdma_comp_cmd_desc *cq;
        struct rte_dma_stats stats;
        dma_addr_t bus_addr;
        void *engine;
-- 
2.25.1

Reply via email to