On 6/26/2026 2:47 AM, Raghavendra Ningoji wrote: > Implement the dmadev control path for the AMD AE4DMA PMD. > > This commit adds: > - dev_configure / vchan_setup: accept a single virtual channel per > dmadev and clamp the requested ring size to the hardware maximum > of 32 descriptors (rounded up to a power of two). > - dev_start / dev_stop / dev_close: program the per-queue control > register to enable/disable the hardware queue and release the > descriptor ring memzone on close. > - dev_info_get: advertise RTE_DMA_CAPA_MEM_TO_MEM and the fixed > ring depth.
It seemed declare support 2~32 depth, not fixed > - dev_dump: print the queue identifiers, ring layout and software > completion counters. > - stats_get / stats_reset: expose submitted / completed / errors > counters maintained by the driver. > - vchan_status: report IDLE / ACTIVE based on hardware read_idx vs > write_idx, and HALTED_ERROR when the queue is not enabled. > > The dmadev framework is wired through dev_ops in ae4dma_dmadev_create(). > > Signed-off-by: Raghavendra Ningoji <[email protected]> > --- > drivers/dma/ae4dma/ae4dma_dmadev.c | 211 +++++++++++++++++++++++++++++ > 1 file changed, 211 insertions(+) > > diff --git a/drivers/dma/ae4dma/ae4dma_dmadev.c > b/drivers/dma/ae4dma/ae4dma_dmadev.c > index 3d82f86906..607f288623 100644 > --- a/drivers/dma/ae4dma/ae4dma_dmadev.c > +++ b/drivers/dma/ae4dma/ae4dma_dmadev.c > @@ -53,6 +53,203 @@ ae4dma_queue_dma_zone_reserve(const char *queue_name, > socket_id, RTE_MEMZONE_IOVA_CONTIG, queue_size); > } > > +static int > +ae4dma_dev_configure(struct rte_dma_dev *dev __rte_unused, > + const struct rte_dma_conf *dev_conf, > + uint32_t conf_sz) > +{ > + if (sizeof(struct rte_dma_conf) != conf_sz) > + return -EINVAL; This may break ABI compatible > + > + if (dev_conf->nb_vchans != 1) > + return -EINVAL; > + > + return 0; > +} > + > +/* Setup a virtual channel for AE4DMA, only 1 vchan is supported per dmadev. > */ > +static int > +ae4dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused, > + const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + uint16_t max_desc = qconf->nb_desc; > + > + if (sizeof(struct rte_dma_vchan_conf) != qconf_sz) > + return -EINVAL; This may break ABI compatible > + > + if (max_desc < 2) > + return -EINVAL; No need to do this because rte_dma_vchan_setup already do it. > + > + if (!rte_is_power_of_2(max_desc)) > + max_desc = rte_align32pow2(max_desc); > + > + if (max_desc > AE4DMA_DESCRIPTORS_PER_CMDQ) { > + AE4DMA_PMD_DEBUG("DMA dev %u nb_desc clamped to %u", > + dev->data->dev_id, AE4DMA_DESCRIPTORS_PER_CMDQ); > + max_desc = AE4DMA_DESCRIPTORS_PER_CMDQ; > + } No need to do this because rte_dma_vchan_setup already do it. > + > + cmd_q->qcfg = *qconf; > + cmd_q->qcfg.nb_desc = max_desc; > + > + /* Ensure all counters are reset, if reconfiguring/restarting device. */ > + memset(&cmd_q->stats, 0, sizeof(cmd_q->stats)); > + return 0; > +} > + > +static int > +ae4dma_dev_start(struct rte_dma_dev *dev) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + uint16_t nb = cmd_q->qcfg.nb_desc; > + > + if (nb == 0) > + return -EBUSY; > + > + /* Program ring depth expected by hardware. */ > + AE4DMA_WRITE_REG(&cmd_q->hwq_regs->max_idx, nb); > + return 0; > +} > + > +static int > +ae4dma_dev_stop(struct rte_dma_dev *dev) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + > + if (cmd_q->hwq_regs != NULL) > + AE4DMA_WRITE_REG(&cmd_q->hwq_regs->control_reg.control_raw, > + AE4DMA_CMD_QUEUE_DISABLE); > + return 0; > +} > + > +static int > +ae4dma_dev_info_get(const struct rte_dma_dev *dev __rte_unused, > + struct rte_dma_info *info, uint32_t size) > +{ > + if (size < sizeof(*info)) > + return -EINVAL; > + info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM; You need also decalre support RTE_DMA_CAP_OPS_COPY, please use dpdk-test dmadev_autotest to test it. The dpdk-dma-perf could also test dmadev. > + info->max_vchans = 1; > + info->min_desc = 2; > + info->max_desc = AE4DMA_DESCRIPTORS_PER_CMDQ; > + info->nb_vchans = 1; > + return 0; > +} > + > +static int > +ae4dma_dev_close(struct rte_dma_dev *dev) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + > + if (cmd_q->hwq_regs != NULL) > + AE4DMA_WRITE_REG(&cmd_q->hwq_regs->control_reg.control_raw, > + AE4DMA_CMD_QUEUE_DISABLE); > + > + rte_memzone_free(cmd_q->mz); > + cmd_q->mz = NULL; > + cmd_q->qbase_desc = NULL; > + cmd_q->qbase_addr = NULL; > + cmd_q->qbase_phys_addr = 0; > + return 0; > +} > + > +static int > +ae4dma_dev_dump(const struct rte_dma_dev *dev, FILE *f) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q; > + void *ae4dma_mmio_base_addr = (uint8_t *)ae4dma->io_regs; > + > + cmd_q = &ae4dma->cmd_q; > + fprintf(f, "cmd_q->id = %" PRIx64 "\n", cmd_q->id); > + fprintf(f, "cmd_q->qidx = %" PRIx64 "\n", cmd_q->qidx); > + fprintf(f, "cmd_q->qsize = %" PRIx64 "\n", cmd_q->qsize); > + fprintf(f, "mmio_base_addr = %p\n", ae4dma_mmio_base_addr); > + fprintf(f, "queues per ae4dma engine = %d\n", > AE4DMA_READ_REG_OFFSET( > + ae4dma_mmio_base_addr, > AE4DMA_COMMON_CONFIG_OFFSET)); > + fprintf(f, "== Private Data ==\n"); > + fprintf(f, " Config: { ring_size: %u }\n", cmd_q->qcfg.nb_desc); > + fprintf(f, " Ring virt: %p\tphys: %#" PRIx64 "\n", > + (void *)cmd_q->qbase_desc, > + (uint64_t)cmd_q->qbase_phys_addr); > + fprintf(f, " Next write: %u\n", cmd_q->next_write); > + fprintf(f, " Next read: %u\n", cmd_q->next_read); > + fprintf(f, " current queue depth: %u\n", cmd_q->ring_buff_count); > + fprintf(f, " }\n"); > + fprintf(f, " Key Stats { submitted: %" PRIu64 ", comp: %" PRIu64 ", > failed: %" PRIu64 " }\n", > + cmd_q->stats.submitted, > + cmd_q->stats.completed, > + cmd_q->stats.errors); > + return 0; > +} > +static int > +ae4dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused, > + struct rte_dma_stats *rte_stats, uint32_t size) > +{ > + const struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + const struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + const struct rte_dma_stats *stats = &cmd_q->stats; > + > + if (size < sizeof(*rte_stats)) > + return -EINVAL; > + if (rte_stats == NULL) > + return -EINVAL; No need to do this check because rte_dma_stats_get already check it Please make such check on other ops. > + > + *rte_stats = *stats; > + return 0; > +} > + > +static int > +ae4dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan __rte_unused) > +{ > + struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + > + memset(&cmd_q->stats, 0, sizeof(cmd_q->stats)); > + return 0; > +} > + > +/* > + * Report channel state to the dmadev framework. > + * > + * RTE_DMA_VCHAN_HALTED_ERROR - HW queue is disabled (never started, or > + * stopped via dev_stop()). > + * RTE_DMA_VCHAN_IDLE - HW has caught up: read_idx == write_idx, > + * no descriptors in flight. > + * RTE_DMA_VCHAN_ACTIVE - HW still has descriptors to process. > + */ > +static int > +ae4dma_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan > __rte_unused, > + enum rte_dma_vchan_status *status) > +{ > + const struct ae4dma_dmadev *ae4dma = dev->fp_obj->dev_private; > + const struct ae4dma_cmd_queue *cmd_q = &ae4dma->cmd_q; > + uint32_t ctrl, hw_read, hw_write; > + > + if (cmd_q->hwq_regs == NULL) { > + *status = RTE_DMA_VCHAN_HALTED_ERROR; > + return 0; > + } > + > + ctrl = AE4DMA_READ_REG(&cmd_q->hwq_regs->control_reg.control_raw); > + if ((ctrl & AE4DMA_CMD_QUEUE_ENABLE) == 0) { > + *status = RTE_DMA_VCHAN_HALTED_ERROR; > + return 0; > + } > + > + hw_read = AE4DMA_READ_REG(&cmd_q->hwq_regs->read_idx); > + hw_write = AE4DMA_READ_REG(&cmd_q->hwq_regs->write_idx); > + > + *status = (hw_read == hw_write) ? RTE_DMA_VCHAN_IDLE > + : RTE_DMA_VCHAN_ACTIVE; > + return 0; > +} > + > static int > ae4dma_add_queue(struct ae4dma_dmadev *dev, struct rte_pci_device *pci, > uint8_t qn, const char *pci_name) > @@ -115,6 +312,19 @@ ae4dma_channel_dev_name(char *out, size_t outlen, const > char *pci_name, > static int > ae4dma_dmadev_create(const char *name, struct rte_pci_device *dev, uint8_t > qn) > { > + static const struct rte_dma_dev_ops ae4dma_dmadev_ops = { > + .dev_close = ae4dma_dev_close, > + .dev_configure = ae4dma_dev_configure, > + .dev_dump = ae4dma_dev_dump, > + .dev_info_get = ae4dma_dev_info_get, > + .dev_start = ae4dma_dev_start, > + .dev_stop = ae4dma_dev_stop, > + .stats_get = ae4dma_stats_get, > + .stats_reset = ae4dma_stats_reset, > + .vchan_status = ae4dma_vchan_status, > + .vchan_setup = ae4dma_vchan_setup, > + }; > + > struct rte_dma_dev *dmadev; > struct ae4dma_dmadev *ae4dma; > char hwq_dev_name[RTE_DEV_NAME_MAX_LEN]; > @@ -130,6 +340,7 @@ ae4dma_dmadev_create(const char *name, struct > rte_pci_device *dev, uint8_t qn) > } > dmadev->device = &dev->device; > dmadev->fp_obj->dev_private = dmadev->data->dev_private; > + dmadev->dev_ops = &ae4dma_dmadev_ops; > > ae4dma = dmadev->data->dev_private; >

