Add afu_mf driver to manage various AFU (Acceleration Function Unit)
in FPGA.

Signed-off-by: Wei Huang <wei.hu...@intel.com>
Acked-by: Tianfei Zhang <tianfei.zh...@intel.com>
---
v2: fix typo
---
v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
---
v4: fix coding style issue and build error in FreeBSD13-64
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 2005 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4253 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c 
b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..f24c748
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "rte_pmd_afu.h"
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+#include "he_lbk.h"
+#include "he_mem.h"
+#include "he_hssi.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+       { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+       { HE_LBK_UUID_L, HE_LBK_UUID_H },
+       { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+       { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+       { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+       { 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+       &n3000_afu_drv,
+       &he_lbk_drv,
+       &he_mem_lbk_drv,
+       &he_mem_tg_drv,
+       &he_hssi_drv,
+       NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+       int32_t x = 0;
+
+       if (!dev || !dev->shared)
+               return -ENODEV;
+
+       x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+       if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+                               1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+               return -EBUSY;
+
+       return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+       if (!dev || !dev->shared)
+               return;
+
+       __atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+       rte_rawdev_obj_t config, size_t config_size)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->ops && dev->ops->config)
+               ret = (*dev->ops->config)(dev, config, config_size);
+
+       return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return -ENODEV;
+
+       ret = afu_mf_trylock(dev);
+       if (ret) {
+               AFU_MF_PMD_WARN("AFU is busy, please start it later");
+               return ret;
+       }
+
+       if (dev->ops && dev->ops->start)
+               ret = (*dev->ops->start)(dev);
+
+       afu_mf_unlock(dev);
+
+       return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return;
+
+       ret = afu_mf_trylock(dev);
+       if (ret) {
+               AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+               return;
+       }
+
+       if (dev->ops && dev->ops->stop)
+               ret = (*dev->ops->stop)(dev);
+
+       afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->ops && dev->ops->close)
+               ret = (*dev->ops->close)(dev);
+
+       return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return -ENODEV;
+
+       ret = afu_mf_trylock(dev);
+       if (ret) {
+               AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+               return ret;
+       }
+
+       if (dev->ops && dev->ops->reset)
+               ret = (*dev->ops->reset)(dev);
+
+       afu_mf_unlock(dev);
+
+       return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+               return -ENODEV;
+
+       dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+       if (!dev)
+               return -ENOENT;
+
+       ret = afu_mf_trylock(dev);
+       if (ret) {
+               AFU_MF_PMD_WARN("AFU is busy, please test it later");
+               return ret;
+       }
+
+       if (dev->ops && dev->ops->test)
+               ret = (*dev->ops->test)(dev);
+
+       afu_mf_unlock(dev);
+
+       return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+       struct afu_mf_rawdev *dev = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_FUNC_TRACE();
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->ops && dev->ops->dump)
+               ret = (*dev->ops->dump)(dev, f);
+
+       return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+       .dev_info_get = NULL,
+       .dev_configure = afu_mf_rawdev_configure,
+       .dev_start = afu_mf_rawdev_start,
+       .dev_stop = afu_mf_rawdev_stop,
+       .dev_close = afu_mf_rawdev_close,
+       .dev_reset = afu_mf_rawdev_reset,
+
+       .queue_def_conf = NULL,
+       .queue_setup = NULL,
+       .queue_release = NULL,
+       .queue_count = NULL,
+
+       .attr_get = NULL,
+       .attr_set = NULL,
+
+       .enqueue_bufs = NULL,
+       .dequeue_bufs = NULL,
+
+       .dump = afu_mf_rawdev_dump,
+
+       .xstats_get = NULL,
+       .xstats_get_names = NULL,
+       .xstats_get_by_name = NULL,
+       .xstats_reset = NULL,
+
+       .firmware_status_get = NULL,
+       .firmware_version_get = NULL,
+       .firmware_load = NULL,
+       .firmware_unload = NULL,
+
+       .dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+       int socket_id)
+{
+       const struct rte_memzone *mz;
+       char mz_name[RTE_MEMZONE_NAMESIZE];
+       struct afu_mf_shared *ptr = NULL;
+       int init_mz = 0;
+
+       if (!name || !data)
+               return -EINVAL;
+
+       /* name format is afu_?|??:??.? which is unique */
+       snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+       mz = rte_memzone_lookup(mz_name);
+       if (!mz) {
+               mz = rte_memzone_reserve(mz_name,
+                               sizeof(struct afu_mf_shared),
+                               socket_id, 0);
+               init_mz = 1;
+       }
+
+       if (!mz) {
+               AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+                       mz_name);
+               return -ENOMEM;
+       }
+
+       ptr = (struct afu_mf_shared *)mz->addr;
+
+       if (init_mz)  /* initialize memory zone on the first time */
+               ptr->lock = 0;
+
+       *data = ptr;
+
+       return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+       size_t size)
+{
+       int n = 0;
+
+       if (!afu_dev || !name || !size)
+               return -EINVAL;
+
+       n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+       if (n >= (int)size) {
+               AFU_MF_PMD_ERR("Name of AFU device is too long!");
+               return -ENAMETOOLONG;
+       }
+
+       return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+       struct afu_mf_drv *entry = NULL;
+       int i = 0;
+
+       if (!afu_id)
+               return NULL;
+
+       while ((entry = afu_table[i++])) {
+               if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+                       (entry->uuid.uuid_high == afu_id->uuid_high))
+                       break;
+       }
+
+       return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+       struct rte_rawdev *rawdev = NULL;
+       struct afu_mf_rawdev *dev = NULL;
+       char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+       int ret = 0;
+
+       if (!afu_dev)
+               return -EINVAL;
+
+       ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+       if (ret)
+               return ret;
+
+       AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+               name, socket_id);
+
+       /* Allocate device structure */
+       rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+                               socket_id);
+       if (!rawdev) {
+               AFU_MF_PMD_ERR("Unable to allocate raw device");
+               return -ENOMEM;
+       }
+
+       rawdev->dev_ops = &afu_mf_rawdev_ops;
+       rawdev->device = &afu_dev->device;
+       rawdev->driver_name = afu_dev->driver->driver.name;
+
+       dev = afu_mf_rawdev_get_priv(rawdev);
+       if (!dev)
+               goto cleanup;
+
+       dev->rawdev = rawdev;
+       dev->port = afu_dev->id.port;
+       dev->addr = afu_dev->mem_resource[0].addr;
+       dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+       if (dev->ops == NULL) {
+               AFU_MF_PMD_ERR("Unsupported AFU device");
+               goto cleanup;
+       }
+
+       if (dev->ops->init) {
+               ret = (*dev->ops->init)(dev);
+               if (ret) {
+                       AFU_MF_PMD_ERR("Failed to init %s", name);
+                       goto cleanup;
+               }
+       }
+
+       ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+       if (ret)
+               goto cleanup;
+
+       return ret;
+
+cleanup:
+       rte_rawdev_pmd_release(rawdev);
+       return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+       struct rte_rawdev *rawdev = NULL;
+       char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+       int ret = 0;
+
+       if (!afu_dev)
+               return -EINVAL;
+
+       ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+       if (ret)
+               return ret;
+
+       AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+       rawdev = rte_rawdev_pmd_get_named_dev(name);
+       if (!rawdev) {
+               AFU_MF_PMD_ERR("Raw device %s not found", name);
+               return -EINVAL;
+       }
+
+       /* rte_rawdev_close is called by pmd_release */
+       ret = rte_rawdev_pmd_release(rawdev);
+       if (ret)
+               AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+       return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+       AFU_MF_PMD_FUNC_TRACE();
+       return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+       AFU_MF_PMD_FUNC_TRACE();
+       return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+       .id_table = afu_uuid_map,
+       .probe = afu_mf_rawdev_probe,
+       .remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h 
b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..5a66f6c
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+               __func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+       AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+       AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+       AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+       AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
+#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache lines */
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+       uint64_t __wait = 0;                                 \
+       uint64_t __invl = (invl);                            \
+       uint64_t __timeout = (timeout);                      \
+       for (; __wait <= __timeout; __wait += __invl) {      \
+               (val) = *(addr);                                 \
+               if (cond)                                        \
+                       break;                                       \
+               rte_delay_ms(__invl);                            \
+       }                                                    \
+       (cond) ? 0 : 1;                                      \
+})
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+       int (*init)(struct afu_mf_rawdev *dev);
+       int (*config)(struct afu_mf_rawdev *dev, void *config,
+               size_t config_size);
+       int (*start)(struct afu_mf_rawdev *dev);
+       int (*stop)(struct afu_mf_rawdev *dev);
+       int (*test)(struct afu_mf_rawdev *dev);
+       int (*close)(struct afu_mf_rawdev *dev);
+       int (*reset)(struct afu_mf_rawdev *dev);
+       int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+       struct rte_afu_uuid uuid;
+       struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+       int32_t lock;
+};
+
+struct afu_mf_rawdev {
+       struct rte_rawdev *rawdev;  /* point to parent raw device */
+       struct afu_mf_shared *shared;  /* shared data for multi-process */
+       struct afu_mf_ops *ops;  /* device operation functions */
+       int port;  /* index of port the AFU attached */
+       void *addr;  /* base address of AFU registers */
+       void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+       return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..bedafbd
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+       uint32_t value)
+{
+       struct traffic_ctrl_cmd cmd;
+       struct traffic_ctrl_data data;
+       uint32_t i = 0;
+
+       AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+       if (!ctx)
+               return -EINVAL;
+
+       data.write_data = value;
+       rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+       cmd.csr = 0;
+       cmd.write_cmd = 1;
+       cmd.afu_cmd_addr = addr;
+       rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+       while (i < MAILBOX_TIMEOUT_MS) {
+               rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+               cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+               if (cmd.ack_trans)
+                       break;
+               i += MAILBOX_POLL_INTERVAL_MS;
+       }
+       if (i >= MAILBOX_TIMEOUT_MS)
+               return -ETIMEDOUT;
+
+       i = 0;
+       cmd.csr = 0;
+       while (i < MAILBOX_TIMEOUT_MS) {
+               cmd.ack_trans = 1;
+               rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+               rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+               cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+               if (!cmd.ack_trans)
+                       break;
+               i += MAILBOX_POLL_INTERVAL_MS;
+       }
+       if (i >= MAILBOX_TIMEOUT_MS)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+       uint32_t *value)
+{
+       struct traffic_ctrl_cmd cmd;
+       struct traffic_ctrl_data data;
+       uint32_t i = 0;
+
+       if (!ctx)
+               return -EINVAL;
+
+       cmd.csr = 0;
+       cmd.read_cmd = 1;
+       cmd.afu_cmd_addr = addr;
+       rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+       while (i < MAILBOX_TIMEOUT_MS) {
+               rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+               cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+               if (cmd.ack_trans) {
+                       data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+                       *value = data.read_data;
+                       break;
+               }
+               i += MAILBOX_POLL_INTERVAL_MS;
+       }
+       if (i >= MAILBOX_TIMEOUT_MS)
+               return -ETIMEDOUT;
+
+       i = 0;
+       cmd.csr = 0;
+       while (i < MAILBOX_TIMEOUT_MS) {
+               cmd.ack_trans = 1;
+               rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+               rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+               cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+               if (!cmd.ack_trans)
+                       break;
+               i += MAILBOX_POLL_INTERVAL_MS;
+       }
+       if (i >= MAILBOX_TIMEOUT_MS)
+               return -ETIMEDOUT;
+
+       AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+       return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+       uint32_t val = 0;
+       uint64_t v64 = 0;
+       int ret = 0;
+
+       ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+       if (ret)
+               return;
+       printf("Number of good packets received: %u\n", val);
+
+       ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+       if (ret)
+               return;
+       printf("Number of bad packets received: %u\n", val);
+
+       ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+       if (ret)
+               return;
+       v64 = val;
+       ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+       if (ret)
+               return;
+       v64 = (v64 << 32) | val;
+       printf("Number of bytes received: %"PRIu64"\n", v64);
+
+       ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+       if (ret)
+               return;
+       if (val & ERR_VALID) {
+               printf("AVST rx error:");
+               if (val & OVERFLOW_ERR)
+                       printf(" overflow");
+               if (val & LENGTH_ERR)
+                       printf(" length");
+               if (val & OVERSIZE_ERR)
+                       printf(" oversize");
+               if (val & UNDERSIZE_ERR)
+                       printf(" undersize");
+               if (val & MAC_CRC_ERR)
+                       printf(" crc");
+               if (val & PHY_ERR)
+                       printf(" phy");
+               printf("\n");
+       }
+
+       ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+       if (ret)
+               return;
+       if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+               printf("FIFO status:");
+               if (val & ALMOST_EMPTY)
+                       printf(" almost empty");
+               if (val & ALMOST_FULL)
+                       printf(" almost full");
+               printf("\n");
+       }
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+       struct he_hssi_priv *priv = NULL;
+       struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+       struct he_hssi_ctx *ctx = NULL;
+       struct traffic_ctrl_ch_sel sel;
+       uint32_t val = 0;
+       uint32_t i = 0;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_hssi_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       cfg = &priv->he_hssi_cfg;
+       ctx = &priv->he_hssi_ctx;
+
+       ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+       if (ret)
+               return ret;
+
+       sel.channel_sel = cfg->port;
+       rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+       if (cfg->he_loopback >= 0) {
+               val = cfg->he_loopback ? 1 : 0;
+               AFU_MF_PMD_INFO("%s HE loopback on port %u",
+                       val ? "Enable" : "Disable", cfg->port);
+               return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+       }
+
+       ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+       if (ret)
+               return ret;
+
+       ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+       if (ret)
+               return ret;
+
+       val = cfg->src_addr & 0xffffffff;
+       ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+       if (ret)
+               return ret;
+       val = (cfg->src_addr >> 32) & 0xffff;
+       ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+       if (ret)
+               return ret;
+
+       val = cfg->dest_addr & 0xffffffff;
+       ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+       if (ret)
+               return ret;
+       val = (cfg->dest_addr >> 32) & 0xffff;
+       ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+       if (ret)
+               return ret;
+
+       val = cfg->random_length ? 1 : 0;
+       ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+       if (ret)
+               return ret;
+
+       val = cfg->random_payload ? 1 : 0;
+       ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < 3; i++) {
+               ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+                       cfg->rnd_seed[i]);
+               if (ret)
+                       return ret;
+       }
+
+       ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+       if (ret)
+               return ret;
+
+       while (i++ < cfg->timeout) {
+               ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+               if (ret)
+                       break;
+               if (val == cfg->num_packets)
+                       break;
+               sleep(1);
+       }
+
+       he_hssi_report(ctx);
+
+       return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+       struct he_hssi_priv *priv = NULL;
+       struct he_hssi_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_hssi_priv *)dev->priv;
+       if (!priv) {
+               priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+               if (!priv)
+                       return -ENOMEM;
+               dev->priv = priv;
+       }
+
+       ctx = &priv->he_hssi_ctx;
+       ctx->addr = (uint8_t *)dev->addr;
+
+       return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+       size_t config_size)
+{
+       struct he_hssi_priv *priv = NULL;
+       struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+       if (!dev || !config || !config_size)
+               return -EINVAL;
+
+       priv = (struct he_hssi_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+               return -EINVAL;
+
+       cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+       if (cfg->port >= NUM_HE_HSSI_PORTS)
+               return -EINVAL;
+
+       rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+       return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       rte_free(dev->priv);
+       dev->priv = NULL;
+
+       return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+       struct he_hssi_priv *priv = NULL;
+       struct he_hssi_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_hssi_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (!f)
+               f = stdout;
+
+       ctx = &priv->he_hssi_ctx;
+
+       fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+       return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+       .init = he_hssi_init,
+       .config = he_hssi_config,
+       .start = NULL,
+       .stop = NULL,
+       .test = he_hssi_test,
+       .close = he_hssi_close,
+       .dump = he_hssi_dump,
+       .reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+       .uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+       .ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t read_cmd:1;
+                       uint32_t write_cmd:1;
+                       uint32_t ack_trans:1;
+                       uint32_t rsvd1:29;
+                       uint32_t afu_cmd_addr:16;
+                       uint32_t rsvd2:16;
+               };
+       };
+};
+
+struct traffic_ctrl_data {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t read_data;
+                       uint32_t write_data;
+               };
+       };
+};
+
+struct traffic_ctrl_ch_sel {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t channel_sel:3;
+                       uint32_t rsvd1:29;
+                       uint32_t rsvd2;
+               };
+       };
+};
+
+struct he_hssi_ctx {
+       uint8_t *addr;
+};
+
+struct he_hssi_priv {
+       struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+       struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..8735647
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+       struct he_lbk_csr_cfg v;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       cfg = &priv->he_lbk_cfg;
+
+       v.csr = 0;
+
+       if (cfg->cont)
+               v.cont = 1;
+
+       v.mode = cfg->mode;
+       v.trput_interleave = cfg->trput_interleave;
+       if (cfg->multi_cl == 4)
+               v.multicl_len = 2;
+       else
+               v.multicl_len = cfg->multi_cl - 1;
+
+       AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+       rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+       return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+       struct he_lbk_ctx *ctx = NULL;
+       struct he_lbk_dsm_status *stat = NULL;
+       struct he_lbk_status0 stat0;
+       struct he_lbk_status1 stat1;
+       uint64_t swtest_msg = 0;
+       uint64_t ticks = 0;
+       uint64_t info = 0;
+       double num, rd_bw, wr_bw;
+
+       if (!dev || !dev->priv)
+               return;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       cfg = &priv->he_lbk_cfg;
+       ctx = &priv->he_lbk_ctx;
+
+       stat = ctx->status_ptr;
+
+       swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+       stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+       stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+       if (cfg->cont)
+               ticks = stat->num_clocks - stat->start_overhead;
+       else
+               ticks = stat->num_clocks -
+                       (stat->start_overhead + stat->end_overhead);
+
+       if (cfg->freq_mhz == 0) {
+               info = rte_read64(ctx->addr + CSR_HE_INFO0);
+               AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+               cfg->freq_mhz = info & 0xffff;
+               if (cfg->freq_mhz == 0) {
+                       AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+                               " Assuming 350 MHz.");
+                       cfg->freq_mhz = 350;
+               }
+       }
+
+       num = (double)stat0.num_reads;
+       rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+       num = (double)stat0.num_writes;
+       wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+       printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+               "Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+               cfg->freq_mhz);
+       printf("%10u  %10u %10u %10u %10u  %12"PRIu64
+               "   %7.3f GB/s   %7.3f GB/s\n",
+               cl, stat0.num_reads, stat0.num_writes,
+               stat1.num_pend_reads, stat1.num_pend_writes,
+               ticks, rd_bw / 1e9, wr_bw / 1e9);
+       printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+       struct he_lbk_ctx *ctx = NULL;
+       struct he_lbk_csr_ctl ctl;
+       uint32_t *ptr = NULL;
+       uint32_t i, j, cl, val = 0;
+       uint64_t sval = 0;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       cfg = &priv->he_lbk_cfg;
+       ctx = &priv->he_lbk_ctx;
+
+       ctl.csr = 0;
+       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+       rte_delay_us(1000);
+       ctl.reset = 1;
+       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+       /* initialize DMA addresses */
+       AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+       rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+
+       AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+       rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+       AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+       rte_write32(SIZE_TO_CLS(ctx->dsm_iova), ctx->addr + CSR_AFU_DSM_BASEL);
+       rte_write32(SIZE_TO_CLS(ctx->dsm_iova) >> 32,
+               ctx->addr + CSR_AFU_DSM_BASEH);
+
+       ret = he_lbk_afu_config(dev);
+       if (ret)
+               return ret;
+
+       /* initialize src data */
+       ptr = (uint32_t *)ctx->src_ptr;
+       j = CLS_TO_SIZE(cfg->end) >> 2;
+       for (i = 0; i < j; i++)
+               *ptr++ = i;
+
+       /* start test */
+       for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+               memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+               memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+               ctl.csr = 0;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+               rte_delay_us(1000);
+               ctl.reset = 1;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+               rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+               ctl.start = 1;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+               if (cfg->cont) {
+                       rte_delay_ms(cfg->timeout * 1000);
+                       ctl.force_completion = 1;
+                       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+                       ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+                               val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+                               DSM_TIMEOUT);
+                       if (ret) {
+                               printf("DSM poll timeout\n");
+                               goto end;
+                       }
+               } else {
+                       ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+                               val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+                               DSM_TIMEOUT);
+                       if (ret) {
+                               printf("DSM poll timeout\n");
+                               goto end;
+                       }
+                       ctl.force_completion = 1;
+                       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+               }
+
+               he_lbk_report(dev, cl);
+
+               i = 0;
+               while (i++ < 100) {
+                       sval = rte_read64(ctx->addr + CSR_STATUS1);
+                       if (sval == 0)
+                               break;
+                       rte_delay_us(1000);
+               }
+
+               if (cfg->mode == NLB_MODE_LPBK) {
+                       ptr = (uint32_t *)ctx->dest_ptr;
+                       j = CLS_TO_SIZE(cl) >> 2;
+                       for (i = 0; i < j; i++) {
+                               if (*ptr++ != i) {
+                                       AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+                                       break;
+                               }
+                       }
+               }
+       }
+
+end:
+       return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct he_lbk_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->he_lbk_ctx;
+
+       rte_free(ctx->dsm_ptr);
+       ctx->dsm_ptr = NULL;
+       ctx->status_ptr = NULL;
+
+       rte_free(ctx->src_ptr);
+       ctx->src_ptr = NULL;
+
+       rte_free(ctx->dest_ptr);
+       ctx->dest_ptr = NULL;
+
+       return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct he_lbk_ctx *ctx = NULL;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->he_lbk_ctx;
+       ctx->addr = (uint8_t *)dev->addr;
+
+       ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+       if (!ctx->dsm_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+       if (ctx->dsm_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+               TEST_MEM_ALIGN);
+       if (!ctx->src_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+       if (ctx->src_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+               TEST_MEM_ALIGN);
+       if (!ctx->dest_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+       if (ctx->dest_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+       return 0;
+
+release:
+       he_lbk_ctx_release(dev);
+       return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv) {
+               dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+               if (!dev->priv)
+                       return -ENOMEM;
+       }
+
+       return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+       size_t config_size)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+       if (!dev || !config || !config_size)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+               return -EINVAL;
+
+       cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+       if (cfg->mode > NLB_MODE_TRPUT)
+               return -EINVAL;
+       if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+               (cfg->multi_cl != 4))
+               return -EINVAL;
+       if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+               return -EINVAL;
+       if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+               return -EINVAL;
+
+       rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+       return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       he_lbk_ctx_release(dev);
+
+       rte_free(dev->priv);
+       dev->priv = NULL;
+
+       return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+       struct he_lbk_priv *priv = NULL;
+       struct he_lbk_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_lbk_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (!f)
+               f = stdout;
+
+       ctx = &priv->he_lbk_ctx;
+
+       fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+       fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+       fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+       fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+       fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+       fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+       fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+       fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+       return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+       .init = he_lbk_init,
+       .config = he_lbk_config,
+       .start = NULL,
+       .stop = NULL,
+       .test = he_lbk_test,
+       .close = he_lbk_close,
+       .dump = he_lbk_dump,
+       .reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+       .uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+       .ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+       .uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+       .ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+       union {
+               uint32_t csr;
+               struct {
+                       uint32_t reset:1;
+                       uint32_t start:1;
+                       uint32_t force_completion:1;
+                       uint32_t reserved:29;
+               };
+       };
+};
+
+struct he_lbk_csr_cfg {
+       union {
+               uint32_t csr;
+               struct {
+                       uint32_t rsvd1:1;
+                       uint32_t cont:1;
+                       uint32_t mode:3;
+                       uint32_t multicl_len:2;
+                       uint32_t rsvd2:13;
+                       uint32_t trput_interleave:3;
+                       uint32_t test_cfg:5;
+                       uint32_t interrupt_on_error:1;
+                       uint32_t interrupt_testmode:1;
+                       uint32_t rsvd3:2;
+               };
+       };
+};
+
+struct he_lbk_status0 {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t num_writes;
+                       uint32_t num_reads;
+               };
+       };
+};
+
+struct he_lbk_status1 {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t num_pend_writes;
+                       uint32_t num_pend_reads;
+               };
+       };
+};
+
+struct he_lbk_dsm_status {
+       uint32_t test_complete;
+       uint32_t test_error;
+       uint64_t num_clocks;
+       uint32_t num_reads;
+       uint32_t num_writes;
+       uint32_t start_overhead;
+       uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+       uint8_t *addr;
+       uint8_t *dsm_ptr;
+       uint64_t dsm_iova;
+       uint8_t *src_ptr;
+       uint64_t src_iova;
+       uint8_t *dest_ptr;
+       uint64_t dest_iova;
+       struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+       struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+       struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+       struct he_mem_tg_priv *priv = NULL;
+       struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+       struct he_mem_tg_ctx *ctx = NULL;
+       uint64_t value = 0x12345678;
+       uint64_t cap = 0;
+       uint64_t channel_mask = 0;
+       int i, t = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_mem_tg_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       cfg = &priv->he_mem_tg_cfg;
+       ctx = &priv->he_mem_tg_ctx;
+
+       AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+       rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+       cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+       AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+       if (cap != value) {
+               AFU_MF_PMD_ERR("Test scratchpad register failed");
+               return -EIO;
+       }
+
+       cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+       AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+       channel_mask = cfg->channel_mask & cap;
+       /* start traffic generators */
+       rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+       /* check test status */
+       while (t < MEM_TG_TIMEOUT_MS) {
+               value = rte_read64(ctx->addr + MEM_TG_STAT);
+               for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+                       if (channel_mask & (1 << i)) {
+                               if (TGACTIVE(value, i))
+                                       continue;
+                               printf("TG channel %d test %s\n", i,
+                                       TGPASS(value, i) ? "pass" :
+                                       TGTIMEOUT(value, i) ? "timeout" :
+                                       TGFAIL(value, i) ? "fail" : "error");
+                               channel_mask &= ~(1 << i);
+                       }
+               }
+               if (!channel_mask)
+                       break;
+               rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+               t += MEM_TG_POLL_INTERVAL_MS;
+       }
+
+       if (channel_mask) {
+               AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+               return channel_mask;
+       }
+
+       return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+       struct he_mem_tg_priv *priv = NULL;
+       struct he_mem_tg_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_mem_tg_priv *)dev->priv;
+       if (!priv) {
+               priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+               if (!priv)
+                       return -ENOMEM;
+               dev->priv = priv;
+       }
+
+       ctx = &priv->he_mem_tg_ctx;
+       ctx->addr = (uint8_t *)dev->addr;
+
+       return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+       size_t config_size)
+{
+       struct he_mem_tg_priv *priv = NULL;
+
+       if (!dev || !config || !config_size)
+               return -EINVAL;
+
+       priv = (struct he_mem_tg_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+               return -EINVAL;
+
+       rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+       return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       rte_free(dev->priv);
+       dev->priv = NULL;
+
+       return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+       struct he_mem_tg_priv *priv = NULL;
+       struct he_mem_tg_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct he_mem_tg_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (!f)
+               f = stdout;
+
+       ctx = &priv->he_mem_tg_ctx;
+
+       fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+       return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+       .init = he_mem_tg_init,
+       .config = he_mem_tg_config,
+       .start = NULL,
+       .stop = NULL,
+       .test = he_mem_tg_test,
+       .close = he_mem_tg_close,
+       .dump = he_mem_tg_dump,
+       .reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+       .uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+       .ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+       uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+       struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+       struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..f304bc8
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+       'he_hssi.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..19d7c54
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,2005 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+       struct nlb_csr_cfg v;
+
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv)
+               return -ENOENT;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       cfg = &priv->nlb_cfg;
+
+       v.csr = 0;
+
+       if (cfg->cont)
+               v.cont = 1;
+
+       if (cfg->cache_policy == NLB_WRPUSH_I)
+               v.wrpush_i = 1;
+       else
+               v.wrthru_en = cfg->cache_policy;
+
+       if (cfg->cache_hint == NLB_RDLINE_MIXED)
+               v.rdsel = 3;
+       else
+               v.rdsel = cfg->cache_hint;
+
+       v.mode = cfg->mode;
+       v.chsel = cfg->read_vc;
+       v.wr_chsel = cfg->write_vc;
+       v.wrfence_chsel = cfg->wrfence_vc;
+       v.wrthru_en = cfg->cache_policy;
+       v.multicl_len = cfg->multi_cl - 1;
+
+       AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+       rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+       return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+       struct nlb_dsm_status *stat = NULL;
+       uint64_t ticks = 0;
+       double num, rd_bw, wr_bw;
+
+       if (!dev || !dev->priv)
+               return;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+
+       cfg = &priv->nlb_cfg;
+       stat = priv->nlb_ctx.status_ptr;
+
+       if (cfg->cont)
+               ticks = stat->num_clocks - stat->start_overhead;
+       else
+               ticks = stat->num_clocks -
+                       (stat->start_overhead + stat->end_overhead);
+
+       if (cfg->freq_mhz == 0)
+               cfg->freq_mhz = 200;
+
+       num = (double)stat->num_reads;
+       rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+       num = (double)stat->num_writes;
+       wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+       printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+               "Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+       printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
+               cl, stat->num_reads, stat->num_writes, ticks,
+               rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct nlb_afu_ctx *ctx = NULL;
+       struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+       struct nlb_csr_ctl ctl;
+       uint32_t *ptr = NULL;
+       uint32_t i, j, cl, val = 0;
+       uint64_t sval = 0;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv)
+               return -ENOENT;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       ctx = &priv->nlb_ctx;
+       cfg = &priv->nlb_cfg;
+
+       /* initialize registers */
+       AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+       rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+       ctl.csr = 0;
+       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+       ctl.reset = 1;
+       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+       AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+       rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+       AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+       rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+       ret = nlb_afu_config(dev);
+       if (ret)
+               return ret;
+
+       /* initialize src data */
+       ptr = (uint32_t *)ctx->src_ptr;
+       j = CLS_TO_SIZE(cfg->end) >> 2;
+       for (i = 0; i < j; i++)
+               *ptr++ = i;
+
+       /* start test */
+       for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+               memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+               memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+               ctl.csr = 0;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+               ctl.reset = 1;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+               rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+               rte_delay_us(10);
+
+               ctl.start = 1;
+               rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+               if (cfg->cont) {
+                       rte_delay_ms(cfg->timeout * 1000);
+                       ctl.force_completion = 1;
+                       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+                       ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+                               val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+                               DSM_TIMEOUT);
+                       if (ret) {
+                               printf("DSM poll timeout\n");
+                               goto end;
+                       }
+               } else {
+                       ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+                               val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+                               DSM_TIMEOUT);
+                       if (ret) {
+                               printf("DSM poll timeout\n");
+                               goto end;
+                       }
+                       ctl.force_completion = 1;
+                       rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+               }
+
+               nlb_afu_report(dev, cl);
+
+               i = 0;
+               while (i++ < 100) {
+                       sval = rte_read64(ctx->addr + CSR_STATUS1);
+                       if (sval == 0)
+                               break;
+                       rte_delay_us(1000);
+               }
+
+               ptr = (uint32_t *)ctx->dest_ptr;
+               j = CLS_TO_SIZE(cl) >> 2;
+               for (i = 0; i < j; i++) {
+                       if (*ptr++ != i) {
+                               AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+                               break;
+                       }
+               }
+       }
+
+end:
+       return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+       int i = 0;
+
+       if (!ctx)
+               return;
+
+       for (i = 0; i < NUM_DMA_BUF; i++) {
+               rte_free(ctx->dma_buf[i]);
+               ctx->dma_buf[i] = NULL;
+       }
+
+       rte_free(ctx->data_buf);
+       ctx->data_buf = NULL;
+
+       rte_free(ctx->ref_buf);
+       ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+       struct rte_pmd_afu_dma_cfg *cfg)
+{
+       size_t page_sz = sysconf(_SC_PAGE_SIZE);
+       int i, ret = 0;
+
+       if (!ctx || !cfg)
+               return -EINVAL;
+
+       for (i = 0; i < NUM_DMA_BUF; i++) {
+               ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+                       TEST_MEM_ALIGN);
+               if (!ctx->dma_buf[i]) {
+                       ret = -ENOMEM;
+                       goto free;
+               }
+               ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+               if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+                       ret = -ENOMEM;
+                       goto free;
+               }
+       }
+
+       ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+       if (!ctx->data_buf) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+       if (!ctx->ref_buf) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       return 0;
+
+free:
+       dma_afu_buf_free(ctx);
+       return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+       int *ptr = NULL;
+       size_t i = 0;
+       size_t dword_size = 0;
+
+       if (!ctx || !size)
+               return;
+
+       ptr = (int *)ctx->ref_buf;
+
+       if (ctx->pattern) {
+               memset(ptr, ctx->pattern, size);
+       } else {
+               srand(99);
+               dword_size = size >> 2;
+               for (i = 0; i < dword_size; i++)
+                       *ptr++ = rand();
+       }
+       rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+       uint8_t *src = NULL;
+       uint8_t *dst = NULL;
+       size_t i = 0;
+       int n = 0;
+
+       if (!ctx || !size)
+               return -EINVAL;
+
+       src = (uint8_t *)ctx->ref_buf;
+       dst = (uint8_t *)ctx->data_buf;
+
+       if (memcmp(src, dst, size)) {
+               printf("Transfer is corrupted\n");
+               if (ctx->verbose) {
+                       for (i = 0; i < size; i++) {
+                               if (*src != *dst) {
+                                       if (++n >= ERR_CHECK_LIMIT)
+                                               break;
+                                       printf("Mismatch at 0x%zx, "
+                                               "Expected %02x  Actual %02x\n",
+                                               i, *src, *dst);
+                               }
+                               src++;
+                               dst++;
+                       }
+                       if (n < ERR_CHECK_LIMIT) {
+                               printf("Found %d error bytes\n", n);
+                       } else {
+                               printf("......\n");
+                               printf("Found more than %d error bytes\n", n);
+                       }
+               }
+               return -1;
+       }
+
+       printf("Transfer is verified\n");
+       return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t 
bytes)
+{
+       uint64_t qwords = bytes / sizeof(uint64_t);
+
+       if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+               !IS_ALIGNED_QWORD((uint64_t)bytes))
+               return;
+
+       for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+               rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+       uint64_t qwords = bytes / sizeof(uint64_t);
+
+       if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+               !IS_ALIGNED_QWORD((uint64_t)bytes))
+               return;
+
+       for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+               *host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+       uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+       if (!ctx)
+               return;
+
+       if (requested_page != ctx->cur_ase_page) {
+               rte_write64(requested_page, ctx->ase_ctrl_addr);
+               ctx->cur_ase_page = requested_page;
+       }
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+       uint64_t host_addr, uint32_t count)
+{
+       uint64_t dev_aligned_addr = 0;
+       uint64_t shift = 0;
+       uint64_t val = 0;
+       uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+               dev_addr, count);
+
+       if (!ctx || (count >= QWORD_BYTES))
+               return -EINVAL;
+
+       if (!count)
+               return 0;
+
+       switch_ase_page(ctx, dev_addr);
+
+       shift = dev_addr % QWORD_BYTES;
+       dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+       val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+       rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
+
+       /* write back to device */
+       rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+       return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+       uint64_t *src_ptr, uint64_t *count)
+{
+       uint64_t src = *src_ptr;
+       uint64_t dst = *dst_ptr;
+       uint64_t align_bytes = *count;
+       uint64_t offset = 0;
+       uint64_t left_in_page = DMA_ASE_WINDOW;
+       uint64_t size_to_copy = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+               align_bytes);
+
+       if (!ctx || !IS_ALIGNED_DWORD(dst))
+               return -EINVAL;
+
+       if (align_bytes < DWORD_BYTES)
+               return 0;
+
+       if (!IS_ALIGNED_QWORD(dst)) {
+               /* Write out a single DWORD to get QWORD aligned */
+               switch_ase_page(ctx, dst);
+               offset = dst & DMA_ASE_WINDOW_MASK;
+
+               rte_write32(*(uint32_t *)(uintptr_t)src,
+                       ctx->ase_data_addr + offset);
+               src += DWORD_BYTES;
+               dst += DWORD_BYTES;
+               align_bytes -= DWORD_BYTES;
+       }
+
+       if (!align_bytes)
+               return 0;
+
+       /* Write out blocks of 64-bit values */
+       while (align_bytes >= QWORD_BYTES) {
+               left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+               size_to_copy =
+                       MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+               if (size_to_copy < QWORD_BYTES)
+                       break;
+               switch_ase_page(ctx, dst);
+               offset = dst & DMA_ASE_WINDOW_MASK;
+               blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+                       (uint64_t *)(uintptr_t)src, size_to_copy);
+               src += size_to_copy;
+               dst += size_to_copy;
+               align_bytes -= size_to_copy;
+       }
+
+       if (align_bytes >= DWORD_BYTES) {
+               /* Write out remaining DWORD */
+               switch_ase_page(ctx, dst);
+               offset = dst & DMA_ASE_WINDOW_MASK;
+               rte_write32(*(uint32_t *)(uintptr_t)src,
+                       ctx->ase_data_addr + offset);
+               src += DWORD_BYTES;
+               dst += DWORD_BYTES;
+               align_bytes -= DWORD_BYTES;
+       }
+
+       *src_ptr = src;
+       *dst_ptr = dst;
+       *count = align_bytes;
+
+       return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+       uint64_t *src_ptr, uint64_t count)
+{
+       uint64_t dst = *dst_ptr;
+       uint64_t src = *src_ptr;
+       uint64_t count_left = count;
+       uint64_t unaligned_size = 0;
+       int ret = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+               count);
+
+       /* aligns address to 8 byte using dst masking method */
+       if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+               unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+               if (unaligned_size > count_left)
+                       unaligned_size = count_left;
+               ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+               if (ret)
+                       return ret;
+               count_left -= unaligned_size;
+               src += unaligned_size;
+               dst += unaligned_size;
+       }
+
+       /* Handles 8/4 byte MMIO transfer */
+       ret = ase_write(ctx, &dst, &src, &count_left);
+       if (ret)
+               return ret;
+
+       /* Left over unaligned bytes transferred using dst masking method */
+       unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+       if (unaligned_size > count_left)
+               unaligned_size = count_left;
+
+       ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+       if (ret)
+               return ret;
+
+       count_left -= unaligned_size;
+       *dst_ptr = dst + unaligned_size;
+       *src_ptr = src + unaligned_size;
+
+       return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+       uint64_t host_addr, uint32_t count)
+{
+       uint64_t dev_aligned_addr = 0;
+       uint64_t shift = 0;
+       uint64_t val = 0;
+       uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+               dev_addr, count);
+
+       if (!ctx || (count >= QWORD_BYTES))
+               return -EINVAL;
+
+       if (!count)
+               return 0;
+
+       switch_ase_page(ctx, dev_addr);
+
+       shift = dev_addr % QWORD_BYTES;
+       dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+       val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+       rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
+
+       return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+       uint64_t *dst_ptr, uint64_t *count)
+{
+       uint64_t src = *src_ptr;
+       uint64_t dst = *dst_ptr;
+       uint64_t align_bytes = *count;
+       uint64_t offset = 0;
+       uint64_t left_in_page = DMA_ASE_WINDOW;
+       uint64_t size_to_copy = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+               align_bytes);
+
+       if (!ctx || !IS_ALIGNED_DWORD(src))
+               return -EINVAL;
+
+       if (align_bytes < DWORD_BYTES)
+               return 0;
+
+       if (!IS_ALIGNED_QWORD(src)) {
+               /* Read a single DWORD to get QWORD aligned */
+               switch_ase_page(ctx, src);
+               offset = src & DMA_ASE_WINDOW_MASK;
+               *(uint32_t *)(uintptr_t)dst =
+                       rte_read32(ctx->ase_data_addr + offset);
+               src += DWORD_BYTES;
+               dst += DWORD_BYTES;
+               align_bytes -= DWORD_BYTES;
+       }
+
+       if (!align_bytes)
+               return 0;
+
+       /* Read blocks of 64-bit values */
+       while (align_bytes >= QWORD_BYTES) {
+               left_in_page -= src & DMA_ASE_WINDOW_MASK;
+               size_to_copy =
+                       MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+               if (size_to_copy < QWORD_BYTES)
+                       break;
+               switch_ase_page(ctx, src);
+               offset = src & DMA_ASE_WINDOW_MASK;
+               blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+                       (uint64_t *)(uintptr_t)dst, size_to_copy);
+               src += size_to_copy;
+               dst += size_to_copy;
+               align_bytes -= size_to_copy;
+       }
+
+       if (align_bytes >= DWORD_BYTES) {
+               /* Read remaining DWORD */
+               switch_ase_page(ctx, src);
+               offset = src & DMA_ASE_WINDOW_MASK;
+               *(uint32_t *)(uintptr_t)dst =
+                       rte_read32(ctx->ase_data_addr + offset);
+               src += DWORD_BYTES;
+               dst += DWORD_BYTES;
+               align_bytes -= DWORD_BYTES;
+       }
+
+       *src_ptr = src;
+       *dst_ptr = dst;
+       *count = align_bytes;
+
+       return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+       uint64_t *dst_ptr, uint64_t count)
+{
+       uint64_t src = *src_ptr;
+       uint64_t dst = *dst_ptr;
+       uint64_t count_left = count;
+       uint64_t unaligned_size = 0;
+       int ret = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+               count);
+
+       /* Aligns address to 8 byte using src masking method */
+       if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+               unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+               if (unaligned_size > count_left)
+                       unaligned_size = count_left;
+               ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+               if (ret)
+                       return ret;
+               count_left -= unaligned_size;
+               dst += unaligned_size;
+               src += unaligned_size;
+       }
+
+       /* Handles 8/4 byte MMIO transfer */
+       ret = ase_read(ctx, &src, &dst, &count_left);
+       if (ret)
+               return ret;
+
+       /* Left over unaligned bytes transferred using src masking method */
+       unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+       if (unaligned_size > count_left)
+               unaligned_size = count_left;
+
+       ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+       if (ret)
+               return ret;
+
+       count_left -= unaligned_size;
+       *dst_ptr = dst + unaligned_size;
+       *src_ptr = src + unaligned_size;
+
+       return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+       /* clear interrupt by writing 1 to IRQ bit in status register */
+       msgdma_status status;
+
+       if (!ctx)
+               return;
+
+       status.csr = 0;
+       status.irq = 1;
+       rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+       struct pollfd pfd = {0};
+       uint64_t count = 0;
+       ssize_t bytes_read = 0;
+       int poll_ret = 0;
+       int ret = 0;
+
+       if (!ctx || (ctx->event_fd < 0))
+               return -EINVAL;
+
+       pfd.fd = ctx->event_fd;
+       pfd.events = POLLIN;
+       poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+       if (poll_ret < 0) {
+               AFU_MF_PMD_ERR("Error %s", strerror(errno));
+               ret = -EFAULT;
+               goto out;
+       } else if (poll_ret == 0) {
+               AFU_MF_PMD_ERR("Timeout");
+               ret = -ETIMEDOUT;
+       } else {
+               bytes_read = read(pfd.fd, &count, sizeof(count));
+               if (bytes_read > 0) {
+                       if (ctx->verbose)
+                               AFU_MF_PMD_DEBUG("Successful, ret %d, cnt 
%"PRIu64,
+                                       poll_ret, count);
+                       ret = 0;
+               } else {
+                       AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+                               strerror(errno) : "zero bytes read");
+                       ret = -EIO;
+               }
+       }
+out:
+       clear_interrupt(ctx);
+       return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+       msgdma_status status;
+       uint64_t fpga_queue_full = 0;
+
+       if (!ctx)
+               return;
+
+       if (ctx->verbose) {
+               AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+                       desc->rd_address_ext, desc->rd_address);
+               AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+                       desc->wr_address_ext, desc->wr_address);
+               AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+               AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+                       desc->wr_burst_count);
+               AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+                       desc->rd_burst_count);
+               AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+               AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+       }
+
+       do {
+               status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+               if (fpga_queue_full++ > 100000000) {
+                       AFU_MF_PMD_DEBUG("DMA queue full retry");
+                       fpga_queue_full = 0;
+               }
+       } while (status.desc_buf_full);
+
+       blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+               sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+       int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+       msgdma_ext_desc *desc = NULL;
+       int alignment_offset = 0;
+       int segment_size = 0;
+
+       if (!ctx)
+               return -EINVAL;
+
+       /* src, dst and count must be 64-byte aligned */
+       if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+               !IS_DMA_ALIGNED(count))
+               return -EINVAL;
+       memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+       /* these fields are fixed for all DMA transfers */
+       desc = ctx->desc_buf;
+       desc->seq_num = 0;
+       desc->wr_stride = 1;
+       desc->rd_stride = 1;
+       desc->control.go = 1;
+       if (intr_en)
+               desc->control.transfer_irq_en = 1;
+       else
+               desc->control.transfer_irq_en = 0;
+
+       if (!is_last_desc)
+               desc->control.early_done_en = 1;
+       else
+               desc->control.early_done_en = 0;
+
+       if (type == FPGA_TO_FPGA) {
+               desc->rd_address = src & DMA_MASK_32_BIT;
+               desc->wr_address = dst & DMA_MASK_32_BIT;
+               desc->len = count;
+               desc->wr_burst_count = 4;
+               desc->rd_burst_count = 4;
+               desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+               desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+               send_descriptor(ctx, desc);
+       } else {
+               /* check CCIP (host) address is aligned to 4CL (256B) */
+               alignment_offset = (type == HOST_TO_FPGA)
+                       ? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+               /* performing a short transfer to get aligned */
+               if (alignment_offset != 0) {
+                       desc->rd_address = src & DMA_MASK_32_BIT;
+                       desc->wr_address = dst & DMA_MASK_32_BIT;
+                       desc->wr_burst_count = 1;
+                       desc->rd_burst_count = 1;
+                       desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+                       desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+                       /* count isn't large enough to hit next 4CL boundary */
+                       if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+                               segment_size = count;
+                               count = 0;
+                       } else {
+                               segment_size = CCIP_ALIGN_BYTES
+                                       - alignment_offset;
+                               src += segment_size;
+                               dst += segment_size;
+                               count -= segment_size;
+                               desc->control.transfer_irq_en = 0;
+                       }
+                       /* post short transfer to align to a 4CL (256 byte) */
+                       desc->len = segment_size;
+                       send_descriptor(ctx, desc);
+               }
+               /* at this point we are 4CL (256 byte) aligned */
+               if (count >= CCIP_ALIGN_BYTES) {
+                       desc->rd_address = src & DMA_MASK_32_BIT;
+                       desc->wr_address = dst & DMA_MASK_32_BIT;
+                       desc->wr_burst_count = 4;
+                       desc->rd_burst_count = 4;
+                       desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+                       desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+                       /* buffer ends on 4CL boundary */
+                       if ((count % CCIP_ALIGN_BYTES) == 0) {
+                               segment_size = count;
+                               count = 0;
+                       } else {
+                               segment_size = count
+                                       - (count % CCIP_ALIGN_BYTES);
+                               src += segment_size;
+                               dst += segment_size;
+                               count -= segment_size;
+                               desc->control.transfer_irq_en = 0;
+                       }
+                       desc->len = segment_size;
+                       send_descriptor(ctx, desc);
+               }
+               /* post short transfer to handle the remainder */
+               if (count > 0) {
+                       desc->rd_address = src & DMA_MASK_32_BIT;
+                       desc->wr_address = dst & DMA_MASK_32_BIT;
+                       desc->len = count;
+                       desc->wr_burst_count = 1;
+                       desc->rd_burst_count = 1;
+                       desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+                       desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+                       if (intr_en)
+                               desc->control.transfer_irq_en = 1;
+                       send_descriptor(ctx, desc);
+               }
+       }
+
+       return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+       *(ctx->magic_buf) = 0ULL;
+       return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+               DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+       int magic_timeout = 0;
+
+       if (!ctx)
+               return;
+
+       poll_interrupt(ctx);
+       while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+               if (magic_timeout++ > 1000) {
+                       AFU_MF_PMD_ERR("DMA magic operation timeout");
+                       magic_timeout = 0;
+                       break;
+               }
+       }
+       *(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+       uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+       int intr_en = 0;
+       int ret = 0;
+
+       if (!ctx || !intr_issued)
+               return -EINVAL;
+
+       src += chunk * ctx->dma_buf_size;
+       dst += chunk * ctx->dma_buf_size;
+
+       if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+               if (*intr_issued) {
+                       ret = poll_interrupt(ctx);
+                       if (ret)
+                               return ret;
+               }
+               intr_en = 1;
+       }
+
+       chunk %= NUM_DMA_BUF;
+       rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
+               ctx->dma_buf_size);
+       ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+                       ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+       if (intr_en)
+               *intr_issued = 1;
+
+       return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t 
src,
+       size_t count)
+{
+       uint64_t i = 0;
+       uint64_t count_left = count;
+       uint64_t aligned_addr = 0;
+       uint64_t align_bytes = 0;
+       uint64_t dma_chunks = 0;
+       uint64_t dma_tx_bytes = 0;
+       uint64_t offset = 0;
+       int issued_intr = 0;
+       int ret = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+               count);
+
+       if (!ctx)
+               return -EINVAL;
+
+       if (!IS_DMA_ALIGNED(dst)) {
+               if (count_left < DMA_ALIGN_BYTES)
+                       return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+               aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+                       * DMA_ALIGN_BYTES;
+               align_bytes = aligned_addr - dst;
+               ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+               if (ret)
+                       return ret;
+               count_left = count_left - align_bytes;
+       }
+
+       if (count_left) {
+               dma_chunks = count_left / ctx->dma_buf_size;
+               offset = dma_chunks * ctx->dma_buf_size;
+               count_left -= offset;
+               AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+                       " (%"PRIu64"...0x%"PRIx64")",
+                       src, dst, dma_chunks, count_left);
+               for (i = 0; i < dma_chunks; i++) {
+                       ret = dma_tx_buf(ctx, dst, src, i,
+                               i == (dma_chunks - 1), &issued_intr);
+                       if (ret)
+                               return ret;
+               }
+
+               if (issued_intr) {
+                       ret = poll_interrupt(ctx);
+                       if (ret)
+                               return ret;
+               }
+
+               if (count_left) {
+                       i = count_left / DMA_ALIGN_BYTES;
+                       if (i > 0) {
+                               dma_tx_bytes = i * DMA_ALIGN_BYTES;
+                               AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+                                       dma_tx_bytes);
+                               rte_memcpy(ctx->dma_buf[0],
+                                       (void *)(uintptr_t)(src + offset),
+                                       dma_tx_bytes);
+                               ret = do_dma(ctx, dst + offset,
+                                       DMA_HOST_ADDR(ctx->dma_iova[0]),
+                                       dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+                               if (ret)
+                                       return ret;
+                               ret = poll_interrupt(ctx);
+                               if (ret)
+                                       return ret;
+                       }
+
+                       count_left -= dma_tx_bytes;
+                       if (count_left) {
+                               AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+                                       count_left);
+                               dst += offset + dma_tx_bytes;
+                               src += offset + dma_tx_bytes;
+                               ret = ase_host_to_fpga(ctx, &dst, &src,
+                                       count_left);
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+       uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+       uint64_t i = chunk % NUM_DMA_BUF;
+       uint64_t n = *rx_count;
+       uint64_t num_pending = 0;
+       int ret = 0;
+
+       if (!ctx || !wf_issued)
+               return -EINVAL;
+
+       ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+               src + chunk * ctx->dma_buf_size,
+               ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+       if (ret)
+               return ret;
+
+       num_pending = chunk - n + 1;
+       if (num_pending == HALF_DMA_BUF) {
+               ret = issue_magic(ctx);
+               if (ret) {
+                       AFU_MF_PMD_DEBUG("Magic issue failed");
+                       return ret;
+               }
+               *wf_issued = 1;
+       }
+
+       if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+               if (*wf_issued) {
+                       wait_magic(ctx);
+                       for (i = 0; i < HALF_DMA_BUF; i++) {
+                               rte_memcpy((void *)(uintptr_t)(dst +
+                                               n * ctx->dma_buf_size),
+                                       ctx->dma_buf[n % NUM_DMA_BUF],
+                                       ctx->dma_buf_size);
+                               n++;
+                       }
+                       *wf_issued = 0;
+                       *rx_count = n;
+               }
+               ret = issue_magic(ctx);
+               if (ret) {
+                       AFU_MF_PMD_DEBUG("Magic issue failed");
+                       return ret;
+               }
+               *wf_issued = 1;
+       }
+
+       return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t 
src,
+       size_t count)
+{
+       uint64_t i = 0;
+       uint64_t count_left = count;
+       uint64_t aligned_addr = 0;
+       uint64_t align_bytes = 0;
+       uint64_t dma_chunks = 0;
+       uint64_t pending_buf = 0;
+       uint64_t dma_rx_bytes = 0;
+       uint64_t offset = 0;
+       int wf_issued = 0;
+       int ret = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+               count);
+
+       if (!ctx)
+               return -EINVAL;
+
+       if (!IS_DMA_ALIGNED(src)) {
+               if (count_left < DMA_ALIGN_BYTES)
+                       return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+               aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+                        * DMA_ALIGN_BYTES;
+               align_bytes = aligned_addr - src;
+               ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+               if (ret)
+                       return ret;
+               count_left = count_left - align_bytes;
+       }
+
+       if (count_left) {
+               dma_chunks = count_left / ctx->dma_buf_size;
+               offset = dma_chunks * ctx->dma_buf_size;
+               count_left -= offset;
+               AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+                       " (%"PRIu64"...0x%"PRIx64")",
+                       src, dst, dma_chunks, count_left);
+               for (i = 0; i < dma_chunks; i++) {
+                       ret = dma_rx_buf(ctx, dst, src, i,
+                               i == (dma_chunks - 1),
+                               &pending_buf, &wf_issued);
+                       if (ret)
+                               return ret;
+               }
+
+               if (wf_issued)
+                       wait_magic(ctx);
+
+               /* clear out final dma memcpy operations */
+               while (pending_buf < dma_chunks) {
+                       /* constant size transfer; no length check required */
+                       rte_memcpy((void *)(uintptr_t)(dst +
+                                       pending_buf * ctx->dma_buf_size),
+                               ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+                               ctx->dma_buf_size);
+                       pending_buf++;
+               }
+
+               if (count_left > 0) {
+                       i = count_left / DMA_ALIGN_BYTES;
+                       if (i > 0) {
+                               dma_rx_bytes = i * DMA_ALIGN_BYTES;
+                               AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+                                       dma_rx_bytes);
+                               ret = do_dma(ctx,
+                                       DMA_HOST_ADDR(ctx->dma_iova[0]),
+                                       src + offset,
+                                       dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+                               if (ret)
+                                       return ret;
+                               ret = issue_magic(ctx);
+                               if (ret)
+                                       return ret;
+                               wait_magic(ctx);
+                               rte_memcpy((void *)(uintptr_t)(dst + offset),
+                                       ctx->dma_buf[0], dma_rx_bytes);
+                       }
+
+                       count_left -= dma_rx_bytes;
+                       if (count_left) {
+                               AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+                                       count_left);
+                               dst += offset + dma_rx_bytes;
+                               src += offset + dma_rx_bytes;
+                               ret = ase_fpga_to_host(ctx, &src, &dst,
+                                                       count_left);
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t 
src,
+       size_t count)
+{
+       uint64_t i = 0;
+       uint64_t count_left = count;
+       uint64_t dma_chunks = 0;
+       uint64_t offset = 0;
+       uint32_t tx_chunks = 0;
+       uint64_t *tmp_buf = NULL;
+       int ret = 0;
+
+       AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+               count);
+
+       if (!ctx)
+               return -EINVAL;
+
+       if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+           && IS_DMA_ALIGNED(count_left)) {
+               dma_chunks = count_left / ctx->dma_buf_size;
+               offset = dma_chunks * ctx->dma_buf_size;
+               count_left -= offset;
+               AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+                       " (%"PRIu64"...0x%"PRIx64")",
+                       src, dst, dma_chunks, count_left);
+               for (i = 0; i < dma_chunks; i++) {
+                       ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+                               src + i * ctx->dma_buf_size,
+                               ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+                       if (ret)
+                               return ret;
+                       if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+                               (i == (dma_chunks - 1))) {
+                               ret = issue_magic(ctx);
+                               if (ret)
+                                       return ret;
+                               wait_magic(ctx);
+                       }
+               }
+
+               if (count_left > 0) {
+                       AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", 
count_left);
+                       ret = do_dma(ctx, dst + offset, src + offset,
+                               count_left, 1, FPGA_TO_FPGA, 0);
+                       if (ret)
+                               return ret;
+                       ret = issue_magic(ctx);
+                       if (ret)
+                               return ret;
+                       wait_magic(ctx);
+               }
+       } else {
+               if ((src < dst) && (src + count_left > dst)) {
+                       AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+                               " -> 0x%"PRIx64" (0x%"PRIx64")",
+                               src, dst, count_left);
+                       return -EINVAL;
+               }
+               tx_chunks = count_left / ctx->dma_buf_size;
+               offset = tx_chunks * ctx->dma_buf_size;
+               count_left -= offset;
+               AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+                       " (%u...0x%"PRIx64")",
+                       src, dst, tx_chunks, count_left);
+               tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+                       DMA_ALIGN_BYTES);
+               for (i = 0; i < tx_chunks; i++) {
+                       ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+                               src + i * ctx->dma_buf_size,
+                               ctx->dma_buf_size);
+                       if (ret)
+                               goto free_buf;
+                       ret = dma_host_to_fpga(ctx,
+                               dst + i * ctx->dma_buf_size,
+                               (uint64_t)tmp_buf, ctx->dma_buf_size);
+                       if (ret)
+                               goto free_buf;
+               }
+
+               if (count_left > 0) {
+                       ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+                               src + offset, count_left);
+                       if (ret)
+                               goto free_buf;
+                       ret = dma_host_to_fpga(ctx, dst + offset,
+                               (uint64_t)tmp_buf, count_left);
+                       if (ret)
+                               goto free_buf;
+               }
+free_buf:
+               rte_free(tmp_buf);
+       }
+
+       return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+       uint64_t src, size_t count, fpga_dma_type type)
+{
+       int ret = 0;
+
+       if (!ctx)
+               return -EINVAL;
+
+       if (type == HOST_TO_FPGA)
+               ret = dma_host_to_fpga(ctx, dst, src, count);
+       else if (type == FPGA_TO_HOST)
+               ret = dma_fpga_to_host(ctx, dst, src, count);
+       else if (type == FPGA_TO_FPGA)
+               ret = dma_fpga_to_fpga(ctx, dst, src, count);
+       else
+               return -EINVAL;
+
+       return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+       uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+               + end.tv_nsec - start.tv_nsec;
+       return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+       uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+       struct timespec start, end;
+       uint64_t test_size = 0;
+       uint64_t *dma_buf_ptr = NULL;
+       double throughput, total_time = 0.0;
+       int i = 0;
+       int ret = 0;
+
+       if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+               AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+               return -EINVAL;
+       }
+
+       if (length < (buf_offset + size_decrement)) {
+               AFU_MF_PMD_ERR("Test length does not match unaligned 
parameter");
+               return -EINVAL;
+       }
+       test_size = length - (buf_offset + size_decrement);
+       if ((ddr_offset + test_size) > ctx->mem_size) {
+               AFU_MF_PMD_ERR("Test is out of DDR memory space");
+               return -EINVAL;
+       }
+
+       dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
+       printf("Sweep Host %p to FPGA 0x%"PRIx64
+               " with 0x%"PRIx64" bytes ...\n",
+               (void *)dma_buf_ptr, ddr_offset, test_size);
+
+       for (i = 0; i < SWEEP_ITERS; i++) {
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+                       test_size, HOST_TO_FPGA);
+               clock_gettime(CLOCK_MONOTONIC, &end);
+               if (ret) {
+                       AFU_MF_PMD_ERR("Failed");
+                       return ret;
+               }
+               total_time += getTime(start, end);
+       }
+       throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+       printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+       printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+               ddr_offset, (void *)dma_buf_ptr, test_size);
+
+       total_time = 0.0;
+       memset((char *)dma_buf_ptr, 0, test_size);
+       for (i = 0; i < SWEEP_ITERS; i++) {
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+                       test_size, FPGA_TO_HOST);
+               clock_gettime(CLOCK_MONOTONIC, &end);
+               if (ret) {
+                       AFU_MF_PMD_ERR("Failed");
+                       return ret;
+               }
+               total_time += getTime(start, end);
+       }
+       throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+       printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+       printf("Verifying buffer ...\n");
+       return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct dma_afu_ctx *ctx = NULL;
+       struct rte_pmd_afu_dma_cfg *cfg = NULL;
+       msgdma_ctrl ctrl;
+       uint64_t offset = 0;
+       uint32_t i = 0;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv)
+               return -ENOENT;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       cfg = &priv->dma_cfg;
+       if (cfg->index >= NUM_N3000_DMA)
+               return -EINVAL;
+       ctx = &priv->dma_ctx[cfg->index];
+
+       ctx->pattern = (int)cfg->pattern;
+       ctx->verbose = (int)cfg->verbose;
+       ctx->dma_buf_size = cfg->size;
+
+       ret = dma_afu_buf_alloc(ctx, cfg);
+       if (ret)
+               goto free;
+
+       printf("Initialize test buffer\n");
+       dma_afu_buf_init(ctx, cfg->length);
+
+       /* enable interrupt */
+       ctrl.csr = 0;
+       ctrl.global_intr_en_mask = 1;
+       rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+       printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+               cfg->offset, cfg->length);
+       ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+               cfg->length, HOST_TO_FPGA);
+       if (ret) {
+               AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+               goto end;
+       }
+       memset(ctx->data_buf, 0, cfg->length);
+
+       printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+               ctx->data_buf, cfg->length);
+       ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+               cfg->length, FPGA_TO_HOST);
+       if (ret) {
+               AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+               goto end;
+       }
+       ret = dma_afu_buf_verify(ctx, cfg->length);
+       if (ret)
+               goto end;
+
+       if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+               offset = cfg->offset + cfg->length;
+       else if (cfg->offset > cfg->length)
+               offset = 0;
+       else
+               goto end;
+
+       printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+               cfg->offset, offset, cfg->length);
+       ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+               FPGA_TO_FPGA);
+       if (ret) {
+               AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+               goto end;
+       }
+
+       printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+               ctx->data_buf, cfg->length);
+       ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+               cfg->length, FPGA_TO_HOST);
+       if (ret) {
+               AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+               goto end;
+       }
+       ret = dma_afu_buf_verify(ctx, cfg->length);
+       if (ret)
+               goto end;
+
+       printf("Sweep with aligned address and size\n");
+       ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+       if (ret)
+               goto end;
+
+       if (cfg->unaligned) {
+               printf("Sweep with unaligned address and size\n");
+               struct unaligned_set {
+                       uint64_t addr_offset;
+                       uint64_t size_dec;
+               } param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+               for (i = 0; i < ARRAY_SIZE(param); i++) {
+                       ret = sweep_test(ctx, cfg->length, cfg->offset,
+                               param[i].addr_offset, param[i].size_dec);
+                       if (ret)
+                               break;
+               }
+       }
+
+end:
+       /* disable interrupt */
+       ctrl.global_intr_en_mask = 0;
+       rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+       dma_afu_buf_free(ctx);
+       return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+       struct rte_afu_device *afudev = NULL;
+
+       if (!dev || !dev->rawdev || !dev->rawdev->device)
+               return NULL;
+
+       afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+       if (!afudev->rawdev || !afudev->rawdev->device)
+               return NULL;
+
+       return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+#ifdef VFIO_PRESENT
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+       uint32_t count, int *efds)
+{
+       struct rte_pci_device *pci_dev = NULL;
+       struct vfio_irq_set *irq_set = NULL;
+       int vfio_dev_fd = 0;
+       size_t sz = 0;
+       int ret = 0;
+
+       if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+               return -EINVAL;
+
+       pci_dev = n3000_afu_get_pci_dev(dev);
+       if (!pci_dev)
+               return -ENODEV;
+       vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+       sz = sizeof(*irq_set) + sizeof(*efds) * count;
+       irq_set = rte_zmalloc(NULL, sz, 0);
+       if (!irq_set)
+               return -ENOMEM;
+
+       irq_set->argsz = (uint32_t)sz;
+       irq_set->count = count;
+       irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+               VFIO_IRQ_SET_ACTION_TRIGGER;
+       irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+       irq_set->start = vec_start;
+
+       rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+       ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+       if (ret)
+               AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+       rte_free(irq_set);
+       return ret;
+}
+#endif
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+       struct rte_pci_device *pci_dev = NULL;
+       uint8_t *addr = NULL;
+       uint64_t val = 0;
+       uint32_t bar = 0;
+
+       pci_dev = n3000_afu_get_pci_dev(dev);
+       if (!pci_dev)
+               return NULL;
+
+       addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+       val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+       if (!PORT_IMPLEMENTED(val)) {
+               AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+               return NULL;
+       }
+
+       bar = PORT_BAR(val);
+       if (bar >= PCI_MAX_RESOURCE) {
+               AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+               return NULL;
+       }
+
+       addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+       return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+       uint32_t *vec_start, uint32_t *vec_count)
+{
+       uint8_t *addr = NULL;
+       uint64_t val = 0;
+       uint64_t header = 0;
+       uint64_t next_offset = 0;
+
+       addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+       if (!addr)
+               return -ENOENT;
+
+       do {
+               addr += next_offset;
+               header = rte_read64(addr);
+               if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+                       (DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+                       val = rte_read64(addr + PORT_UINT_CAP_REG);
+                       if (vec_start)
+                               *vec_start = PORT_VEC_START(val);
+                       if (vec_count)
+                               *vec_count = PORT_VEC_COUNT(val);
+                       return 0;
+               }
+               next_offset = DFH_NEXT_OFFSET(header);
+               if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+                       break;
+       } while (!DFH_EOL(header));
+
+       return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct nlb_afu_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->nlb_ctx;
+
+       rte_free(ctx->dsm_ptr);
+       ctx->dsm_ptr = NULL;
+       ctx->status_ptr = NULL;
+
+       rte_free(ctx->src_ptr);
+       ctx->src_ptr = NULL;
+
+       rte_free(ctx->dest_ptr);
+       ctx->dest_ptr = NULL;
+
+       return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct nlb_afu_ctx *ctx = NULL;
+       int ret = 0;
+
+       if (!dev || !addr)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->nlb_ctx;
+       ctx->addr = addr;
+
+       ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+       if (!ctx->dsm_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+       if (ctx->dsm_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+               TEST_MEM_ALIGN);
+       if (!ctx->src_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+       if (ctx->src_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+               TEST_MEM_ALIGN);
+       if (!ctx->dest_ptr) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+       if (ctx->dest_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+       return 0;
+
+release:
+       nlb_afu_ctx_release(dev);
+       return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct dma_afu_ctx *ctx = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->dma_ctx[0];
+
+       rte_free(ctx->desc_buf);
+       ctx->desc_buf = NULL;
+
+       rte_free(ctx->magic_buf);
+       ctx->magic_buf = NULL;
+
+       close(ctx->event_fd);
+       return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t 
*addr)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct dma_afu_ctx *ctx = NULL;
+       uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+       static int efds[1] = {0};
+       uint32_t vec_start = 0;
+       int ret = 0;
+
+       if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       ctx = &priv->dma_ctx[index];
+       ctx->index = index;
+       ctx->addr = addr;
+       ctx->csr_addr = addr + DMA_CSR;
+       ctx->desc_addr = addr + DMA_DESC;
+       ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+       ctx->ase_data_addr = addr + DMA_ASE_DATA;
+       ctx->mem_size = mem_sz[ctx->index];
+       ctx->cur_ase_page = INVALID_ASE_PAGE;
+       if (ctx->index == 0) {
+               ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+               if (ret)
+                       return ret;
+
+               efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+               if (efds[0] < 0) {
+                       AFU_MF_PMD_ERR("eventfd create failed");
+                       return -EBADF;
+               }
+#ifdef VFIO_PRESENT
+               if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+                       AFU_MF_PMD_ERR("DMA interrupt setup failed");
+#endif
+       }
+       ctx->event_fd = efds[0];
+
+       ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+               sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+       if (!ctx->desc_buf) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+               TEST_MEM_ALIGN);
+       if (!ctx->magic_buf) {
+               ret = -ENOMEM;
+               goto release;
+       }
+       ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+       if (ctx->magic_iova == RTE_BAD_IOVA) {
+               ret = -ENOMEM;
+               goto release;
+       }
+
+       return 0;
+
+release:
+       dma_afu_ctx_release(dev);
+       return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       uint8_t *addr = NULL;
+       uint64_t header = 0;
+       uint64_t uuid_hi = 0;
+       uint64_t uuid_lo = 0;
+       uint64_t next_offset = 0;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       addr = (uint8_t *)dev->addr;
+       do {
+               addr += next_offset;
+               header = rte_read64(addr);
+               uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+               uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+               if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+                       (uuid_lo == N3000_NLB0_UUID_L) &&
+                       (uuid_hi == N3000_NLB0_UUID_H)) {
+                       AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+                       ret = nlb_afu_ctx_init(dev, addr);
+                       if (ret)
+                               return ret;
+               } else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+                       (uuid_lo == N3000_DMA_UUID_L) &&
+                       (uuid_hi == N3000_DMA_UUID_H) &&
+                       (priv->num_dma < NUM_N3000_DMA)) {
+                       AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+                               priv->num_dma, (void *)addr);
+                       ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+                       if (ret)
+                               return ret;
+                       priv->num_dma++;
+               } else {
+                       AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+                               ", uuid %016"PRIx64"%016"PRIx64,
+                               DFH_TYPE(header), uuid_hi, uuid_lo);
+               }
+
+               next_offset = DFH_NEXT_OFFSET(header);
+               if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+                       break;
+       } while (!DFH_EOL(header));
+
+       return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv) {
+               dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+               if (!dev->priv)
+                       return -ENOMEM;
+       }
+
+       return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+       size_t config_size)
+{
+       struct n3000_afu_priv *priv = NULL;
+       struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+       int i = 0;
+       uint64_t top = 0;
+
+       if (!dev || !config || !config_size)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+               return -EINVAL;
+
+       cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+       if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+               if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+                       return -EINVAL;
+               if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+                       (cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+                       return -EINVAL;
+               if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+                       return -EINVAL;
+               if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+                       return -EINVAL;
+               if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+                       return -EINVAL;
+               if ((cfg->nlb_cfg.multi_cl != 1) &&
+                       (cfg->nlb_cfg.multi_cl != 2) &&
+                       (cfg->nlb_cfg.multi_cl != 4))
+                       return -EINVAL;
+               if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+                       (cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+                       return -EINVAL;
+               if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+                       (cfg->nlb_cfg.end > MAX_CACHE_LINES))
+                       return -EINVAL;
+               rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+                       sizeof(struct rte_pmd_afu_nlb_cfg));
+       } else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+               if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+                       return -EINVAL;
+               i = cfg->dma_cfg.index;
+               if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+                       return -EINVAL;
+               if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+                       return -EINVAL;
+               top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+               if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+                       return -EINVAL;
+               if (i == 3) {  /* QDR connected to DMA3 */
+                       if (cfg->dma_cfg.length & 0x3f) {
+                               cfg->dma_cfg.length &= ~0x3f;
+                               AFU_MF_PMD_INFO("Round size to %x for QDR",
+                                       cfg->dma_cfg.length);
+                       }
+               }
+               rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+                       sizeof(struct rte_pmd_afu_dma_cfg));
+       } else {
+               AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+               return -EINVAL;
+       }
+
+       priv->cfg_type = cfg->type;
+       return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+       struct n3000_afu_priv *priv = NULL;
+       int ret = 0;
+
+       if (!dev)
+               return -EINVAL;
+
+       if (!dev->priv)
+               return -ENOENT;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+
+       if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+               AFU_MF_PMD_INFO("Test NLB");
+               ret = nlb_afu_test(dev);
+       } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+               AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+               ret = dma_afu_test(dev);
+       } else {
+               AFU_MF_PMD_ERR("Please configure AFU before test");
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+       if (!dev)
+               return -EINVAL;
+
+       nlb_afu_ctx_release(dev);
+       dma_afu_ctx_release(dev);
+
+       rte_free(dev->priv);
+       dev->priv = NULL;
+
+       return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+       struct n3000_afu_priv *priv = NULL;
+
+       if (!dev)
+               return -EINVAL;
+
+       priv = (struct n3000_afu_priv *)dev->priv;
+       if (!priv)
+               return -ENOENT;
+
+       if (!f)
+               f = stdout;
+
+       if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+               struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+               fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+               fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+               fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+               fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+               fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+               fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+               fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+               fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+       } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+               struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+               fprintf(f, "index:\t\t%d\n", ctx->index);
+               fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+               fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+               fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+               fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+               fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+               fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+               fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+               fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+       uint8_t *addr = NULL;
+       uint64_t val = 0;
+
+       addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+       if (!addr)
+               return -ENOENT;
+
+       val = rte_read64(addr + PORT_CTRL_REG);
+       val |= PORT_SOFT_RESET;
+       rte_write64(val, addr + PORT_CTRL_REG);
+       rte_delay_us(100);
+       val &= ~PORT_SOFT_RESET;
+       rte_write64(val, addr + PORT_CTRL_REG);
+
+       return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+       .init = n3000_afu_init,
+       .config = n3000_afu_config,
+       .start = NULL,
+       .stop = NULL,
+       .test = n3000_afu_test,
+       .close = n3000_afu_close,
+       .dump = n3000_afu_dump,
+       .reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+       .uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+       .ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..4c740da
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+       union {
+               uint32_t csr;
+               struct {
+                       uint32_t reset:1;
+                       uint32_t start:1;
+                       uint32_t force_completion:1;
+                       uint32_t reserved:29;
+               };
+       };
+};
+
+struct nlb_csr_cfg {
+       union {
+               uint32_t csr;
+               struct {
+                       uint32_t wrthru_en:1;
+                       uint32_t cont:1;
+                       uint32_t mode:3;
+                       uint32_t multicl_len:2;
+                       uint32_t rsvd1:1;
+                       uint32_t delay_en:1;
+                       uint32_t rdsel:2;
+                       uint32_t rsvd2:1;
+                       uint32_t chsel:3;
+                       uint32_t rsvd3:1;
+                       uint32_t wrpush_i:1;
+                       uint32_t wr_chsel:3;
+                       uint32_t rsvd4:3;
+                       uint32_t test_cfg:5;
+                       uint32_t interrupt_on_error:1;
+                       uint32_t interrupt_testmode:1;
+                       uint32_t wrfence_chsel:2;
+               };
+       };
+};
+
+struct nlb_status0 {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t num_writes;
+                       uint32_t num_reads;
+               };
+       };
+};
+
+struct nlb_status1 {
+       union {
+               uint64_t csr;
+               struct {
+                       uint32_t num_pend_writes;
+                       uint32_t num_pend_reads;
+               };
+       };
+};
+
+struct nlb_dsm_status {
+       uint32_t test_complete;
+       uint32_t test_error;
+       uint64_t num_clocks;
+       uint32_t num_reads;
+       uint32_t num_writes;
+       uint32_t start_overhead;
+       uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+       HOST_TO_FPGA = 0,
+       FPGA_TO_HOST,
+       FPGA_TO_FPGA,
+       FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t tx_channel:8;
+               uint32_t generate_sop:1;
+               uint32_t generate_eop:1;
+               uint32_t park_reads:1;
+               uint32_t park_writes:1;
+               uint32_t end_on_eop:1;
+               uint32_t reserved_1:1;
+               uint32_t transfer_irq_en:1;
+               uint32_t early_term_irq_en:1;
+               uint32_t trans_error_irq_en:8;
+               uint32_t early_done_en:1;
+               uint32_t reserved_2:6;
+               uint32_t go:1;
+       };
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+       uint32_t rd_address;
+       uint32_t wr_address;
+       uint32_t len;
+       uint16_t seq_num;
+       uint8_t rd_burst_count;
+       uint8_t wr_burst_count;
+       uint16_t rd_stride;
+       uint16_t wr_stride;
+       uint32_t rd_address_ext;
+       uint32_t wr_address_ext;
+       msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t busy:1;
+               uint32_t desc_buf_empty:1;
+               uint32_t desc_buf_full:1;
+               uint32_t rsp_buf_empty:1;
+               uint32_t rsp_buf_full:1;
+               uint32_t stopped:1;
+               uint32_t resetting:1;
+               uint32_t stopped_on_error:1;
+               uint32_t stopped_on_early_term:1;
+               uint32_t irq:1;
+               uint32_t reserved:22;
+       };
+} msgdma_status;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t stop_dispatcher:1;
+               uint32_t reset_dispatcher:1;
+               uint32_t stop_on_error:1;
+               uint32_t stopped_on_early_term:1;
+               uint32_t global_intr_en_mask:1;
+               uint32_t stop_descriptors:1;
+               uint32_t reserved:22;
+       };
+} msgdma_ctrl;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t rd_fill_level:16;
+               uint32_t wr_fill_level:16;
+       };
+} msgdma_fill_level;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t rsp_fill_level:16;
+               uint32_t reserved:16;
+       };
+} msgdma_rsp_level;
+
+typedef union {
+       uint32_t csr;
+       struct {
+               uint32_t rd_seq_num:16;
+               uint32_t wr_seq_num:16;
+       };
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+       msgdma_status status;
+       msgdma_ctrl ctrl;
+       msgdma_fill_level fill_level;
+       msgdma_rsp_level rsp;
+       msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+       uint8_t *addr;
+       uint8_t *dsm_ptr;
+       uint64_t dsm_iova;
+       uint8_t *src_ptr;
+       uint64_t src_iova;
+       uint8_t *dest_ptr;
+       uint64_t dest_iova;
+       struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+       int index;
+       uint8_t *addr;
+       uint8_t *csr_addr;
+       uint8_t *desc_addr;
+       uint8_t *ase_ctrl_addr;
+       uint8_t *ase_data_addr;
+       uint64_t mem_size;
+       uint64_t cur_ase_page;
+       int event_fd;
+       int verbose;
+       int pattern;
+       void *data_buf;
+       void *ref_buf;
+       msgdma_ext_desc *desc_buf;
+       uint64_t *magic_buf;
+       uint64_t magic_iova;
+       uint32_t dma_buf_size;
+       uint64_t *dma_buf[NUM_DMA_BUF];
+       uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+       struct rte_pmd_afu_nlb_cfg nlb_cfg;
+       struct rte_pmd_afu_dma_cfg dma_cfg;
+       struct nlb_afu_ctx nlb_ctx;
+       struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+       int num_dma;
+       int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..89d866a
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+       uint32_t mode;
+       uint32_t begin;
+       uint32_t end;
+       uint32_t multi_cl;
+       uint32_t cont;
+       uint32_t timeout;
+       uint32_t cache_policy;
+       uint32_t cache_hint;
+       uint32_t read_vc;
+       uint32_t write_vc;
+       uint32_t wrfence_vc;
+       uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+       uint32_t index;     /* index of DMA controller */
+       uint32_t length;    /* total length of data to DMA */
+       uint32_t offset;    /* address offset of target memory */
+       uint32_t size;      /* size of transfer buffer */
+       uint32_t pattern;   /* data pattern to fill in test buffer */
+       uint32_t unaligned; /* use unaligned address or length in sweep test */
+       uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+       int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+       union {
+               struct rte_pmd_afu_nlb_cfg nlb_cfg;
+               struct rte_pmd_afu_dma_cfg dma_cfg;
+       };
+};
+
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+       uint32_t mode;
+       uint32_t begin;
+       uint32_t end;
+       uint32_t multi_cl;
+       uint32_t cont;
+       uint32_t timeout;
+       uint32_t trput_interleave;
+       uint32_t freq_mhz;
+};
+
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+       uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+       uint32_t port;
+       uint32_t timeout;
+       uint32_t num_packets;
+       uint32_t random_length;
+       uint32_t packet_length;
+       uint32_t random_payload;
+       uint32_t rnd_seed[3];
+       uint64_t src_addr;
+       uint64_t dest_addr;
+       int he_loopback;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+       local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1

Reply via email to