This driver implements DPDK driver that has the same functionality
as net-front driver in Linux kernel.

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
v2 -- no changes

 config/common_linuxapp                |   6 +
 lib/Makefile                          |   1 +
 lib/librte_pmd_xen/Makefile           |  30 ++
 lib/librte_pmd_xen/virt_dev.c         | 400 +++++++++++++++++++++++++
 lib/librte_pmd_xen/virt_dev.h         |  30 ++
 lib/librte_pmd_xen/xen_adapter_info.h |  64 ++++
 lib/librte_pmd_xen/xen_dev.c          | 375 +++++++++++++++++++++++
 lib/librte_pmd_xen/xen_dev.h          |  97 ++++++
 lib/librte_pmd_xen/xen_logs.h         |  23 ++
 lib/librte_pmd_xen/xen_rxtx.c         | 546 ++++++++++++++++++++++++++++++++++
 lib/librte_pmd_xen/xen_rxtx.h         | 110 +++++++
 mk/rte.app.mk                         |   4 +
 12 files changed, 1686 insertions(+)
 create mode 100644 lib/librte_pmd_xen/Makefile
 create mode 100644 lib/librte_pmd_xen/virt_dev.c
 create mode 100644 lib/librte_pmd_xen/virt_dev.h
 create mode 100644 lib/librte_pmd_xen/xen_adapter_info.h
 create mode 100644 lib/librte_pmd_xen/xen_dev.c
 create mode 100644 lib/librte_pmd_xen/xen_dev.h
 create mode 100644 lib/librte_pmd_xen/xen_logs.h
 create mode 100644 lib/librte_pmd_xen/xen_rxtx.c
 create mode 100644 lib/librte_pmd_xen/xen_rxtx.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index d428f84..668fc8d 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -232,6 +232,12 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n

 #
+# Compile XEN net-front PMD driver
+#
+CONFIG_RTE_LIBRTE_XEN_PMD=n
+CONFIG_RTE_LIBRTE_XEN_DEBUG_INIT=n
+
+#
 # Do prefetch of packet data within PMD driver receive function
 #
 CONFIG_RTE_PMD_PACKET_PREFETCH=y
diff --git a/lib/Makefile b/lib/Makefile
index d617d81..f405e40 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -52,6 +52,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += 
librte_pmd_af_packet
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += librte_pmd_virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += librte_pmd_xen
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
diff --git a/lib/librte_pmd_xen/Makefile b/lib/librte_pmd_xen/Makefile
new file mode 100644
index 0000000..d294d03
--- /dev/null
+++ b/lib/librte_pmd_xen/Makefile
@@ -0,0 +1,30 @@
+#
+#   Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_xen.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+VPATH += $(RTE_SDK)/lib/librte_pmd_xen
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += virt_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += xen_rxtx.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_XEN_PMD) += lib/librte_net lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_xen/virt_dev.c b/lib/librte_pmd_xen/virt_dev.c
new file mode 100644
index 0000000..f824977
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+
+#include "virt_dev.h"
+
+struct uio_map {
+       void *addr;
+       uint64_t offset;
+       uint64_t size;
+       uint64_t phaddr;
+};
+
+struct uio_resource {
+       TAILQ_ENTRY(uio_resource) next;
+       struct rte_pci_addr pci_addr;
+       char path[PATH_MAX];
+       size_t nb_maps;
+       struct uio_map maps[PCI_MAX_RESOURCE];
+};
+
+static int
+virt_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+       FILE *f;
+       char buf[BUFSIZ];
+       char *end = NULL;
+
+       f = fopen(filename, "r");
+       if (f == NULL) {
+               RTE_LOG(ERR, EAL, "cannot open sysfs value %s", filename);
+               return -1;
+       }
+
+       if (fgets(buf, sizeof(buf), f) == NULL) {
+               RTE_LOG(ERR, EAL, "cannot read sysfs value %s", filename);
+               fclose(f);
+               return -1;
+       }
+
+       *val = strtoull(buf, &end, 0);
+       if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+               RTE_LOG(ERR, EAL, "cannot parse sysfs value %s", filename);
+               fclose(f);
+               return -1;
+       }
+
+       fclose(f);
+       return 0;
+}
+
+#define OFF_MAX ((uint64_t)(off_t)-1)
+static ssize_t
+virt_uio_get_mappings(const char *devname, struct uio_map maps[],
+                     size_t nb_maps)
+{
+       size_t i;
+       char dirname[PATH_MAX];
+       char filename[PATH_MAX];
+       uint64_t offset, size;
+
+       for (i = 0; i != nb_maps; i++) {
+
+               snprintf(dirname, sizeof(dirname),
+                               "%s/maps/map%zu", devname, i);
+
+               if (access(dirname, F_OK) != 0)
+                       break;
+
+               snprintf(filename, sizeof(filename), "%s/offset", dirname);
+               if (virt_parse_sysfs_value(filename, &offset) < 0) {
+                       RTE_LOG(ERR, EAL, "cannot parse offset of %s",
+                                       dirname);
+                       return -1;
+               }
+
+               snprintf(filename, sizeof(filename), "%s/size", dirname);
+               if (virt_parse_sysfs_value(filename, &size) < 0) {
+                       RTE_LOG(ERR, EAL, "cannot parse size of %s", dirname);
+                       return -1;
+               }
+
+               snprintf(filename, sizeof(filename), "%s/addr", dirname);
+               if (virt_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+                       RTE_LOG(ERR, EAL, "cannot parse addr of %s", dirname);
+                       return -1;
+               }
+
+               if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+                       RTE_LOG(ERR, EAL,
+                                       "offset/size exceed system max value");
+                       return -1;
+               }
+
+               maps[i].offset = offset;
+               maps[i].size = size;
+       }
+
+       return i;
+}
+
+static void *
+virt_map_resource(void *requested_addr, const char *devname, off_t offset,
+                 size_t size)
+{
+       int fd;
+       void *mapaddr;
+
+       fd = open(devname, O_RDWR);
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "Cannot open %s: %s",
+                               devname, strerror(errno));
+               return NULL;
+       }
+
+       mapaddr = mmap(0, size, PROT_READ | PROT_WRITE,
+                       MAP_SHARED, fd, offset);
+       if (mapaddr == MAP_FAILED || (requested_addr != NULL &&
+                               mapaddr != requested_addr)) {
+               RTE_LOG(ERR, EAL,
+                               "cannot mmap(%s(%d), %p, 0x%lx, 0x%lx): %s 
(%p)",
+                               devname, fd, requested_addr,
+                               (unsigned long)size, (unsigned long)offset,
+                               strerror(errno), mapaddr);
+               close(fd);
+               return NULL;
+       }
+
+       RTE_LOG(DEBUG, EAL, "memory mapped at %p", mapaddr);
+
+       return mapaddr;
+}
+
+void
+virt_uio_unmap_addresses(void **addresses, size_t *lens, int max_addresses)
+{
+       int j;
+
+       for (j = 0; j < max_addresses; j++) {
+               if (addresses[j] && lens[j]) {
+                       munmap(addresses[j], lens[j]);
+                       RTE_LOG(DEBUG, EAL, "memory umnmapped %p %d",
+                                       addresses[j], (int)lens[j]);
+               }
+       }
+}
+
+int
+virt_uio_map_addresses(const char *dirname, void **addresses, size_t *lens,
+                      int max_addresses)
+{
+       int j;
+       DIR *dir;
+       struct dirent *e;
+       char dirname2[PATH_MAX];
+       char devname[PATH_MAX];
+       unsigned uio_num;
+       struct uio_resource *uio_res;
+       struct uio_map *maps;
+       uint64_t pagesz;
+       ssize_t nb_maps;
+       uint64_t offset;
+       void *mapaddr;
+
+       RTE_LOG(DEBUG, EAL, "dirname %s", dirname);
+
+       dir = opendir(dirname);
+
+       if (!dir) {
+               RTE_LOG(ERR, EAL, "Cannot opendir %s", dirname);
+               return -1;
+       }
+
+       while ((e = readdir(dir)) != NULL) {
+
+               int shortprefix_len = sizeof("uio") - 1;
+               char *endptr;
+
+               if (strncmp(e->d_name, "uio", 3) != 0)
+                       continue;
+
+               errno = 0;
+               uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+               if (errno == 0 && endptr != e->d_name) {
+                       snprintf(dirname2, sizeof(dirname2), "%s/uio%u",
+                                       dirname, uio_num);
+                       break;
+               }
+       }
+       closedir(dir);
+
+       if (!e) {
+               RTE_LOG(ERR, EAL, "dirname %s not managed, skipping",
+                               dirname);
+               return -1;
+       }
+
+       uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+       if (uio_res == NULL) {
+               RTE_LOG(ERR, EAL, "cannot store uio mmap details");
+               return -1;
+       }
+
+       snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+       snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+
+       nb_maps = virt_uio_get_mappings(dirname2, uio_res->maps,
+                       sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+       if (nb_maps < 0)
+               return nb_maps;
+
+       uio_res->nb_maps = nb_maps;
+       pagesz = sysconf(_SC_PAGESIZE);
+       maps = uio_res->maps;
+
+       for (j = 0; j < nb_maps && j < max_addresses; j++) {
+               offset = j * pagesz;
+               mapaddr = virt_map_resource(NULL, devname,
+                               (off_t)offset, (size_t)maps[j].size);
+               if (maps[j].addr || !mapaddr)
+                       return -1;
+               maps[j].addr = mapaddr;
+               maps[j].offset = offset;
+               addresses[j] = mapaddr;
+               lens[j] = (size_t)maps[j].size;
+       }
+
+       return 0;
+}
+
+static struct
+rte_eth_dev *virt_eth_dev_allocate(const char *name,
+                                  struct eth_driver *eth_drv,
+                                  unsigned dev_private_size)
+{
+       struct rte_eth_dev *eth_dev;
+
+       eth_dev = rte_eth_dev_allocate(name);
+       if (!eth_dev) {
+               RTE_LOG(ERR, EAL, "virt eth_dev allocation was failed (%d)",
+                               ENOMEM);
+               return NULL;
+       }
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               eth_dev->data->dev_private =
+                       rte_zmalloc("eth_dev private data structure",
+                                   dev_private_size, RTE_CACHE_LINE_SIZE);
+               if (!eth_dev->data->dev_private)
+                       rte_panic("virt eth_dev private data allocation was 
failed\n");
+       }
+
+       eth_dev->driver = eth_drv;
+       eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+       TAILQ_INIT(&(eth_dev->callbacks));
+
+       return eth_dev;
+}
+
+static int
+virt_eth_dev_init(const char *name,
+                     struct virt_eth_driver *virt_eth_drv,
+                     const char *dirname)
+{
+       int err = -ENOMEM;
+       struct rte_eth_dev *eth_dev;
+       struct eth_driver *eth_drv = &virt_eth_drv->eth_driver;
+       struct rte_pci_device *dev;
+
+       dev = malloc(sizeof(*dev));
+       if (dev == NULL)
+               goto error;
+
+       eth_dev = virt_eth_dev_allocate(name, eth_drv, 
eth_drv->dev_private_size);
+       if (!eth_dev)
+               goto error;
+
+       dev->numa_node = -1;
+       dev->driver = &eth_drv->pci_drv;
+       eth_dev->pci_dev = dev;
+       
+       if (eth_drv->eth_dev_init) {
+               err = (*eth_drv->eth_dev_init)(eth_drv, eth_dev);
+               if (err) {
+                       RTE_LOG(ERR, EAL, "eth_dev_init was failed (%d)", err);
+                       goto error;
+               }
+       }
+
+       if (virt_eth_drv->virt_eth_dev_init) {
+               err = (*virt_eth_drv->virt_eth_dev_init)(virt_eth_drv, eth_dev,
+                                                        dirname);
+               if (err) {
+                       RTE_LOG(ERR, EAL, "virt eth_dev_init was failed (%d)",
+                                       err);
+                       goto error;
+               }
+       }
+
+       return 0;
+error:
+       free(dev);
+       return err;
+}
+
+#define PROC_MODULES "/proc/modules"
+static int
+virt_uio_check_module(const char *module_name)
+{
+       FILE *f;
+       unsigned i;
+       char buf[BUFSIZ];
+
+       if (module_name == NULL)
+               return 0;
+
+       f = fopen(PROC_MODULES, "r");
+       if (f == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot open "PROC_MODULES": %s\n",
+                               strerror(errno));
+               return -1;
+       }
+
+       while (fgets(buf, sizeof(buf), f) != NULL) {
+
+               for (i = 0; i < sizeof(buf) && buf[i] != '\0'; i++) {
+                       if (isspace(buf[i]))
+                               buf[i] = '\0';
+               }
+
+               if (strncmp(buf, module_name, sizeof(buf)) == 0) {
+                       fclose(f);
+                       return 0;
+               }
+       }
+
+       fclose(f);
+       return -1;
+}
+
+int
+virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv)
+{
+       struct dirent *e;
+       DIR *dir;
+       char dirname[PATH_MAX];
+
+       if (virt_eth_drv->module_name) {
+               RTE_LOG(DEBUG, EAL, "module name: \"%s\", driver name: \"%s\"",
+                       virt_eth_drv->module_name,
+                       virt_eth_drv->eth_driver.pci_drv.name);
+
+               if (virt_uio_check_module(virt_eth_drv->module_name) != 0) {
+                       RTE_LOG(ERR, EAL, "The %s is required by %s driver\n",
+                               virt_eth_drv->module_name,
+                               virt_eth_drv->eth_driver.pci_drv.name);
+                       return -1;
+               }
+       }
+
+       dir = opendir(virt_eth_drv->sysfs_unbind_dir);
+       if (dir == NULL) {
+               RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", __func__,
+                       strerror(errno));
+               return -1;
+       }
+
+       while ((e = readdir(dir)) != NULL) {
+               if (e->d_name[0] == '.')
+                       continue;
+
+               /*create or not*/
+               if (!(virt_eth_drv->is_eth_device_dir(e->d_name)))
+                       continue;
+
+               snprintf(dirname, sizeof(dirname), "%s/%s/uio",
+                        virt_eth_drv->sysfs_unbind_dir, e->d_name);
+               if (virt_eth_dev_init(e->d_name, virt_eth_drv, dirname) < 0)
+                       goto error;
+       }
+       closedir(dir);
+       return 0;
+
+error:
+       closedir(dir);
+       return -1;
+}
diff --git a/lib/librte_pmd_xen/virt_dev.h b/lib/librte_pmd_xen/virt_dev.h
new file mode 100644
index 0000000..73223ee
--- /dev/null
+++ b/lib/librte_pmd_xen/virt_dev.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _VIRT_ETHDEV_H_
+#define _VIRT_ETHDEV_H_
+
+struct virt_eth_driver;
+
+typedef int (*virt_is_eth_device_dir_t)(const char *dir);
+typedef int (*virt_eth_dev_init_t)(struct virt_eth_driver *virt_eth_drv,
+                                      struct rte_eth_dev *dev, const char 
*dirname);
+
+struct virt_eth_driver {
+       struct eth_driver            eth_driver;
+       const char                   *sysfs_bind_dir;
+       const char                   *sysfs_unbind_dir;
+       virt_is_eth_device_dir_t is_eth_device_dir;
+       virt_eth_dev_init_t      virt_eth_dev_init;
+       const char                   *module_name;
+};
+
+int virt_eth_driver_register(struct virt_eth_driver *virt_eth_drv);
+int virt_uio_map_addresses(const char *dirname, void **addresses,
+                              size_t *lens, int max_addresses);
+void virt_uio_unmap_addresses(void **addresses,
+                                 size_t *lens, int max_addresses);
+
+#endif /* _VIRT_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_adapter_info.h 
b/lib/librte_pmd_xen/xen_adapter_info.h
new file mode 100644
index 0000000..15d71ac
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_adapter_info.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef XEN_ADAPTER_INFO_H_
+#define XEN_ADAPTER_INFO_H_
+
+#define MAX_TARGET 256
+
+#define IOCTL_EVTCHN_NOTIFY_GRANT 7
+
+struct gref_addr {
+       grant_ref_t gref;
+       unsigned long paddr;
+};
+
+struct ioctl_evtchn_notify_grant {
+       unsigned int port;
+       int otherend_id;
+       uint16_t count;
+       uint8_t is_rx;
+       union {
+               struct xen_netif_rx_front_ring *rx_ring;
+               struct xen_netif_tx_front_ring *tx_ring;
+       } u;
+       struct netfront_info *info;
+       uint16_t rel_count;
+       grant_ref_t rel_gref[MAX_TARGET];
+       struct gref_addr s[MAX_TARGET];
+};
+
+#define XEN_PMD_UIO_NAME "xen/pmd_uio"
+
+enum {
+       INFO_MAP = 0,
+       RX_RING_MAP,
+       TX_RING_MAP,
+       XEN_MAP_MAX
+};
+
+struct xen_adapter_info {
+       /*global parameters */
+       struct xen_netif_rx_front_ring *rx_ring;
+       struct xen_netif_tx_front_ring *tx_ring;
+       struct netfront_info *info;
+
+       uint8_t is_connected;
+       uint8_t disconnect_count;
+
+       /*adapter specific data*/
+       int otherend_id;
+       unsigned int rx_evtchn;
+       unsigned int tx_evtchn;
+       u_int8_t mac[6];
+
+       /*params of grefs array*/
+       uint16_t rx_grefs_count;
+       uint16_t tx_grefs_count;
+       /* this field has to be the last */
+       grant_ref_t rxtx_grefs[];
+};
+
+#endif /* XEN_ADAPTER_INFO_H_ */
diff --git a/lib/librte_pmd_xen/xen_dev.c b/lib/librte_pmd_xen/xen_dev.c
new file mode 100644
index 0000000..a098cca
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#include <stdio.h>
+
+#include <sys/ioctl.h>
+#include <xen/sys/evtchn.h>
+
+#define XEN_MAX_RX_PKTLEN  0xFFFF
+#define XEN_MIN_RX_BUFSIZE (2 * PAGE_SIZE)
+
+static int xen_evt_fd = -1;
+
+void
+xen_set_rx_ng(struct xen_rx_queue *rxq)
+{
+       rxq->ng_rx.port = rxq->xa->info_page->rx_evtchn;
+       rxq->ng_rx.info = rxq->xa->info_page->info;
+       rxq->ng_rx.u.rx_ring = rxq->xa->info_page->rx_ring;
+       rxq->ng_rx.otherend_id = rxq->xa->info_page->otherend_id;
+}
+
+void
+xen_set_tx_ng(struct xen_tx_queue *txq)
+{
+       txq->ng_tx.port = txq->xa->info_page->tx_evtchn;
+       txq->ng_tx.info = txq->xa->info_page->info;
+       txq->ng_tx.u.tx_ring = txq->xa->info_page->tx_ring;
+       txq->ng_tx.otherend_id = txq->xa->info_page->otherend_id;
+}
+
+static int
+xen_evtchn_notify_grant_rxtx(struct ioctl_evtchn_notify_grant *ng)
+{
+       int rc;
+
+       rc = ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY_GRANT, ng);
+       if (rc)
+               rc = errno;
+
+       return rc;
+}
+
+int
+xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq)
+{
+       if (likely(xen_evt_fd >= 0)) {
+
+               xen_set_rx_ng(rxq);
+
+               return xen_evtchn_notify_grant_rxtx(&rxq->ng_rx);
+       }
+
+       return -1;
+}
+
+int
+xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq)
+{
+       if (likely(xen_evt_fd >= 0)) {
+
+               xen_set_tx_ng(txq);
+
+               return xen_evtchn_notify_grant_rxtx(&txq->ng_tx);
+
+       }
+
+       return -1;
+}
+
+static int
+xen_evtchn_notify_rxtx(unsigned int evtchn)
+{
+       struct ioctl_evtchn_notify notify = { .port = evtchn };
+
+       if (xen_evt_fd >= 0)
+               return ioctl(xen_evt_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+
+       return -1;
+}
+
+static int
+xen_evtchn_notify(struct xen_adapter *xa)
+{
+       int res = 0;
+
+       res += xen_evtchn_notify_rxtx(xa->info_page->tx_evtchn);
+
+       if (xa->info_page->tx_evtchn != xa->info_page->rx_evtchn)
+               res += xen_evtchn_notify_rxtx(xa->info_page->rx_evtchn);
+
+       return res;
+}
+
+static void
+xen_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+       unsigned i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+                               dev->data->tx_queues[i])->tx_stats;
+               if (NULL != txs) {
+                       stats->opackets += txs->opackets;
+                       stats->obytes += txs->obytes;
+                       stats->oerrors += txs->oerrors;
+               } else {
+                       continue;
+               }
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+                               dev->data->rx_queues[i])->rx_stats;
+               if (NULL != rxs) {
+                       stats->ipackets += rxs->ipackets;
+                       stats->ierrors += rxs->ierrors;
+                       stats->ibytes += rxs->ibytes;
+               } else {
+                       continue;
+               }
+       }
+}
+
+static void
+xen_dev_stats_reset(struct rte_eth_dev *dev)
+{
+       uint16_t i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               struct xen_tx_stats *txs = &((struct xen_tx_queue *)
+                               dev->data->tx_queues[i])->tx_stats;
+               if (NULL != txs) {
+                       txs->opackets = 0;
+                       txs->obytes = 0;
+                       txs->oerrors = 0;
+               } else {
+                       continue;
+               }
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct xen_rx_stats *rxs = &((struct xen_rx_queue *)
+                               dev->data->rx_queues[i])->rx_stats;
+               if (NULL != rxs) {
+                       rxs->ipackets = 0;
+                       rxs->ibytes = 0;
+                       rxs->ierrors = 0;
+               } else {
+                       continue;
+               }
+       }
+}
+
+static void
+xen_dev_info_get(__attribute__((unused)) struct rte_eth_dev *dev,
+               struct rte_eth_dev_info *dev_info)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       dev_info->max_rx_queues = (uint16_t)1;
+       dev_info->max_tx_queues = (uint16_t)1;
+       dev_info->max_mac_addrs = 1;
+       dev_info->min_rx_bufsize = XEN_MIN_RX_BUFSIZE;
+       dev_info->max_rx_pktlen = XEN_MAX_RX_PKTLEN;
+}
+
+static int
+xen_dev_configure(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       return 0;
+}
+
+static void
+xen_dev_close(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+       PMD_INIT_FUNC_TRACE();
+}
+
+static int
+_xen_is_eth_device_dir(const char *dir)
+{
+       int devid;
+
+       return sscanf(dir, "vif-%d", &devid) == 1;
+}
+
+/**
+ * Atomically writes the link status information into global
+ * structure rte_eth_dev.
+ *
+ * @param dev
+ *   - Pointer to the structure rte_eth_dev to read from.
+ *   - Pointer to the buffer to be saved with the link status.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static inline int
+xen_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+               struct rte_eth_link *link)
+{
+       struct rte_eth_link *dst = &(dev->data->dev_link);
+       struct rte_eth_link *src = link;
+
+       if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+                               *(uint64_t *)src) == 0)
+               return -1;
+
+       return 0;
+}
+
+static int
+xen_dev_link_update(struct rte_eth_dev *dev,
+               __attribute__((unused)) int wait_to_complete)
+{
+       struct rte_eth_link link;
+
+       PMD_INIT_FUNC_TRACE();
+
+       link.link_status = 1;
+       link.link_speed = ETH_LINK_SPEED_1000;
+       link.link_duplex = ETH_LINK_FULL_DUPLEX;
+
+       xen_dev_atomic_write_link_status(dev, &link);
+
+       return 0;
+}
+
+static int
+xen_dev_start(struct rte_eth_dev *dev)
+{
+       struct xen_adapter *xa = VA_XA(dev);
+
+       PMD_INIT_FUNC_TRACE();
+
+       xen_dev_link_update(dev, 0);
+
+       xen_evtchn_notify(xa);
+
+       return 0;
+}
+
+static void
+xen_dev_stop(__attribute__((unused)) struct rte_eth_dev *dev)
+{
+       PMD_INIT_FUNC_TRACE();
+}
+
+static int
+wait_uio_init(uint8_t *state, const uint32_t timeout)
+{
+       uint32_t i;
+
+       for (i = 0; i < timeout * 10; i++) {
+               if (*state)
+                       return 0;
+               usleep(100000);
+       }
+
+       return -1;
+}
+
+static struct eth_dev_ops xen_eth_dev_ops = {
+       /*dev*/
+       .dev_configure        = xen_dev_configure,
+       .dev_close            = xen_dev_close,
+       .dev_start            = xen_dev_start,
+       .dev_stop             = xen_dev_stop,
+       .dev_infos_get        = xen_dev_info_get,
+       .link_update          = xen_dev_link_update,
+       /*rxtx*/
+       .stats_get            = xen_dev_stats_get,
+       .stats_reset          = xen_dev_stats_reset,
+       .rx_queue_setup       = xen_dev_rx_queue_setup,
+       .rx_queue_release     = xen_dev_rx_queue_release,
+       .tx_queue_setup       = xen_dev_tx_queue_setup,
+       .tx_queue_release     = xen_dev_tx_queue_release,
+};
+
+static int
+xen_dev_init(struct virt_eth_driver *virt_eth_drv __attribute__((unused)),
+            struct rte_eth_dev *eth_dev, const char *dirname)
+{
+       int err = 0;
+
+       struct xen_adapter *xa = VA_XA(eth_dev);
+
+       PMD_INIT_FUNC_TRACE();
+
+       err = virt_uio_map_addresses(dirname, xa->uio_res, xa->uio_len,
+                                    XEN_MAP_MAX);
+       if (err != 0) {
+               PMD_INIT_LOG(ERR, "virt_uio_map_addresses failed (%d)", err);
+               return -1;
+       }
+
+       eth_dev->dev_ops = &xen_eth_dev_ops;
+
+       xa->info_page =
+               (struct xen_adapter_info *)xa->uio_res[INFO_MAP];
+
+       if (wait_uio_init(&xa->info_page->is_connected, 3)) {
+               PMD_INIT_LOG(ERR, "no connection to xen_netback");
+               virt_uio_unmap_addresses(xa->uio_res, xa->uio_len,
+                                        XEN_MAP_MAX);
+               return -1;
+       }
+
+       PMD_INIT_LOG(DEBUG, "rx: %d,rx_evtchn: %d,tx: %d,tx_evtchn: %d",
+                    (int)xa->info_page->rx_grefs_count,
+                    (int)xa->info_page->rx_evtchn,
+                    (int)xa->info_page->tx_grefs_count,
+                    (int)xa->info_page->tx_evtchn);
+
+       /* copy mac-addr */
+       eth_dev->data->mac_addrs = rte_malloc("xen", ETHER_ADDR_LEN, 0);
+       memcpy(&eth_dev->data->mac_addrs->addr_bytes[0],
+              &xa->info_page->mac[0], ETHER_ADDR_LEN);
+
+       return 0;
+}
+
+static struct virt_eth_driver rte_xen_pmd = {
+       .eth_driver = {
+               .pci_drv = {
+                       .name = "rte_xen_pmd",
+                       .id_table = NULL,
+               },
+               .dev_private_size = sizeof(struct xen_adapter),
+       },
+       .sysfs_unbind_dir = "/sys/bus/xen/devices",
+       .sysfs_bind_dir = "/sys/bus/xen/drivers",
+       .is_eth_device_dir = _xen_is_eth_device_dir,
+       .virt_eth_dev_init = xen_dev_init,
+       .module_name = "xen_uio",
+};
+
+static int
+rte_xen_pmd_init(const char *name __rte_unused,
+                const char *param __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       xen_evt_fd = open("/dev/"XEN_PMD_UIO_NAME, O_RDWR);
+
+       if (xen_evt_fd == -1) {
+               if (errno != ENOENT)
+                       PMD_INIT_LOG(ERR, "cannot open event device %s",
+                                       "/dev/"XEN_PMD_UIO_NAME);
+               return 0;
+       }
+
+       return virt_eth_driver_register(&rte_xen_pmd);
+}
+
+static struct rte_driver rte_xen_driver = {
+       .type = PMD_PDEV,
+       .init = rte_xen_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_xen_driver);
diff --git a/lib/librte_pmd_xen/xen_dev.h b/lib/librte_pmd_xen/xen_dev.h
new file mode 100644
index 0000000..b54287c
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_dev.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_ETHDEV_H_
+#define _XEN_ETHDEV_H_
+
+#include <assert.h>
+#include <sys/user.h>
+#include <inttypes.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <xen/io/netif.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memzone.h>
+#include <rte_dev.h>
+
+#include "xen_logs.h"
+
+#include "xen_adapter_info.h"
+
+typedef uint64_t u64;
+
+#undef  PAGE_SIZE
+#define PAGE_SHIFT            12
+#define PAGE_SIZE             (1 << PAGE_SHIFT)
+
+#define __phys_to_pfn(paddr)  ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define __pfn_to_phys(pfn)    ((phys_addr_t)(pfn) << PAGE_SHIFT)
+
+#define NET_TX_RING_SIZE      __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE      __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
+
+#define RX_MAX_TARGET         min_t(int, NET_RX_RING_SIZE, 256)
+#define TX_MAX_TARGET         min_t(int, NET_TX_RING_SIZE, 256)
+
+#if __XEN_LATEST_INTERFACE_VERSION__ > 0x0003020a
+
+#define FRONT_RING_ATTACH(_r, _s, __size) do {   \
+       (_r)->sring = (_s);                      \
+       (_r)->req_prod_pvt = (_s)->req_prod;     \
+       (_r)->rsp_cons = (_s)->rsp_prod;         \
+       (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#endif
+
+#define VA_XA(eth_dev) \
+       (struct xen_adapter *)((eth_dev->data->dev_private))
+
+#define min_t(t, x, y) ({   \
+       t _x = (x);         \
+       t _y = (y);         \
+       _x > _y ? _x : _y; })
+
+struct xen_adapter {
+       /* it's a place for all uio resources */
+       void *uio_res[XEN_MAP_MAX];
+       size_t uio_len[XEN_MAP_MAX];
+
+       /*pointer to the info page*/
+       struct xen_adapter_info *info_page;
+
+       void **rx_queues;
+       void **tx_queues;
+};
+
+#include "xen_rxtx.h"
+
+void xen_set_rx_ng(struct xen_rx_queue *rxq);
+void xen_set_tx_ng(struct xen_tx_queue *txq);
+int xen_evtchn_notify_grant_rx(struct xen_rx_queue *rxq);
+int xen_evtchn_notify_grant_tx(struct xen_tx_queue *txq);
+
+/*rx*/
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+               uint16_t nb_rx_desc, unsigned int socket_id,
+               const struct rte_eth_rxconf *rx_conf,
+               struct rte_mempool *mb_pool);
+void xen_dev_rx_queue_release(void *rxq);
+uint16_t xen_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts);
+
+/*tx*/
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+               uint16_t nb_tx_desc, unsigned int socket_id,
+               const struct rte_eth_txconf *tx_conf);
+void xen_dev_tx_queue_release(void *txq);
+
+#endif /* _XEN_ETHDEV_H_ */
diff --git a/lib/librte_pmd_xen/xen_logs.h b/lib/librte_pmd_xen/xen_logs.h
new file mode 100644
index 0000000..2334db0
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_logs.h
@@ -0,0 +1,23 @@
+#ifndef _XEN_LOGS_H_
+#define _XEN_LOGS_H_
+
+
+#ifdef RTE_LIBRTE_XEN_DEBUG_INIT
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+       do { \
+               RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args);\
+               fflush(stdout);\
+       } while (0)
+
+#else
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+       do { } while (0)
+
+#endif
+
+#define PMD_INIT_FUNC_TRACE() \
+       PMD_INIT_LOG(DEBUG, " >>")
+
+#endif /* _XEN_LOGS_H_ */
diff --git a/lib/librte_pmd_xen/xen_rxtx.c b/lib/librte_pmd_xen/xen_rxtx.c
new file mode 100644
index 0000000..c45e67a
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include "xen_dev.h"
+#include "xen_rxtx.h"
+#include "virt_dev.h"
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb)             \
+       (uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+
+static void
+xen_rx_ring_init(struct xen_rx_queue *rxq)
+{
+       SHARED_RING_INIT(rxq->rxs);
+       FRONT_RING_ATTACH(&rxq->ring, rxq->rxs, PAGE_SIZE);
+       xen_dev_rx_send_requests(rxq);
+       rxq->rx_disconnect_count = rxq->xa->info_page->disconnect_count;
+       xen_set_rx_ng(rxq);
+}
+
+static void
+xen_tx_ring_init(struct xen_tx_queue *txq)
+{
+       SHARED_RING_INIT(txq->txs);
+       FRONT_RING_ATTACH(&txq->ring, txq->txs, PAGE_SIZE);
+       xen_dev_tx_recv_responses(txq);
+       txq->tx_disconnect_count = txq->xa->info_page->disconnect_count;
+       xen_set_tx_ng(txq);
+}
+
+int
+xen_dev_rx_send_requests(struct xen_rx_queue *rxq)
+{
+       uint16_t i;
+       struct netif_rx_request *req;
+       RING_IDX req_prod = rxq->ring.req_prod_pvt;
+       RING_IDX prod = req_prod;
+       uint16_t free_space = RING_FREE_REQUESTS(&rxq->ring);
+
+       xen_set_rx_ng(rxq);
+
+       for (i = 0; i < free_space; i++) {
+               struct rte_mbuf *mbuf;
+
+               prod = (req_prod + i) & (RING_SIZE(&rxq->ring) - 1);
+
+               req = RING_GET_REQUEST(&rxq->ring, prod);
+
+               mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+               if (unlikely(!mbuf)) {
+                       PMD_INIT_LOG(ERR, "no mbuf");
+                       break; /*skip*/
+               }
+
+               mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+               rxq->mbuf[prod] = mbuf;
+
+               /*set data at the begin of the next page*/
+               uint64_t phys_addr = RTE_MBUF_DATA_DMA_ADDR(mbuf);
+               uint64_t phys_addr_shifted =
+                       (phys_addr + PAGE_SIZE - 1) &
+                       (~((uint64_t)PAGE_SIZE - 1));
+               uint64_t shift =  phys_addr_shifted - phys_addr;
+
+               mbuf->data_off += shift;
+               rxq->ng_rx.s[i].gref = rxq->gref[prod];
+
+               rxq->ng_rx.s[i].paddr = __phys_to_pfn(phys_addr_shifted);
+
+               req->gref = rxq->gref[prod];
+               req->id = prod;
+       }
+
+       rxq->ring.req_prod_pvt = (req_prod + i);
+
+       rxq->ng_rx.count = i;
+       xen_evtchn_notify_grant_rx(rxq);
+       rxq->ng_rx.rel_count = 0;
+
+       return 0;
+}
+
+static void
+xen_dev_rx_recv_extra(struct xen_rx_queue *rxq, struct netif_extra_info *extra)
+{
+       if (unlikely(!extra)) {
+               PMD_INIT_LOG(ERR, "Invalid rxq state transition: %d",
+                               rxq->state);
+               rxq->state = RX_RESP_GENERAL;
+       }
+
+       if (unlikely(!extra->type ||
+                               extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+               PMD_INIT_LOG(WARNING, "Invalid extra type: %d", extra->type);
+               rxq->state = RX_RESP_GENERAL;
+       }
+
+       if (!(extra->flags & XEN_NETIF_EXTRA_FLAG_MORE)) {
+               PMD_INIT_LOG(DEBUG, "No XEN_NETIF_EXTRA_FLAG_MORE");
+               rxq->state = RX_RESP_GENERAL;
+       }
+}
+
+static uint16_t
+xen_dev_rx_recv_responses(struct xen_rx_queue *rxq, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts)
+{
+       uint16_t nb_rx;
+       uint16_t i;
+       struct netif_rx_response *rsp;
+       struct netif_extra_info *extra = NULL;
+       RING_IDX rsp_cons = rxq->ring.rsp_cons;
+       RING_IDX cons = rsp_cons;
+       uint16_t work_todo;
+
+       nb_rx = 0;
+       work_todo = RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring);
+       for (i = 0; i < work_todo && nb_rx < nb_pkts; i++) {
+               struct rte_mbuf *mbuf;
+
+               cons = (rsp_cons + i) & (RING_SIZE(&rxq->ring) - 1);
+
+               rsp = RING_GET_RESPONSE(&rxq->ring, cons);
+
+               PMD_INIT_LOG(DEBUG, "id:%u status:%u offset:%u flags:%x",
+                               rsp->id, rsp->status, rsp->offset, rsp->flags);
+
+               rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] = rxq->gref[cons];
+               rxq->ng_rx.rel_count++;
+
+               if (unlikely(rsp->status < 0)) {
+                       PMD_INIT_LOG(WARNING, "bad rsp->status: %d",
+                                       rsp->status);
+                       rte_pktmbuf_free(rxq->mbuf[cons]);
+                       rxq->mbuf[cons] = NULL;
+                       rxq->state = RX_RESP_GENERAL;
+                       rxq->first_frag = rxq->prev_frag = NULL;
+                       continue;
+               }
+
+               switch (rxq->state) {
+               case RX_RESP_GENERAL: /* normal receiving */
+                       if (unlikely(rsp->flags & NETRXF_extra_info)) {
+                               PMD_INIT_LOG(DEBUG,
+                                               "EXTRA_NETRXF_extra_info");
+                               rxq->state = RX_RESP_EXTRA;
+                               rte_pktmbuf_free(rxq->mbuf[cons]);
+                               rxq->mbuf[cons] = NULL;
+                               break;
+                       }
+                       /* normal receive */
+                       if (rxq->mbuf[cons]) {
+                               mbuf = rxq->mbuf[cons];
+                               mbuf->port = rxq->port_id;
+                               mbuf->data_len = mbuf->pkt_len = rsp->status;
+                               mbuf->data_off += rsp->offset;
+
+                               if (rsp->flags & NETRXF_more_data) {
+                                       rxq->state = RX_RESP_CONTINUE;
+                                       rxq->first_frag =
+                                               rxq->prev_frag = mbuf;
+                               } else {
+                                       /*send to the upper level*/
+                                       rx_pkts[nb_rx++] = mbuf;
+                                       rxq->rx_stats.ipackets++;
+                                       rxq->rx_stats.ibytes +=
+                                               mbuf->pkt_len;
+                               }
+
+                               rxq->mbuf[cons] = NULL;
+                       } else {
+                               PMD_INIT_LOG(WARNING, "no rxq->mbuf[%d]",
+                                               cons);
+                               rxq->rx_stats.ierrors++;
+                       }
+                       break;
+
+               case RX_RESP_EXTRA: /* extra */
+                       extra = (struct netif_extra_info *)rsp;
+                       xen_dev_rx_recv_extra(rxq, extra);
+                       rte_pktmbuf_free(rxq->mbuf[cons]);
+                       rxq->mbuf[cons] = NULL;
+                       break;
+
+               case RX_RESP_CONTINUE: /* packet is segmented */
+                       if (rxq->mbuf[cons]) {
+                               mbuf = rxq->mbuf[cons];
+                               /* mbuf->in_port = rxq->port_id; */
+                               mbuf->data_len = mbuf->pkt_len =
+                                       rsp->status;
+                               mbuf->data_off += rsp->offset;
+
+                               rxq->first_frag->nb_segs++;
+                               rxq->first_frag->pkt_len += mbuf->data_len;
+                               rxq->prev_frag->next = mbuf;
+
+                               if (rsp->flags & NETRXF_more_data)
+                                       rxq->prev_frag = mbuf;
+                               else {
+                                       rxq->state = RX_RESP_GENERAL;
+                                       /*send to the upper level*/
+                                       rx_pkts[nb_rx++] = rxq->first_frag;
+                                       rxq->rx_stats.ipackets++;
+                                       rxq->rx_stats.ibytes += 
rxq->first_frag->pkt_len;
+                                       rxq->first_frag = rxq->prev_frag = NULL;
+                               }
+
+                               rxq->mbuf[cons] = NULL;
+                       } else {
+                               PMD_INIT_LOG(WARNING, "no cntn rxq->mbuf[%d]",
+                                               cons);
+                               rxq->rx_stats.ierrors++;
+                       }
+                       break;
+               }
+
+               rxq->mbuf[cons] = NULL;
+       }
+       rxq->ring.rsp_cons = (rsp_cons + i);
+
+       return nb_rx;
+}
+
+uint16_t
+xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       uint16_t res = 0;
+
+       struct xen_rx_queue *rxq = (struct xen_rx_queue *)rx_queue;
+
+       if (likely(rxq->xa->info_page->is_connected)) {
+
+               if (unlikely(rxq->xa->info_page->disconnect_count !=
+                                       rxq->rx_disconnect_count)) {
+
+                       xen_rx_queue_release(rxq);
+
+                       xen_rx_ring_init(rxq);
+
+               }
+
+               res = xen_dev_rx_recv_responses(rxq, rx_pkts, nb_pkts);
+
+               xen_dev_rx_send_requests(rxq);
+       }
+
+       return res;
+}
+
+void
+xen_rx_queue_release(struct xen_rx_queue *rxq)
+{
+       uint16_t i;
+
+       rxq->ng_rx.count = 0;
+       rxq->ng_rx.rel_count = 0;
+
+       for (i = 0; i < (RING_SIZE(&rxq->ring)); i++) {
+               rxq->ng_rx.rel_gref[rxq->ng_rx.rel_count] =
+                       rxq->gref[i];
+               rxq->ng_rx.rel_count++;
+               if (NULL != rxq->mbuf[i]) {
+                       rte_pktmbuf_free(rxq->mbuf[i]);
+                       rxq->mbuf[i] = NULL;
+               }
+       }
+       xen_evtchn_notify_grant_rx(rxq);
+}
+
+void
+xen_dev_rx_queue_release(void *rxq)
+{
+       struct xen_rx_queue *rx_q = (struct xen_rx_queue *)rxq;
+
+       if (NULL != rx_q) {
+               xen_rx_queue_release(rx_q);
+               rte_free(rx_q);
+       }
+}
+
+int
+xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+               __attribute__((unused)) uint16_t nb_desc,
+               __attribute__((unused)) unsigned int socket_id,
+               __attribute__((unused)) const struct rte_eth_rxconf *rx_conf,
+               struct rte_mempool *mp)
+{
+       struct xen_rx_queue *rxq;
+       struct xen_adapter *xa = VA_XA(dev);
+       struct xen_adapter_info *info = xa->info_page;
+
+       if (NET_RX_RING_SIZE > info->rx_grefs_count) {
+               PMD_INIT_LOG(ERR, "rx ring size greater than rx grefs count");
+               return -ENOMEM;
+       }
+
+       rxq = rte_zmalloc("rx_queue", sizeof(struct xen_rx_queue),
+                         RTE_CACHE_LINE_SIZE);
+       if (NULL == rxq) {
+               PMD_INIT_LOG(ERR, "rte_zmalloc for rxq failed!");
+               return -ENOMEM;
+       }
+
+       rxq->xa = xa;
+       rxq->queue_id = queue_idx;
+       rxq->port_id = dev->data->port_id;
+       rxq->state = RX_RESP_GENERAL;
+       rxq->first_frag = rxq->prev_frag = NULL;
+       rxq->mb_pool = mp;
+       rxq->ng_rx.is_rx = 1;
+       rxq->ng_rx.rel_count = 0;
+       rxq->gref = &info->rxtx_grefs[0];
+
+       rxq->rxs = (struct netif_rx_sring *)xa->uio_res[RX_RING_MAP];
+
+       dev->data->rx_queues[queue_idx] = rxq;
+       if (!xa->rx_queues)
+               xa->rx_queues = dev->data->rx_queues;
+
+       xen_rx_ring_init(rxq);
+
+       dev->rx_pkt_burst = xen_dev_recv_pkts;
+
+       return 0;
+}
+
+static void
+xen_dev_tx_prepare_request(struct xen_tx_queue *txq, uint16_t i, uint16_t size,
+               uint16_t offset, uint16_t flags, unsigned long paddr)
+{
+       RING_IDX prod = (txq->ring.req_prod_pvt+i) & (RING_SIZE(&txq->ring)-1);
+       struct netif_tx_request *req = RING_GET_REQUEST(&txq->ring, prod);
+
+       txq->ng_tx.s[i].gref = txq->gref[prod];
+       txq->ng_tx.s[i].paddr = paddr;
+
+       req->id = prod;
+       req->flags = flags;
+       req->offset = offset;
+       req->gref = txq->gref[prod];
+       req->size = (txq->mbuf[prod] ? txq->mbuf[prod]->pkt_len : size);
+
+       PMD_INIT_LOG(DEBUG, "id:%u size:%u offset:%u gref:%u flags:%x",
+               req->id, req->size, req->offset, req->gref, req->flags);
+}
+
+static int
+xen_dev_tx_send_requests(struct xen_tx_queue *txq, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       struct rte_mbuf *mbuf;
+       unsigned long paddr;
+       uint16_t offset;
+       uint16_t flags;
+       uint16_t size;
+       uint16_t i = 0;
+       uint16_t nb_tx = 0;
+       uint16_t free_space = RING_FREE_REQUESTS(&txq->ring);
+
+       xen_set_tx_ng(txq);
+
+       while (i < free_space && nb_tx < nb_pkts) {
+
+               RING_IDX prod = (txq->ring.req_prod_pvt + i) &
+                       (RING_SIZE(&txq->ring) - 1);
+               txq->mbuf[prod] = mbuf = tx_pkts[nb_tx];
+
+               if (unlikely(NULL == mbuf)) {
+                       PMD_INIT_LOG(WARNING, "no mbuf for req");
+                       break;
+               }
+
+               /* each segment could be splited because of offset
+                * so it must be twice */
+               if (i + (tx_pkts[nb_tx]->nb_segs * 2) > free_space)
+                       break;
+
+               /* prepare request for each mbuf segment */
+               do {
+                       size = mbuf->data_len;
+                       flags = (mbuf->next ? NETTXF_more_data : 0);
+                       paddr = __phys_to_pfn(RTE_MBUF_DATA_DMA_ADDR(mbuf));
+                       offset = (RTE_MBUF_DATA_DMA_ADDR(mbuf)) &
+                               ((uint64_t)PAGE_SIZE - 1);
+
+                       /* check if additional segmentation is needed */
+                       if (size + offset > PAGE_SIZE) {
+                               size = PAGE_SIZE - offset;
+                               xen_dev_tx_prepare_request(txq, i, size,
+                                       offset, NETTXF_more_data, paddr);
+                               paddr += size;
+                               offset = (offset + size) % PAGE_SIZE;
+                               size = mbuf->data_len - size;
+                               i++;
+                       }
+
+                       xen_dev_tx_prepare_request(txq, i, size,
+                                       offset, flags, paddr);
+                       i++;
+
+               } while ((mbuf = mbuf->next));
+
+               nb_tx++;
+               txq->tx_stats.opackets++;
+               txq->tx_stats.obytes += txq->mbuf[prod]->pkt_len;
+       }
+
+       txq->ring.req_prod_pvt += i;
+       txq->ng_tx.count = i;
+       xen_evtchn_notify_grant_tx(txq);
+       txq->ng_tx.rel_count = 0;
+
+       return nb_tx;
+}
+
+int
+xen_dev_tx_recv_responses(struct xen_tx_queue *txq)
+{
+       uint16_t i;
+       struct netif_tx_response *rsp;
+       RING_IDX rsp_cons = txq->ring.rsp_cons;
+       RING_IDX cons;
+       uint16_t work_todo;
+
+       work_todo = RING_HAS_UNCONSUMED_RESPONSES(&txq->ring);
+       for (i = 0; i < work_todo; i++) {
+               cons = (rsp_cons + i) & (RING_SIZE(&txq->ring) - 1);
+
+               rsp = RING_GET_RESPONSE(&txq->ring, cons);
+
+               if (unlikely(rsp->status == NETIF_RSP_NULL))
+                       PMD_INIT_LOG(WARNING, "NETIF_RSP_NULL");
+
+               txq->ng_tx.rel_gref[txq->ng_tx.rel_count] = txq->gref[cons];
+               txq->ng_tx.rel_count++;
+
+               if (likely(txq->mbuf[cons] != NULL)) {
+                       rte_pktmbuf_free(txq->mbuf[cons]);
+                       txq->mbuf[cons] = NULL;
+               }
+       }
+       txq->ring.rsp_cons = (rsp_cons + i);
+
+       return 0;
+}
+
+uint16_t
+xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       uint16_t res = 0;
+
+       struct xen_tx_queue *txq = (struct xen_tx_queue *)tx_queue;
+
+       if (likely(txq->xa->info_page->is_connected)) {
+
+               if (unlikely(txq->xa->info_page->disconnect_count !=
+                                       txq->tx_disconnect_count)) {
+
+                       xen_tx_queue_release(txq);
+
+                       xen_tx_ring_init(txq);
+
+               }
+
+               xen_dev_tx_recv_responses(txq);
+
+               res = xen_dev_tx_send_requests(txq, tx_pkts, nb_pkts);
+       }
+
+       return res;
+}
+
+void
+xen_tx_queue_release(struct xen_tx_queue *txq)
+{
+       uint16_t i;
+
+       txq->ng_tx.count = 0;
+       txq->ng_tx.rel_count = 0;
+
+       for (i = 0; i < (RING_SIZE(&txq->ring)); i++) {
+               if (NULL != txq->mbuf[i]) {
+                       rte_pktmbuf_free(txq->mbuf[i]);
+                       txq->mbuf[i] = NULL;
+                       txq->ng_tx.rel_gref[txq->ng_tx.rel_count] =
+                               txq->gref[i];
+                       txq->ng_tx.rel_count++;
+               }
+       }
+       xen_evtchn_notify_grant_tx(txq);
+}
+
+void
+xen_dev_tx_queue_release(void *txq)
+{
+       struct xen_tx_queue *tx_q = (struct xen_tx_queue *)txq;
+
+       if (NULL != tx_q) {
+               xen_tx_queue_release(tx_q);
+               rte_free(tx_q);
+       }
+}
+
+int
+xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+               __attribute__((unused)) uint16_t nb_desc,
+               __attribute__((unused)) unsigned int socket_id,
+               __attribute__((unused)) const struct rte_eth_txconf *tx_conf)
+{
+       struct xen_tx_queue *txq;
+       struct xen_adapter *xa = VA_XA(dev);
+       struct xen_adapter_info *info = xa->info_page;
+
+       if (NET_TX_RING_SIZE > info->tx_grefs_count) {
+               PMD_INIT_LOG(ERR, "tx ring size greater than tx grefs count");
+               return -ENOMEM;
+       }
+
+       txq = rte_zmalloc("tx_queue", sizeof(struct xen_tx_queue),
+                         RTE_CACHE_LINE_SIZE);
+       if (NULL == txq) {
+               PMD_INIT_LOG(ERR, "rte_zmalloc for txq failed!");
+               return -ENOMEM;
+       }
+
+       txq->txs = (struct netif_tx_sring *)xa->uio_res[TX_RING_MAP];
+
+       txq->xa = xa;
+       txq->queue_id = queue_idx;
+       txq->port_id = dev->data->port_id;
+       txq->ng_tx.is_rx = 0;
+       txq->ng_tx.rel_count = 0;
+       txq->gref = &info->rxtx_grefs[info->rx_grefs_count];
+
+       dev->data->tx_queues[queue_idx] = txq;
+       if (!xa->tx_queues)
+               xa->tx_queues = dev->data->tx_queues;
+
+       xen_tx_ring_init(txq);
+
+       dev->tx_pkt_burst = xen_dev_xmit_pkts;
+
+       return 0;
+}
diff --git a/lib/librte_pmd_xen/xen_rxtx.h b/lib/librte_pmd_xen/xen_rxtx.h
new file mode 100644
index 0000000..eea41c8
--- /dev/null
+++ b/lib/librte_pmd_xen/xen_rxtx.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _XEN_RXTX_H_
+#define _XEN_RXTX_H_
+
+#define DEFAULT_RX_FREE_THRESH   0
+#define DEFAULT_TX_FREE_THRESH   512
+
+struct xen_tx_stats {
+       uint64_t opackets;
+       uint64_t obytes;
+       uint64_t oerrors;
+};
+
+struct xen_rx_stats {
+       uint64_t ipackets;
+       uint64_t ibytes;
+       uint64_t ierrors;
+};
+
+enum rx_resp_state {
+       RX_RESP_GENERAL = 0,
+       RX_RESP_CONTINUE,
+       RX_RESP_EXTRA
+};
+
+struct xen_rx_queue {
+       /**< RX queue index. */
+       uint16_t                   queue_id;
+       /**< Device port identifier. */
+       uint8_t                    port_id;
+       /**< mbuf pool to populate RX ring. */
+       struct rte_mempool         *mb_pool;
+       /**< Ptr to dev_private data. */
+       struct xen_adapter         *xa;
+
+       /* Xen specific */
+
+       /**< Pointer to the xen rx ring shared with other end. */
+       netif_rx_front_ring_t      ring;
+       struct netif_rx_sring      *rxs;
+       /**< Grefs for sharing with the other end. */
+       grant_ref_t                *gref;
+       /**< Allocated for RING_INX mbufs. */
+       struct rte_mbuf            *mbuf[NET_RX_RING_SIZE];
+       /**< state machine */
+       enum rx_resp_state         state;
+       /**< First packet segment. */
+       struct rte_mbuf            *first_frag;
+       /**< Previous packet segment. */
+       struct rte_mbuf            *prev_frag;
+       /**< Statistics. */
+       struct xen_rx_stats        rx_stats;
+       /**< Number of disconnections. */
+       uint8_t                    rx_disconnect_count;
+       /**< Notify and gnttab ioctl struct. */
+       struct ioctl_evtchn_notify_grant ng_rx;
+};
+
+struct xen_tx_queue {
+       uint16_t                   queue_id;
+       /**< TX queue index. */
+       uint8_t                    port_id;
+       /**< Device port identifier. */
+       struct xen_adapter         *xa;
+       /**< Ptr to dev_private data */
+
+       /* Xen specific */
+
+       /**< Pointer to the xen tx ring shared with other end. */
+       netif_tx_front_ring_t      ring;
+       struct netif_tx_sring      *txs;
+       /**< Grefs for sharing with the other end. */
+       grant_ref_t                *gref;
+       /**< Allocated for RING_INX mbufs. */
+       struct rte_mbuf            *mbuf[NET_TX_RING_SIZE];
+       /**< Statistics. */
+       struct xen_tx_stats        tx_stats;
+       /**< Number of disconnections. */
+       uint8_t                    tx_disconnect_count;
+       /**< Notify and gnttab ioctl struct. */
+       struct ioctl_evtchn_notify_grant ng_tx;
+};
+
+int xen_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+               uint16_t nb_rx_desc, unsigned int socket_id,
+               const struct rte_eth_rxconf *rx_conf,
+               struct rte_mempool *mb_pool);
+
+int xen_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+               uint16_t nb_tx_desc, unsigned int socket_id,
+               const struct rte_eth_txconf *tx_conf);
+
+void xen_dev_rx_queue_release(void *rxq);
+void xen_dev_tx_queue_release(void *txq);
+void xen_rx_queue_release(struct xen_rx_queue *rxq);
+void xen_tx_queue_release(struct xen_tx_queue *txq);
+
+int xen_dev_rx_send_requests(struct xen_rx_queue *rxq);
+int xen_dev_tx_recv_responses(struct xen_tx_queue *txq);
+
+uint16_t xen_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+uint16_t xen_dev_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts);
+
+#endif /* _XEN_RXTX_H_ */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 334cb25..5d0927c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -192,6 +192,10 @@ LDLIBS += -lrte_pmd_xenvirt
 LDLIBS += -lxenstore
 endif

+ifeq ($(CONFIG_RTE_LIBRTE_XEN_PMD),y)
+LDLIBS += -lrte_pmd_xen
+endif
+
 ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 # plugins (link only if static libraries)

-- 
2.1.4

Reply via email to