[dpdk-dev] [PATCH 2/5] [pktgen] different PCAP per each queue

2019-01-10 Thread Rafal Kozik
In some test scenario it is necessary to configure different flow per each
Tx queue. This patch adds possibility to provide list of PCAPs files.

If number of files is smaller than number of queues, first PCAP file
is used for the queues which number is above the number of files.
To achieve uniform throughput, average length of packets in each PCAP
needs to be the same.

Signed-off-by: Rafal Kozik 
---
 app/pktgen-main.c | 40 ++--
 app/pktgen-pcap.c | 12 ++--
 app/pktgen-port-cfg.c |  8 +++-
 app/pktgen-port-cfg.h |  1 +
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/app/pktgen-main.c b/app/pktgen-main.c
index b90da0c..68ffa13 100644
--- a/app/pktgen-main.c
+++ b/app/pktgen-main.c
@@ -81,6 +81,7 @@ pktgen_usage(const char *prgname)
printf(
"Usage: %s [EAL options] -- [-h] [-v] [-P] [-G] [-T] [-f 
cmd_file] [-l log_file] [-s P:PCAP_file] [-m ]\n"
"  -s P:filePCAP packet stream file, 'P' is the port 
number\n"
+   "  -s P:file0,file1,... list of PCAP packet stream files per 
queue, 'P' is the port number\n"
"  -f filename  Command file (.pkt) to execute or a Lua script 
(.lua) file\n"
"  -l filename  Write log to filename\n"
"  -P   Enable PROMISCUOUS mode on all ports\n"
@@ -142,10 +143,10 @@ pktgen_usage(const char *prgname)
 static int
 pktgen_parse_args(int argc, char **argv)
 {
-   int opt, ret, port;
+   int opt, ret, port, q;
char **argvopt;
int option_index;
-   char *prgname = argv[0], *p;
+   char *prgname = argv[0], *p, *pc;
static struct option lgopts[] = {
{"crc-strip", 0, 0, 0},
{NULL, 0, 0, 0}
@@ -187,14 +188,27 @@ pktgen_parse_args(int argc, char **argv)
case 's':   /* Read a PCAP packet capture file (stream) */
port = strtol(optarg, NULL, 10);
p = strchr(optarg, ':');
-   if ( (p == NULL) ||
-(pktgen.info[port].pcap =
- _pcap_open(++p, port)) == NULL) {
-   pktgen_log_error(
-   "Invalid PCAP filename (%s) must 
include port number as P:filename",
-   optarg);
-   pktgen_usage(prgname);
-   return -1;
+   pc = strchr(optarg, ',');
+   if (p == NULL)
+   goto pcap_err;
+   if (pc == NULL) {
+   pktgen.info[port].pcap = _pcap_open(++p, port);
+   if (pktgen.info[port].pcap == NULL)
+   goto pcap_err;
+   } else {
+   q = 0;
+   while (p != NULL && q < NUM_Q) {
+   p++;
+   pc = strchr(p, ',');
+   if (pc != NULL)
+   *pc = '\0';
+   pktgen.info[port].pcaps[q] = 
_pcap_open(p, port);
+   if (pktgen.info[port].pcaps[q] == NULL)
+   goto pcap_err;
+   p = pc;
+   q++;
+   }
+   pktgen.info[port].pcap = 
pktgen.info[port].pcaps[0];
}
break;
case 'P':   /* Enable promiscuous mode on the ports */
@@ -257,6 +271,12 @@ pktgen_parse_args(int argc, char **argv)
ret = optind - 1;
optind = 1; /* reset getopt lib */
return ret;
+
+pcap_err:
+   pktgen_log_error("Invalid PCAP filename (%s) must include port number 
as P:filename",
+optarg);
+   pktgen_usage(prgname);
+   return -1;
 }
 
 #define MAX_BACKTRACE  32
diff --git a/app/pktgen-pcap.c b/app/pktgen-pcap.c
index dffdd13..e7ef9a2 100644
--- a/app/pktgen-pcap.c
+++ b/app/pktgen-pcap.c
@@ -343,9 +343,9 @@ pktgen_pcap_parse(pcap_info_t *pcap, port_info_t *info, 
unsigned qid)
/* If count is greater then zero then we allocate and create the PCAP 
mbuf pool. */
if (elt_count > 0) {
/* Create the average size packet */
-   info->pcap->pkt_size= (pkt_sizes / elt_count);
-   info->pcap->pkt_count   = elt_count;
-   info->pcap->pkt_idx = 0;
+  

[dpdk-dev] [PATCH 0/5][pktgen] fixes and minor features

2019-01-10 Thread Rafal Kozik
Hello Keith,

This patch set contain two fixes for issues that were noticed when running
DPDK in debug configuration. There are also three minor features that
may be useful. Those patches are independent from each other.

I am kindly asking, if those changes are appropriate
to be merged to Pktgen?

Best regards,
Rafal

Rafal Kozik (5):
  [pktgen] fix race condition in start
  [pktgen] different PCAP per each queue
  [pktgen] use constants for PCAP creation
  [pktgen] expose number of missed Rx packets
  [pktgen] fix cleanup of not sent packets

 app/lpktgenlib.c  |  1 +
 app/pktgen-cmds.c |  4 ++--
 app/pktgen-main.c | 40 ++--
 app/pktgen-pcap.c | 34 +-
 app/pktgen-port-cfg.c |  8 +++-
 app/pktgen-port-cfg.h |  1 +
 app/pktgen.c  |  7 +++
 7 files changed, 61 insertions(+), 34 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH 1/5] [pktgen] fix race condition in start

2019-01-10 Thread Rafal Kozik
Flag SENDING_PACKETS must be set last.
When it is set before SEND_FOREVER flag, Tx lcores start to call
pktgen_send_pkts. But as current_tx_count is 0 and SEND_FOREVER
is not yet set, SENDING_PACKETS will be cleared.
This causes in some cases that start command will no take effect.

Fixes: f5f901fdf52b ("Update for 1.8 rte_mbuf changes.")

Signed-off-by: Rafal Kozik 
---
 app/pktgen-cmds.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/pktgen-cmds.c b/app/pktgen-cmds.c
index 95f6ef1..655f6bd 100644
--- a/app/pktgen-cmds.c
+++ b/app/pktgen-cmds.c
@@ -1412,10 +1412,10 @@ pktgen_start_transmitting(port_info_t *info)
rte_atomic64_set(&info->current_tx_count,
 rte_atomic64_read(&info->transmit_count));
 
-   pktgen_set_port_flags(info, SENDING_PACKETS);
-
if (rte_atomic64_read(&info->current_tx_count) == 0)
pktgen_set_port_flags(info, SEND_FOREVER);
+
+   pktgen_set_port_flags(info, SENDING_PACKETS);
}
 }
 
-- 
2.7.4



[dpdk-dev] [PATCH 3/5] [pktgen] use constants for PCAP creation

2019-01-10 Thread Rafal Kozik
Instead of magic number and DPDK constants use, constants defined
in PKTGEN. It allows to easily adjust packets size for users needs.
For example jumbo packets could be supported by just increasing
DEFAULT_MBUF_SIZE and MAX_PKT_SIZE.

Signed-off-by: Rafal Kozik 
---
 app/pktgen-pcap.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/app/pktgen-pcap.c b/app/pktgen-pcap.c
index e7ef9a2..7f380f6 100644
--- a/app/pktgen-pcap.c
+++ b/app/pktgen-pcap.c
@@ -43,7 +43,7 @@ pktgen_print_pcap(uint16_t pid)
pcap_info_t *pcap;
pcaprec_hdr_t pcap_hdr;
char buff[64];
-   char pkt_buff[2048];
+   char pkt_buff[DEFAULT_MBUF_SIZE];
 
pktgen_display_set_color("top.page");
display_topline("");
@@ -216,7 +216,7 @@ pktgen_pcap_mbuf_ctor(struct rte_mempool *mp,
uint32_t mbuf_size, buf_len, priv_size = 0;
pcaprec_hdr_t hdr;
ssize_t len = -1;
-   char buffer[2048];
+   char buffer[DEFAULT_MBUF_SIZE];
pcap_info_t *pcap = (pcap_info_t *)opaque_arg;
 
 #if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 0)
@@ -274,10 +274,10 @@ pktgen_pcap_mbuf_ctor(struct rte_mempool *mp,
len = hdr.incl_len;
 
/* Adjust the packet length if not a valid size. */
-   if (len < (ETHER_MIN_LEN - 4) )
-   len = (ETHER_MIN_LEN - 4);
-   else if (len > (ETHER_MAX_LEN - 4) )
-   len = (ETHER_MAX_LEN - 4);
+   if (len < MIN_PKT_SIZE)
+   len = MIN_PKT_SIZE;
+   else if (len > MAX_PKT_SIZE)
+   len = MAX_PKT_SIZE;
 
m->data_len = len;
m->pkt_len  = len;
@@ -309,7 +309,7 @@ pktgen_pcap_parse(pcap_info_t *pcap, port_info_t *info, 
unsigned qid)
pcaprec_hdr_t hdr;
uint32_t elt_count, data_size, len, i;
uint64_t pkt_sizes = 0;
-   char buffer[2048];
+   char buffer[DEFAULT_MBUF_SIZE];
char name[RTE_MEMZONE_NAMESIZE];
 
if ( (pcap == NULL) || (info == NULL) )
@@ -327,10 +327,10 @@ pktgen_pcap_parse(pcap_info_t *pcap, port_info_t *info, 
unsigned qid)
/* Skip any jumbo packets or packets that are too small */
len = hdr.incl_len;
 
-   if (len < (ETHER_MIN_LEN - 4) )
-   len = (ETHER_MIN_LEN - 4);
-   else if (len > (ETHER_MAX_LEN - 4) )
-   len = (ETHER_MAX_LEN - 4);
+   if (len < MIN_PKT_SIZE)
+   len = MIN_PKT_SIZE;
+   else if (len > MAX_PKT_SIZE)
+   len = MAX_PKT_SIZE;
 
elt_count++;
 
-- 
2.7.4



[dpdk-dev] [PATCH 4/5] [pktgen] expose number of missed Rx packets

2019-01-10 Thread Rafal Kozik
Expose number of missed Rx packets from DPDK NIC statistics to Lua API.

Signed-off-by: Rafal Kozik 
---
 app/lpktgenlib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/lpktgenlib.c b/app/lpktgenlib.c
index 9a6307c..e9c7241 100644
--- a/app/lpktgenlib.c
+++ b/app/lpktgenlib.c
@@ -2888,6 +2888,7 @@ port_stats(lua_State *L, port_info_t *info, char *type)
setf_integer(L, "ierrors", stats.ierrors);
setf_integer(L, "oerrors", stats.oerrors);
setf_integer(L, "rx_nombuf", stats.rx_nombuf);
+   setf_integer(L, "imissed", stats.imissed);
 
if (strcmp(type, "rate") == 0) {
setf_integer(L, "pkts_rx", stats.ipackets);
-- 
2.7.4



[dpdk-dev] [PATCH 5/5] [pktgen] fix cleanup of not sent packets

2019-01-10 Thread Rafal Kozik
Not sent packets are copied to the beginning of array to be reused in next
iteration. But as in some cases more than half of mbuf could not be sent,
source and destination locations would overlap. In such case rte_memcpy
cannot be used.
One of side effects is double sending the same mbuf and as a consequence
double returning it to the mempool.

Fixes: 2bf1eecea240 ("fixup code for 18.05 and cleanup")

Signed-off-by: Rafal Kozik 
---
 app/pktgen.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/app/pktgen.c b/app/pktgen.c
index f7eee07..c1039a6 100644
--- a/app/pktgen.c
+++ b/app/pktgen.c
@@ -272,11 +272,10 @@ _send_burst_fast(port_info_t *info, uint16_t qid)
 {
struct mbuf_table   *mtab = &info->q[qid].tx_mbufs;
struct rte_mbuf **pkts;
-   uint32_t ret, cnt, sav, retry;
+   uint32_t ret, cnt, retry, i;
 
cnt = mtab->len;
mtab->len = 0;
-   sav = cnt;
 
pkts = mtab->m_table;
 
@@ -299,8 +298,8 @@ _send_burst_fast(port_info_t *info, uint16_t qid)
}
}
if (cnt) {
-   rte_memcpy(&mtab->m_table[0], &mtab->m_table[sav - cnt],
-  sizeof(char *) * cnt);
+   for (i = 0; i < cnt; i++)
+   mtab->m_table[i] = pkts[i];
mtab->len = cnt;
}
 }
-- 
2.7.4



[dpdk-dev] [PATCH] doc: update for ENA supported drivers

2018-10-05 Thread Rafal Kozik
Add instruction how to bind ENA to VFIO-PCI driver.

Fixes: cf8a122c296a ("ena: introduce documentation")
Cc: sta...@dpdk.org

Signed-off-by: Zorik Machulsky 
Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 
---
 doc/guides/nics/ena.rst  | 12 +---
 doc/guides/nics/features/ena.ini |  1 +
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/ena.rst b/doc/guides/nics/ena.rst
index d19912e..0dfefc6 100644
--- a/doc/guides/nics/ena.rst
+++ b/doc/guides/nics/ena.rst
@@ -187,11 +187,17 @@ Prerequisites
 -
 
 #. Prepare the system as recommended by DPDK suite.  This includes environment
-   variables, hugepages configuration, tool-chains and configuration
+   variables, hugepages configuration, tool-chains and configuration.
 
-#. Insert igb_uio kernel module using the command 'modprobe igb_uio'
+#. ENA PMD can operate with vfio-pci or igb_uio driver.
 
-#. Bind the intended ENA device to igb_uio module
+#. Insert vfio-pci or igb_uio kernel module using the command
+   'modprobe vfio-pci' or 'modprobe igb_uio' respectively.
+
+#. If vfio-pci driver is used please make sure noiommu mode is enabled:
+   echo 1 > /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+#. Bind the intended ENA device to vfio-pci or igb_uio module.
 
 
 At this point the system should be ready to run DPDK applications. Once the
diff --git a/doc/guides/nics/features/ena.ini b/doc/guides/nics/features/ena.ini
index 691c1e3..aa6f05a 100644
--- a/doc/guides/nics/features/ena.ini
+++ b/doc/guides/nics/features/ena.ini
@@ -23,5 +23,6 @@ Inner L4 checksum= Y
 Basic stats  = Y
 Extended stats   = Y
 Linux UIO= Y
+Linux VFIO   = Y
 x86-32   = Y
 x86-64   = Y
-- 
2.7.4



[dpdk-dev] [PATCH v2] doc: update for ENA supported drivers

2018-10-10 Thread Rafal Kozik
Add instruction how to bind ENA to VFIO-PCI driver.

Fixes: cf8a122c296a ("ena: introduce documentation")
Cc: sta...@dpdk.org

Signed-off-by: Zorik Machulsky 
Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 
---
v2:
Describe cases with and without IOMMU.
---
 doc/guides/nics/ena.rst  | 14 +++---
 doc/guides/nics/features/ena.ini |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/ena.rst b/doc/guides/nics/ena.rst
index d19912e..77399f4 100644
--- a/doc/guides/nics/ena.rst
+++ b/doc/guides/nics/ena.rst
@@ -187,11 +187,19 @@ Prerequisites
 -
 
 #. Prepare the system as recommended by DPDK suite.  This includes environment
-   variables, hugepages configuration, tool-chains and configuration
+   variables, hugepages configuration, tool-chains and configuration.
 
-#. Insert igb_uio kernel module using the command 'modprobe igb_uio'
+#. ENA PMD can operate with vfio-pci or igb_uio driver.
 
-#. Bind the intended ENA device to igb_uio module
+#. Insert vfio-pci or igb_uio kernel module using the command
+   'modprobe vfio-pci' or 'modprobe igb_uio' respectively.
+
+#. For VFIO-PCI users only:
+   Please make sure that IOMMU is enabled in your system,
+   or use VFIO driver in noiommu mode:
+   echo 1 > /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+#. Bind the intended ENA device to vfio-pci or igb_uio module.
 
 
 At this point the system should be ready to run DPDK applications. Once the
diff --git a/doc/guides/nics/features/ena.ini b/doc/guides/nics/features/ena.ini
index 691c1e3..aa6f05a 100644
--- a/doc/guides/nics/features/ena.ini
+++ b/doc/guides/nics/features/ena.ini
@@ -23,5 +23,6 @@ Inner L4 checksum= Y
 Basic stats  = Y
 Extended stats   = Y
 Linux UIO= Y
+Linux VFIO   = Y
 x86-32   = Y
 x86-64   = Y
-- 
2.7.4



[dpdk-dev] [PATCH v2 1/4] igb_uio: add wc option

2018-06-28 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be use by all PMDs.

To get internal_addr memory need to be mapped. But as memory could not be
mapped twice: with and without WC, it should be skipped for WC. [1]

To do not spoil other drivers that potentially could use internal_addr,
parameter wc_activate adds possibility to skip it for those PMDs,
that do not use it.

[1] https://www.kernel.org/doc/ols/2008/ols2008v2-pages-135-144.pdf
section 5.3 and 5.4

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 kernel/linux/igb_uio/igb_uio.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index b3233f1..3382fb1 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -30,6 +30,7 @@ struct rte_uio_pci_dev {
int refcnt;
 };
 
+static int wc_activate;
 static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 /* sriov sysfs */
@@ -375,9 +376,13 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct 
uio_info *info,
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return -1;
-   internal_addr = ioremap(addr, len);
-   if (internal_addr == NULL)
-   return -1;
+   if (wc_activate == 0) {
+   internal_addr = ioremap(addr, len);
+   if (internal_addr == NULL)
+   return -1;
+   } else {
+   internal_addr = NULL;
+   }
info->mem[n].name = name;
info->mem[n].addr = addr;
info->mem[n].internal_addr = internal_addr;
@@ -650,6 +655,12 @@ MODULE_PARM_DESC(intr_mode,
 "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
 "\n");
 
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+"0 - disable\n"
+"other - enable\n");
+
 MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Intel Corporation");
-- 
2.7.4



[dpdk-dev] [PATCH v2 0/4] support for write combining

2018-06-28 Thread Rafal Kozik
Support for write combining.

---
v2:
 * Rebased on top of master.
 * Fix typos.
 * Make commit messages more verbose.
 * Add comments.
 * Initialize fd.

Rafal Kozik (4):
  igb_uio: add wc option
  bus/pci: reference driver structure
  eal: enable WC during resources mapping
  net/ena: enable WC

 drivers/bus/pci/linux/pci_uio.c | 41 +
 drivers/bus/pci/pci_common.c| 17 -
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 drivers/net/ena/ena_ethdev.c|  3 ++-
 kernel/linux/igb_uio/igb_uio.c  | 17 ++---
 5 files changed, 59 insertions(+), 21 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v2 3/4] eal: enable WC during resources mapping

2018-06-28 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be used by all PMDs.

It will be enabled only if RTE_PCI_DRV_WC_ACTIVATE will be set in
drivers flags. For proper work also igb_uio driver must be loaded with
wc_activate set to 1.

When mapping PCI resources, firstly try to us WC.
If it is not supported it will fallback to normal mode.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/linux/pci_uio.c | 41 +
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4b..e3947c2 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -282,22 +282,19 @@ int
 pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
 {
-   int fd;
+   int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+   int wc_activate = 0;
+
+   if (dev->driver != NULL)
+   wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
 
loc = &dev->addr;
maps = uio_res->maps;
 
-   /* update devname for mmap  */
-   snprintf(devname, sizeof(devname),
-   "%s/" PCI_PRI_FMT "/resource%d",
-   rte_pci_get_sysfs_path(),
-   loc->domain, loc->bus, loc->devid,
-   loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,31 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
/*
 * open resource file, to mmap it
 */
-   fd = open(devname, O_RDWR);
-   if (fd < 0) {
-   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   if (wc_activate) {
+   /* update devname for mmap  */
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d_wc",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   fd = open(devname, O_RDWR);
+   }
+
+   if (!wc_activate || fd < 0) {
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   /* then try to map resource file */
+   fd = open(devname, O_RDWR);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
-   goto error;
+   goto error;
+   }
}
 
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 458e6d0..828acc5 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -135,6 +135,8 @@ struct rte_pci_bus {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
-- 
2.7.4



[dpdk-dev] [PATCH v2 2/4] bus/pci: reference driver structure

2018-06-28 Thread Rafal Kozik
Add pointer to driver structure before calling rte_pci_map_device.
It allows to use driver flags for adjusting configuration.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 

---
 drivers/bus/pci/pci_common.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index d8151b0..8f5d77f 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -158,17 +158,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
 
+   /*
+* reference driver structure
+* This need to be before rte_pci_map_device(), as it enable to use
+* driver flags for adjusting configuration.
+*/
+   dev->driver = dr;
+   dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
-   if (ret != 0)
+   if (ret != 0) {
+   dev->driver = NULL;
+   dev->device.driver = NULL;
return ret;
+   }
}
 
-   /* reference driver structure */
-   dev->driver = dr;
-   dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
-- 
2.7.4



[dpdk-dev] [PATCH v2 4/4] net/ena: enable WC

2018-06-28 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus. ENA PMD may make usage of this feature.

To enable it load igb_uio driver with wc_activate set to 1.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/net/ena/ena_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 9ae73e3..1870edf 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -2210,7 +2210,8 @@ static int eth_ena_pci_remove(struct rte_pci_device 
*pci_dev)
 
 static struct rte_pci_driver rte_ena_pmd = {
.id_table = pci_id_ena_map,
-   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+RTE_PCI_DRV_WC_ACTIVATE,
.probe = eth_ena_pci_probe,
.remove = eth_ena_pci_remove,
 };
-- 
2.7.4



[dpdk-dev] [PATCH v3 0/4] support for write combining

2018-06-29 Thread Rafal Kozik
Support for write combining.

---
v2:
 * Rebased on top of master.
 * Fix typos.
 * Make commit messages more verbose.
 * Add comments.
 * Initialize fd.

---
v3:
 * Log if BAR was mapped with or without support for WC.

Rafal Kozik (4):
  igb_uio: add wc option
  bus/pci: reference driver structure
  eal: enable WC during resources mapping
  net/ena: enable WC

 drivers/bus/pci/linux/pci_uio.c | 44 ++---
 drivers/bus/pci/pci_common.c| 17 +++-
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 drivers/net/ena/ena_ethdev.c|  3 ++-
 kernel/linux/igb_uio/igb_uio.c  | 17 +---
 5 files changed, 62 insertions(+), 21 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v3 1/4] igb_uio: add wc option

2018-06-29 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be use by all PMDs.

To get internal_addr memory need to be mapped. But as memory could not be
mapped twice: with and without WC, it should be skipped for WC. [1]

To do not spoil other drivers that potentially could use internal_addr,
parameter wc_activate adds possibility to skip it for those PMDs,
that do not use it.

[1] https://www.kernel.org/doc/ols/2008/ols2008v2-pages-135-144.pdf
section 5.3 and 5.4

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 kernel/linux/igb_uio/igb_uio.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index b3233f1..3382fb1 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -30,6 +30,7 @@ struct rte_uio_pci_dev {
int refcnt;
 };
 
+static int wc_activate;
 static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 /* sriov sysfs */
@@ -375,9 +376,13 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct 
uio_info *info,
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return -1;
-   internal_addr = ioremap(addr, len);
-   if (internal_addr == NULL)
-   return -1;
+   if (wc_activate == 0) {
+   internal_addr = ioremap(addr, len);
+   if (internal_addr == NULL)
+   return -1;
+   } else {
+   internal_addr = NULL;
+   }
info->mem[n].name = name;
info->mem[n].addr = addr;
info->mem[n].internal_addr = internal_addr;
@@ -650,6 +655,12 @@ MODULE_PARM_DESC(intr_mode,
 "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
 "\n");
 
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+"0 - disable\n"
+"other - enable\n");
+
 MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Intel Corporation");
-- 
2.7.4



[dpdk-dev] [PATCH v3 2/4] bus/pci: reference driver structure

2018-06-29 Thread Rafal Kozik
Add pointer to driver structure before calling rte_pci_map_device.
It allows to use driver flags for adjusting configuration.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/pci_common.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index d8151b0..8f5d77f 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -158,17 +158,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
 
+   /*
+* reference driver structure
+* This need to be before rte_pci_map_device(), as it enable to use
+* driver flags for adjusting configuration.
+*/
+   dev->driver = dr;
+   dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
-   if (ret != 0)
+   if (ret != 0) {
+   dev->driver = NULL;
+   dev->device.driver = NULL;
return ret;
+   }
}
 
-   /* reference driver structure */
-   dev->driver = dr;
-   dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
-- 
2.7.4



[dpdk-dev] [PATCH v3 3/4] eal: enable WC during resources mapping

2018-06-29 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be used by all PMDs.

It will be enabled only if RTE_PCI_DRV_WC_ACTIVATE will be set in
drivers flags. For proper work also igb_uio driver must be loaded with
wc_activate set to 1.

When mapping PCI resources, firstly try to us WC.
If it is not supported it will fallback to normal mode.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/linux/pci_uio.c | 44 ++---
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4b..fb02f0a 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -282,22 +282,19 @@ int
 pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
 {
-   int fd;
+   int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+   int wc_activate = 0;
+
+   if (dev->driver != NULL)
+   wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
 
loc = &dev->addr;
maps = uio_res->maps;
 
-   /* update devname for mmap  */
-   snprintf(devname, sizeof(devname),
-   "%s/" PCI_PRI_FMT "/resource%d",
-   rte_pci_get_sysfs_path(),
-   loc->domain, loc->bus, loc->devid,
-   loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,34 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
/*
 * open resource file, to mmap it
 */
-   fd = open(devname, O_RDWR);
-   if (fd < 0) {
-   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   if (wc_activate) {
+   /* update devname for mmap  */
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d_wc",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   fd = open(devname, O_RDWR);
+   if (fd >= 0)
+   RTE_LOG(INFO, EAL, "%s mapped\n", devname);
+   }
+
+   if (!wc_activate || fd < 0) {
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   /* then try to map resource file */
+   fd = open(devname, O_RDWR);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
-   goto error;
+   goto error;
+   }
+   RTE_LOG(INFO, EAL, "%s mapped\n", devname);
}
 
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 458e6d0..828acc5 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -135,6 +135,8 @@ struct rte_pci_bus {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
-- 
2.7.4



[dpdk-dev] [PATCH v3 4/4] net/ena: enable WC

2018-06-29 Thread Rafal Kozik
Write combining (WC) increases NIC performance by making better
utilization of PCI bus. ENA PMD may make usage of this feature.

To enable it load igb_uio driver with wc_activate set to 1.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/net/ena/ena_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 9ae73e3..1870edf 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -2210,7 +2210,8 @@ static int eth_ena_pci_remove(struct rte_pci_device 
*pci_dev)
 
 static struct rte_pci_driver rte_ena_pmd = {
.id_table = pci_id_ena_map,
-   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+RTE_PCI_DRV_WC_ACTIVATE,
.probe = eth_ena_pci_probe,
.remove = eth_ena_pci_remove,
 };
-- 
2.7.4



[dpdk-dev] [PATCH v4 2/4] bus/pci: reference driver structure

2018-06-29 Thread Rafal Kozik
From: Kozik 

Add pointer to driver structure before calling rte_pci_map_device.
It allows to use driver flags for adjusting configuration.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/pci_common.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index d8151b0..8f5d77f 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -158,17 +158,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
 
+   /*
+* reference driver structure
+* This need to be before rte_pci_map_device(), as it enable to use
+* driver flags for adjusting configuration.
+*/
+   dev->driver = dr;
+   dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
-   if (ret != 0)
+   if (ret != 0) {
+   dev->driver = NULL;
+   dev->device.driver = NULL;
return ret;
+   }
}
 
-   /* reference driver structure */
-   dev->driver = dr;
-   dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
-- 
2.7.4



[dpdk-dev] [PATCH v4 0/4] Support for write combining.

2018-06-29 Thread Rafal Kozik
Support for write combining.

---
v2:
 * Rebased on top of master.
 * Fix typos.
 * Make commit messages more verbose.
 * Add comments.
 * Initialize fd.

---
v3:
 * Log if BAR was mapped with or without support for WC.

---
v4:
 * Before opening PCI resource, check if it supports WC.
 * Log only when WC mapping failed.
 * Log when wc_activate is set in igb_uio driver.

Kozik (4):
  igb_uio: add wc option
  bus/pci: reference driver structure
  eal: enable WC during resources mapping
  net/ena: enable WC

 drivers/bus/pci/linux/pci_uio.c | 47 ++---
 drivers/bus/pci/pci_common.c| 17 ++-
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 drivers/net/ena/ena_ethdev.c|  3 ++-
 kernel/linux/igb_uio/igb_uio.c  | 18 +---
 5 files changed, 66 insertions(+), 21 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v4 1/4] igb_uio: add wc option

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be use by all PMD.

To get internal_addr memory need to be mapped. But as memory could not be
mapped twice: with and without WC it should be skipped for WC. [1]

To do not spoil other drivers that potentially could use internal_addr,
parameter wc_activate adds possibility to skip it for those PMDs,
that do not use it.

[1] https://www.kernel.org/doc/ols/2008/ols2008v2-pages-135-144.pdf
section 5.3 and 5.4

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 kernel/linux/igb_uio/igb_uio.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index b3233f1..e16e760 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -30,6 +30,7 @@ struct rte_uio_pci_dev {
int refcnt;
 };
 
+static int wc_activate;
 static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 /* sriov sysfs */
@@ -375,9 +376,14 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct 
uio_info *info,
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return -1;
-   internal_addr = ioremap(addr, len);
-   if (internal_addr == NULL)
-   return -1;
+   if (wc_activate == 0) {
+   internal_addr = ioremap(addr, len);
+   if (internal_addr == NULL)
+   return -1;
+   } else {
+   internal_addr = NULL;
+   pr_info("wc_activate is set\n");
+   }
info->mem[n].name = name;
info->mem[n].addr = addr;
info->mem[n].internal_addr = internal_addr;
@@ -650,6 +656,12 @@ MODULE_PARM_DESC(intr_mode,
 "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
 "\n");
 
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+"0 - disable\n"
+"other - enable\n");
+
 MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Intel Corporation");
-- 
2.7.4



[dpdk-dev] [PATCH v4 3/4] eal: enable WC during resources mapping

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be used by all PMDs.

It will be enabled only if RTE_PCI_DRV_WC_ACTIVATE will be set in
drivers flags. For proper work also igb_uio driver must be loaded with
wc_activate set to 1.

When mapping PCI resources, firstly check if it supports WC
and try to use it.
In case of failure, it will fall-back to normal mode.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/linux/pci_uio.c | 47 ++---
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4b..a7c1442 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -282,22 +282,19 @@ int
 pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
 {
-   int fd;
+   int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+   int wc_activate = 0;
+
+   if (dev->driver != NULL)
+   wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
 
loc = &dev->addr;
maps = uio_res->maps;
 
-   /* update devname for mmap  */
-   snprintf(devname, sizeof(devname),
-   "%s/" PCI_PRI_FMT "/resource%d",
-   rte_pci_get_sysfs_path(),
-   loc->domain, loc->bus, loc->devid,
-   loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,37 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
/*
 * open resource file, to mmap it
 */
-   fd = open(devname, O_RDWR);
-   if (fd < 0) {
-   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   if (wc_activate) {
+   /* update devname for mmap  */
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d_wc",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   if (access(devname, R_OK|W_OK) != -1) {
+   fd = open(devname, O_RDWR);
+   if (fd < 0)
+   RTE_LOG(INFO, EAL, "%s cannot be mapped. "
+   "Fall-back to non prefetchable mode.\n",
+   devname);
+   }
+   }
+
+   if (!wc_activate || fd < 0) {
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   /* then try to map resource file */
+   fd = open(devname, O_RDWR);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
-   goto error;
+   goto error;
+   }
}
 
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 458e6d0..828acc5 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -135,6 +135,8 @@ struct rte_pci_bus {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
-- 
2.7.4



[dpdk-dev] [PATCH v4 4/4] net/ena: enable WC

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus. ENA PMD may make usage of this feature.

To enable it load igb_uio driver with wc_activate set to 1.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/net/ena/ena_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 9ae73e3..1870edf 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -2210,7 +2210,8 @@ static int eth_ena_pci_remove(struct rte_pci_device 
*pci_dev)
 
 static struct rte_pci_driver rte_ena_pmd = {
.id_table = pci_id_ena_map,
-   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+RTE_PCI_DRV_WC_ACTIVATE,
.probe = eth_ena_pci_probe,
.remove = eth_ena_pci_remove,
 };
-- 
2.7.4



[dpdk-dev] [PATCH v5 0/4] Support for write combining.

2018-06-29 Thread Rafal Kozik
Support for write combining.

---
v2:
 * Rebased on top of master.
 * Fix typos.
 * Make commit messages more verbose.
 * Add comments.
 * Initialize fd.

---
v3:
 * Log if BAR was mapped with or without support for WC.

---
v4:
 * Log only if WC mapping failed.
 * Log if wc_activate is set in igb_uio driver.

---
v5:
 * Log message in igb_uio will be printed only once.

Kozik (4):
  igb_uio: add wc option
  bus/pci: reference driver structure
  eal: enable WC during resources mapping
  net/ena: enable WC

 drivers/bus/pci/linux/pci_uio.c | 47 ++---
 drivers/bus/pci/pci_common.c| 17 ++-
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 drivers/net/ena/ena_ethdev.c|  3 ++-
 kernel/linux/igb_uio/igb_uio.c  | 20 +++---
 5 files changed, 68 insertions(+), 21 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v5 4/4] net/ena: enable WC

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus. ENA PMD may make usage of this feature.

To enable it load igb_uio driver with wc_activate set to 1.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/net/ena/ena_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 9ae73e3..1870edf 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -2210,7 +2210,8 @@ static int eth_ena_pci_remove(struct rte_pci_device 
*pci_dev)
 
 static struct rte_pci_driver rte_ena_pmd = {
.id_table = pci_id_ena_map,
-   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+RTE_PCI_DRV_WC_ACTIVATE,
.probe = eth_ena_pci_probe,
.remove = eth_ena_pci_remove,
 };
-- 
2.7.4



[dpdk-dev] [PATCH v5 3/4] eal: enable WC during resources mapping

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be used by all PMDs.

It will be enabled only if RTE_PCI_DRV_WC_ACTIVATE will be set in
drivers flags. For proper work also igb_uio driver must be loaded with
wc_activate set to 1.

When mapping PCI resources, firstly check if it support WC
and then try to us it.
In case of failure, it will fallback to normal mode.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/linux/pci_uio.c | 47 ++---
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4b..a7c1442 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -282,22 +282,19 @@ int
 pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
 {
-   int fd;
+   int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+   int wc_activate = 0;
+
+   if (dev->driver != NULL)
+   wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
 
loc = &dev->addr;
maps = uio_res->maps;
 
-   /* update devname for mmap  */
-   snprintf(devname, sizeof(devname),
-   "%s/" PCI_PRI_FMT "/resource%d",
-   rte_pci_get_sysfs_path(),
-   loc->domain, loc->bus, loc->devid,
-   loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,37 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
/*
 * open resource file, to mmap it
 */
-   fd = open(devname, O_RDWR);
-   if (fd < 0) {
-   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   if (wc_activate) {
+   /* update devname for mmap  */
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d_wc",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   if (access(devname, R_OK|W_OK) != -1) {
+   fd = open(devname, O_RDWR);
+   if (fd < 0)
+   RTE_LOG(INFO, EAL, "%s cannot be mapped. "
+   "Fall-back to non prefetchable mode.\n",
+   devname);
+   }
+   }
+
+   if (!wc_activate || fd < 0) {
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   /* then try to map resource file */
+   fd = open(devname, O_RDWR);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
-   goto error;
+   goto error;
+   }
}
 
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 458e6d0..828acc5 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -135,6 +135,8 @@ struct rte_pci_bus {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
-- 
2.7.4



[dpdk-dev] [PATCH v5 1/4] igb_uio: add wc option

2018-06-29 Thread Rafal Kozik
From: Kozik 

Write combining (WC) increases NIC performance by making better
utilization of PCI bus, but cannot be use by all PMD.

To get internal_addr memory need to be mapped. But as memory could not be
mapped twice: with and without WC it should be skipped for WC. [1]

To do not spoil other drivers that potentially could use internal_addr,
parameter wc_activate adds possibility to skip it for those PMDs,
that do not use it.

[1] https://www.kernel.org/doc/ols/2008/ols2008v2-pages-135-144.pdf
section 5.3 and 5.4

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
Acked-by: Ferruh Yigit 
---
 kernel/linux/igb_uio/igb_uio.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index b3233f1..3398eac 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -30,6 +30,7 @@ struct rte_uio_pci_dev {
int refcnt;
 };
 
+static int wc_activate;
 static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 /* sriov sysfs */
@@ -375,9 +376,13 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct 
uio_info *info,
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return -1;
-   internal_addr = ioremap(addr, len);
-   if (internal_addr == NULL)
-   return -1;
+   if (wc_activate == 0) {
+   internal_addr = ioremap(addr, len);
+   if (internal_addr == NULL)
+   return -1;
+   } else {
+   internal_addr = NULL;
+   }
info->mem[n].name = name;
info->mem[n].addr = addr;
info->mem[n].internal_addr = internal_addr;
@@ -626,6 +631,9 @@ igbuio_pci_init_module(void)
return -EINVAL;
}
 
+   if (wc_activate != 0)
+   pr_info("wc_activate is set\n");
+
ret = igbuio_config_intr_mode(intr_mode);
if (ret < 0)
return ret;
@@ -650,6 +658,12 @@ MODULE_PARM_DESC(intr_mode,
 "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
 "\n");
 
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+"0 - disable\n"
+"other - enable\n");
+
 MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Intel Corporation");
-- 
2.7.4



[dpdk-dev] [PATCH v5 2/4] bus/pci: reference driver structure

2018-06-29 Thread Rafal Kozik
From: Kozik 

Add pointer to driver structure before calling rte_pci_map_device.
It allows to use driver flags for adjusting configuration.

Signed-off-by: Rafal Kozik 
Acked-by: Bruce Richardson 
---
 drivers/bus/pci/pci_common.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index d8151b0..8f5d77f 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -158,17 +158,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
 
+   /*
+* reference driver structure
+* This need to be before rte_pci_map_device(), as it enable to use
+* driver flags for adjusting configuration.
+*/
+   dev->driver = dr;
+   dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
-   if (ret != 0)
+   if (ret != 0) {
+   dev->driver = NULL;
+   dev->device.driver = NULL;
return ret;
+   }
}
 
-   /* reference driver structure */
-   dev->driver = dr;
-   dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
-- 
2.7.4



[dpdk-dev] [PATCH] net/ena: cleaning HW IO rings configuration

2018-11-14 Thread Rafal Kozik
When queues are stoped release tx buffers.
During start initialize array of empty
tx/rx reqs with default values.

Fixes: df238f84c0a2 ("net/ena: recreate HW IO rings on start and stop")

Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 
---
 drivers/net/ena/ena_ethdev.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 05a4fbe..3690afe 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -1096,6 +1096,7 @@ static int ena_create_io_queue(struct ena_ring *ring)
{ ENA_ADMIN_PLACEMENT_POLICY_HOST,
  0, 0, 0, 0, 0 };
uint16_t ena_qid;
+   unsigned int i;
int rc;
 
adapter = ring->adapter;
@@ -1106,10 +1107,14 @@ static int ena_create_io_queue(struct ena_ring *ring)
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
ctx.queue_size = adapter->tx_ring_size;
+   for (i = 0; i < ring->ring_size; i++)
+   ring->empty_tx_reqs[i] = i;
} else {
ena_qid = ENA_IO_RXQ_IDX(ring->id);
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
ctx.queue_size = adapter->rx_ring_size;
+   for (i = 0; i < ring->ring_size; i++)
+   ring->empty_rx_reqs[i] = i;
}
ctx.qid = ena_qid;
ctx.msix_vector = -1; /* interrupts not used */
@@ -1152,6 +1157,8 @@ static void ena_free_io_queues_all(struct ena_adapter 
*adapter)
for (i = 0; i < nb_txq; ++i) {
ena_qid = ENA_IO_TXQ_IDX(i);
ena_com_destroy_io_queue(ena_dev, ena_qid);
+
+   ena_tx_queue_release_bufs(&adapter->tx_ring[i]);
}
 
for (i = 0; i < nb_rxq; ++i) {
-- 
2.7.4



[dpdk-dev] [PATCH] net/ena: fix out of order completion

2018-11-20 Thread Rafal Kozik
From: root 

rx_buffer_info should be refill not linearly, but out of order.
IDs should be taken from empty_rx_reqs array.

rx_refill_buffer is introduced to temporary storage
bulk of mbufs taken from pool.

In case of error unused mbufs are put back to pool.

Fixes: c2034976673d ("net/ena: add Rx out of order completion")
Cc: sta...@dpdk.org

Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 

---
 drivers/net/ena/ena_ethdev.c | 40 
 drivers/net/ena/ena_ethdev.h |  1 +
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 3690afe..3a5cce9 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -776,6 +776,10 @@ static void ena_rx_queue_release(void *queue)
rte_free(ring->rx_buffer_info);
ring->rx_buffer_info = NULL;
 
+   if (ring->rx_refill_buffer)
+   rte_free(ring->rx_refill_buffer);
+   ring->rx_refill_buffer = NULL;
+
if (ring->empty_rx_reqs)
rte_free(ring->empty_rx_reqs);
ring->empty_rx_reqs = NULL;
@@ -1318,6 +1322,17 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
 
+   rxq->rx_refill_buffer = rte_zmalloc("rxq->rx_refill_buffer",
+   sizeof(struct rte_mbuf *) * nb_desc,
+   RTE_CACHE_LINE_SIZE);
+
+   if (!rxq->rx_refill_buffer) {
+   RTE_LOG(ERR, PMD, "failed to alloc mem for rx refill buffer\n");
+   rte_free(rxq->rx_buffer_info);
+   rxq->rx_buffer_info = NULL;
+   return -ENOMEM;
+   }
+
rxq->empty_rx_reqs = rte_zmalloc("rxq->empty_rx_reqs",
 sizeof(uint16_t) * nb_desc,
 RTE_CACHE_LINE_SIZE);
@@ -1325,6 +1340,8 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
RTE_LOG(ERR, PMD, "failed to alloc mem for empty rx reqs\n");
rte_free(rxq->rx_buffer_info);
rxq->rx_buffer_info = NULL;
+   rte_free(rxq->rx_refill_buffer);
+   rxq->rx_refill_buffer = NULL;
return -ENOMEM;
}
 
@@ -1346,7 +1363,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
uint16_t ring_mask = ring_size - 1;
uint16_t next_to_use = rxq->next_to_use;
uint16_t in_use, req_id;
-   struct rte_mbuf **mbufs = &rxq->rx_buffer_info[0];
+   struct rte_mbuf **mbufs = rxq->rx_refill_buffer;
 
if (unlikely(!count))
return 0;
@@ -1354,13 +1371,8 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
in_use = rxq->next_to_use - rxq->next_to_clean;
ena_assert_msg(((in_use + count) < ring_size), "bad ring state");
 
-   count = RTE_MIN(count,
-   (uint16_t)(ring_size - (next_to_use & ring_mask)));
-
/* get resources for incoming packets */
-   rc = rte_mempool_get_bulk(rxq->mb_pool,
- (void **)(&mbufs[next_to_use & ring_mask]),
- count);
+   rc = rte_mempool_get_bulk(rxq->mb_pool, (void **)mbufs, count);
if (unlikely(rc < 0)) {
rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf);
PMD_RX_LOG(DEBUG, "there are no enough free buffers");
@@ -1369,15 +1381,17 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
 
for (i = 0; i < count; i++) {
uint16_t next_to_use_masked = next_to_use & ring_mask;
-   struct rte_mbuf *mbuf = mbufs[next_to_use_masked];
+   struct rte_mbuf *mbuf = mbufs[i];
struct ena_com_buf ebuf;
 
-   rte_prefetch0(mbufs[((next_to_use + 4) & ring_mask)]);
+   if (likely(i + 4 < count))
+   rte_prefetch0(mbufs[i + 4]);
 
req_id = rxq->empty_rx_reqs[next_to_use_masked];
rc = validate_rx_req_id(rxq, req_id);
if (unlikely(rc < 0))
break;
+   rxq->rx_buffer_info[req_id] = mbuf;
 
/* prepare physical address for DMA transaction */
ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
@@ -1386,17 +1400,19 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
rc = ena_com_add_single_rx_desc(rxq->ena_com_io_sq,
&ebuf, req_id);
if (unlikely(rc)) {
-   rte_mempool_put_bulk(rxq-&

[dpdk-dev] [PATCH v2] net/ena: fix out of order completion

2018-11-20 Thread Rafal Kozik
rx_buffer_info should be refill not linearly, but out of order.
IDs should be taken from empty_rx_reqs array.

rx_refill_buffer is introduced to temporary storage
bulk of mbufs taken from pool.

In case of error unused mbufs are put back to pool.

Fixes: c2034976673d ("net/ena: add Rx out of order completion")
Cc: sta...@dpdk.org

Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 

---
Fix commit author.
---
 drivers/net/ena/ena_ethdev.c | 40 
 drivers/net/ena/ena_ethdev.h |  1 +
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 3690afe..3a5cce9 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -776,6 +776,10 @@ static void ena_rx_queue_release(void *queue)
rte_free(ring->rx_buffer_info);
ring->rx_buffer_info = NULL;
 
+   if (ring->rx_refill_buffer)
+   rte_free(ring->rx_refill_buffer);
+   ring->rx_refill_buffer = NULL;
+
if (ring->empty_rx_reqs)
rte_free(ring->empty_rx_reqs);
ring->empty_rx_reqs = NULL;
@@ -1318,6 +1322,17 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
 
+   rxq->rx_refill_buffer = rte_zmalloc("rxq->rx_refill_buffer",
+   sizeof(struct rte_mbuf *) * nb_desc,
+   RTE_CACHE_LINE_SIZE);
+
+   if (!rxq->rx_refill_buffer) {
+   RTE_LOG(ERR, PMD, "failed to alloc mem for rx refill buffer\n");
+   rte_free(rxq->rx_buffer_info);
+   rxq->rx_buffer_info = NULL;
+   return -ENOMEM;
+   }
+
rxq->empty_rx_reqs = rte_zmalloc("rxq->empty_rx_reqs",
 sizeof(uint16_t) * nb_desc,
 RTE_CACHE_LINE_SIZE);
@@ -1325,6 +1340,8 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
RTE_LOG(ERR, PMD, "failed to alloc mem for empty rx reqs\n");
rte_free(rxq->rx_buffer_info);
rxq->rx_buffer_info = NULL;
+   rte_free(rxq->rx_refill_buffer);
+   rxq->rx_refill_buffer = NULL;
return -ENOMEM;
}
 
@@ -1346,7 +1363,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
uint16_t ring_mask = ring_size - 1;
uint16_t next_to_use = rxq->next_to_use;
uint16_t in_use, req_id;
-   struct rte_mbuf **mbufs = &rxq->rx_buffer_info[0];
+   struct rte_mbuf **mbufs = rxq->rx_refill_buffer;
 
if (unlikely(!count))
return 0;
@@ -1354,13 +1371,8 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
in_use = rxq->next_to_use - rxq->next_to_clean;
ena_assert_msg(((in_use + count) < ring_size), "bad ring state");
 
-   count = RTE_MIN(count,
-   (uint16_t)(ring_size - (next_to_use & ring_mask)));
-
/* get resources for incoming packets */
-   rc = rte_mempool_get_bulk(rxq->mb_pool,
- (void **)(&mbufs[next_to_use & ring_mask]),
- count);
+   rc = rte_mempool_get_bulk(rxq->mb_pool, (void **)mbufs, count);
if (unlikely(rc < 0)) {
rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf);
PMD_RX_LOG(DEBUG, "there are no enough free buffers");
@@ -1369,15 +1381,17 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
 
for (i = 0; i < count; i++) {
uint16_t next_to_use_masked = next_to_use & ring_mask;
-   struct rte_mbuf *mbuf = mbufs[next_to_use_masked];
+   struct rte_mbuf *mbuf = mbufs[i];
struct ena_com_buf ebuf;
 
-   rte_prefetch0(mbufs[((next_to_use + 4) & ring_mask)]);
+   if (likely(i + 4 < count))
+   rte_prefetch0(mbufs[i + 4]);
 
req_id = rxq->empty_rx_reqs[next_to_use_masked];
rc = validate_rx_req_id(rxq, req_id);
if (unlikely(rc < 0))
break;
+   rxq->rx_buffer_info[req_id] = mbuf;
 
/* prepare physical address for DMA transaction */
ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
@@ -1386,17 +1400,19 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
rc = ena_com_add_single_rx_desc(rxq->ena_com_io_sq,
&ebuf, req_id);
if (unlikely(rc)) {
-   rte_mempool_put_bulk(rxq-&

[dpdk-dev] [PATCH v3] net/ena: fix out of order completion

2018-11-21 Thread Rafal Kozik
rx_buffer_info should be refill not linearly, but out of order.
IDs should be taken from empty_rx_reqs array.

rx_refill_buffer is introduced to temporary storage
bulk of mbufs taken from pool.

In case of error unused mbufs are put back to pool.

Fixes: c2034976673d ("net/ena: add Rx out of order completion")
Cc: sta...@dpdk.org

Signed-off-by: Rafal Kozik 
Acked-by: Michal Krawczyk 

---
v2:
Fix commit author.

v3:
Add () for readability.
---
 drivers/net/ena/ena_ethdev.c | 40 
 drivers/net/ena/ena_ethdev.h |  1 +
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 3690afe..a07bd2b 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -776,6 +776,10 @@ static void ena_rx_queue_release(void *queue)
rte_free(ring->rx_buffer_info);
ring->rx_buffer_info = NULL;
 
+   if (ring->rx_refill_buffer)
+   rte_free(ring->rx_refill_buffer);
+   ring->rx_refill_buffer = NULL;
+
if (ring->empty_rx_reqs)
rte_free(ring->empty_rx_reqs);
ring->empty_rx_reqs = NULL;
@@ -1318,6 +1322,17 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
return -ENOMEM;
}
 
+   rxq->rx_refill_buffer = rte_zmalloc("rxq->rx_refill_buffer",
+   sizeof(struct rte_mbuf *) * nb_desc,
+   RTE_CACHE_LINE_SIZE);
+
+   if (!rxq->rx_refill_buffer) {
+   RTE_LOG(ERR, PMD, "failed to alloc mem for rx refill buffer\n");
+   rte_free(rxq->rx_buffer_info);
+   rxq->rx_buffer_info = NULL;
+   return -ENOMEM;
+   }
+
rxq->empty_rx_reqs = rte_zmalloc("rxq->empty_rx_reqs",
 sizeof(uint16_t) * nb_desc,
 RTE_CACHE_LINE_SIZE);
@@ -1325,6 +1340,8 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
RTE_LOG(ERR, PMD, "failed to alloc mem for empty rx reqs\n");
rte_free(rxq->rx_buffer_info);
rxq->rx_buffer_info = NULL;
+   rte_free(rxq->rx_refill_buffer);
+   rxq->rx_refill_buffer = NULL;
return -ENOMEM;
}
 
@@ -1346,7 +1363,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
uint16_t ring_mask = ring_size - 1;
uint16_t next_to_use = rxq->next_to_use;
uint16_t in_use, req_id;
-   struct rte_mbuf **mbufs = &rxq->rx_buffer_info[0];
+   struct rte_mbuf **mbufs = rxq->rx_refill_buffer;
 
if (unlikely(!count))
return 0;
@@ -1354,13 +1371,8 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
in_use = rxq->next_to_use - rxq->next_to_clean;
ena_assert_msg(((in_use + count) < ring_size), "bad ring state");
 
-   count = RTE_MIN(count,
-   (uint16_t)(ring_size - (next_to_use & ring_mask)));
-
/* get resources for incoming packets */
-   rc = rte_mempool_get_bulk(rxq->mb_pool,
- (void **)(&mbufs[next_to_use & ring_mask]),
- count);
+   rc = rte_mempool_get_bulk(rxq->mb_pool, (void **)mbufs, count);
if (unlikely(rc < 0)) {
rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf);
PMD_RX_LOG(DEBUG, "there are no enough free buffers");
@@ -1369,15 +1381,17 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
 
for (i = 0; i < count; i++) {
uint16_t next_to_use_masked = next_to_use & ring_mask;
-   struct rte_mbuf *mbuf = mbufs[next_to_use_masked];
+   struct rte_mbuf *mbuf = mbufs[i];
struct ena_com_buf ebuf;
 
-   rte_prefetch0(mbufs[((next_to_use + 4) & ring_mask)]);
+   if (likely((i + 4) < count))
+   rte_prefetch0(mbufs[i + 4]);
 
req_id = rxq->empty_rx_reqs[next_to_use_masked];
rc = validate_rx_req_id(rxq, req_id);
if (unlikely(rc < 0))
break;
+   rxq->rx_buffer_info[req_id] = mbuf;
 
/* prepare physical address for DMA transaction */
ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
@@ -1386,17 +1400,19 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, 
unsigned int count)
rc = ena_com_add_single_rx_desc(rxq->ena_com_io_sq,
&ebuf, req_id);
if (unlikely(rc)) {
-   rte_mempool

[dpdk-dev] [PATCH] fix repopulation of tx_mbufs table

2018-11-23 Thread Rafal Kozik
If in one TX cycle NIC does not send any packet, pktgen tries
to allocate 0 mbufs from pool. In such case DPDK return error
and packets will not be send. As no packet will be send in
next iteration this situation will repeat.

Checking if taking more mbufs is needed will prevent this situation.

Fixes: f034b381d19f ("cleanup and fix for FVL NIC performance")

Signed-off-by: Rafal Kozik 
---
 app/pktgen.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/pktgen.c b/app/pktgen.c
index 2d9ff59..b4d3dfe 100644
--- a/app/pktgen.c
+++ b/app/pktgen.c
@@ -1054,7 +1054,8 @@ pktgen_send_pkts(port_info_t *info, uint16_t qid, struct 
rte_mempool *mp)
uint16_t saved = info->q[qid].tx_mbufs.len;
uint16_t nb_pkts = info->tx_burst - saved;
 
-   rc = pg_pktmbuf_alloc_bulk(mp,
+   if (likely(nb_pkts > 0))
+   rc = pg_pktmbuf_alloc_bulk(mp,
   
&info->q[qid].tx_mbufs.m_table[saved],
   nb_pkts);
if (rc == 0) {
@@ -1070,7 +1071,8 @@ pktgen_send_pkts(port_info_t *info, uint16_t qid, struct 
rte_mempool *mp)
uint16_t saved = info->q[qid].tx_mbufs.len;
uint16_t nb_pkts = txCnt - saved;
 
-   rc = pg_pktmbuf_alloc_bulk(mp,
+   if (likely(nb_pkts > 0))
+   rc = pg_pktmbuf_alloc_bulk(mp,
   
&info->q[qid].tx_mbufs.m_table[saved],
   nb_pkts);
if (rc == 0) {
-- 
2.7.4



[dpdk-dev] [PATCH 1/4] igb_uio: add wc option

2018-04-11 Thread Rafal Kozik
Write combining (wc) increase NIC performance by making better
utilization of PCI bus, but cannot be use by all PMD.

Parameter wc_activate add possibility to enable it for
those PMD that could support it.

Signed-off-by: Rafal Kozik 
---
 kernel/linux/igb_uio/igb_uio.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/kernel/linux/igb_uio/igb_uio.c b/kernel/linux/igb_uio/igb_uio.c
index 4cae4dd..42e3b3f 100644
--- a/kernel/linux/igb_uio/igb_uio.c
+++ b/kernel/linux/igb_uio/igb_uio.c
@@ -30,6 +30,7 @@ struct rte_uio_pci_dev {
int refcnt;
 };
 
+static int wc_activate;
 static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 /* sriov sysfs */
@@ -375,9 +376,13 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct 
uio_info *info,
len = pci_resource_len(dev, pci_bar);
if (addr == 0 || len == 0)
return -1;
-   internal_addr = ioremap(addr, len);
-   if (internal_addr == NULL)
-   return -1;
+   if (wc_activate == 0) {
+   internal_addr = ioremap(addr, len);
+   if (internal_addr == NULL)
+   return -1;
+   } else {
+   internal_addr = NULL;
+   }
info->mem[n].name = name;
info->mem[n].addr = addr;
info->mem[n].internal_addr = internal_addr;
@@ -638,6 +643,12 @@ MODULE_PARM_DESC(intr_mode,
 "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
 "\n");
 
+module_param(wc_activate, int, 0);
+MODULE_PARM_DESC(wc_activate,
+"Activate support for write combining (WC) (default=0)\n"
+"0 - disable\n"
+"other - enable\n");
+
 MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Intel Corporation");
-- 
2.7.4



[dpdk-dev] [PATCH 0/4] support for write combining

2018-04-11 Thread Rafal Kozik
Support for write combining.

Rafal Kozik (4):
  igb_uio: add wc option
  bus/pci: reference driver structure
  eal: enable WC during resources mapping
  net/ena: enable WC

 drivers/bus/pci/linux/pci_uio.c | 39 ---
 drivers/bus/pci/pci_common.c| 13 -
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 drivers/net/ena/ena_ethdev.c|  3 ++-
 kernel/linux/igb_uio/igb_uio.c  | 17 ++---
 5 files changed, 54 insertions(+), 20 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH 2/4] bus/pci: reference driver structure

2018-04-11 Thread Rafal Kozik
Reference driver structure before calling rte_pci_map_device.
It allow to use driver flags for adjusting configuration.

Signed-off-by: Rafal Kozik 
---
 drivers/bus/pci/pci_common.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 2a00f36..15e9a47 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -159,17 +159,20 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
 
+   /* reference driver structure */
+   dev->driver = dr;
+   dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
-   if (ret != 0)
+   if (ret != 0) {
+   dev->driver = NULL;
+   dev->device.driver = NULL;
return ret;
+   }
}
 
-   /* reference driver structure */
-   dev->driver = dr;
-   dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
-- 
2.7.4



[dpdk-dev] [PATCH 4/4] net/ena: enable WC

2018-04-11 Thread Rafal Kozik
Write combining (wc) increase NIC performenca by making better
utilization of PCI bus. ENA support this feature.

To enable it load igb driver with wc_activate set to 1.

Signed-off-by: Rafal Kozik 
---
 drivers/net/ena/ena_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 34b2a8d..415d89d 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -1889,7 +1889,8 @@ static int eth_ena_pci_remove(struct rte_pci_device 
*pci_dev)
 
 static struct rte_pci_driver rte_ena_pmd = {
.id_table = pci_id_ena_map,
-   .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING |
+RTE_PCI_DRV_WC_ACTIVATE,
.probe = eth_ena_pci_probe,
.remove = eth_ena_pci_remove,
 };
-- 
2.7.4



[dpdk-dev] [PATCH 3/4] eal: enable WC during resources mapping

2018-04-11 Thread Rafal Kozik
Write combining (wc) increase NIC performenca by making better
utilization of PCI bus, but cannot be used by all PMD.

It will be enable only if RTE_PCI_DRV_WC_ACTIVATE will be set in
drivers flags. For proper work also igb driver must be loaded with
wc_activate set to 1.

When mapping PCI resources firstly try to us wc.
If it is not supported it will fallback to normal mode.

Signed-off-by: Rafal Kozik 
---
 drivers/bus/pci/linux/pci_uio.c | 39 ---
 drivers/bus/pci/rte_bus_pci.h   |  2 ++
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4b..a1570c9 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -287,17 +287,14 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+   int wc_activate = 0;
+
+   if (dev->driver != NULL)
+   wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
 
loc = &dev->addr;
maps = uio_res->maps;
 
-   /* update devname for mmap  */
-   snprintf(devname, sizeof(devname),
-   "%s/" PCI_PRI_FMT "/resource%d",
-   rte_pci_get_sysfs_path(),
-   loc->domain, loc->bus, loc->devid,
-   loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,31 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, 
int res_idx,
/*
 * open resource file, to mmap it
 */
-   fd = open(devname, O_RDWR);
-   if (fd < 0) {
-   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+   if (wc_activate) {
+   /* update devname for mmap  */
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d_wc",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   fd = open(devname, O_RDWR);
+   }
+
+   if (!wc_activate || fd < 0) {
+   snprintf(devname, sizeof(devname),
+   "%s/" PCI_PRI_FMT "/resource%d",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid,
+   loc->function, res_idx);
+
+   /* then try to map resource file */
+   fd = open(devname, O_RDWR);
+   if (fd < 0) {
+   RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
-   goto error;
+   goto error;
+   }
}
 
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 357afb9..b7bcce3 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -132,6 +132,8 @@ struct rte_pci_bus {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
-- 
2.7.4



[dpdk-dev] [PATCH 0/2] net/ena: convert to new offloads API

2018-01-16 Thread Rafal Kozik
Ethdev offloads API has changed since:

commit cba7f53b717d ("ethdev: introduce Tx queue offloads API")
commit ce17eddefc20 ("ethdev: introduce Rx queue offloads API")

Those patches support new offloads API.

Rafal Kozik (2):
  net/ena: convert to new Tx offloads API
  net/ena: convert to new Rx offloads API

 drivers/net/ena/ena_ethdev.c | 109 ---
 drivers/net/ena/ena_ethdev.h |   5 ++
 2 files changed, 97 insertions(+), 17 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH 1/2] net/ena: convert to new Tx offloads API

2018-01-16 Thread Rafal Kozik
Ethdev Tx offloads API has changed since:

commit cba7f53b717d ("ethdev: introduce Tx queue offloads API")

This commit support the new Tx offloads API. Queue configuration
is stored in ena_ring.offloads. During preparing mbufs for tx, offloads are
allowed only if appropriate flags in this field are set.

Signed-off-by: Rafal Kozik 
---
 drivers/net/ena/ena_ethdev.c | 73 +++-
 drivers/net/ena/ena_ethdev.h |  3 ++
 2 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 22db895..6473776 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -164,6 +164,14 @@ static const struct ena_stats ena_stats_ena_com_strings[] 
= {
 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings)
 #define ENA_STATS_ARRAY_ENA_COMARRAY_SIZE(ena_stats_ena_com_strings)
 
+#define QUEUE_OFFLOADS (DEV_TX_OFFLOAD_TCP_CKSUM |\
+   DEV_TX_OFFLOAD_UDP_CKSUM |\
+   DEV_TX_OFFLOAD_IPV4_CKSUM |\
+   DEV_TX_OFFLOAD_TCP_TSO)
+#define MBUF_OFFLOADS (PKT_TX_L4_MASK |\
+  PKT_TX_IP_CKSUM |\
+  PKT_TX_TCP_SEG)
+
 /** Vendor ID used by Amazon devices */
 #define PCI_VENDOR_ID_AMAZON 0x1D0F
 /** Amazon devices */
@@ -227,6 +235,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
  struct rte_eth_rss_reta_entry64 *reta_conf,
  uint16_t reta_size);
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
+static bool ena_are_tx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads);
 
 static const struct eth_dev_ops ena_dev_ops = {
.dev_configure= ena_dev_configure,
@@ -280,21 +290,24 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf 
*mbuf,
 }
 
 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
-  struct ena_com_tx_ctx *ena_tx_ctx)
+  struct ena_com_tx_ctx *ena_tx_ctx,
+  uint64_t queue_offloads)
 {
struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
 
-   if (mbuf->ol_flags &
-   (PKT_TX_L4_MASK | PKT_TX_IP_CKSUM | PKT_TX_TCP_SEG)) {
+   if ((mbuf->ol_flags & MBUF_OFFLOADS) &&
+   (queue_offloads & QUEUE_OFFLOADS)) {
/* check if TSO is required */
-   if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
+   if ((mbuf->ol_flags & PKT_TX_TCP_SEG) &&
+   (queue_offloads & DEV_TX_OFFLOAD_TCP_TSO)) {
ena_tx_ctx->tso_enable = true;
 
ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf);
}
 
/* check if L3 checksum is needed */
-   if (mbuf->ol_flags & PKT_TX_IP_CKSUM)
+   if ((mbuf->ol_flags & PKT_TX_IP_CKSUM) &&
+   (queue_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM))
ena_tx_ctx->l3_csum_enable = true;
 
if (mbuf->ol_flags & PKT_TX_IPV6) {
@@ -310,19 +323,17 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf 
*mbuf,
}
 
/* check if L4 checksum is needed */
-   switch (mbuf->ol_flags & PKT_TX_L4_MASK) {
-   case PKT_TX_TCP_CKSUM:
+   if ((mbuf->ol_flags & PKT_TX_TCP_CKSUM) &&
+   (queue_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
ena_tx_ctx->l4_csum_enable = true;
-   break;
-   case PKT_TX_UDP_CKSUM:
+   } else if ((mbuf->ol_flags & PKT_TX_UDP_CKSUM) &&
+  (queue_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
ena_tx_ctx->l4_csum_enable = true;
-   break;
-   default:
+   } else {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
ena_tx_ctx->l4_csum_enable = false;
-   break;
}
 
ena_meta->mss = mbuf->tso_segsz;
@@ -945,7 +956,7 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
  uint16_t queue_idx,
  uint16_t nb_desc,
  __rte_unused unsigned int socket_id,
- __rte_unused const struct rte_eth_txconf *tx_conf)
+ const struct rte_eth_txconf *tx_conf)
 {
struct ena_com_create_io_ctx ctx

[dpdk-dev] [PATCH 2/2] net/ena: convert to new Rx offloads API

2018-01-16 Thread Rafal Kozik
Ethdev Rx offloads API has changed since:

commit ce17eddefc20 ("ethdev: introduce Rx queue offloads API")

This commit support the new Rx offloads API.

Signed-off-by: Rafal Kozik 
---
 drivers/net/ena/ena_ethdev.c | 36 ++--
 drivers/net/ena/ena_ethdev.h |  2 ++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 6473776..f069ca8 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -237,6 +237,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
 static bool ena_are_tx_queue_offloads_allowed(struct ena_adapter *adapter,
  uint64_t offloads);
+static bool ena_are_rx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads);
 
 static const struct eth_dev_ops ena_dev_ops = {
.dev_configure= ena_dev_configure,
@@ -766,7 +768,8 @@ static uint32_t ena_get_mtu_conf(struct ena_adapter 
*adapter)
 {
uint32_t max_frame_len = adapter->max_mtu;
 
-   if (adapter->rte_eth_dev_data->dev_conf.rxmode.jumbo_frame == 1)
+   if (adapter->rte_eth_dev_data->dev_conf.rxmode.offloads &
+   DEV_RX_OFFLOAD_JUMBO_FRAME)
max_frame_len =

adapter->rte_eth_dev_data->dev_conf.rxmode.max_rx_pkt_len;
 
@@ -1065,7 +1068,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
  uint16_t queue_idx,
  uint16_t nb_desc,
  __rte_unused unsigned int socket_id,
- __rte_unused const struct rte_eth_rxconf *rx_conf,
+ const struct rte_eth_rxconf *rx_conf,
  struct rte_mempool *mp)
 {
struct ena_com_create_io_ctx ctx =
@@ -1101,6 +1104,11 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
 
+   if (!ena_are_rx_queue_offloads_allowed(adapter, rx_conf->offloads)) {
+   RTE_LOG(ERR, PMD, "Unsupported queue offloads\n");
+   return -EINVAL;
+   }
+
ena_qid = ENA_IO_RXQ_IDX(queue_idx);
 
ctx.qid = ena_qid;
@@ -1405,6 +1413,7 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
struct ena_adapter *adapter =
(struct ena_adapter *)(dev->data->dev_private);
uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
+   uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
 
if ((tx_offloads & adapter->tx_supported_offloads) != tx_offloads) {
RTE_LOG(ERR, PMD, "Some Tx offloads are not supported "
@@ -1413,6 +1422,13 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
return -ENOTSUP;
}
 
+   if ((rx_offloads & adapter->rx_supported_offloads) != rx_offloads) {
+   RTE_LOG(ERR, PMD, "Some Rx offloads are not supported "
+   "requested 0x%lx supported 0x%lx\n",
+   rx_offloads, adapter->rx_supported_offloads);
+   return -ENOTSUP;
+   }
+
if (!(adapter->state == ENA_ADAPTER_STATE_INIT ||
  adapter->state == ENA_ADAPTER_STATE_STOPPED)) {
PMD_INIT_LOG(ERR, "Illegal adapter state: %d",
@@ -1434,6 +1450,7 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
}
 
adapter->tx_selected_offloads = tx_offloads;
+   adapter->rx_selected_offloads = rx_offloads;
return 0;
 }
 
@@ -1475,6 +1492,19 @@ static bool ena_are_tx_queue_offloads_allowed(struct 
ena_adapter *adapter,
return true;
 }
 
+static bool ena_are_rx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads)
+{
+   uint64_t port_offloads = adapter->rx_selected_offloads;
+
+   /* Check if port supports all requested offloads.
+* True if all offloads selected for queue are set for port.
+*/
+   if ((offloads & port_offloads) != offloads)
+   return false;
+   return true;
+}
+
 static void ena_infos_get(struct rte_eth_dev *dev,
  struct rte_eth_dev_info *dev_info)
 {
@@ -1529,6 +1559,7 @@ static void ena_infos_get(struct rte_eth_dev *dev,
 
/* Inform framework about available features */
dev_info->rx_offload_capa = rx_feat;
+   dev_info->rx_queue_offload_capa = rx_feat;
dev_info->tx_offload_capa = tx_feat;
dev_info->tx_queue_offload_capa = tx_feat;
 
@@ -1541,6 +1572,7 @@ static void ena_infos_get(struct rte_eth_dev *dev,
dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE;
 
adapter->tx_s

[dpdk-dev] [PATCH v2 1/2] net/ena: convert to new Tx offloads API

2018-01-17 Thread Rafal Kozik
Ethdev Tx offloads API has changed since:

commit cba7f53b717d ("ethdev: introduce Tx queue offloads API")

This commit support the new Tx offloads API. Queue configuration
is stored in ena_ring.offloads. During preparing mbufs for tx, offloads are
allowed only if appropriate flags in this field are set.

Signed-off-by: Rafal Kozik 
---
v2:
 * Check ETH_TXQ_FLAGS_IGNORE flag.
 * Use PRIx64 in printf.

 drivers/net/ena/ena_ethdev.c | 74 +++-
 drivers/net/ena/ena_ethdev.h |  3 ++
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 22db895..54ccc3d 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -164,6 +164,14 @@ static const struct ena_stats ena_stats_ena_com_strings[] 
= {
 #define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings)
 #define ENA_STATS_ARRAY_ENA_COMARRAY_SIZE(ena_stats_ena_com_strings)
 
+#define QUEUE_OFFLOADS (DEV_TX_OFFLOAD_TCP_CKSUM |\
+   DEV_TX_OFFLOAD_UDP_CKSUM |\
+   DEV_TX_OFFLOAD_IPV4_CKSUM |\
+   DEV_TX_OFFLOAD_TCP_TSO)
+#define MBUF_OFFLOADS (PKT_TX_L4_MASK |\
+  PKT_TX_IP_CKSUM |\
+  PKT_TX_TCP_SEG)
+
 /** Vendor ID used by Amazon devices */
 #define PCI_VENDOR_ID_AMAZON 0x1D0F
 /** Amazon devices */
@@ -227,6 +235,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
  struct rte_eth_rss_reta_entry64 *reta_conf,
  uint16_t reta_size);
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
+static bool ena_are_tx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads);
 
 static const struct eth_dev_ops ena_dev_ops = {
.dev_configure= ena_dev_configure,
@@ -280,21 +290,24 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf 
*mbuf,
 }
 
 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
-  struct ena_com_tx_ctx *ena_tx_ctx)
+  struct ena_com_tx_ctx *ena_tx_ctx,
+  uint64_t queue_offloads)
 {
struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
 
-   if (mbuf->ol_flags &
-   (PKT_TX_L4_MASK | PKT_TX_IP_CKSUM | PKT_TX_TCP_SEG)) {
+   if ((mbuf->ol_flags & MBUF_OFFLOADS) &&
+   (queue_offloads & QUEUE_OFFLOADS)) {
/* check if TSO is required */
-   if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
+   if ((mbuf->ol_flags & PKT_TX_TCP_SEG) &&
+   (queue_offloads & DEV_TX_OFFLOAD_TCP_TSO)) {
ena_tx_ctx->tso_enable = true;
 
ena_meta->l4_hdr_len = GET_L4_HDR_LEN(mbuf);
}
 
/* check if L3 checksum is needed */
-   if (mbuf->ol_flags & PKT_TX_IP_CKSUM)
+   if ((mbuf->ol_flags & PKT_TX_IP_CKSUM) &&
+   (queue_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM))
ena_tx_ctx->l3_csum_enable = true;
 
if (mbuf->ol_flags & PKT_TX_IPV6) {
@@ -310,19 +323,17 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf 
*mbuf,
}
 
/* check if L4 checksum is needed */
-   switch (mbuf->ol_flags & PKT_TX_L4_MASK) {
-   case PKT_TX_TCP_CKSUM:
+   if ((mbuf->ol_flags & PKT_TX_TCP_CKSUM) &&
+   (queue_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
ena_tx_ctx->l4_csum_enable = true;
-   break;
-   case PKT_TX_UDP_CKSUM:
+   } else if ((mbuf->ol_flags & PKT_TX_UDP_CKSUM) &&
+  (queue_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
ena_tx_ctx->l4_csum_enable = true;
-   break;
-   default:
+   } else {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
ena_tx_ctx->l4_csum_enable = false;
-   break;
}
 
ena_meta->mss = mbuf->tso_segsz;
@@ -945,7 +956,7 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
  uint16_t queue_idx,
  uint16_t nb_desc,
  __rte_unused unsigned int socket_id,
- __rte_unused const struct rte_eth_txconf *tx_conf)
+ const struct rte_eth_txconf *tx_

[dpdk-dev] [PATCH v2 2/2] net/ena: convert to new Rx offloads API

2018-01-17 Thread Rafal Kozik
Ethdev Rx offloads API has changed since:

commit ce17eddefc20 ("ethdev: introduce Rx queue offloads API")

This commit support the new Rx offloads API.

Signed-off-by: Rafal Kozik 
---
v2:
 * Use PRIx64 in printf.

 drivers/net/ena/ena_ethdev.c | 36 ++--
 drivers/net/ena/ena_ethdev.h |  2 ++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 54ccc3d..1dfbe39 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -237,6 +237,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
 static bool ena_are_tx_queue_offloads_allowed(struct ena_adapter *adapter,
  uint64_t offloads);
+static bool ena_are_rx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads);
 
 static const struct eth_dev_ops ena_dev_ops = {
.dev_configure= ena_dev_configure,
@@ -766,7 +768,8 @@ static uint32_t ena_get_mtu_conf(struct ena_adapter 
*adapter)
 {
uint32_t max_frame_len = adapter->max_mtu;
 
-   if (adapter->rte_eth_dev_data->dev_conf.rxmode.jumbo_frame == 1)
+   if (adapter->rte_eth_dev_data->dev_conf.rxmode.offloads &
+   DEV_RX_OFFLOAD_JUMBO_FRAME)
max_frame_len =

adapter->rte_eth_dev_data->dev_conf.rxmode.max_rx_pkt_len;
 
@@ -1066,7 +1069,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
  uint16_t queue_idx,
  uint16_t nb_desc,
  __rte_unused unsigned int socket_id,
- __rte_unused const struct rte_eth_rxconf *rx_conf,
+ const struct rte_eth_rxconf *rx_conf,
  struct rte_mempool *mp)
 {
struct ena_com_create_io_ctx ctx =
@@ -1102,6 +1105,11 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
 
+   if (!ena_are_rx_queue_offloads_allowed(adapter, rx_conf->offloads)) {
+   RTE_LOG(ERR, PMD, "Unsupported queue offloads\n");
+   return -EINVAL;
+   }
+
ena_qid = ENA_IO_RXQ_IDX(queue_idx);
 
ctx.qid = ena_qid;
@@ -1406,6 +1414,7 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
struct ena_adapter *adapter =
(struct ena_adapter *)(dev->data->dev_private);
uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
+   uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
 
if ((tx_offloads & adapter->tx_supported_offloads) != tx_offloads) {
RTE_LOG(ERR, PMD, "Some Tx offloads are not supported "
@@ -1414,6 +1423,13 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
return -ENOTSUP;
}
 
+   if ((rx_offloads & adapter->rx_supported_offloads) != rx_offloads) {
+   RTE_LOG(ERR, PMD, "Some Rx offloads are not supported "
+   "requested 0x%" PRIx64 " supported 0x%" PRIx64 "\n",
+   rx_offloads, adapter->rx_supported_offloads);
+   return -ENOTSUP;
+   }
+
if (!(adapter->state == ENA_ADAPTER_STATE_INIT ||
  adapter->state == ENA_ADAPTER_STATE_STOPPED)) {
PMD_INIT_LOG(ERR, "Illegal adapter state: %d",
@@ -1435,6 +1451,7 @@ static int ena_dev_configure(struct rte_eth_dev *dev)
}
 
adapter->tx_selected_offloads = tx_offloads;
+   adapter->rx_selected_offloads = rx_offloads;
return 0;
 }
 
@@ -1476,6 +1493,19 @@ static bool ena_are_tx_queue_offloads_allowed(struct 
ena_adapter *adapter,
return true;
 }
 
+static bool ena_are_rx_queue_offloads_allowed(struct ena_adapter *adapter,
+ uint64_t offloads)
+{
+   uint64_t port_offloads = adapter->rx_selected_offloads;
+
+   /* Check if port supports all requested offloads.
+* True if all offloads selected for queue are set for port.
+*/
+   if ((offloads & port_offloads) != offloads)
+   return false;
+   return true;
+}
+
 static void ena_infos_get(struct rte_eth_dev *dev,
  struct rte_eth_dev_info *dev_info)
 {
@@ -1530,6 +1560,7 @@ static void ena_infos_get(struct rte_eth_dev *dev,
 
/* Inform framework about available features */
dev_info->rx_offload_capa = rx_feat;
+   dev_info->rx_queue_offload_capa = rx_feat;
dev_info->tx_offload_capa = tx_feat;
dev_info->tx_queue_offload_capa = tx_feat;
 
@@ -1542,6 +1573,7 @@ static void ena_infos_get(struct rte_eth_dev *dev,
  

[dpdk-dev] [PATCH] net/ena: TX L4 offloads should not be set in RX path

2018-01-25 Thread Rafal Kozik
Information about received packet type detected by NIC should be
stored in packet_type field of rte_mbuf. TX L4 offload flags should
not be set in RX path. Only fields that could be set in of_flags
during packet receiving are information if L4 and L3 checksum is
correct.

Fixes: 1173fca25af9 ("ena: add polling-mode driver")

Reported-by: Matthew Smith 
Signed-off-by: Rafal Kozik 
---
 drivers/net/ena/ena_ethdev.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 83e0ae2..1e2af80 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -275,16 +275,17 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf 
*mbuf,
   struct ena_com_rx_ctx *ena_rx_ctx)
 {
uint64_t ol_flags = 0;
+   uint32_t packet_type = 0;
 
if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)
-   ol_flags |= PKT_TX_TCP_CKSUM;
+   packet_type |= RTE_PTYPE_L4_TCP;
else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)
-   ol_flags |= PKT_TX_UDP_CKSUM;
+   packet_type |= RTE_PTYPE_L4_UDP;
 
if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4)
-   ol_flags |= PKT_TX_IPV4;
+   packet_type |= RTE_PTYPE_L3_IPV4;
else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6)
-   ol_flags |= PKT_TX_IPV6;
+   packet_type |= RTE_PTYPE_L3_IPV6;
 
if (unlikely(ena_rx_ctx->l4_csum_err))
ol_flags |= PKT_RX_L4_CKSUM_BAD;
@@ -292,6 +293,7 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf 
*mbuf,
ol_flags |= PKT_RX_IP_CKSUM_BAD;
 
mbuf->ol_flags = ol_flags;
+   mbuf->packet_type = packet_type;
 }
 
 static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf,
-- 
2.7.4



[dpdk-dev] [PATCH] net/ena: fix jumbo support in Rx offloads flags

2018-02-01 Thread Rafal Kozik
ENA device supports Rx jumbo frames and such information needs to
be provided in the offloads flags.

Fixes:  7369f88f88c0 ("net/ena: convert to new Rx offloads API")

Signed-off-by: Rafal Kozik 
---
 drivers/net/ena/ena_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 83e0ae2..3588384 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -1561,6 +1561,8 @@ static void ena_infos_get(struct rte_eth_dev *dev,
DEV_RX_OFFLOAD_UDP_CKSUM  |
DEV_RX_OFFLOAD_TCP_CKSUM;
 
+   rx_feat |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+
/* Inform framework about available features */
dev_info->rx_offload_capa = rx_feat;
dev_info->rx_queue_offload_capa = rx_feat;
-- 
2.7.4