[dpdk-dev] [PATCH v3 12/12] eal: pci: vfio: fix build error

2016-01-07 Thread Santosh Shukla
From: Rizwan Ansari 

Patch fixes below build error:
/home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c:
In function ?pci_vfio_set_iommu_type?:
/home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c:243:2:
error: ?for? loop initial declarations are only allowed in C99 mode
for (unsigned idx = 0; idx < RTE_DIM(iommu_types); idx++) {
^
/home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c:243:2:
note: use option -std=c99 or -std=gnu99 to compile your code

Signed-off-by: Rizwan Ansari 
Signed-off-by: Santosh Shukla 
---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index c69050d..844ef80 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -240,7 +240,8 @@ pci_vfio_set_bus_master(int dev_fd)
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
 static const struct vfio_iommu_type *
 pci_vfio_set_iommu_type(int vfio_container_fd) {
-   for (unsigned idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+   unsigned idx;
+   for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
const struct vfio_iommu_type *t = _types[idx];

int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 11/12] vfio: Support for no-IOMMU mode

2016-01-07 Thread Santosh Shukla
From: Anatoly Burakov 

This commit is adding a generic mechanism to support multiple IOMMU
types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special
VFIO mode that doesn't use IOMMU at all), but it's easily extended
by adding necessary definitions into eal_pci_init.h and a DMA
mapping function to eal_pci_vfio_dma.c.

Since type 1 IOMMU module is no longer necessary to have VFIO,
we fix the module check to check for vfio-pci instead. It's not
ideal and triggers VFIO checks more often (and thus produces more
error output, which was the reason behind the module check in the
first place), so we compensate for that by providing more verbose
logging, indicating whether VFIO initialization has succeeded or
failed.

Signed-off-by: Anatoly Burakov 
---
 lib/librte_eal/linuxapp/eal/Makefile   |1 +
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |   22 
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |  142 +++-
 lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c |   84 ++
 lib/librte_eal/linuxapp/eal/eal_vfio.h |5 +
 5 files changed, 201 insertions(+), 53 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_dma.c

diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index 26eced5..5c9e9d9 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -59,6 +59,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_uio.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio.c
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_dma.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci_vfio_mp_sync.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_debug.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h 
b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 3bc592b..068800c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -112,6 +112,28 @@ struct vfio_config {
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
 };

+/* function pointer typedef for DMA mapping functions */
+typedef  int (*vfio_dma_func_t)(int);
+
+/* Structure to hold supported IOMMU types */
+struct vfio_iommu_type {
+   int type_id;
+   const char *name;
+   vfio_dma_func_t dma_map_func;
+};
+
+/* function prototypes for different IOMMU types */
+int vfio_iommu_type1_dma_map(int container_fd);
+int vfio_iommu_noiommu_dma_map(int container_fd);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+   /* x86 IOMMU, otherwise known as type 1 */
+   { VFIO_TYPE1_IOMMU, "Type 1", _iommu_type1_dma_map},
+   /* IOMMU-less mode */
+   { VFIO_NOIOMMU_IOMMU, "No-IOMMU", _iommu_noiommu_dma_map},
+};
+
 #endif

 #endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 7ec203c..c69050d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -72,6 +72,7 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq)
 #define VFIO_DIR "/dev/vfio"
 #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
 #define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
 #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)

 /* per-process VFIO config */
@@ -236,42 +237,57 @@ pci_vfio_set_bus_master(int dev_fd)
return 0;
 }

-/* set up DMA mappings */
-static int
-pci_vfio_setup_dma_maps(int vfio_container_fd)
-{
-   const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-   int i, ret;
-
-   ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
-   VFIO_TYPE1_IOMMU);
-   if (ret) {
-   RTE_LOG(ERR, EAL, "  cannot set IOMMU type, "
-   "error %i (%s)\n", errno, strerror(errno));
-   return -1;
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+static const struct vfio_iommu_type *
+pci_vfio_set_iommu_type(int vfio_container_fd) {
+   for (unsigned idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+   const struct vfio_iommu_type *t = _types[idx];
+
+   int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+   t->type_id);
+   if (!ret) {
+   RTE_LOG(NOTICE, EAL, "  using IOMMU type %d (%s)\n",
+   t->type_id, t->name);
+   return t;
+   }
+   /* not an error, there may be more supported IOMMU types */
+   RTE_LOG(DEBUG, EAL, "  set IOMMU type %d (%s) failed, "
+   "error %i (%s)\n", t->type_id, t->name, errno,
+ 

[dpdk-dev] [PATCH v3 10/12] config: armv7/v8: Enable RTE_LIBRTE_VIRTIO_PMD

2016-01-07 Thread Santosh Shukla
Enable RTE_LIBRTE_VIRTIO_PMD for armv7/v8 and setting RTE_VIRTIO_INC_VEC=n.
Builds successfully for armv7/v8.

Signed-off-by: Santosh Shukla 
---
 config/defconfig_arm-armv7a-linuxapp-gcc   |6 +-
 config/defconfig_arm64-armv8a-linuxapp-gcc |6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc 
b/config/defconfig_arm-armv7a-linuxapp-gcc
index cbebd64..d840dc2 100644
--- a/config/defconfig_arm-armv7a-linuxapp-gcc
+++ b/config/defconfig_arm-armv7a-linuxapp-gcc
@@ -43,6 +43,11 @@ CONFIG_RTE_FORCE_INTRINSICS=y
 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y

+# VIRTIO support for ARM
+CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+# Disable VIRTIO VECTOR support
+CONFIG_RTE_VIRTIO_INC_VECTOR=n
+
 # ARM doesn't have support for vmware TSC map
 CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n

@@ -70,7 +75,6 @@ CONFIG_RTE_LIBRTE_I40E_PMD=n
 CONFIG_RTE_LIBRTE_IXGBE_PMD=n
 CONFIG_RTE_LIBRTE_MLX4_PMD=n
 CONFIG_RTE_LIBRTE_MPIPE_PMD=n
-CONFIG_RTE_LIBRTE_VIRTIO_PMD=n
 CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
 CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
 CONFIG_RTE_LIBRTE_PMD_BNX2X=n
diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc 
b/config/defconfig_arm64-armv8a-linuxapp-gcc
index 504f3ed..b3a4b28 100644
--- a/config/defconfig_arm64-armv8a-linuxapp-gcc
+++ b/config/defconfig_arm64-armv8a-linuxapp-gcc
@@ -45,8 +45,12 @@ CONFIG_RTE_TOOLCHAIN_GCC=y

 CONFIG_RTE_CACHE_LINE_SIZE=64

+# Enable VIRTIO support for ARM
+CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+# Disable VIRTIO VECTOR support
+CONFIG_RTE_VIRTIO_INC_VECTOR=n
+
 CONFIG_RTE_IXGBE_INC_VECTOR=n
-CONFIG_RTE_LIBRTE_VIRTIO_PMD=n
 CONFIG_RTE_LIBRTE_IVSHMEM=n
 CONFIG_RTE_LIBRTE_FM10K_PMD=n
 CONFIG_RTE_LIBRTE_I40E_PMD=n
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 09/12] virtio: vfio: Enable RTE_PCI_DRV_NEED_MAPPING flag in driver

2016-01-07 Thread Santosh Shukla
Flag required for vfio case, It helps to update vfio_dev_fd for each
virtio net interface.

Signed-off-by: Santosh Shukla 
---
 drivers/net/virtio/virtio_ethdev.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 9ca99d5..8d55049 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1447,7 +1447,11 @@ static struct eth_driver rte_virtio_pmd = {
.pci_drv = {
.name = "rte_virtio_pmd",
.id_table = pci_id_virtio_map,
+#ifdef RTE_EAL_VFIO
+   .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
+#else
.drv_flags = RTE_PCI_DRV_DETACHABLE,
+#endif
},
.eth_dev_init = eth_virtio_dev_init,
.eth_dev_uninit = eth_virtio_dev_uninit,
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 08/12] virtio: Add capability to initialize driver for vfio interface

2016-01-07 Thread Santosh Shukla
- adding pci_dev member in struct virtio_hw{} to track
vfio_dev_fd per virtio interface, required for ioport
rd/wr operation.

- Masked default VIRTIO_READ/WRITE for vfio case.

Signed-off-by: Santosh Shukla 
Signed-off-by: Rizwan Ansari 
Signed-off-by: Rakesh Krishnamurhty 
---
 drivers/net/virtio/virtio_ethdev.c |   11 ++-
 drivers/net/virtio/virtio_pci.h|9 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index d928339..9ca99d5 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1287,9 +1287,18 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)

pci_dev = eth_dev->pci_dev;

+#ifdef RTE_EAL_VFIO
+   hw->pci_dev = pci_dev;
+
+   /* For debug use only */
+   const struct rte_intr_handle *intr_handle __rte_unused = 
_dev->intr_handle;
+   PMD_INIT_LOG(DEBUG, "hw->pci_dev %p intr_handle %p vfio_dev_fd %d\n",
+hw->pci_dev, intr_handle,
+intr_handle->vfio_dev_fd);
+#endif
+
if (virtio_resource_init(pci_dev) < 0)
return -1;
-
hw->use_msix = virtio_has_msix(_dev->addr);
hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;

diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 8b5b031..6dabede 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -47,6 +47,10 @@

 #include 

+#ifdef RTE_EAL_VFIO
+#include "virtio_vfio.h"
+#endif
+
 struct virtqueue;

 /* VirtIO PCI vendor/device ID. */
@@ -176,6 +180,9 @@ struct virtio_hw {
uint8_t use_msix;
uint8_t started;
uint8_t mac_addr[ETHER_ADDR_LEN];
+#ifdef RTE_EAL_VFIO
+   const struct rte_pci_device *pci_dev;
+#endif
 };

 /*
@@ -228,6 +235,7 @@ outl_p(unsigned int data, unsigned int port)
 }
 #endif

+#ifndef RTE_EAL_VFIO
 #define VIRTIO_PCI_REG_ADDR(hw, reg) \
(unsigned short)((hw)->io_base + (reg))

@@ -245,6 +253,7 @@ outl_p(unsigned int data, unsigned int port)
inl((VIRTIO_PCI_REG_ADDR((hw), (reg
 #define VIRTIO_WRITE_REG_4(hw, reg, value) \
outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg
+#endif

 static inline int
 vtpci_with_feature(struct virtio_hw *hw, uint32_t bit)
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 07/12] virtio: vfio: add api support to rd/wr ioport bar

2016-01-07 Thread Santosh Shukla
For vfio case - Use pread/pwrite api to access virtio
ioport space.

Signed-off-by: Santosh Shukla 
Signed-off-by: Rizwan Ansari 
Signed-off-by: Rakesh Krishnamurthy 
---
 drivers/net/virtio/virtio_vfio.h |  146 ++
 1 file changed, 146 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_vfio.h

diff --git a/drivers/net/virtio/virtio_vfio.h b/drivers/net/virtio/virtio_vfio.h
new file mode 100644
index 000..fd94b30
--- /dev/null
+++ b/drivers/net/virtio/virtio_vfio.h
@@ -0,0 +1,146 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Cavium Networks. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ *THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _VIRTIO_VFIO_H_
+#define _VIRTIO_VFIO_H_
+
+#ifdef RTE_EAL_VFIO
+#include 
+#include 
+#include 
+
+#include "virtio_logs.h"
+
+static inline void ioport_inb(const struct rte_pci_device *pci_dev,
+ uint8_t reg, uint8_t *val)
+{
+   if (rte_eal_pci_read_bar(pci_dev, (uint8_t *)val, sizeof(uint8_t), reg,
+0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+static inline void ioport_inw(const struct rte_pci_device *pci_dev,
+ uint16_t reg, uint16_t *val)
+{
+   if (rte_eal_pci_read_bar(pci_dev, (uint16_t *)val, sizeof(uint16_t),
+reg, 0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+static inline void ioport_inl(const struct rte_pci_device *pci_dev,
+ uint32_t reg, uint32_t *val)
+{
+   if (rte_eal_pci_read_bar(pci_dev, (uint32_t *)val, sizeof(uint32_t),
+reg, 0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+static inline void ioport_outb_p(const struct rte_pci_device *pci_dev,
+uint8_t reg, uint8_t val)
+{
+   if (rte_eal_pci_write_bar(pci_dev, (uint8_t *), sizeof(uint8_t),
+ reg, 0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+
+static inline void ioport_outw_p(const struct rte_pci_device *pci_dev,
+uint16_t reg, uint16_t val)
+{
+   if (rte_eal_pci_write_bar(pci_dev, (uint16_t *), sizeof(uint16_t),
+ reg, 0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+
+static inline void ioport_outl_p(const struct rte_pci_device *pci_dev,
+uint32_t reg, uint32_t val)
+{
+   if (rte_eal_pci_write_bar(pci_dev, (uint32_t *), sizeof(uint32_t),
+ reg, 0) <= 0) {
+   PMD_DRV_LOG(ERR, "Can't read from PCI bar space");
+   return;
+   }
+}
+
+#define VIRTIO_READ_REG_1(hw, reg) \
+({ \
+   uint8_t ret;\
+   ioport_inb(((hw)->pci_dev), reg, ); \
+   ret;\
+})
+
+#define VIRTIO_WRITE_REG_1(hw, reg, val)   \
+({  

[dpdk-dev] [PATCH v3 06/12] eal: pci: vfio: add rd/wr func for pci bar space

2016-01-07 Thread Santosh Shukla
Introducing below api for pci bar space rd/wr. Currently used for
pci iobar rd/wr.

Api's are:
- rte_eal_pci_read_bar
- rte_eal_pci_write_bar

virtio when used for vfio-mode then virtio driver will use these api
to do rd/wr operation on ioport pci bar.

Signed-off-by: Santosh Shukla 
---
 lib/librte_eal/common/include/rte_pci.h|   38 
 lib/librte_eal/linuxapp/eal/eal_pci.c  |   28 
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |6 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |   28 
 4 files changed, 100 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 334c12e..53437cc 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -471,6 +471,44 @@ int rte_eal_pci_read_config(const struct rte_pci_device 
*device,
void *buf, size_t len, off_t offset);

 /**
+ * Read PCI bar space.
+ *
+ * @param device
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ * @param buf
+ *   A data buffer where the bytes should be read into
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into PCI bar space
+ * @param bar_idx
+ *   The pci bar index (valid range is 0..5)
+ */
+int rte_eal_pci_read_bar(const struct rte_pci_device *device,
+void *buf, size_t len, off_t offset, int bar_idx);
+
+/**
+ * Write PCI bar space.
+ *
+ * @param device
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use
+ * @param buf
+ *   A data buffer containing the bytes should be written
+ * @param len
+ *   The length of the data buffer.
+ * @param offset
+ *   The offset into PCI config space
+ * @param bar_idx
+ *   The pci bar index (valid range is 0..5)
+*/
+int rte_eal_pci_write_bar(const struct rte_pci_device *device,
+ const void *buf, size_t len, off_t offset,
+ int bar_idx);
+
+
+/**
  * Write PCI config space.
  *
  * @param device
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index bc5b5be..22d3321 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -621,6 +621,34 @@ int rte_eal_pci_write_config(const struct rte_pci_device 
*device,
}
 }

+int rte_eal_pci_read_bar(const struct rte_pci_device *device __rte_unused,
+void *buf __rte_unused,
+size_t len __rte_unused,
+off_t offset __rte_unused,
+int bar_idx __rte_unused)
+{
+#ifdef VFIO_PRESENT
+   const struct rte_intr_handle *intr_handle = >intr_handle;
+   return pci_vfio_read_bar(intr_handle, buf, len, offset, bar_idx);
+#else
+   return 0; /* UIO's not applicable */
+#endif
+}
+
+int rte_eal_pci_write_bar(const struct rte_pci_device *device __rte_unused,
+ const void *buf __rte_unused,
+ size_t len __rte_unused,
+ off_t offset __rte_unused,
+ int bar_idx __rte_unused)
+{
+#ifdef VFIO_PRESENT
+   const struct rte_intr_handle *intr_handle = >intr_handle;
+   return pci_vfio_write_bar(intr_handle, buf, len, offset, bar_idx);
+#else
+   return 0; /* UIO's not applicable */
+#endif
+}
+
 /* Init the PCI EAL subsystem */
 int
 rte_eal_pci_init(void)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h 
b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index a17c708..3bc592b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -68,6 +68,12 @@ int pci_vfio_read_config(const struct rte_intr_handle 
*intr_handle,
 int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
  const void *buf, size_t len, off_t offs);

+int pci_vfio_read_bar(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offs, int bar_idx);
+
+int pci_vfio_write_bar(const struct rte_intr_handle *intr_handle,
+  const void *buf, size_t len, off_t offs, int bar_idx);
+
 /* map VFIO resource prototype */
 int pci_vfio_map_resource(struct rte_pci_device *dev);
 int pci_vfio_get_group_fd(int iommu_group_fd);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 4077eb6..7ec203c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -93,6 +93,34 @@ pci_vfio_write_config(const struct rte_intr_handle 
*intr_handle,
   VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
 }

+int
+pci_vfio_read_bar(const struct rte_intr_handle *intr_handle,
+ void *buf, size_t len, off_t offs, int bar_idx)
+{
+   if (bar_idx < VFIO_PCI_BAR0_REGION_INDEX
+  || 

[dpdk-dev] [PATCH v3 05/12] virtio_pci.h: build fix for sys/io.h for non-x86 arch

2016-01-07 Thread Santosh Shukla
make sure sys/io.h used only for x86 archs. This fixes build error
arm64/arm case.

Signed-off-by: Santosh Shukla 
---
 drivers/net/virtio/virtio_pci.h |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 47f722a..8b5b031 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -40,8 +40,10 @@
 #include 
 #include 
 #else
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686)
 #include 
 #endif
+#endif

 #include 

-- 
1.7.9.5



[dpdk-dev] [PATCH v3 04/12] linuxapp/vfio: ignore mapping for ioport region

2016-01-07 Thread Santosh Shukla
vfio_pci_mmap() try to map all pci bars. ioport region ar not mapped in
vfio/kernel so ignore mmaping for ioport.

Signed-off-by: Santosh Shukla 
---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |   20 
 1 file changed, 20 insertions(+)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 74f91ba..4077eb6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -760,6 +760,26 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
return -1;
}

+   /* chk for io port region */
+   uint32_t ioport_bar;
+   ret = pread64(vfio_dev_fd, _bar, sizeof(ioport_bar),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+ + PCI_BASE_ADDRESS_0 + i*4);
+
+   if (ret != sizeof(ioport_bar)) {
+   RTE_LOG(ERR, EAL,
+   "Cannot read command (%x) from PCI config"
+   "space!\n", PCI_BASE_ADDRESS_0 + i*4);
+   return -1;
+   }
+
+   if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
+   RTE_LOG(INFO, EAL, "\tIgnore mapping since Its a i/o"
+  "port bar (%d) addr : %x\n", i,
+  ioport_bar);
+   continue;
+   }
+
/* skip non-mmapable BARs */
if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
continue;
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 03/12] linuxapp: eal: arm: Always return 0 for rte_eal_iopl_init()

2016-01-07 Thread Santosh Shukla
iopl() syscall not supported in linux-arm/arm64 so always return 0 value.

Signed-off-by: Santosh Shukla 
Acked-by: Jan Viktorin 
---
 lib/librte_eal/linuxapp/eal/eal.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
b/lib/librte_eal/linuxapp/eal/eal.c
index 635ec36..2617037 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -716,6 +716,9 @@ rte_eal_iopl_init(void)
return -1;
return 0;
 #else
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+   return 0; /* iopl syscall not supported for ARM/ARM64 */
+#endif
return -1;
 #endif
 }
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 02/12] config: i686: set RTE_VIRTIO_INC_VECTOR=n

2016-01-07 Thread Santosh Shukla
i686 target config example:
config/defconfig_i686-native-linuxapp-gcc says "Vectorized PMD is not supported
on 32-bit".

So setting RTE_VIRTIO_INC_VECTOR to 'n'.

Signed-off-by: Santosh Shukla 
---
 config/defconfig_i686-native-linuxapp-gcc |1 +
 config/defconfig_i686-native-linuxapp-icc |1 +
 2 files changed, 2 insertions(+)

diff --git a/config/defconfig_i686-native-linuxapp-gcc 
b/config/defconfig_i686-native-linuxapp-gcc
index a90de9b..a4b1c49 100644
--- a/config/defconfig_i686-native-linuxapp-gcc
+++ b/config/defconfig_i686-native-linuxapp-gcc
@@ -49,3 +49,4 @@ CONFIG_RTE_LIBRTE_KNI=n
 # Vectorized PMD is not supported on 32-bit
 #
 CONFIG_RTE_IXGBE_INC_VECTOR=n
+CONFIG_RTE_VIRTIO_INC_VECTOR=n
diff --git a/config/defconfig_i686-native-linuxapp-icc 
b/config/defconfig_i686-native-linuxapp-icc
index c021321..f8eb6ad 100644
--- a/config/defconfig_i686-native-linuxapp-icc
+++ b/config/defconfig_i686-native-linuxapp-icc
@@ -49,3 +49,4 @@ CONFIG_RTE_LIBRTE_KNI=n
 # Vectorized PMD is not supported on 32-bit
 #
 CONFIG_RTE_IXGBE_INC_VECTOR=n
+CONFIG_RTE_VIRTIO_INC_VECTOR=n
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 01/12] virtio: Introduce config RTE_VIRTIO_INC_VECTOR

2016-01-07 Thread Santosh Shukla
virtio_recv_pkts_vec and other virtio vector friend apis are written for sse/avx
instructions. For arm64 in particular, virtio vector implementation does not
exist(todo).

So virtio pmd driver wont build for targets like i686, arm64.  By making
RTE_VIRTIO_INC_VECTOR=n, Driver can build for non-sse/avx targets and will work
in non-vectored virtio mode.

Signed-off-by: Santosh Shukla 
---
 config/common_linuxapp   |1 +
 drivers/net/virtio/Makefile  |2 +-
 drivers/net/virtio/virtio_rxtx.c |7 +++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 74bc515..8677697 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -274,6 +274,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DRIVER=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
+CONFIG_RTE_VIRTIO_INC_VECTOR=y

 #
 # Compile burst-oriented VMXNET3 PMD driver
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 43835ba..25a842d 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -50,7 +50,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
-SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
+SRCS-$(CONFIG_RTE_VIRTIO_INC_VECTOR) += virtio_rxtx_simple.c

 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 74b39ef..23be1ff 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -438,7 +438,9 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,

dev->data->rx_queues[queue_idx] = vq;

+#ifdef RTE_VIRTIO_INC_VECTOR
virtio_rxq_vec_setup(vq);
+#endif

return 0;
 }
@@ -464,7 +466,10 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_txconf *tx_conf)
 {
uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+
+#ifdef RTE_VIRTIO_INC_VECTOR
struct virtio_hw *hw = dev->data->dev_private;
+#endif
struct virtqueue *vq;
uint16_t tx_free_thresh;
int ret;
@@ -477,6 +482,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}

+#ifdef RTE_VIRTIO_INC_VECTOR
/* Use simple rx/tx func if single segment and no offloads */
if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
 !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
@@ -485,6 +491,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
dev->rx_pkt_burst = virtio_recv_pkts_vec;
use_simple_rxtx = 1;
}
+#endif

ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
nb_desc, socket_id, );
-- 
1.7.9.5



[dpdk-dev] [PATCH v3 00/12] Add virtio support for arm/arm64

2016-01-07 Thread Santosh Shukla
This v3 patch uses vfio-noiommu-way to access virtio-net pci interface.
Tested for arm64 thunderX platform. Patch builds for
x86/i386/arm/armv8/thunderX.

The patchset has below dependancy:
- Kernel should be patched with Alex vfio-noiommu patch, available at linuz-next
[1].
- Anatoly's dpdk patch for vfio-noimmu, Included in this patch series but not
  for review :), It exist in this patchset only for user to pick complete blob 
and
  try it out.

Some Caveat:
- Only for x86 platform: Either of the config should be enable RTE_EAL_VFIO or 
IGB_UIO
  Right now by default both enabled. It lead to virtio pmd driver init error, as
  because v3 appraoch depends on RTE_PCI_DRV_NEED_MAPPING flag so ioport
  resource field wont update for IGB_UO case. I am working on below approach
- Introduce new drv_flag which does not do mapping (as v3 vfio
  approach doesn't care for mapping) and only initializes pci interface
  for PCI bars (in particualr ioport pci bar) rd/wr operation. It will
  be useful to rd/wr pci bars, Currently required for vfio/virtio case.
v3:
Major changes:
- Patch set uses vfio-noimmu approach to access virtio ioport bar
- Dropped rte_io.h abstraction changes done in v2 series as because vfio-noimmu
  doesn't do rd/wr access on mmaped memory so wont care for x86 style api ie..
  in/out[b,w,l] Although I am working on other patch series which will get rid
  of ifdef "sys/io.h" clutter.

Patch summary:
- Top 3 patches are old v2 patch
- 4 to 9 patches are vfio/virtio specifics.
- 10th patch is v2 patch
- Last patch is build fix patch of anatoly(s) vfio-noiommu patch.

v2:
- Removed ifdef arm.. clutter from igb_uio / virtio_ethedev files
- Introduced rte_io.h header file in generic/ and arch specifics i.e.. for
  armv7 --> rte_io_32.h, for armv8 --> rte_io_64.h.
- Removed RTE_ARCH_X86 ifdef clutter too and added rte_io.h header which nothing
  but wraps sys/io.h for x86_64 and i686
- Moved all the RTE_ARCH_ARM/64 dependancy for igb_uio case to separate header
  file named igbuio_ioport_misc.h. Now igb_uio.c will call only three function
   - igbuio_iomap
   - igbuio_ioport_register
   - igbuio_ioport_unregister
- Moved ARM/64 specific definition to include/exec-env/rte_virt_ioport.h header
- Included virtio_ioport.c/h; has all private and public api required to map
  iopci bar for non-x86 arch. Tested on thunderX and x86_64 both.
  Private api includes:
- virtio_map_ioport
- virtio_set_ioport_addr
  Public api includes:
- virtio_ioport_init
- virtio_ioport_unmap

- Last patch is the miscllanious format specifier fix identifid for 64bit case
  during regression.


v1:
- First patch adds RTE_VIRTIO_INC_VECTOR config, much needed for archs like
  arm/arm64 as they don't support vectored implementation, also wont able to
  build.
- Second patch is in-general fix for i686.
- Third patch is to emulate x86-style of {in,out}[b,w,l] api support for 
armv7/v8.
  As virtio-net-pci pmd driver uses those apis for port rd/wr {b,w,l}
- Fourth patch to enable VIRTIO_PMD feature in armv7/v8/thunderX config.
- Fifth patch to disable iopl syscall, As arm/arm64 linux kernel doesn't support
  them.
- Sixth patch introduces ioport memdevice called /dev/igb_ioport by which virtio
  pmd driver could able to rd/wr PCI_IOBAR.
  {applicable for arm/arm64 only, tested for arm64 as of now}


[1] https://lwn.net/Articles/660745/

Anatoly Burakov (1):
  vfio: Support for no-IOMMU mode

Rizwan Ansari (1):
  eal: pci: vfio: fix build error

Santosh Shukla (10):
  virtio: Introduce config RTE_VIRTIO_INC_VECTOR
  config: i686: set RTE_VIRTIO_INC_VECTOR=n
  linuxapp: eal: arm: Always return 0 for rte_eal_iopl_init()
  linuxapp/vfio: ignore mapping for ioport region
  virtio_pci.h: build fix for sys/io.h for non-x86 arch
  eal: pci: vfio: add rd/wr func for pci bar space
  virtio: vfio: add api support to rd/wr ioport bar
  virtio: Add capability to initialize driver for vfio interface
  virtio: vfio: Enable RTE_PCI_DRV_NEED_MAPPING flag in driver
  config: armv7/v8: Enable RTE_LIBRTE_VIRTIO_PMD

 config/common_linuxapp |1 +
 config/defconfig_arm-armv7a-linuxapp-gcc   |6 +-
 config/defconfig_arm64-armv8a-linuxapp-gcc |6 +-
 config/defconfig_i686-native-linuxapp-gcc  |1 +
 config/defconfig_i686-native-linuxapp-icc  |1 +
 drivers/net/virtio/Makefile|2 +-
 drivers/net/virtio/virtio_ethdev.c |   15 +-
 drivers/net/virtio/virtio_pci.h|   11 ++
 drivers/net/virtio/virtio_rxtx.c   |7 +
 drivers/net/virtio/virtio_vfio.h   |  146 ++
 lib/librte_eal/common/include/rte_pci.h|   38 +
 lib/librte_eal/linuxapp/eal/Makefile   |1 +
 lib/librte_eal/linuxapp/eal/eal.c  |3 +
 lib/librte_eal/linuxapp/eal/eal_pci.c  |   28 
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |   28 
 

[dpdk-dev] [PATCH v3 06/12] eal: pci: vfio: add rd/wr func for pci bar space

2016-01-07 Thread Wiles, Keith
On 1/7/16, 12:19 PM, "dev on behalf of Stephen Hemminger"  wrote:

>On Thu,  7 Jan 2016 22:03:03 +0530
>Santosh Shukla  wrote:
>
>>  
>> +int rte_eal_pci_read_bar(const struct rte_pci_device *device __rte_unused,
>> + void *buf __rte_unused,
>> + size_t len __rte_unused,
>> + off_t offset __rte_unused,
>> + int bar_idx __rte_unused)
>> +{
>> +#ifdef VFIO_PRESENT
>> +const struct rte_intr_handle *intr_handle = >intr_handle;
>> +return pci_vfio_read_bar(intr_handle, buf, len, offset, bar_idx);
>> +#else
>> +return 0; /* UIO's not applicable */
>> +#endif
>> +}
>
>It seems wrong to declare all the parameters as unused but then use them.
>Maybe there is a way to have a macro for USED(x) in the #else case

I would suggest we create a macro '#define RTE_UNUSED(x)  ((void)x)?, unless we 
have one and I missed it groping though the code.
>


Regards,
Keith






[dpdk-dev] [PATCH v3 12/12] eal: pci: vfio: fix build error

2016-01-07 Thread Burakov, Anatoly
Hi Santosh,

> Patch fixes below build error:
> /home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxa
> pp/eal/eal_pci_vfio.c:
> In function ?pci_vfio_set_iommu_type?:
> /home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxa
> pp/eal/eal_pci_vfio.c:243:2:
> error: ?for? loop initial declarations are only allowed in C99 mode
> for (unsigned idx = 0; idx < RTE_DIM(iommu_types); idx++) {
>   ^
> /home/mv/work/thunder/santosh/dpdk/santosh/dpdk/lib/librte_eal/linuxa
> pp/eal/eal_pci_vfio.c:243:2:
> note: use option -std=c99 or -std=gnu99 to compile your code
> 
> Signed-off-by: Rizwan Ansari 
> Signed-off-by: Santosh Shukla 

Oops, thanks, I'll submit a v2.

Thanks,
Anatoly


[dpdk-dev] [PATCH v2 4/4] virtio: check if any kernel driver is manipulating the virtio device

2016-01-07 Thread Panu Matilainen
On 01/03/2016 07:56 PM, Huawei Xie wrote:
> v2 changes:
>   change LOG level from ERR to INFO
>
> virtio PMD could use IO port to configure the virtio device without
> using uio driver.
>
> There are two issues with previous implementation:
> 1) virtio PMD will take over each virtio device blindly even if some
> are not intended for DPDK.
> 2) driver conflict between virtio PMD and virtio-net kernel driver.
>
> This patch checks if there is any kernel driver manipulating the virtio
> device before virtio PMD uses IO port to configure the device.
>
> Fixes: da978dfdc43b ("virtio: use port IO to get PCI resource")
>
> Signed-off-by: Huawei Xie 
> ---
>   drivers/net/virtio/virtio_ethdev.c | 7 +++
>   1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/virtio/virtio_ethdev.c 
> b/drivers/net/virtio/virtio_ethdev.c
> index e815acd..7a50dac 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -1138,6 +1138,13 @@ static int virtio_resource_init_by_ioports(struct 
> rte_pci_device *pci_dev)
>   int found = 0;
>   size_t linesz;
>
> + if (pci_dev->kdrv != RTE_KDRV_NONE) {
> + PMD_INIT_LOG(INFO,
> + "kernel driver is manipulating this device." \
> + " Please unbind the kernel driver.");

At the very least this message needs to be changed.

Like said earlier, I think the message could just as well be dropped 
entirely, but at least it should be something to the tune of "ignoring 
kernel owned device" instead of asking the user to break their 
configuration.

- Panu -


[dpdk-dev] [PATCH v5 1/1] eal/linux: change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Ralf Hoffmann
with only one hugepage or already sorted hugepage addresses, the sort
function called memcpy with same src and dst pointer. Debugging with
valgrind will issue a warning about overlapping area. This patch changes
the sort method to qsort to avoid this behavior. The separate sort
function is no longer necessary.

Signed-off-by: Ralf Hoffmann 
Acked-by: Sergio Gonzalez Monroy 
---
v5:
* set commit title to eal/linux

v4:
* add linebreak to qsort statement

v3:
* set commit title to eal/linux

v2:

* incorporate patch from http://dpdk.org/dev/patchwork/patch/2061/
  to use qsort instead of bubble sort,
  original patch by Jay Rolette 

 lib/librte_eal/linuxapp/eal/eal_memory.c | 61 
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..c0e510b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -701,54 +701,23 @@ error:
return -1;
 }

-/*
- * Sort the hugepg_tbl by physical address (lower addresses first on x86,
- * higher address first on powerpc). We use a slow algorithm, but we won't
- * have millions of pages, and this is only done at init time.
- */
 static int
-sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+cmp_physaddr(const void *a, const void *b)
 {
-   unsigned i, j;
-   int compare_idx;
-   uint64_t compare_addr;
-   struct hugepage_file tmp;
-
-   for (i = 0; i < hpi->num_pages[0]; i++) {
-   compare_addr = 0;
-   compare_idx = -1;
-
-   /*
-* browse all entries starting at 'i', and find the
-* entry with the smallest addr
-*/
-   for (j=i; j< hpi->num_pages[0]; j++) {
-
-   if (compare_addr == 0 ||
-#ifdef RTE_ARCH_PPC_64
-   hugepg_tbl[j].physaddr > compare_addr) {
+#ifndef RTE_ARCH_PPC_64
+   const struct hugepage_file *p1 = (const struct hugepage_file *)a;
+   const struct hugepage_file *p2 = (const struct hugepage_file *)b;
 #else
-   hugepg_tbl[j].physaddr < compare_addr) {
+   /* PowerPC needs memory sorted in reverse order from x86 */
+   const struct hugepage_file *p1 = (const struct hugepage_file *)b;
+   const struct hugepage_file *p2 = (const struct hugepage_file *)a;
 #endif
-   compare_addr = hugepg_tbl[j].physaddr;
-   compare_idx = j;
-   }
-   }
-
-   /* should not happen */
-   if (compare_idx == -1) {
-   RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", 
__func__);
-   return -1;
-   }
-
-   /* swap the 2 entries in the table */
-   memcpy(, _tbl[compare_idx],
-   sizeof(struct hugepage_file));
-   memcpy(_tbl[compare_idx], _tbl[i],
-   sizeof(struct hugepage_file));
-   memcpy(_tbl[i], , sizeof(struct hugepage_file));
-   }
-   return 0;
+   if (p1->physaddr < p2->physaddr)
+   return -1;
+   else if (p1->physaddr > p2->physaddr)
+   return 1;
+   else
+   return 0;
 }

 /*
@@ -1195,8 +1164,8 @@ rte_eal_hugepage_init(void)
goto fail;
}

-   if (sort_by_physaddr(_hp[hp_offset], hpi) < 0)
-   goto fail;
+   qsort(_hp[hp_offset], hpi->num_pages[0],
+ sizeof(struct hugepage_file), cmp_physaddr);

 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/* remap all hugepages into single file segments */
-- 
2.5.0



[dpdk-dev] [PATCH v4 1/1] change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Ralf Hoffmann
with only one hugepage or already sorted hugepage addresses, the sort
function called memcpy with same src and dst pointer. Debugging with
valgrind will issue a warning about overlapping area. This patch changes
the sort method to qsort to avoid this behavior. The separate sort
function is no longer necessary.

Signed-off-by: Ralf Hoffmann 
---
v4:
* add linebreak to qsort statement

v3:
* set commit title to eal/linux

v2:

* incorporate patch from http://dpdk.org/dev/patchwork/patch/2061/
  to use qsort instead of bubble sort,
  original patch by Jay Rolette 

 lib/librte_eal/linuxapp/eal/eal_memory.c | 61 
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c 
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..c0e510b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -701,54 +701,23 @@ error:
return -1;
 }

-/*
- * Sort the hugepg_tbl by physical address (lower addresses first on x86,
- * higher address first on powerpc). We use a slow algorithm, but we won't
- * have millions of pages, and this is only done at init time.
- */
 static int
-sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+cmp_physaddr(const void *a, const void *b)
 {
-   unsigned i, j;
-   int compare_idx;
-   uint64_t compare_addr;
-   struct hugepage_file tmp;
-
-   for (i = 0; i < hpi->num_pages[0]; i++) {
-   compare_addr = 0;
-   compare_idx = -1;
-
-   /*
-* browse all entries starting at 'i', and find the
-* entry with the smallest addr
-*/
-   for (j=i; j< hpi->num_pages[0]; j++) {
-
-   if (compare_addr == 0 ||
-#ifdef RTE_ARCH_PPC_64
-   hugepg_tbl[j].physaddr > compare_addr) {
+#ifndef RTE_ARCH_PPC_64
+   const struct hugepage_file *p1 = (const struct hugepage_file *)a;
+   const struct hugepage_file *p2 = (const struct hugepage_file *)b;
 #else
-   hugepg_tbl[j].physaddr < compare_addr) {
+   /* PowerPC needs memory sorted in reverse order from x86 */
+   const struct hugepage_file *p1 = (const struct hugepage_file *)b;
+   const struct hugepage_file *p2 = (const struct hugepage_file *)a;
 #endif
-   compare_addr = hugepg_tbl[j].physaddr;
-   compare_idx = j;
-   }
-   }
-
-   /* should not happen */
-   if (compare_idx == -1) {
-   RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", 
__func__);
-   return -1;
-   }
-
-   /* swap the 2 entries in the table */
-   memcpy(, _tbl[compare_idx],
-   sizeof(struct hugepage_file));
-   memcpy(_tbl[compare_idx], _tbl[i],
-   sizeof(struct hugepage_file));
-   memcpy(_tbl[i], , sizeof(struct hugepage_file));
-   }
-   return 0;
+   if (p1->physaddr < p2->physaddr)
+   return -1;
+   else if (p1->physaddr > p2->physaddr)
+   return 1;
+   else
+   return 0;
 }

 /*
@@ -1195,8 +1164,8 @@ rte_eal_hugepage_init(void)
goto fail;
}

-   if (sort_by_physaddr(_hp[hp_offset], hpi) < 0)
-   goto fail;
+   qsort(_hp[hp_offset], hpi->num_pages[0],
+ sizeof(struct hugepage_file), cmp_physaddr);

 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
/* remap all hugepages into single file segments */
-- 
2.5.0



[dpdk-dev] [PATCH v3 1/1] eal/linux: change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Ralf Hoffmann
Hi Sergio,

On 07.01.2016 11:38, Sergio Gonzalez Monroy wrote:
>> Forgot to mention that there is a checkpatch warning:
>> WARNING:LONG_LINE: line over 90 characters
>> #113: FILE: lib/librte_eal/linuxapp/eal/eal_memory.c:1167:
>> +   qsort(_hp[hp_offset], hpi->num_pages[0],
>> sizeof(struct hugepage_file), cmp_physaddr);
>>
>> Could you fix that Ralf?
>>
>> Thanks,
>> Sergio
> 
> Just FYI, there is a new DPDK Contributors guide:
> http://dpdk.org/doc/guides/contributing/patches.html

I will send an updated patch right away, fixing that warning.

I was aware of that guide, it's really good. Interestingly I have
already used the checkpatches.sh script as described on that page, but
it just said

"1/1 valid patch"

so I assumed it was ok. But indeed, running the checkpath.pl manually
showed that line size warning, thanks for the hint. The checkpatches.sh
only checks the return code, so it does not see the warning.

Best Regards,

Ralf



[dpdk-dev] [PATCH v4 1/1] change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Sergio Gonzalez Monroy
Hi Ralf,

It seems like the title update you did for v3 is missing in this patch.

On 07/01/2016 13:59, Ralf Hoffmann wrote:
> with only one hugepage or already sorted hugepage addresses, the sort
> function called memcpy with same src and dst pointer. Debugging with
> valgrind will issue a warning about overlapping area. This patch changes
> the sort method to qsort to avoid this behavior. The separate sort
> function is no longer necessary.
>
> Signed-off-by: Ralf Hoffmann 
> ---
>
Once you fix that you can add my Acked-by to your commit message.

Acked-by: Sergio Gonzalez Monroy 

By the way, just a reminder to update the status of previous patches in 
patchwork as 'superseded'.

Regards,
Sergio


[dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet type is set

2016-01-07 Thread Adrien Mazarguil
On Thu, Jan 07, 2016 at 10:24:19AM +, Ananyev, Konstantin wrote:
> 
> 
> > -Original Message-
> > From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> > Sent: Wednesday, January 06, 2016 5:23 PM
> > To: Ananyev, Konstantin
> > Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if 
> > packet type is set
> > 
> > On Wed, Jan 06, 2016 at 04:44:43PM +, Ananyev, Konstantin wrote:
> > >
> > >
> > > > -Original Message-
> > > > From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> > > > Sent: Wednesday, January 06, 2016 3:45 PM
> > > > To: Ananyev, Konstantin
> > > > Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> > > > Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if 
> > > > packet type is set
> > > >
> > > > On Wed, Jan 06, 2016 at 02:29:07PM +, Ananyev, Konstantin wrote:
> > > > >
> > > > >
> > > > > > -Original Message-
> > > > > > From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> > > > > > Sent: Wednesday, January 06, 2016 10:01 AM
> > > > > > To: Ananyev, Konstantin
> > > > > > Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> > > > > > Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query 
> > > > > > what/if packet type is set
> > > > > >
> > > > > > On Tue, Jan 05, 2016 at 04:50:31PM +, Ananyev, Konstantin wrote:
> > > > > > [...]
> > > > > > > > -Original Message-
> > > > > > > > From: N?lio Laranjeiro [mailto:nelio.laranjeiro at 6wind.com]
> > > > > > [...]
> > > > > > > > I think we miss a comment here in how those 2/6/4 values are 
> > > > > > > > chosen
> > > > > > > > because, according to the mask, I expect 16 possibilities but I 
> > > > > > > > get
> > > > > > > > less.  It will help a lot anyone who needs to add a new type.
> > > > > > > >
> > > > > > > > Extending the snprintf behavior above, it is best to remove the 
> > > > > > > > mask
> > > > > > > > argument altogether and have rte_eth_dev_get_ptype_info() 
> > > > > > > > return the
> > > > > > > > entire list every time.  Applications need to iterate on the 
> > > > > > > > result in
> > > > > > > > any case.
> > > > > > >
> > > > > > > I think we'd better keep mask argument.
> > > > > > > In many cases upper layer only interested in some particular  
> > > > > > > subset of
> > > > > > > all packet types that HW can recognise.
> > > > > > > Let say l3fwd only cares about  RTE_PTYPE_L3_MASK, it is not 
> > > > > > > interested in L4,
> > > > > > > tunnelling packet types, etc.
> > > > > > > If caller needs to know all recognised ptypes, he can set 
> > > > > > > mask==-1,
> > > > > > > In that case all supported packet types will be returned.
> > > > > >
> > > > > > There are other drawbacks to the mask argument in my opinion. The 
> > > > > > API will
> > > > > > have to be updated again as soon as 32 bits aren't enough to 
> > > > > > represent all
> > > > > > possible masks. We can't predict it will be large enough forever 
> > > > > > but on the
> > > > > > other hand, using uint64_t seems overkill at this point.
> > > > >
> > > > > Inside rte_mbuf packet_type itself is a 32 bit value.
> > > > > These 32 bits are divided into several fields to mark packet types,
> > > > > i.e: bits [0-3] are for all possible L2 types, bits [4-7] for L3 
> > > > > types, etc.
> > > > > As long as packet_type itself is 32bits, 32bit mask is sufficient.
> > > > > If we'll ever run out of 32 bits in packet_type itself, it will be 
> > > > > ABI change anyway.
> > > >
> > > > Sure, however why not do it now this issue has been raised so this 
> > > > function
> > > > doesn't need updating the day it breaks? I know there's a million other
> > > > places with a similar problem but I'm all for making new code future 
> > > > proof.
> > >
> > > If rte_mbuf packet_type will have to be increased to 64bit long, then
> > > this function will have to change anyway (with or without mask parameter).
> > > It will have to become:
> > >
> > > rte_eth_dev_get_ptype_info(uint8_t portid, uint64_t ptypes[], ...)
> > >
> > > So I think we don't have to worry about mask parameter itself.
> > 
> > Well, yes, besides I overlooked ptypes[] itself is 32 bit, working around
> > the type width of the mask wouldn't help much.
> > 
> > > > Perhaps in this particular case there is no way to hit the limit 
> > > > (although
> > > > there are only four unused bits left to extend RTE_PTYPE masks) but 
> > > > we've
> > > > seen this happen too many times with subsequent ABI breakage.
> > >
> > > When ptype was introduced we tried to reserve some free space for each 
> > > layer (L2/L3/L4/...),
> > > so it wouldn't be overrun immediately.
> > > But of course if there would be a new HW that can recognise dozen new 
> > > packet types - it is possible.
> > > Do you have any particular use-case in mind?
> > 
> > No, that was just to illustrate my point.
> > 
> > > > > > I think this use 

[dpdk-dev] [PATCH v2 4/4] virtio: check if any kernel driver is manipulating the virtio device

2016-01-07 Thread Ferruh Yigit
On Mon, Jan 04, 2016 at 05:56:49PM +, Xie, Huawei wrote:
> On 1/5/2016 1:24 AM, Stephen Hemminger wrote:
> > On Mon,  4 Jan 2016 01:56:13 +0800
> > Huawei Xie  wrote:
> >
> >> +  if (pci_dev->kdrv != RTE_KDRV_NONE) {
> >> +  PMD_INIT_LOG(INFO,
> >> +  "kernel driver is manipulating this device." \
> >> +  " Please unbind the kernel driver.");
> > Splitting strings in general is a bad idea since it makes it harder to find 
> > log messages.
> > Also the first clause is lower case and the second is captialized.
> Got it. This is to avoid 80 char warning. Will put it in one line to
> make it friendly for searching.
> The first clause is lower is because it actually follows "%s():".

Indeed checkpatch lets logging functions be longer than 80 chars, so does not 
forces to split message.
But checkpatch detects kernel logging functions only, not ours, that is why we 
are still getting over 80 chars warning.

Should we ignore LONG_LINE_STRING warnings in checkpatch?

Thanks,
ferruh 


[dpdk-dev] [PATCH] pmd/virtio: fix cannot start virtio dev after stop

2016-01-07 Thread Yuanhan Liu
On Tue, Jan 05, 2016 at 09:07:42AM +0800, Jianfeng Tan wrote:
> Fix the issue that virtio device cannot be started after stopped.
> 
> The field, hw->started, should be changed by virtio_dev_start/stop instead
> of virtio_dev_close.
> 
> Signed-off-by: Jianfeng Tan 

Acked-by: Yuanhan Liu 

BTW, if I'm not mistaken, it's an issue reported from Pavel. If so,
you should add following above your Signed-off-by.

Reported-by: Pavel Fedin 

Note that if you send v2, you can carry my ACK, as you just do
trivial changes.

--yliu


[dpdk-dev] [PATCH v3 06/12] eal: pci: vfio: add rd/wr func for pci bar space

2016-01-07 Thread Stephen Hemminger
On Thu, 7 Jan 2016 18:26:30 +
"Wiles, Keith"  wrote:

> On 1/7/16, 12:19 PM, "dev on behalf of Stephen Hemminger"  dpdk.org on behalf of stephen at networkplumber.org> wrote:
> 
> >On Thu,  7 Jan 2016 22:03:03 +0530
> >Santosh Shukla  wrote:
> >
> >>  
> >> +int rte_eal_pci_read_bar(const struct rte_pci_device *device __rte_unused,
> >> +   void *buf __rte_unused,
> >> +   size_t len __rte_unused,
> >> +   off_t offset __rte_unused,
> >> +   int bar_idx __rte_unused)
> >> +{
> >> +#ifdef VFIO_PRESENT
> >> +  const struct rte_intr_handle *intr_handle = >intr_handle;
> >> +  return pci_vfio_read_bar(intr_handle, buf, len, offset, bar_idx);
> >> +#else
> >> +  return 0; /* UIO's not applicable */
> >> +#endif
> >> +}
> >
> >It seems wrong to declare all the parameters as unused but then use them.
> >Maybe there is a way to have a macro for USED(x) in the #else case
> 
> I would suggest we create a macro '#define RTE_UNUSED(x)  ((void)x)?, unless 
> we have one and I missed it groping though the code.
> >
> 
> 
> Regards,
> Keith
> 
> 
> 
> 

Or just move the #ifdef outside the function and have two versions

#ifdef VFIO_PRESENT
int rte_eal_pci_read_bar(const struct rte_pci_device *device,
 void *buf, size_t len,
  off_t offset, int bar_idx)
{
const struct rte_intr_handle *intr_handle = >intr_handle;

return pci_vfio_read_bar(intr_handle, buf, len, offset, bar_idx);
}
}
#else
int rte_eal_pci_read_bar(const struct rte_pci_device *device __rte_unused,
 void *buf __rte_unused,
 size_t len __rte_unused,
 off_t offset __rte_unused,
 int bar_idx __rte_unused)
{
return 0; /* UIO's not applicable */
}
#endif


[dpdk-dev] [PATCH v3 1/1] eal/linux: change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Sergio Gonzalez Monroy
On 07/01/2016 09:51, Sergio Gonzalez Monroy wrote:
> On 07/01/2016 09:33, Sergio Gonzalez Monroy wrote:
>> On 05/01/2016 09:37, Ralf Hoffmann wrote:
>>> with only one hugepage or already sorted hugepage addresses, the sort
>>> function called memcpy with same src and dst pointer. Debugging with
>>> valgrind will issue a warning about overlapping area. This patch
>>> changes the sort method to qsort to avoid this behavior, according to
>>> original patch from Jay Rolette . The separate
>>> sort function is no longer necessary.
>>>
>>> Signed-off-by: Ralf Hoffmann 
>>> ---
>>>
>> Acked-by: Sergio Gonzalez Monroy 
> Forgot to mention that there is a checkpatch warning:
> WARNING:LONG_LINE: line over 90 characters
> #113: FILE: lib/librte_eal/linuxapp/eal/eal_memory.c:1167:
> +   qsort(_hp[hp_offset], hpi->num_pages[0], 
> sizeof(struct hugepage_file), cmp_physaddr);
>
> Could you fix that Ralf?
>
> Thanks,
> Sergio

Just FYI, there is a new DPDK Contributors guide:
http://dpdk.org/doc/guides/contributing/patches.html

Regards,
Sergio


[dpdk-dev] [PATCH] vhost: fix leak of fds and mmaps

2016-01-07 Thread Yuanhan Liu
On Tue, Jan 05, 2016 at 02:14:09PM -0800, Rich Lane wrote:
> The common vhost code only supported a single mmap per device. vhost-user
> worked around this by saving the address/length/fd of each mmap after the end
> of the rte_virtio_memory struct. This only works if the vhost-user code frees
> dev->mem, since the common code is unaware of the extra info. The
> VHOST_USER_RESET_OWNER message is one situation where the common code frees
> dev->mem and leaks the fds and mappings. This happens every time I shut down a
> VM.

That is a good catch! But I don't think it needs the complexity your
patch has to fix it. Besides that, your patch has ABI change, which
is not acceptable at this stage.

Back to the issue, the thing is that, mmap/unmap is vhost-user/vhost-cuse
specific, thus we'd better to handle them inside vhost-user/vhost-cuse
differently, but not in the common path, virtio-net.c: let the common
path handle common things only. That would make the logic clear, and
hence less error prone.

Note that we have already handled the mmap inside vhost-user/vhost-cuse,
thinking of that way, there is no reason to handle unmap at virtio-net.c:
it should be a historic issue while we added vhost-cuse first, which will
not be an issue until we added vhost-user.

Another note is that you should not name an internal function with "rte_"
prefix; it should be reserved for public functions.

--yliu


[dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet type is set

2016-01-07 Thread Ananyev, Konstantin


> -Original Message-
> From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> Sent: Wednesday, January 06, 2016 5:23 PM
> To: Ananyev, Konstantin
> Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if packet 
> type is set
> 
> On Wed, Jan 06, 2016 at 04:44:43PM +, Ananyev, Konstantin wrote:
> >
> >
> > > -Original Message-
> > > From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> > > Sent: Wednesday, January 06, 2016 3:45 PM
> > > To: Ananyev, Konstantin
> > > Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query what/if 
> > > packet type is set
> > >
> > > On Wed, Jan 06, 2016 at 02:29:07PM +, Ananyev, Konstantin wrote:
> > > >
> > > >
> > > > > -Original Message-
> > > > > From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> > > > > Sent: Wednesday, January 06, 2016 10:01 AM
> > > > > To: Ananyev, Konstantin
> > > > > Cc: N?lio Laranjeiro; Tan, Jianfeng; dev at dpdk.org
> > > > > Subject: Re: [dpdk-dev] [PATCH 01/12] ethdev: add API to query 
> > > > > what/if packet type is set
> > > > >
> > > > > On Tue, Jan 05, 2016 at 04:50:31PM +, Ananyev, Konstantin wrote:
> > > > > [...]
> > > > > > > -Original Message-
> > > > > > > From: N?lio Laranjeiro [mailto:nelio.laranjeiro at 6wind.com]
> > > > > [...]
> > > > > > > I think we miss a comment here in how those 2/6/4 values are 
> > > > > > > chosen
> > > > > > > because, according to the mask, I expect 16 possibilities but I 
> > > > > > > get
> > > > > > > less.  It will help a lot anyone who needs to add a new type.
> > > > > > >
> > > > > > > Extending the snprintf behavior above, it is best to remove the 
> > > > > > > mask
> > > > > > > argument altogether and have rte_eth_dev_get_ptype_info() return 
> > > > > > > the
> > > > > > > entire list every time.  Applications need to iterate on the 
> > > > > > > result in
> > > > > > > any case.
> > > > > >
> > > > > > I think we'd better keep mask argument.
> > > > > > In many cases upper layer only interested in some particular  
> > > > > > subset of
> > > > > > all packet types that HW can recognise.
> > > > > > Let say l3fwd only cares about  RTE_PTYPE_L3_MASK, it is not 
> > > > > > interested in L4,
> > > > > > tunnelling packet types, etc.
> > > > > > If caller needs to know all recognised ptypes, he can set mask==-1,
> > > > > > In that case all supported packet types will be returned.
> > > > >
> > > > > There are other drawbacks to the mask argument in my opinion. The API 
> > > > > will
> > > > > have to be updated again as soon as 32 bits aren't enough to 
> > > > > represent all
> > > > > possible masks. We can't predict it will be large enough forever but 
> > > > > on the
> > > > > other hand, using uint64_t seems overkill at this point.
> > > >
> > > > Inside rte_mbuf packet_type itself is a 32 bit value.
> > > > These 32 bits are divided into several fields to mark packet types,
> > > > i.e: bits [0-3] are for all possible L2 types, bits [4-7] for L3 types, 
> > > > etc.
> > > > As long as packet_type itself is 32bits, 32bit mask is sufficient.
> > > > If we'll ever run out of 32 bits in packet_type itself, it will be ABI 
> > > > change anyway.
> > >
> > > Sure, however why not do it now this issue has been raised so this 
> > > function
> > > doesn't need updating the day it breaks? I know there's a million other
> > > places with a similar problem but I'm all for making new code future 
> > > proof.
> >
> > If rte_mbuf packet_type will have to be increased to 64bit long, then
> > this function will have to change anyway (with or without mask parameter).
> > It will have to become:
> >
> > rte_eth_dev_get_ptype_info(uint8_t portid, uint64_t ptypes[], ...)
> >
> > So I think we don't have to worry about mask parameter itself.
> 
> Well, yes, besides I overlooked ptypes[] itself is 32 bit, working around
> the type width of the mask wouldn't help much.
> 
> > > Perhaps in this particular case there is no way to hit the limit (although
> > > there are only four unused bits left to extend RTE_PTYPE masks) but we've
> > > seen this happen too many times with subsequent ABI breakage.
> >
> > When ptype was introduced we tried to reserve some free space for each 
> > layer (L2/L3/L4/...),
> > so it wouldn't be overrun immediately.
> > But of course if there would be a new HW that can recognise dozen new 
> > packet types - it is possible.
> > Do you have any particular use-case in mind?
> 
> No, that was just to illustrate my point.
> 
> > > > > I think this use for masks should be avoided when performance does not
> > > > > matter much, as in this case, user application cannot know the number 
> > > > > of
> > > > > entries in advance and must rely on the returned value to iterate.
> > > >
> > > > User doesn't know numbers of entries in advance anyway (with and 
> > > > 

[dpdk-dev] Traffic scheduling in DPDK

2016-01-07 Thread ravulakollu.ku...@wipro.com
Thanks Jasvinder for your quick response.

-Original Message-
From: Singh, Jasvinder [mailto:jasvinder.si...@intel.com]
Sent: Thursday, January 07, 2016 3:44 PM
To: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
Cc: dev at dpdk.org
Subject: RE: [dpdk-dev] Traffic scheduling in DPDK

Hi Uday,


> -Original Message-
> From: ravulakollu.kumar at wipro.com [mailto:ravulakollu.kumar at wipro.com]
> Sent: Thursday, January 7, 2016 6:29 AM
> To: Singh, Jasvinder
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
>
> Hi Jasvinder,
>
> Below is my system configuration
>
> Hugepages:
> --
> [root at qos_sched]# grep -i huge /proc/meminfo
> AnonHugePages:  4096 kB
> HugePages_Total:8000
> HugePages_Free: 7488
> HugePages_Rsvd:0
> HugePages_Surp:0
> Hugepagesize:   2048 kB
>
> NUMA Nodes:
> --
> NUMA node0 CPU(s): 0,2,4,6,8,10
> NUMA node1 CPU(s): 1,3,5,7,9,11
>
> Ports :
> 
> Two Ethernet 10G 2P X520 Adapter
>
> Note : These two PCI devices are connected to NUMA socket 0
>
> Below is the QoS scheduler command I am running .
>
> ./build/qos_sched -c 0x14 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4"
> --cfg ./profile.cfg
>
> After running getting the below error.
>
> APP: EAL core mask not configured properly, must be 16 instead
> of 14
>
> So, changed the command line as below
>
> ./build/qos_sched -c 0x16 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4"
> --cfg ./profile.cfg
>
> After running getting a different error as shown below.
>
> PANIC in rte_eth_dev_data_alloc():
> Cannot allocate memzone for ethernet port data
> 10: [./build/qos_sched() [0x4039c9]]
> 9: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7fba5e95faf5]]
> 8: [./build/qos_sched(main+0x9) [0x403949]]
> 7: [./build/qos_sched(app_parse_args+0x2b) [0x4040eb]]
> 6: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_eal_init+0xac2) [0x7fba5f8ba452]]
> 5: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_eal_pci_probe+0x11d) [0x7fba5f8e767d]]
> 4: [/root /DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(+0x11cafc) [0x7fba5f982afc]]
> 3: [/root /DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(+0x11caa4) [0x7fba5f982aa4]]
> 2: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(__rte_panic+0xcb) [0x7fba5f894438]]
> 1: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_dump_stack+0x18) [0x7fba5f8f2128]]
> Aborted
>
> Could you help me in running this app as per my system configuration.


I guess you are reserving less memory using --socket-mem. In qos_sched, each 
mbuf has (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)  bytes and we 
have mempool size equal to 2*1024*1024 mbufs. Altogether it becomes approx. 4 
GB. So, try using default memory allocated using hugepages or in case, if you 
want to use less than default ,   may use --socket-mem  5120,0

Please refer source code as well to get an idea on memory requirements for this 
application.

> Thanks ,
> Uday
>
> Bound two 10G
> -Original Message-
> From: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
> Sent: Wednesday, January 06, 2016 6:10 PM
> To: 'Singh, Jasvinder'
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
>
> Thanks Jasvinder,
>
> Does this application works on systems with multiple NUMA Nodes ?
>
> Thanks,
> Uday
>
> -Original Message-
> From: Singh, Jasvinder [mailto:jasvinder.singh at intel.com]
> Sent: Tuesday, January 05, 2016 3:40 PM
> To: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
>
> Hi Uday,
>
> >
> > Thanks Jasvinder , I am running the below command
> >
> > ./build/qos_sched -c 0xe -n 1  -- --pfc "0,1,3,2" --cfg
> > ./profile.cfg
> >
> > Bound two 1G physical ports to DPDK , and started running the above
> > command with the default profile mentioned in profile.cfg .
> > I am using lcore 3 and 2 for RX and TX. It was not successful,
> > getting the below error.
> >
> > APP: Initializing port 0... PMD: eth_igb_rx_queue_setup():
> > sw_ring=0x7f5b20ba2240 hw_ring=0x7f5b20ba2680
> dma_addr=0xbf87a2680
> > PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> > consider setting the TX WTHRESH value to 4, 8, or 16.
> > PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b910c0
> > hw_ring=0x7f5b20b92100 dma_addr=0xbf8792100
> > PMD: eth_igb_start(): <<
> > done:  Link Up - speed 1000 Mbps - full-duplex
> > APP: Initializing port 1... PMD: eth_igb_rx_queue_setup():
> > sw_ring=0x7f5b20b80a40 hw_ring=0x7f5b20b80e80
> dma_addr=0xbf8780e80
> > PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> > consider setting the TX WTHRESH value to 4, 8, or 16.
> > PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b6f8c0
> > hw_ring=0x7f5b20b70900 dma_addr=0xbf8770900
> > PMD: eth_igb_start(): <<
> > done:  Link 

[dpdk-dev] [PATCH v3 09/12] virtio: vfio: Enable RTE_PCI_DRV_NEED_MAPPING flag in driver

2016-01-07 Thread Stephen Hemminger
On Thu,  7 Jan 2016 22:03:06 +0530
Santosh Shukla  wrote:

> +#ifdef RTE_EAL_VFIO
> + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE,
> +#else
>   .drv_flags = RTE_PCI_DRV_DETACHABLE,
> +#endif

Since VFIO is determined at runtime not compile time, the flags should
be updated at runtime not compile time.




[dpdk-dev] [PATCH v3 06/12] eal: pci: vfio: add rd/wr func for pci bar space

2016-01-07 Thread Stephen Hemminger
On Thu,  7 Jan 2016 22:03:03 +0530
Santosh Shukla  wrote:

>  
> +int rte_eal_pci_read_bar(const struct rte_pci_device *device __rte_unused,
> +  void *buf __rte_unused,
> +  size_t len __rte_unused,
> +  off_t offset __rte_unused,
> +  int bar_idx __rte_unused)
> +{
> +#ifdef VFIO_PRESENT
> + const struct rte_intr_handle *intr_handle = >intr_handle;
> + return pci_vfio_read_bar(intr_handle, buf, len, offset, bar_idx);
> +#else
> + return 0; /* UIO's not applicable */
> +#endif
> +}

It seems wrong to declare all the parameters as unused but then use them.
Maybe there is a way to have a macro for USED(x) in the #else case


[dpdk-dev] Traffic scheduling in DPDK

2016-01-07 Thread Singh, Jasvinder
Hi Uday,


> -Original Message-
> From: ravulakollu.kumar at wipro.com [mailto:ravulakollu.kumar at wipro.com]
> Sent: Thursday, January 7, 2016 6:29 AM
> To: Singh, Jasvinder
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
> 
> Hi Jasvinder,
> 
> Below is my system configuration
> 
> Hugepages:
> --
> [root at qos_sched]# grep -i huge /proc/meminfo
> AnonHugePages:  4096 kB
> HugePages_Total:8000
> HugePages_Free: 7488
> HugePages_Rsvd:0
> HugePages_Surp:0
> Hugepagesize:   2048 kB
> 
> NUMA Nodes:
> --
> NUMA node0 CPU(s): 0,2,4,6,8,10
> NUMA node1 CPU(s): 1,3,5,7,9,11
> 
> Ports :
> 
> Two Ethernet 10G 2P X520 Adapter
> 
> Note : These two PCI devices are connected to NUMA socket 0
> 
> Below is the QoS scheduler command I am running .
> 
> ./build/qos_sched -c 0x14 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4" --cfg
> ./profile.cfg
> 
> After running getting the below error.
> 
> APP: EAL core mask not configured properly, must be 16 instead of 14
> 
> So, changed the command line as below
> 
> ./build/qos_sched -c 0x16 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4" --cfg
> ./profile.cfg
> 
> After running getting a different error as shown below.
> 
> PANIC in rte_eth_dev_data_alloc():
> Cannot allocate memzone for ethernet port data
> 10: [./build/qos_sched() [0x4039c9]]
> 9: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7fba5e95faf5]]
> 8: [./build/qos_sched(main+0x9) [0x403949]]
> 7: [./build/qos_sched(app_parse_args+0x2b) [0x4040eb]]
> 6: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_eal_init+0xac2) [0x7fba5f8ba452]]
> 5: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_eal_pci_probe+0x11d) [0x7fba5f8e767d]]
> 4: [/root /DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(+0x11cafc) [0x7fba5f982afc]]
> 3: [/root /DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(+0x11caa4) [0x7fba5f982aa4]]
> 2: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(__rte_panic+0xcb) [0x7fba5f894438]]
> 1: [/root/DPDK/x86_64-ivshmem-linuxapp-
> gcc/lib/libintel_dpdk.so(rte_dump_stack+0x18) [0x7fba5f8f2128]] Aborted
> 
> Could you help me in running this app as per my system configuration.


I guess you are reserving less memory using --socket-mem. In qos_sched, each 
mbuf has (1528 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)  bytes and we 
have mempool size equal to 2*1024*1024 mbufs. Altogether it becomes approx. 4 
GB. So, try using default memory allocated using hugepages or in case, if you 
want to use less than default ,   may use --socket-mem  5120,0   

Please refer source code as well to get an idea on memory requirements for this 
application.

> Thanks ,
> Uday
> 
> Bound two 10G
> -Original Message-
> From: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
> Sent: Wednesday, January 06, 2016 6:10 PM
> To: 'Singh, Jasvinder'
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
> 
> Thanks Jasvinder,
> 
> Does this application works on systems with multiple NUMA Nodes ?
> 
> Thanks,
> Uday
> 
> -Original Message-
> From: Singh, Jasvinder [mailto:jasvinder.singh at intel.com]
> Sent: Tuesday, January 05, 2016 3:40 PM
> To: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
> 
> Hi Uday,
> 
> >
> > Thanks Jasvinder , I am running the below command
> >
> > ./build/qos_sched -c 0xe -n 1  -- --pfc "0,1,3,2" --cfg ./profile.cfg
> >
> > Bound two 1G physical ports to DPDK , and started running the above
> > command with the default profile mentioned in profile.cfg .
> > I am using lcore 3 and 2 for RX and TX. It was not successful, getting
> > the below error.
> >
> > APP: Initializing port 0... PMD: eth_igb_rx_queue_setup():
> > sw_ring=0x7f5b20ba2240 hw_ring=0x7f5b20ba2680
> dma_addr=0xbf87a2680
> > PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> > consider setting the TX WTHRESH value to 4, 8, or 16.
> > PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b910c0
> > hw_ring=0x7f5b20b92100 dma_addr=0xbf8792100
> > PMD: eth_igb_start(): <<
> > done:  Link Up - speed 1000 Mbps - full-duplex
> > APP: Initializing port 1... PMD: eth_igb_rx_queue_setup():
> > sw_ring=0x7f5b20b80a40 hw_ring=0x7f5b20b80e80
> dma_addr=0xbf8780e80
> > PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> > consider setting the TX WTHRESH value to 4, 8, or 16.
> > PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b6f8c0
> > hw_ring=0x7f5b20b70900 dma_addr=0xbf8770900
> > PMD: eth_igb_start(): <<
> > done:  Link Up - speed 1000 Mbps - full-duplex
> > SCHED: Low level config for pipe profile 0:
> > Token bucket: period = 3277, credits per period = 8, size = 100
> > Traffic classes: period = 500, credits per period = [12207,
> > 12207, 12207, 12207]
> > Traffic class 3 

[dpdk-dev] [PATCH v3 03/12] linuxapp: eal: arm: Always return 0 for rte_eal_iopl_init()

2016-01-07 Thread Stephen Hemminger
On Thu,  7 Jan 2016 22:03:00 +0530
Santosh Shukla  wrote:

>  #else
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> + return 0; /* iopl syscall not supported for ARM/ARM64 */
> +#endif
>   return -1;
>  #endif

Minor net why not:

#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
return -1
#else

That way you won't generate two return statements and potentially
trigger warnings from static checkers.


[dpdk-dev] [PATCH v3 1/1] eal/linux: change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Sergio Gonzalez Monroy
On 07/01/2016 09:33, Sergio Gonzalez Monroy wrote:
> On 05/01/2016 09:37, Ralf Hoffmann wrote:
>> with only one hugepage or already sorted hugepage addresses, the sort
>> function called memcpy with same src and dst pointer. Debugging with
>> valgrind will issue a warning about overlapping area. This patch
>> changes the sort method to qsort to avoid this behavior, according to
>> original patch from Jay Rolette . The separate
>> sort function is no longer necessary.
>>
>> Signed-off-by: Ralf Hoffmann 
>> ---
>>
> Acked-by: Sergio Gonzalez Monroy 
Forgot to mention that there is a checkpatch warning:
WARNING:LONG_LINE: line over 90 characters
#113: FILE: lib/librte_eal/linuxapp/eal/eal_memory.c:1167:
+   qsort(_hp[hp_offset], hpi->num_pages[0], 
sizeof(struct hugepage_file), cmp_physaddr);

Could you fix that Ralf?

Thanks,
Sergio


[dpdk-dev] [PATCH 12/12] examples/l3fwd: add option to parse ptype

2016-01-07 Thread Ananyev, Konstantin
Hi Jianfeng,

> -Original Message-
> From: Tan, Jianfeng
> Sent: Thursday, January 07, 2016 1:20 AM
> To: Ananyev, Konstantin; dev at dpdk.org
> Cc: Zhang, Helin
> Subject: Re: [PATCH 12/12] examples/l3fwd: add option to parse ptype
> 
> 
> Hi Konstantin,
> 
> On 1/6/2016 12:49 AM, Ananyev, Konstantin wrote:
> > Hi Jianfeng,
> >
>  +static int
>  +check_packet_type_ok(int portid)
>  +{
>  +int i;
>  +int ret;
>  +uint32_t ptypes[RTE_PTYPE_L3_MAX_NUM];
>  +int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
>  +
>  +ret = rte_eth_dev_get_ptype_info(portid, RTE_PTYPE_L3_MASK,
> >>> ptypes);
>  +for (i = 0; i < ret; ++i) {
>  +if (ptypes[i] & RTE_PTYPE_L3_IPV4)
>  +ptype_l3_ipv4 = 1;
>  +if (ptypes[i] & RTE_PTYPE_L3_IPV6)
>  +ptype_l3_ipv6 = 1;
>  +}
>  +
>  +if (ptype_l3_ipv4 == 0)
>  +printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", 
>  portid);
>  +
>  +if (ptype_l3_ipv6 == 0)
>  +printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", 
>  portid);
>  +
>  +if (ptype_l3_ipv4 || ptype_l3_ipv6)
>  +return 1;
> >
> > Forgot one thing: I think it should be:
> >
> > if (ptype_l3_ipv4 && ptype_l3_ipv6)
> >return 1;
> > return 0;
> >
> > or just:
> >
> > return ptype_l3_ipv4 && ptype_l3_ipv6;
> 
> My original thought is: PMDs, like vmxnet3, fills ptype_l3_ipv4, but not
> ptype_l3_ipv6.
> If we use "&&", then it would add rx callback to parse ptype whether
> ipv4 or ipv6 traffic is comming.

Yes, I think that's how it should be:
If HW can't recognise either IPV4 or IPV6 packets, then SW parsing needs to be 
done.
l3fwd relies on PMD to recognise both IPV4 and IPV6 packets properly.
If it can recognise only IPV4, then IPV6 traffic will not be forwarded 
correctly,
and visa-versa.  
Konstantin

> 
> Thanks,
> Jianfeng
> 
> >
> > Konstantin



[dpdk-dev] tx_burst errors seen with virtual pmds

2016-01-07 Thread Dey, Souvik
So is there no resolution to this issue ?

-Original Message-
From: Tan, Jianfeng [mailto:jianfeng@intel.com] 
Sent: Thursday, January 07, 2016 2:18 PM
To: Dey, Souvik ; dev at dpdk.org
Subject: RE: tx_burst errors seen with virtual pmds


Hi Souvik,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Dey, Souvik
> Sent: Thursday, January 7, 2016 2:15 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] tx_burst errors seen with virtual pmds
> 
> Hi All,
> While trying call load with virtio_pmd/vmxnet3_pmd , I 
> am seeing that at around 100Mbps, we are having tx_burst errors. We 
> are currently using DPDK 2.1 and send 1 packet at a time. When we try 
> the same with ixgbe_vf we are not seeing similar drops. Is there any 
> issues with the virtual pmds which is causing  this tx drops. We are 
> using tx descriptor as 1024 and 1 tx queue to send out packets.
> 
> --
> Regards,
> Souvik

IMO, for virtio and vmxnet3, there's a frontend in guest and a backend in host.
And mostly, backend driver does the data copies, which spends more CPU than 
frontend. So the tx descriptor queue could be always full, which leads to tx 
error.

Thanks,
Jianfeng


[dpdk-dev] [PATCH v3 1/1] eal/linux: change hugepage sorting to avoid overlapping memcpy

2016-01-07 Thread Sergio Gonzalez Monroy
On 05/01/2016 09:37, Ralf Hoffmann wrote:
> with only one hugepage or already sorted hugepage addresses, the sort
> function called memcpy with same src and dst pointer. Debugging with
> valgrind will issue a warning about overlapping area. This patch
> changes the sort method to qsort to avoid this behavior, according to
> original patch from Jay Rolette . The separate
> sort function is no longer necessary.
>
> Signed-off-by: Ralf Hoffmann 
> ---
>
Acked-by: Sergio Gonzalez Monroy 


[dpdk-dev] [PATCH 12/12] examples/l3fwd: add option to parse ptype

2016-01-07 Thread Tan, Jianfeng

Hi Konstantin,

On 1/6/2016 12:49 AM, Ananyev, Konstantin wrote:
> Hi Jianfeng,
>
 +static int
 +check_packet_type_ok(int portid)
 +{
 +  int i;
 +  int ret;
 +  uint32_t ptypes[RTE_PTYPE_L3_MAX_NUM];
 +  int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
 +
 +  ret = rte_eth_dev_get_ptype_info(portid, RTE_PTYPE_L3_MASK,
>>> ptypes);
 +  for (i = 0; i < ret; ++i) {
 +  if (ptypes[i] & RTE_PTYPE_L3_IPV4)
 +  ptype_l3_ipv4 = 1;
 +  if (ptypes[i] & RTE_PTYPE_L3_IPV6)
 +  ptype_l3_ipv6 = 1;
 +  }
 +
 +  if (ptype_l3_ipv4 == 0)
 +  printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
 +
 +  if (ptype_l3_ipv6 == 0)
 +  printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
 +
 +  if (ptype_l3_ipv4 || ptype_l3_ipv6)
 +  return 1;
>
> Forgot one thing: I think it should be:
>
> if (ptype_l3_ipv4 && ptype_l3_ipv6)
>return 1;
> return 0;
>
> or just:
>
> return ptype_l3_ipv4 && ptype_l3_ipv6;

My original thought is: PMDs, like vmxnet3, fills ptype_l3_ipv4, but not 
ptype_l3_ipv6.
If we use "&&", then it would add rx callback to parse ptype whether 
ipv4 or ipv6 traffic is comming.

Thanks,
Jianfeng

>
> Konstantin



[dpdk-dev] tx_burst errors seen with virtual pmds

2016-01-07 Thread Tan, Jianfeng

Hi Souvik,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Dey, Souvik
> Sent: Thursday, January 7, 2016 2:15 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] tx_burst errors seen with virtual pmds
> 
> Hi All,
> While trying call load with virtio_pmd/vmxnet3_pmd , I am 
> seeing
> that at around 100Mbps, we are having tx_burst errors. We are currently
> using DPDK 2.1 and send 1 packet at a time. When we try the same with
> ixgbe_vf we are not seeing similar drops. Is there any issues with the virtual
> pmds which is causing  this tx drops. We are using tx descriptor as 1024 and 1
> tx queue to send out packets.
> 
> --
> Regards,
> Souvik

IMO, for virtio and vmxnet3, there's a frontend in guest and a backend in host.
And mostly, backend driver does the data copies, which spends more CPU than
frontend. So the tx descriptor queue could be always full, which leads to tx 
error.

Thanks,
Jianfeng


[dpdk-dev] Traffic scheduling in DPDK

2016-01-07 Thread ravulakollu.ku...@wipro.com
Hi Jasvinder,

Below is my system configuration

Hugepages:
--
[root at qos_sched]# grep -i huge /proc/meminfo
AnonHugePages:  4096 kB
HugePages_Total:8000
HugePages_Free: 7488
HugePages_Rsvd:0
HugePages_Surp:0
Hugepagesize:   2048 kB

NUMA Nodes:
--
NUMA node0 CPU(s): 0,2,4,6,8,10
NUMA node1 CPU(s): 1,3,5,7,9,11

Ports :

Two Ethernet 10G 2P X520 Adapter

Note : These two PCI devices are connected to NUMA socket 0

Below is the QoS scheduler command I am running .

./build/qos_sched -c 0x14 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4" --cfg 
./profile.cfg

After running getting the below error.

APP: EAL core mask not configured properly, must be 16 instead of 14

So, changed the command line as below

./build/qos_sched -c 0x16 -n 1 --socket-mem 1024,0 -- --pfc "0,1,2,4" --cfg 
./profile.cfg

After running getting a different error as shown below.

PANIC in rte_eth_dev_data_alloc():
Cannot allocate memzone for ethernet port data
10: [./build/qos_sched() [0x4039c9]]
9: [/lib64/libc.so.6(__libc_start_main+0xf5) [0x7fba5e95faf5]]
8: [./build/qos_sched(main+0x9) [0x403949]]
7: [./build/qos_sched(app_parse_args+0x2b) [0x4040eb]]
6: 
[/root/DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(rte_eal_init+0xac2)
 [0x7fba5f8ba452]]
5: 
[/root/DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(rte_eal_pci_probe+0x11d)
 [0x7fba5f8e767d]]
4: [/root /DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(+0x11cafc) 
[0x7fba5f982afc]]
3: [/root /DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(+0x11caa4) 
[0x7fba5f982aa4]]
2: 
[/root/DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(__rte_panic+0xcb) 
[0x7fba5f894438]]
1: 
[/root/DPDK/x86_64-ivshmem-linuxapp-gcc/lib/libintel_dpdk.so(rte_dump_stack+0x18)
 [0x7fba5f8f2128]]
Aborted

Could you help me in running this app as per my system configuration.

Thanks ,
Uday

Bound two 10G
-Original Message-
From: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
Sent: Wednesday, January 06, 2016 6:10 PM
To: 'Singh, Jasvinder'
Cc: dev at dpdk.org
Subject: RE: [dpdk-dev] Traffic scheduling in DPDK

Thanks Jasvinder,

Does this application works on systems with multiple NUMA Nodes ?

Thanks,
Uday

-Original Message-
From: Singh, Jasvinder [mailto:jasvinder.si...@intel.com]
Sent: Tuesday, January 05, 2016 3:40 PM
To: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service)
Cc: dev at dpdk.org
Subject: RE: [dpdk-dev] Traffic scheduling in DPDK

Hi Uday,

>
> Thanks Jasvinder , I am running the below command
>
> ./build/qos_sched -c 0xe -n 1  -- --pfc "0,1,3,2" --cfg ./profile.cfg
>
> Bound two 1G physical ports to DPDK , and started running the above
> command with the default profile mentioned in profile.cfg .
> I am using lcore 3 and 2 for RX and TX. It was not successful, getting
> the below error.
>
> APP: Initializing port 0... PMD: eth_igb_rx_queue_setup():
> sw_ring=0x7f5b20ba2240 hw_ring=0x7f5b20ba2680 dma_addr=0xbf87a2680
> PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> consider setting the TX WTHRESH value to 4, 8, or 16.
> PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b910c0
> hw_ring=0x7f5b20b92100 dma_addr=0xbf8792100
> PMD: eth_igb_start(): <<
> done:  Link Up - speed 1000 Mbps - full-duplex
> APP: Initializing port 1... PMD: eth_igb_rx_queue_setup():
> sw_ring=0x7f5b20b80a40 hw_ring=0x7f5b20b80e80 dma_addr=0xbf8780e80
> PMD: eth_igb_tx_queue_setup(): To improve 1G driver performance,
> consider setting the TX WTHRESH value to 4, 8, or 16.
> PMD: eth_igb_tx_queue_setup(): sw_ring=0x7f5b20b6f8c0
> hw_ring=0x7f5b20b70900 dma_addr=0xbf8770900
> PMD: eth_igb_start(): <<
> done:  Link Up - speed 1000 Mbps - full-duplex
> SCHED: Low level config for pipe profile 0:
> Token bucket: period = 3277, credits per period = 8, size = 100
> Traffic classes: period = 500, credits per period = [12207,
> 12207, 12207, 12207]
> Traffic class 3 oversubscription: weight = 0
> WRR cost: [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]
> EAL: Error - exiting with code: 1
>   Cause: Unable to config sched subport 0, err=-2


In default profile.cfg, It is assumed that all the nic ports have 10 Gbps rate. 
The above error occurs when subport's  tb_rate (10Gbps) is found more than NIC 
port's capacity (1 Gbps). Therefore, you need to use either 10 Gbps ports in 
your application or have to amend the profile.cfg to work with 1 Gbps port.  
Please refer to  DPDK QoS framework document for more details on various 
parameters - http://dpdk.org/doc/guides/prog_guide/qos_framework.html


> -Original Message-
> From: Singh, Jasvinder [mailto:jasvinder.singh at intel.com]
> Sent: Monday, January 04, 2016 9:26 PM
> To: Ravulakollu Udaya Kumar (WT01 - Product Engineering Service);
> dev at dpdk.org
> Subject: RE: [dpdk-dev] Traffic scheduling in DPDK
>
> Hi Uday,
>
>
> > I have an issue in running qos_sched 

[dpdk-dev] tx_burst errors seen with virtual pmds

2016-01-07 Thread Dey, Souvik
Hi All,
While trying call load with virtio_pmd/vmxnet3_pmd , I am 
seeing  that at around 100Mbps, we are having tx_burst errors. We are currently 
using DPDK 2.1 and send 1 packet at a time. When we try the same with ixgbe_vf 
we are not seeing similar drops. Is there any issues with the virtual pmds 
which is causing  this tx drops. We are using tx descriptor as 1024 and 1 tx 
queue to send out packets.

--
Regards,
Souvik