[dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.

2015-04-16 Thread David Marchand
On Thu, Apr 16, 2015 at 5:14 PM, Ananyev, Konstantin <
konstantin.ananyev at intel.com> wrote:

> > Yes,  a new set of macros should be introduced first, then we can update
> > PMD code. Did anyone are working on it now ?
>
> As far as I know, no one is working on it right now.
> So, I suppose, you are welcome to start :)
>

Not working on it, so yes if you volunteer, you are welcome.


-- 
David Marchand


[dpdk-dev] [PATCH 2/2] pci: allow const for rte_pci_addr

2015-04-16 Thread Stephen Hemminger
probe and close both don't modify the rte_pci_addr structure
that is passed.

Signed-off-by: Stephen Hemminger 
---
 lib/librte_eal/common/eal_common_pci.c  | 4 ++--
 lib/librte_eal/common/include/rte_pci.h | 7 ---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 808b87b..4229aaf 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -158,7 +158,7 @@ pci_close_all_drivers(struct rte_pci_device *dev)
  * the driver of the devive.
  */
 int
-rte_eal_pci_probe_one(struct rte_pci_addr *addr)
+rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
 {
struct rte_pci_device *dev = NULL;
int ret = 0;
@@ -189,7 +189,7 @@ err_return:
  * the driver of the devive.
  */
 int
-rte_eal_pci_close_one(struct rte_pci_addr *addr)
+rte_eal_pci_close_one(const struct rte_pci_addr *addr)
 {
struct rte_pci_device *dev = NULL;
int ret = 0;
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 62449d7..223d3cd 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -292,7 +292,8 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr 
*dev_addr)
  * Negative on addr is less than addr2, or error.
  */
 static inline int
-rte_eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
+rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
+const struct rte_pci_addr *addr2)
 {
uint64_t dev_addr, dev_addr2;

@@ -348,7 +349,7 @@ int rte_eal_pci_probe(void);
  *   - 0 on success.
  *   - Negative on error.
  */
-int rte_eal_pci_probe_one(struct rte_pci_addr *addr);
+int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);

 /**
  * Close the single PCI device.
@@ -363,7 +364,7 @@ int rte_eal_pci_probe_one(struct rte_pci_addr *addr);
  *   - 0 on success.
  *   - Negative on error.
  */
-int rte_eal_pci_close_one(struct rte_pci_addr *addr);
+int rte_eal_pci_close_one(const struct rte_pci_addr *addr);
 #endif /* RTE_LIBRTE_EAL_HOTPLUG */

 /**
-- 
2.1.4



[dpdk-dev] [PATCH 1/2] pci: make device_id tables const

2015-04-16 Thread Stephen Hemminger
The PCI device id table is immutable and should be made const
in all drivers. The pseudo drivers can initialize their local
copy as necessary.

Signed-off-by: Stephen Hemminger 
---
 app/test/virtual_pmd.c  |  3 +--
 lib/librte_eal/common/include/rte_pci.h |  2 +-
 lib/librte_eal/linuxapp/eal/eal_pci.c   |  8 
 lib/librte_pmd_bond/rte_eth_bond_api.c  | 12 +---
 lib/librte_pmd_e1000/em_ethdev.c|  2 +-
 lib/librte_pmd_e1000/igb_ethdev.c   |  4 ++--
 lib/librte_pmd_enic/enic_ethdev.c   |  2 +-
 lib/librte_pmd_fm10k/fm10k_ethdev.c |  2 +-
 lib/librte_pmd_i40e/i40e_ethdev.c   |  2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c|  2 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |  4 ++--
 lib/librte_pmd_mlx4/mlx4.c  |  2 +-
 lib/librte_pmd_virtio/virtio_ethdev.c   |  2 +-
 lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c |  2 +-
 14 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/app/test/virtual_pmd.c b/app/test/virtual_pmd.c
index 9581892..a538c8a 100644
--- a/app/test/virtual_pmd.c
+++ b/app/test/virtual_pmd.c
@@ -562,6 +562,7 @@ virtual_ethdev_create(const char *name, struct ether_addr 
*mac_addr,
id_table = rte_zmalloc_socket(name, sizeof(*id_table), 0, socket_id);
if (id_table == NULL)
goto err;
+   id_table->device_id = 0xBEEF;

dev_private = rte_zmalloc_socket(name, sizeof(*dev_private), 0, 
socket_id);
if (dev_private == NULL)
@@ -627,8 +628,6 @@ virtual_ethdev_create(const char *name, struct ether_addr 
*mac_addr,
eth_dev->pci_dev = pci_dev;
eth_dev->pci_dev->driver = _drv->pci_drv;

-   eth_dev->pci_dev->driver->id_table->device_id = 0xBEEF;
-
eth_dev->rx_pkt_burst = virtual_ethdev_rx_burst_success;
eth_dev->tx_pkt_burst = virtual_ethdev_tx_burst_success;

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 785852d..62449d7 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -204,7 +204,7 @@ struct rte_pci_driver {
const char *name;   /**< Driver name. */
pci_devinit_t *devinit; /**< Device init. function. */
pci_devuninit_t *devuninit; /**< Device uninit function. */
-   struct rte_pci_id *id_table;/**< ID table, NULL terminated. 
*/
+   const struct rte_pci_id *id_table;  /**< ID table, NULL terminated. 
*/
uint32_t drv_flags; /**< Flags contolling handling 
of device. */
 };

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 9cb0ffd..d2adc66 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -624,9 +624,9 @@ int
 rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device 
*dev)
 {
int ret;
-   struct rte_pci_id *id_table;
+   const struct rte_pci_id *id_table;

-   for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
+   for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {

/* check if device's identifiers match the driver's ones */
if (id_table->vendor_id != dev->id.vendor_id &&
@@ -696,12 +696,12 @@ int
 rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev)
 {
-   struct rte_pci_id *id_table;
+   const struct rte_pci_id *id_table;

if ((dr == NULL) || (dev == NULL))
return -EINVAL;

-   for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
+   for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) {

/* check if device's identifiers match the driver's ones */
if (id_table->vendor_id != dev->id.vendor_id &&
diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c 
b/lib/librte_pmd_bond/rte_eth_bond_api.c
index f594fe1..dc11f47 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_api.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_api.c
@@ -237,14 +237,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, 
uint8_t socket_id)
RTE_BOND_LOG(ERR, "Unable to malloc pci_id_table on socket");
goto err;
}
-
+   pci_id_table->device_id = PCI_ANY_ID;
+   pci_id_table->subsystem_device_id = PCI_ANY_ID;
+   pci_id_table->vendor_id = PCI_ANY_ID;
+   pci_id_table->subsystem_vendor_id = PCI_ANY_ID;
+   
pci_drv->id_table = pci_id_table;
-
-   pci_drv->id_table->device_id = PCI_ANY_ID;
-   pci_drv->id_table->subsystem_device_id = PCI_ANY_ID;
-   pci_drv->id_table->vendor_id = PCI_ANY_ID;
-   pci_drv->id_table->subsystem_vendor_id = PCI_ANY_ID;
-
pci_drv->drv_flags = RTE_PCI_DRV_INTR_LSC;

internals = rte_zmalloc_socket(name, sizeof(*internals), 0, socket_id);
diff --git a/lib/librte_pmd_e1000/em_ethdev.c 

[dpdk-dev] [PATCH 0/2] PCI cleanups

2015-04-16 Thread Stephen Hemminger
More places where PCI code should be using const but wasn't

Stephen Hemminger (2):
  pci: make device_id tables const
  pci: allow const for rte_pci_addr

 app/test/virtual_pmd.c  |  3 +--
 lib/librte_eal/common/eal_common_pci.c  |  4 ++--
 lib/librte_eal/common/include/rte_pci.h |  9 +
 lib/librte_eal/linuxapp/eal/eal_pci.c   |  8 
 lib/librte_pmd_bond/rte_eth_bond_api.c  | 12 +---
 lib/librte_pmd_e1000/em_ethdev.c|  2 +-
 lib/librte_pmd_e1000/igb_ethdev.c   |  4 ++--
 lib/librte_pmd_enic/enic_ethdev.c   |  2 +-
 lib/librte_pmd_fm10k/fm10k_ethdev.c |  2 +-
 lib/librte_pmd_i40e/i40e_ethdev.c   |  2 +-
 lib/librte_pmd_i40e/i40e_ethdev_vf.c|  2 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |  4 ++--
 lib/librte_pmd_mlx4/mlx4.c  |  2 +-
 lib/librte_pmd_virtio/virtio_ethdev.c   |  2 +-
 lib/librte_pmd_vmxnet3/vmxnet3_ethdev.c |  2 +-
 15 files changed, 29 insertions(+), 31 deletions(-)

-- 
2.1.4



[dpdk-dev] [PATCH v5 2/8] Move common functions in eal.c

2015-04-16 Thread Ravi Kerur
Thomas, Thanks for the review so far, I will make necessary changes and
send out v6.

Thanks, Ravi

On Thu, Apr 16, 2015 at 3:08 PM, Ravi Kerur  wrote:

>
>
> On Thu, Apr 16, 2015 at 1:24 AM, Thomas Monjalon <
> thomas.monjalon at 6wind.com> wrote:
>
>> Hi Ravi,
>>
>> I think this patch is too complex and move too many different things:
>> - sysfs
>> - mem_cfg
>> - proc_type
>> - application_usage
>> Please split them up.
>>
>> I'm not sure the classification in eal_common_runtime.c and
>> eal_common_system.c
>> new file is clear.
>>
>
> I chose most system related functions (hugepages, lcore sockets, sysfs)
> into eal_common_system.c file. Rest I moved them to eal_common_runtime.c
> file for lack of better name. You want more fine grained control and divide
> them into 4 files??
>
>>
>> 2015-04-09 12:40, Ravi Kerur:
>> > + mem_config = (struct rte_mem_config *) mmap(NULL,
>> sizeof(*mem_config),
>>
>> Why this cast is needed?
>>
>
> Linux has it, BSD doesn't. I just chose Linux version.
>
>


[dpdk-dev] [PATCH v5 1/8] Move common functions in eal_thread.c

2015-04-16 Thread Ravi Kerur
On Tue, Apr 14, 2015 at 2:35 PM, Ravi Kerur  wrote:

>
>
> On Tue, Apr 14, 2015 at 6:59 AM, Thomas Monjalon <
> thomas.monjalon at 6wind.com> wrote:
>
>> Hi Ravi,
>>
>> 2015-04-09 12:40, Ravi Kerur:
>> > --- a/lib/librte_eal/common/eal_common_thread.c
>> > +++ b/lib/librte_eal/common/eal_common_thread.c
>> [...]
>> > +#ifdef RTE_EXEC_ENV_BSDAPP
>> > +#include 
>> > +#include 
>> > +#else /* RTE_EXEC_ENV_BSDAPP */
>> >  #include 
>> > +#endif /* RTE_EXEC_ENV_BSDAPP */
>> [...]
>> > +#ifdef RTE_EXEC_ENV_BSDAPP
>> > + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
>> > + lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
>> > +#else /* RTE_EXEC_ENV_BSDAPP */
>> > + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
>> > + lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
>> > +#endif /* RTE_EXEC_ENV_BSDAPP */
>>
>> These lines should stay in bsdapp and linuxapp directory.
>> You can add a new function to eal_thread.h to format the thread id,
>> so you'll be able to use %s in generic log above.
>>
>
> Thomas, sure will make the changes. I will wait for additional comments if
> any for other patches and send v6 together.
>
> Thanks.
>
>
I plan to use pthread_setname_np after pthread_create and use
pthread_getname_np in RTE_LOG. Both these library calls are available in
BSD and Linux. Let me know if this is ok??


[dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.

2015-04-16 Thread Ananyev, Konstantin


> -Original Message-
> From: outlook_739db8e1c4bc6fae at outlook.com 
> [mailto:outlook_739db8e1c4bc6fae at outlook.com] On Behalf Of Wang Dong
> Sent: Thursday, April 16, 2015 12:36 PM
> To: Ananyev, Konstantin; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.
> 
> >
> >
> >> -Original Message-
> >> From: outlook_739db8e1c4bc6fae at outlook.com 
> >> [mailto:outlook_739db8e1c4bc6fae at outlook.com] On Behalf Of Dong.Wang
> >> Sent: Wednesday, April 15, 2015 2:46 PM
> >> To: Ananyev, Konstantin; dev at dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv 
> >> pkts.
> >>
> >>
> >>
> >>> Hi,
> >>>
>  -Original Message-
>  From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of WangDong
>  Sent: Saturday, April 11, 2015 4:34 PM
>  To: dev at dpdk.org
>  Subject: [dpdk-dev] [PATCH] ixgbe:Add write memory barrier for recv pkts.
> 
>  Like transmit packets, before update receive descriptor's tail pointer, 
>  rte_wmb() should be added after writing recv descriptor.
> 
>  Signed-off-by: Dong Wang 
>  ---
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 5 +
> 1 file changed, 5 insertions(+)
> 
>  diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
>  b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>  index 9da2c7e..d504688 100644
>  --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>  +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>  @@ -1338,6 +1338,9 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf 
>  **rx_pkts,
>    */
>   rx_pkts[nb_rx++] = rxm;
>   }
>  +
>  +rte_wmb();
>  +
> >>>
> >>> Why do you think it is necessary?
> >>> I can't see any good reason to put wmb() here.
> >>> I would understand if, at least you'll try to insert it just before 
> >>> updating RDT:
> >>>rx_id = (uint16_t) ((rx_id == 0) ?
> >>>(rxq->nb_rx_desc - 1) : (rx_id - 
> >>> 1));
> >>> + rte_wmb();
> >>> IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
> >>>
> >>> That is not needed IA with current implementation, but would make sense 
> >>> for machines with relaxed memory ordering.
> >>> Though right now DPDK IXGBE PMD is supported only on IA,  anyway.
> >>> Same for ixgbe_recv_scattered_pkts().
> >>>
> >>> Konstantin
> >>
> >> Yes, current implementation works well with IA, and the transmit packets
> >> function's rte_wmb() is also unneccessary.
> >>
> >> But there are two reasons for adding rte_wmb() in recv pkts function:
> >> 1) The memory barrier in recv pkts function and xmit pkts function are
> >> inconsistent, rte_wmb() should be added to recv pkts function or be
> >> removed from xmit pkts function.
> >> 2) DPDK will support PowerPC processor (Other developers are working on
> >> it), I check the memory ordering of PowerPC, there was no mention of
> >> store-store instruction's principle in MPC8544 Reference Manual, only
> >> said it is weak memory ordering.
> >>
> >> So, I think it is neccessary to add rte_wmb() to recv pkts function.
> >>
> >> Dong
> >
> > What I was trying to say:
> >
> > 1. I think you put barrier in a wrong place.
> > Even for machines with weak memory ordering, we need a barrier only when we 
> > are goint to update RDT, i.e:
> > if (nb_hold > rxq->rx_free_thresh) { ... ; barrier; 
> > IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, ...); }
> Yes, I put it in a wrong place, it will reduce performance. It's better
> to place it in that you suggested.
> >
> > 2. Even with putting wmb() here, you wouldn't fix  ixgbe_recv_pkts() to 
> > work on machines with weak memory ordering.
> > I think that to make it work properly, you'll need an rmb() bewtween 
> > reading DD bit and rest of RXD:
> >
> > rxdp = _ring[rx_id];
> >   staterr = rxdp->wb.upper.status_error;
> > + rte_rmb();
> >   if (! (staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
> >  break;
> >   rxd = *rxdp;
> Yes, it seems wmb is not enough for weak memory ordering processor. Both
> rmb and wmb are needed.
> >
> > 3. As Stephen pointed in his mail, we shouldn't penalise IA implementation 
> > with unnecessary barriers
> > As was discussed at that thread:  
> > http://dpdk.org/ml/archives/dev/2015-March/015202.html
> > probably the best is to introduce a new macros: rte_smp_*mb (or something) 
> > that would be architecture dependent:
> > compiler_barrier on IA, proper HW barrier on machines with weak memory 
> > ordering and update the code to use it.
> >
> > So, if you like to fix that issue, please do that in  a proper way.
> >
> > BTW, I think that for PPC support even before touching ixgbe or any other 
> > PMD,
> > step 3 (or similar) need to be done on rte_ring enqueue/dequeue code.
> >
> > Konstantin
> Yes,  a new set of macros should be introduced first, then we can update
> PMD code. Did anyone are working on it now ?

As far as I know, no one is 

[dpdk-dev] [PATCH v5 6/8] Move common functions in eal_pci.c

2015-04-16 Thread Ravi Kerur
On Thu, Apr 16, 2015 at 2:02 AM, Thomas Monjalon 
wrote:

> This patch is very sensible and difficult to follow.
> I'm really afraid that some nasty bugs could be hidden.
> Please could you try to split it in several steps?
> Thanks
>

Sure, will drop eal_pci_uio.c and eal_pci_vfio.c  changes and will send out
new version.

>
> 2015-04-09 12:40, Ravi Kerur:
> > Changes in v5
> > Rebase to latest code.
> > Removed RTE_EXEC_ENV_BSDAPP from earlier changes.
> >
> > Changes in v4
> > Move common functions in eal_pci.c to librte_eal/common/
> > eal_common_pci.c file.
> >
> > Following functions are moved to eal_common_pci.c file.
> >
> > void *pci_map_resource(void *requested_addr, const int vfio_fd,
> >   const char *devname, off_t offset, size_t size);
> > int pci_addr_comparison(struct rte_pci_addr *addr,
> > struct rte_pci_addr *addr2);
> > int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr,
> > struct rte_pci_device *dev);
> >
> > Use RTE_EXEC_ENV_BSDAPP to differentiate minor differences in
> > common function.
> > Fix checkpatch warnings and errors.
> >
> > Changes in v3
> > N/A
> >
> > Changes in v2
> > N/A
> >
> > Changes in v1
> > N/A
> >
> > Signed-off-by: Ravi Kerur 
> > ---
> >  lib/librte_eal/bsdapp/eal/eal_pci.c| 122
> ---
> >  lib/librte_eal/common/eal_common_pci.c | 130
> -
> >  lib/librte_eal/common/eal_private.h|  48 +++
> >  lib/librte_eal/linuxapp/eal/eal_pci.c  | 100 +-
> >  lib/librte_eal/linuxapp/eal/eal_pci_init.h |   6 --
> >  lib/librte_eal/linuxapp/eal/eal_pci_uio.c  |  36 ++--
> >  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |  17 ++--
> >  7 files changed, 212 insertions(+), 247 deletions(-)
>
>


[dpdk-dev] [PATCH v5 4/8] Move common functions in eal_timer.c

2015-04-16 Thread Ravi Kerur
On Thu, Apr 16, 2015 at 1:46 AM, Thomas Monjalon 
wrote:

> 2015-04-09 12:40, Ravi Kerur:
> > Changes in v5
> > Rebase to latest code.
> >
> > Changes in v4
> > Removed extern declaration of eal_tsc_resolution_hz,
> > instead provided _set_ API.
> > Make set_tsc_freq_from_clock as wrapper function for BSD.
> >
> > Changes in v3
> > Changed subject to be more explicit on file name inclusion.
> >
> > Changes in v2
> > Use common function name set_tsc_freq_from_sysctl for BSD and Linux.
> > Update comments about its actuality in function declaration.
> >
> > Changes in v1
> > Move common functions in eal_timer.c to librte_eal/common/
> > eal_common_timer.c file.
> >
> > Following functions are  moved to eal_common_timer.c file
> >
> > void rte_delay_us(unsigned us);
> > uint64_t rte_get_tsc_hz(void);
> > static void set_tsc_freq_fallback(void);
> > void set_tsc_freq(void);
> >
> > Makefile changes to reflect new file added.
> > Fix checkpatch warnings and errors.
> >
> > Signed-off-by: Ravi Kerur 
>
> Seems good.
> Acked-by: Thomas Monjalon 
>
> Minor nit, a blank line is missing:
>

Will fix this in next version.


> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + */
> > +#include 
>


[dpdk-dev] [PATCH] hash: update jhash function with the latest available

2015-04-16 Thread Bruce Richardson
On Thu, Apr 16, 2015 at 02:26:59PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> Check out: http://burtleburtle.net/bob/c/lookup3.c
> 
> This patch integrates that code in the rte_jhash library,
> adding also a new function rte_jhash_word2,
> that returns two different hash values, for a single key.
> 

Should the addition of the new functionality not be a separate patch from the
update to the existing code?
Also, do the new functions return the exact same values as the previous 
versions,
just faster?

> Signed-off-by: Pablo de Lara 
> ---
>  lib/librte_hash/rte_jhash.h |  407 
> ---
>  1 files changed, 347 insertions(+), 60 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> index a4bf5a1..3de006d 100644
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -45,38 +45,51 @@ extern "C" {
>  #endif
>  
>  #include 
> +#include 
>  
>  /* jhash.h: Jenkins hash support.
>   *
> - * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
> + * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
>   *
>   * http://burtleburtle.net/bob/hash/
>   *
>   * These are the credits from Bob's sources:
>   *
> - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> - * hash(), hash2(), hash3, and mix() are externally useful functions.
> - * Routines to test the hash are included if SELF_TEST is defined.
> - * You can use this free for any purpose.  It has no warranty.
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
>   *
>   * $FreeBSD$
>   */
>  
> +#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k
> +
>  /** @internal Internal function. NOTE: Arguments are modified. */
>  #define __rte_jhash_mix(a, b, c) do { \
> - a -= b; a -= c; a ^= (c>>13); \
> - b -= c; b -= a; b ^= (a<<8); \
> - c -= a; c -= b; c ^= (b>>13); \
> - a -= b; a -= c; a ^= (c>>12); \
> - b -= c; b -= a; b ^= (a<<16); \
> - c -= a; c -= b; c ^= (b>>5); \
> - a -= b; a -= c; a ^= (c>>3); \
> - b -= c; b -= a; b ^= (a<<10); \
> - c -= a; c -= b; c ^= (b>>15); \
> + a -= c; a ^= rot(c, 4); c += b; \
> + b -= a; b ^= rot(a, 6); a += c; \
> + c -= b; c ^= rot(b, 8); b += a; \
> + a -= c; a ^= rot(c, 16); c += b; \
> + b -= a; b ^= rot(a, 19); a += c; \
> + c -= b; c ^= rot(b, 4); b += a; \
> +} while (0)
> +
> +#define __rte_jhash_final(a, b, c) do { \
> + c ^= b; c -= rot(b, 14); \
> + a ^= c; a -= rot(c, 11); \
> + b ^= a; b -= rot(a, 25); \
> + c ^= b; c -= rot(b, 16); \
> + a ^= c; a -= rot(c, 4);  \
> + b ^= a; b -= rot(a, 14); \
> + c ^= b; c -= rot(b, 24); \
>  } while (0)
>  
>  /** The golden ratio: an arbitrary value. */
> -#define RTE_JHASH_GOLDEN_RATIO  0x9e3779b9
> +#define RTE_JHASH_GOLDEN_RATIO  0xdeadbeef
>  
>  /**
>   * The most generic version, hashes an arbitrary sequence
> @@ -95,42 +108,256 @@ extern "C" {
>  static inline uint32_t
>  rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  {
> - uint32_t a, b, c, len;
> - const uint8_t *k = (const uint8_t *)key;
> - const uint32_t *k32 = (const uint32_t *)key;
> + uint32_t a, b, c;
> + union {
> + const void *ptr;
> + size_t i;
> + } u;
>  
> - len = length;
> - a = b = RTE_JHASH_GOLDEN_RATIO;
> - c = initval;
> + /* Set up the internal state */
> + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
>  
> - while (len >= 12) {
> - a += k32[0];
> - b += k32[1];
> - c += k32[2];
> + u.ptr = key;
>  
> - __rte_jhash_mix(a,b,c);
> + if ((u.i & 0x3) == 0) {
> + const uint32_t *k = (const uint32_t *)key;
>  
> - k += (3 * sizeof(uint32_t)), k32 += 3;
> - len -= (3 * sizeof(uint32_t));
> - }
> + while (length > 12) {
> + a += k[0];
> + b += k[1];
> + c += k[2];
>  
> - c += length;
> - switch (len) {
> - case 11: c += 

[dpdk-dev] [PATCH v2 2/2] use simple zero initializers

2015-04-16 Thread Thomas Monjalon
2015-04-16 12:12, Olivier MATZ:
> On 04/15/2015 10:49 PM, Thomas Monjalon wrote:
> > To initialize a structure with zeros, one field was explicitly set
> > to avoid "missing initializer" bug with old GCC (e.g. 4.4).
> > This warning is now disabled (commit ) for old versions of GCC,
> > so the workarounds may be removed.
> >
> > These initializers should not be needed for static variables but they
> > are still used to workaround an ICC bug (see commit b2595c4aa92d).
> >
> > There is one remaining exception where {0} initializer doesn't work cleanly,
> > even with recent GCC:
> > lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c:735:9:
> > error: missing braces around initializer [-Werror=missing-braces]
> >struct rte_mbuf mb_def = {0}; /* zeroed mbuf */
> >
> > Tested with GCC 4.4.7 (CentOS), 4.7.2 (Debian) and 4.9.2 (Arch).
> >
> > Signed-off-by: Thomas Monjalon 
> 
> I'm trying to compile the head of dpdk (without this patch applied),
> and I have this error with clang:
> 
>  ixgbe_rxtx.c:2509:41: error: missing field 'driver_name' initializer
>   [-Werror,-Wmissing-field-initializers]
>  struct rte_eth_dev_info dev_info = { 0 };
> 
> I'm wondering if adding more {0} would compile on clang, at least with
> the current clang flags.

It's fixed by adding -Wno-missing-field-initializers to clang flags.

Someone to test with ICC?

Thanks


[dpdk-dev] [PATCH v7] Restore support for virtio on FreeBSD

2015-04-16 Thread Raz Amir
Fixes: 8a312224bcde ("eal/bsd: fix fd leak")

Closing /dev/io fd causes SIGBUS in inb/outb instructions
as the process loses the IOPL privileges once the fd is closed:
(gdb) bt
0  0x00492f2c in outb (port=49170, data=0 '\000')
at /usr/include/machine/cpufunc.h:244
1  0x00492f7a in outb_p (data=0 '\000', port=49170)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.h:211
2  0x0049328d in vtpci_set_status (hw=0x80331f380, status=0 '\000')
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:130
3  0x004931fe in vtpci_reset (hw=0x80331f380)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:108
4  0x004a175e in eth_virtio_dev_init (eth_dev=0x831b80 
)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_ethdev.c:1150
5  0x00462c09 in rte_eth_dev_init (pci_drv=0x79d1a0 ,
pci_dev=0x802417560) at /dpdk/dpdk-2.0.0/lib/librte_ether/rte_ethdev.c:326
6  0x0046f03f in rte_eal_pci_probe_one_driver (dr=0x79d1a0 
,
dev=0x802417560) at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal_pci.c:487
7  0x00475b06 in pci_probe_all_drivers (dev=0x802417560)
at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:116
8  0x00475bb9 in rte_eal_pci_probe ()
at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:246
9  0x0046cd63 in rte_eal_init (argc=5, argv=0x7fffeaf0)
at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal.c:554
10 0x00404544 in main ()

Signed-off-by: Raz Amir 
---
 lib/librte_eal/bsdapp/eal/eal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 871d5f4..43e8a47 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -421,12 +421,12 @@ int rte_eal_has_hugepages(void)
 int
 rte_eal_iopl_init(void)
 {
-   int fd;
+   static int fd;

fd = open("/dev/io", O_RDWR);
if (fd < 0)
return -1;
-   close(fd);
+   /* keep fd open for iopl */
return 0;
 }

-- 
2.1.2



[dpdk-dev] [PATCH v5 0/5] Refactor module `eventfd_link'

2015-04-16 Thread Pavel Boldin
This patchset contains refactoring steps for the `eventfd_link' module
of the DPDK's `librte_vhost' part.

The commit messages are updated to include `Signed-off-by'.

Pavel Boldin (5):
  vhost: eventfd_link: moving ioctl to a function
  vhost: eventfd_link: add function fget_from_files
  vhost: eventfd_link: fix ioctl return values
  vhost: eventfd_link: replace copy-pasted sys_close
  vhost: eventfd_link: removing extra #includes

 lib/librte_vhost/eventfd_link/eventfd_link.c | 181 +--
 1 file changed, 87 insertions(+), 94 deletions(-)

-- 
1.9.1



[dpdk-dev] [PATCH v6] Restore support for virtio on FreeBSD

2015-04-16 Thread Raz Amir
Will do

-Original Message-
From: Bruce Richardson [mailto:bruce.richard...@intel.com] 
Sent: 16 April 2015 12:39
To: Raz Amir
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH v6] Restore support for virtio on FreeBSD

On Thu, Apr 16, 2015 at 11:02:03AM +0300, Raz Amir wrote:
> Fixes: 8a312224bcde ("eal/bsd: fix fd leak")
> 
> Closing /dev/io fd causes SIGBUS in inb/outb instructions as the 
> process loses the IOPL privileges once the fd is closed:
> (gdb) bt
> 0  0x00492f2c in outb (port=49170, data=0 '\000') at 
> /usr/include/machine/cpufunc.h:244
> 1  0x00492f7a in outb_p (data=0 '\000', port=49170) at 
> /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.h:211
> 2  0x0049328d in vtpci_set_status (hw=0x80331f380, status=0 
> '\000') at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:130
> 3  0x004931fe in vtpci_reset (hw=0x80331f380) at 
> /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:108
> 4  0x004a175e in eth_virtio_dev_init (eth_dev=0x831b80 
> ) at 
> /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_ethdev.c:1150
> 5  0x00462c09 in rte_eth_dev_init (pci_drv=0x79d1a0 
> ,
> pci_dev=0x802417560) at 
> /dpdk/dpdk-2.0.0/lib/librte_ether/rte_ethdev.c:326
> 6  0x0046f03f in rte_eal_pci_probe_one_driver (dr=0x79d1a0 
> ,
> dev=0x802417560) at 
> /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal_pci.c:487
> 7  0x00475b06 in pci_probe_all_drivers (dev=0x802417560) at 
> /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:116
> 8  0x00475bb9 in rte_eal_pci_probe () at 
> /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:246
> 9  0x0046cd63 in rte_eal_init (argc=5, argv=0x7fffeaf0) at 
> /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal.c:554
> 10 0x00404544 in main ()
> 
> Signed-off-by: Raz Amir 

It does look the cleanest solution, though I don't like the idea of leaking
the file handle. Can you perhaps just change things a little so that the fd
variable is static - even locally in the function will do, as it helps
indicate that the fd is persistent.

/Bruce

> ---
>  lib/librte_eal/bsdapp/eal/eal.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/bsdapp/eal/eal.c 
> b/lib/librte_eal/bsdapp/eal/eal.c index 871d5f4..e20f915 100644
> --- a/lib/librte_eal/bsdapp/eal/eal.c
> +++ b/lib/librte_eal/bsdapp/eal/eal.c
> @@ -426,7 +426,7 @@ rte_eal_iopl_init(void)
>   fd = open("/dev/io", O_RDWR);
>   if (fd < 0)
>   return -1;
> - close(fd);
> + /* keep fd open for iopl */
>   return 0;
>  }
>  
> --
> 2.1.2
> 



[dpdk-dev] [PATCH] hash: update jhash function with the latest available

2015-04-16 Thread Pablo de Lara
Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

Check out: http://burtleburtle.net/bob/c/lookup3.c

This patch integrates that code in the rte_jhash library,
adding also a new function rte_jhash_word2,
that returns two different hash values, for a single key.

Signed-off-by: Pablo de Lara 
---
 lib/librte_hash/rte_jhash.h |  407 ---
 1 files changed, 347 insertions(+), 60 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..3de006d 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,51 @@ extern "C" {
 #endif

 #include 
+#include 

 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */

+#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-   a -= b; a -= c; a ^= (c>>13); \
-   b -= c; b -= a; b ^= (a<<8); \
-   c -= a; c -= b; c ^= (b>>13); \
-   a -= b; a -= c; a ^= (c>>12); \
-   b -= c; b -= a; b ^= (a<<16); \
-   c -= a; c -= b; c ^= (b>>5); \
-   a -= b; a -= c; a ^= (c>>3); \
-   b -= c; b -= a; b ^= (a<<10); \
-   c -= a; c -= b; c ^= (b>>15); \
+   a -= c; a ^= rot(c, 4); c += b; \
+   b -= a; b ^= rot(a, 6); a += c; \
+   c -= b; c ^= rot(b, 8); b += a; \
+   a -= c; a ^= rot(c, 16); c += b; \
+   b -= a; b ^= rot(a, 19); a += c; \
+   c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+   c ^= b; c -= rot(b, 14); \
+   a ^= c; a -= rot(c, 11); \
+   b ^= a; b -= rot(a, 25); \
+   c ^= b; c -= rot(b, 16); \
+   a ^= c; a -= rot(c, 4);  \
+   b ^= a; b -= rot(a, 14); \
+   c ^= b; c -= rot(b, 24); \
 } while (0)

 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO  0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO  0xdeadbeef

 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +108,256 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-   uint32_t a, b, c, len;
-   const uint8_t *k = (const uint8_t *)key;
-   const uint32_t *k32 = (const uint32_t *)key;
+   uint32_t a, b, c;
+   union {
+   const void *ptr;
+   size_t i;
+   } u;

-   len = length;
-   a = b = RTE_JHASH_GOLDEN_RATIO;
-   c = initval;
+   /* Set up the internal state */
+   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;

-   while (len >= 12) {
-   a += k32[0];
-   b += k32[1];
-   c += k32[2];
+   u.ptr = key;

-   __rte_jhash_mix(a,b,c);
+   if ((u.i & 0x3) == 0) {
+   const uint32_t *k = (const uint32_t *)key;

-   k += (3 * sizeof(uint32_t)), k32 += 3;
-   len -= (3 * sizeof(uint32_t));
-   }
+   while (length > 12) {
+   a += k[0];
+   b += k[1];
+   c += k[2];

-   c += length;
-   switch (len) {
-   case 11: c += ((uint32_t)k[10] << 24);
-   case 10: c += ((uint32_t)k[9] << 16);
-   case 9 : c += ((uint32_t)k[8] << 8);
-   case 8 : b += ((uint32_t)k[7] << 24);
-   case 7 : b += ((uint32_t)k[6] << 16);
-   case 6 : b += ((uint32_t)k[5] << 8);
-   case 5 : b += k[4];
-   case 4 : a += ((uint32_t)k[3] << 24);
-   case 3 : a += ((uint32_t)k[2] << 16);
-

[dpdk-dev] KNI automatic IP configuration

2015-04-16 Thread Olivier Deme
Hi, sorry to resurrect this question.
I am now using a virtio network interface in a qemu VM.
Is it possible to use ifplugd to automatically setup the IP 
configuration of a KNI based interface?
Here is the output of lspci:
00:03.0 Ethernet controller: Red Hat, Inc Virtio network device
00:04.0 Ethernet controller: Red Hat, Inc Virtio network device
00:05.0 Ethernet controller: Red Hat, Inc Virtio network device
00:06.0 Ethernet controller: Red Hat, Inc Virtio network device

Thanks for your help.
Olivier.


On 05/04/15 02:08, Zhou, Danny wrote:
> e1000 driver supports many different NIC devices.
>
> Do you know your NIC device type or device_id?
>
>> -Original Message-
>> From: Olivier Deme [mailto:odeme at druidsoftware.com]
>> Sent: Sunday, April 05, 2015 1:03 AM
>> To: Zhou, Danny
>> Cc: dev at dpdk.org
>> Subject: Re: [dpdk-dev] KNI automatic IP configuration
>>
>> Currently I?m using the e1000 PMD driver.
>>
>> Thanks,
>> Olivier.
>>
>>> On 4 Apr 2015, at 12:51, Zhou, Danny  wrote:
>>>
>>> Which NIC device are you using now?
>>>
>>> KNI does not provide Ethtool support for all NICs supported by the 
>>> Poll-Mode Drivers.
>>>
 -Original Message-
 From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Olivier Deme
 Sent: Friday, April 03, 2015 10:03 PM
 To: dev at dpdk.org
 Subject: [dpdk-dev] KNI automatic IP configuration

 Hi all,

 Is it possible on Linux to get the Linux network manager to automatically 
 configure a KNI virtual network interface as soon as it
 comes up?
 I haven?t been able to get Fedora to apply the configuration under 
 /etc/sysconfig/network-scripts/ifcfg-veth0 file upon
>> starting
 my DPDK application.

 Executing ifup veth0 manually works but somehow the fedora network manager 
 seems to have an issue to apply the if cfg
>> config
 automatically.
 In the system logs I can see something along the lines ?kni doesn?t 
 support ethtool?. Is this related?

 I also tried ifplugd but without success.

 Many thanks for your help,

 Olivier.

-- 
*Olivier Dem?*
*Druid Software Ltd.*
*Tel: +353 1 202 1831*
*Email: odeme at druidsoftware.com *
*URL: http://www.druidsoftware.com*
Druid Software: Monetising enterprise small cells solutions.


Druid_Footer_Logo


[dpdk-dev] [PATCH v2 1/2] ixgbe: fix build with gcc 4.4

2015-04-16 Thread Vlad Zolotarov


On 04/15/15 23:49, Thomas Monjalon wrote:
> With GCC 4.4.7 from CentOS 6.5, the following errors arise:
>
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function ?ixgbe_dev_rx_queue_setup?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: missing initializer
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: (near initialization for 
> ?dev_info.driver_name?)
>
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function ?ixgbe_set_rsc?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: missing initializer
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: (near initialization for 
> ?dev_info.driver_name?)
>
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function 
> ?ixgbe_recv_pkts_lro_single_alloc?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1479: error: ?next_rsc_entry? may be used 
> uninitialized in this function
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1480: error: ?next_rxe? may be used 
> uninitialized in this function
>
> The "missing initializer" warning is a GCC bug which seems fixed in 4.7.
> The "may be used uninitialized" warning seems to be another GCC bug and is
> workarounded with NULL initialization.
>
> Fixes: 8eecb3295aed ("ixgbe: add LRO support")
>
> Signed-off-by: Thomas Monjalon 
> ---
> changes in v2:
> - option -Wno-missing-field-initializers for old GCC instead of code 
> workaround
>
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 4 ++--
>   mk/toolchain/gcc/rte.vars.mk  | 5 +
>   2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f1da9ec..6475c44 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -1476,8 +1476,8 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf 
> **rx_pkts, uint16_t nb_pkts,
>   bool eop;
>   struct ixgbe_rx_entry *rxe;
>   struct ixgbe_rsc_entry *rsc_entry;
> - struct ixgbe_rsc_entry *next_rsc_entry;
> - struct ixgbe_rx_entry *next_rxe;
> + struct ixgbe_rsc_entry *next_rsc_entry = NULL;
> + struct ixgbe_rx_entry *next_rxe = NULL;

-Wno-maybe-uninitialized ?

>   struct rte_mbuf *first_seg;
>   struct rte_mbuf *rxm;
>   struct rte_mbuf *nmb;
> diff --git a/mk/toolchain/gcc/rte.vars.mk b/mk/toolchain/gcc/rte.vars.mk
> index 88f235c..208cddd 100644
> --- a/mk/toolchain/gcc/rte.vars.mk
> +++ b/mk/toolchain/gcc/rte.vars.mk
> @@ -80,5 +80,10 @@ WERROR_FLAGS += -Wundef -Wwrite-strings
>   # process cpu flags
>   include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
>   
> +# workaround GCC bug with warning "missing initializer" for "= {0}"
> +ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1)
> +WERROR_FLAGS += -Wno-missing-field-initializers
> +endif
> +
>   export CC AS AR LD OBJCOPY OBJDUMP STRIP READELF
>   export TOOLCHAIN_CFLAGS TOOLCHAIN_LDFLAGS TOOLCHAIN_ASFLAGS



[dpdk-dev] [PATCH RFC 10/10] examples/tep_termination:add the configuration for encapsulation and the decapsulation

2015-04-16 Thread Jijiang Liu
The two flags by default are enabled, but sometimes we want to know the 
performance influence due to encapsulation and decapsulation operations, and
I think we should add the two options. 

Signed-off-by: Jijiang Liu 
---
 examples/tep_termination/main.c|   36 ++-
 examples/tep_termination/vxlan_setup.c |8 +-
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index 8ce78ee..7d021f9 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -125,6 +125,12 @@ uint8_t tx_checksum;
 /* TSO segment size */
 uint16_t tso_segsz = 0;

+/* enable/disable decapsulation */
+uint8_t rx_decap = 1;
+
+/* enable/disable encapsulation */
+uint8_t tx_encap = 1;
+
 /* RX filter type for tunneling packet */
 uint8_t filter_idx;

@@ -250,6 +256,8 @@ vep_termination_usage(const char *prgname)
"   --nb-devices: number of virtIO device\n"
"   --tx-checksum [0|1]: inner Tx checksum offload\n"
"   --tso-segsz [0-N]: TSO segment size\n"
+   "   --decap [0|1]: Decapsulation for tunneling packet\n"
+   "   --encap [0|1]: Encapsulation for tunneling packet\n"
"   --filter-type[1-3]: filter type for tunneling packet\n"
"   1: Inner MAC and tenent ID\n"
"   2: Inner MAC and tenent ID\n"
@@ -276,6 +284,8 @@ tep_parse_args(int argc, char **argv)
{"nb-devices", required_argument, NULL, 0},
{"tx-checksum", required_argument, NULL, 0},
{"tso-segsz", required_argument, NULL, 0},
+   {"decap", required_argument, NULL, 0},
+   {"encap", required_argument, NULL, 0},
{"filter-type", required_argument, NULL, 0},
{"stats", required_argument, NULL, 0},
{"dev-basename", required_argument, NULL, 0},
@@ -325,8 +335,30 @@ tep_parse_args(int argc, char **argv)
return -1;
}
}
-   
-   if (!strncmp(long_option[option_index].name, 
"tx-checksum", MAX_LONG_OPT_SZ)) {
+
+   /* Enable/disable encapsulation on RX. */
+   if (!strncmp(long_option[option_index].name, "decap", 
MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, 1);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for decapsulation [0|1]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   } else
+   rx_decap = ret;
+   }
+
+   /* Enable/disable encapsulation on TX. */
+   if (!strncmp(long_option[option_index].name, "encap", 
MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, 1);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for encapsulation [0|1]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   } else
+   tx_encap = ret;
+   }
+
+if (!strncmp(long_option[option_index].name, 
"tx-checksum", MAX_LONG_OPT_SZ)) {
ret = parse_num_opt(optarg, 1);
if (ret == -1) {
RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for tx-checksum [0|1]\n");
diff --git a/examples/tep_termination/vxlan_setup.c 
b/examples/tep_termination/vxlan_setup.c
index 312a878..b4e8fbc 100644
--- a/examples/tep_termination/vxlan_setup.c
+++ b/examples/tep_termination/vxlan_setup.c
@@ -83,6 +83,8 @@ extern uint16_t num_devices;
 extern uint16_t udp_port;
 extern uint8_t ports[RTE_MAX_ETHPORTS];
 extern uint8_t filter_idx;
+extern uint8_t rx_decap;
+extern uint8_t tx_encap;

 /* ethernet addresses of ports */
 extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
@@ -237,7 +239,8 @@ vxlan_rx_process(struct rte_mbuf *pkt)
| PKT_RX_TUNNEL_IPV6_HDR)) == 0)
return -1;

-   ret = decapsulation(pkt);
+   if(rx_decap)
+   ret = decapsulation(pkt);

return ret;
 }
@@ -252,7 +255,8 @@ vxlan_tx_process(uint8_t vport_id, struct rte_mbuf *pkt)
return -1;
}

-   ret = encapsulation(pkt, vport_id);
+   if (tx_encap)
+   ret = encapsulation(pkt, vport_id);

return ret;
 }
-- 
1.7.7.6



[dpdk-dev] [PATCH RFC 09/10] examples/tep_termination:add TSO offload configuration

2015-04-16 Thread Jijiang Liu
If the 'tso-segsz' is not 0, which means TSO offload is enabled.

Signed-off-by: Jijiang Liu 
---
 examples/tep_termination/main.c  |   19 +--
 examples/tep_termination/vxlan.c |4 
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index 7c69a82..8ce78ee 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -122,6 +122,9 @@ uint16_t udp_port;
 /* enable/disable inner TX checksum */
 uint8_t tx_checksum;

+/* TSO segment size */
+uint16_t tso_segsz = 0;
+
 /* RX filter type for tunneling packet */
 uint8_t filter_idx;

@@ -246,6 +249,7 @@ vep_termination_usage(const char *prgname)
"   --udp-port: UDP destination port for VXLAN packet\n"
"   --nb-devices: number of virtIO device\n"
"   --tx-checksum [0|1]: inner Tx checksum offload\n"
+   "   --tso-segsz [0-N]: TSO segment size\n"
"   --filter-type[1-3]: filter type for tunneling packet\n"
"   1: Inner MAC and tenent ID\n"
"   2: Inner MAC and tenent ID\n"
@@ -271,6 +275,7 @@ tep_parse_args(int argc, char **argv)
{"udp-port", required_argument, NULL, 0},
{"nb-devices", required_argument, NULL, 0},
{"tx-checksum", required_argument, NULL, 0},
+   {"tso-segsz", required_argument, NULL, 0},
{"filter-type", required_argument, NULL, 0},
{"stats", required_argument, NULL, 0},
{"dev-basename", required_argument, NULL, 0},
@@ -301,6 +306,16 @@ tep_parse_args(int argc, char **argv)
} else 
num_devices = ret;
}
+
+   if (!strncmp(long_option[option_index].name, 
"tso-segsz", MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, INT16_MAX);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for TCP segment size [0-N]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   } else 
+   tso_segsz = ret;
+   }

if (!strncmp(long_option[option_index].name, 
"udp-port", MAX_LONG_OPT_SZ)) {
ret = parse_num_opt(optarg, INT16_MAX);
@@ -310,8 +325,8 @@ tep_parse_args(int argc, char **argv)
return -1;
}
}
-
-if (!strncmp(long_option[option_index].name, 
"tx-checksum", MAX_LONG_OPT_SZ)) {
+   
+   if (!strncmp(long_option[option_index].name, 
"tx-checksum", MAX_LONG_OPT_SZ)) {
ret = parse_num_opt(optarg, 1);
if (ret == -1) {
RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for tx-checksum [0|1]\n");
diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c
index 6fc75ee..e3ef832 100644
--- a/examples/tep_termination/vxlan.c
+++ b/examples/tep_termination/vxlan.c
@@ -48,6 +48,7 @@ extern struct vxlan_conf vxdev;
 extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
 extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
 extern uint16_t udp_port;
+extern uint16_t tso_segsz;

 static uint16_t
 get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
@@ -149,6 +150,8 @@ process_inner_cksums(struct ether_hdr *eth_hdr, struct 
offload_info *info)
ol_flags |= PKT_TX_TCP_CKSUM;
tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
ol_flags);
+   if (tso_segsz != 0)
+   ol_flags |= PKT_TX_TCP_SEG;

} else if (info->l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
@@ -219,6 +222,7 @@ int encapsulation(struct rte_mbuf *m, uint8_t vport_id)
m->outer_l3_len = sizeof(struct ipv4_hdr);

m->ol_flags |= ol_flags;
+   m->tso_segsz = tso_segsz;

/*VXLAN HEADER*/
vxlan->vx_flags = VXLAN_FLAGS;
-- 
1.7.7.6



[dpdk-dev] [PATCH RFC 08/10] examples/tep_termination:add Tx checksum offload configuration for inner header

2015-04-16 Thread Jijiang Liu
For VXLAN packet, the inner Tx checksum offload means inner IPv4 and inner 
L4(TCP/UDP/SCTP).

Signed-off-by: Jijiang Liu 
---
 examples/tep_termination/main.c  |   15 +++
 examples/tep_termination/vxlan.c |   77 ++
 2 files changed, 92 insertions(+), 0 deletions(-)

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index e8142e0..7c69a82 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -119,6 +119,9 @@ uint16_t num_devices;
 /* VXLAN UDP destination port */
 uint16_t udp_port;

+/* enable/disable inner TX checksum */
+uint8_t tx_checksum;
+
 /* RX filter type for tunneling packet */
 uint8_t filter_idx;

@@ -242,6 +245,7 @@ vep_termination_usage(const char *prgname)
RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
"   --udp-port: UDP destination port for VXLAN packet\n"
"   --nb-devices: number of virtIO device\n"
+   "   --tx-checksum [0|1]: inner Tx checksum offload\n"
"   --filter-type[1-3]: filter type for tunneling packet\n"
"   1: Inner MAC and tenent ID\n"
"   2: Inner MAC and tenent ID\n"
@@ -266,6 +270,7 @@ tep_parse_args(int argc, char **argv)
static struct option long_option[] = {
{"udp-port", required_argument, NULL, 0},
{"nb-devices", required_argument, NULL, 0},
+   {"tx-checksum", required_argument, NULL, 0},
{"filter-type", required_argument, NULL, 0},
{"stats", required_argument, NULL, 0},
{"dev-basename", required_argument, NULL, 0},
@@ -305,6 +310,16 @@ tep_parse_args(int argc, char **argv)
return -1;
}
}
+
+if (!strncmp(long_option[option_index].name, 
"tx-checksum", MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, 1);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for tx-checksum [0|1]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   } else
+   tx_checksum = ret;
+   }

if (!strncmp(long_option[option_index].name, 
"filter-type", MAX_LONG_OPT_SZ)) {
ret = parse_num_opt(optarg, 3);
diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c
index 942eb10..6fc75ee 100644
--- a/examples/tep_termination/vxlan.c
+++ b/examples/tep_termination/vxlan.c
@@ -43,11 +43,21 @@
 #include "main.h"
 #include "vxlan.h"

+extern uint8_t tx_checksum;
 extern struct vxlan_conf vxdev;
 extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
 extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
 extern uint16_t udp_port;

+static uint16_t
+get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
+{
+   if (ethertype == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+   return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
+   else /* assume ethertype == ETHER_TYPE_IPv6 */
+   return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
+}
+
 /*
  * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
  * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
@@ -87,6 +97,67 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct 
offload_info *info)
}

 }
+/* if possible, calculate the checksum of a packet in hw or sw,
+ *  * depending on the testpmd command line configuration */
+static uint64_t
+process_inner_cksums(struct ether_hdr *eth_hdr, struct offload_info *info)
+{
+   void *l3_hdr = NULL;
+   struct ipv4_hdr *ipv4_hdr;
+   struct ipv6_hdr *ipv6_hdr;
+   struct udp_hdr *udp_hdr;
+   struct tcp_hdr *tcp_hdr;
+   struct sctp_hdr *sctp_hdr;
+   uint64_t ol_flags = 0;
+   
+   info->l2_len = sizeof(struct ether_hdr);
+   info->ethertype = eth_hdr->ether_type;
+
+   if (info->ethertype == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+   struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+   info->l2_len  += sizeof(struct vlan_hdr);
+   info->ethertype = vlan_hdr->eth_proto;
+   }
+
+   l3_hdr = (char *)eth_hdr + info->l2_len;
+
+   if (info->ethertype == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+   ipv4_hdr = l3_hdr;
+   ipv4_hdr->hdr_checksum = 0;
+   ol_flags |= PKT_TX_IPV4;
+   ol_flags |= PKT_TX_IP_CKSUM;
+   info->l3_len = sizeof(struct ipv4_hdr);
+   info->l4_proto = ipv4_hdr->next_proto_id;
+   } else if (info->ethertype == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+

[dpdk-dev] [PATCH RFC 07/10] examples/tep_termination:add tunnel filter type configuration

2015-04-16 Thread Jijiang Liu
The follwoing filter types are supported for VXLAN,

1> Inner MAC and tenent ID

2> Inner MAC and tenent ID, and Outer MAC

3> Inner MAC and tenent ID

Signed-off-by: Jijiang Liu 
---
 examples/tep_termination/main.c|   21 +
 examples/tep_termination/vxlan_setup.c |   50 ++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index 68d1706..e8142e0 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -72,6 +72,9 @@
 #define MAX_PKT_BURST 32   /* Max burst size for RX/TX */
 #define BURST_TX_DRAIN_US 100  /* TX drain every ~100us */

+/* Defines how long we wait between retries on RX */
+#define BURST_RX_WAIT_US 15
+
 #define BURST_RX_RETRIES 4/* Number of retries on RX. */

 #define JUMBO_FRAME_MAX_SIZE0x2600
@@ -116,6 +119,9 @@ uint16_t num_devices;
 /* VXLAN UDP destination port */
 uint16_t udp_port;

+/* RX filter type for tunneling packet */
+uint8_t filter_idx;
+
 /* overlay packet operation */
 struct ol_switch_ops overlay_options = {
.port_configure = vxlan_port_init,
@@ -236,6 +242,10 @@ vep_termination_usage(const char *prgname)
RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
"   --udp-port: UDP destination port for VXLAN packet\n"
"   --nb-devices: number of virtIO device\n"
+   "   --filter-type[1-3]: filter type for tunneling packet\n"
+   "   1: Inner MAC and tenent ID\n"
+   "   2: Inner MAC and tenent ID\n"
+   "   3: Outer MAC, Inner MAC and tenent ID\n"
"   --dev-basename \n"
"   -p PORTMASK: Set mask for ports to be used by 
application\n"
"   --stats [0-N]: 0: Disable stats, N: Time in seconds to 
print stats\n"
@@ -256,6 +266,7 @@ tep_parse_args(int argc, char **argv)
static struct option long_option[] = {
{"udp-port", required_argument, NULL, 0},
{"nb-devices", required_argument, NULL, 0},
+   {"filter-type", required_argument, NULL, 0},
{"stats", required_argument, NULL, 0},
{"dev-basename", required_argument, NULL, 0},
{NULL, 0, 0, 0},
@@ -294,6 +305,16 @@ tep_parse_args(int argc, char **argv)
return -1;
}
}
+   
+   if (!strncmp(long_option[option_index].name, 
"filter-type", MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, 3);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for filter type [1-3]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   } else
+   filter_idx = ret - 1;
+   }

/* Enable/disable stats. */
if (!strncmp(long_option[option_index].name, "stats", 
MAX_LONG_OPT_SZ)) {
diff --git a/examples/tep_termination/vxlan_setup.c 
b/examples/tep_termination/vxlan_setup.c
index fbffbc8..312a878 100644
--- a/examples/tep_termination/vxlan_setup.c
+++ b/examples/tep_termination/vxlan_setup.c
@@ -82,6 +82,7 @@
 extern uint16_t num_devices;
 extern uint16_t udp_port;
 extern uint8_t ports[RTE_MAX_ETHPORTS];
+extern uint8_t filter_idx;

 /* ethernet addresses of ports */
 extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
@@ -237,10 +238,12 @@ vxlan_rx_process(struct rte_mbuf *pkt)
return -1;

ret = decapsulation(pkt);
+
return ret;
 }

-static int vxlan_tx_process(uint8_t vport_id, struct rte_mbuf *pkt)
+static int
+vxlan_tx_process(uint8_t vport_id, struct rte_mbuf *pkt)
 {
int ret = 0;

@@ -260,11 +263,12 @@ static int vxlan_tx_process(uint8_t vport_id, struct 
rte_mbuf *pkt)
 int
 vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
-   int i;
+   int i, ret;
struct ether_hdr *pkt_hdr;
struct virtio_net_data_ll *dev_ll;
struct virtio_net *dev = vdev->dev;
uint64_t portid = dev->device_fh;
+   struct rte_eth_tunnel_filter_conf tunnel_filter_conf;

dev_ll = ll_root_used;

@@ -290,6 +294,28 @@ vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
vxdev.port[portid].peermac.addr_bytes[i] = peer_mac[i];
}

+   memset(_filter_conf, 0, sizeof(struct 
rte_eth_tunnel_filter_conf));
+
+   tunnel_filter_conf.outer_mac = _eth_addr[0];
+   tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
+
+   /* inner MAC */
+   tunnel_filter_conf.inner_mac = >mac_address;
+
+   tunnel_filter_conf.queue_id = 

[dpdk-dev] [PATCH RFC 06/10] examples/tep_termination:add UDP port configuration for UDP tunneling packet

2015-04-16 Thread Jijiang Liu
The port number of UDP tunneling packet is configurable, which has 16 entries 
in total for i40e.

Signed-off-by: Jijiang Liu 
---
 examples/tep_termination/main.c|   14 ++
 examples/tep_termination/vxlan.c   |3 ++-
 examples/tep_termination/vxlan_setup.c |   17 +
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index 60a825e..68d1706 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -113,6 +113,9 @@ struct vpool {
 /* number of devices */
 uint16_t num_devices;

+/* VXLAN UDP destination port */
+uint16_t udp_port;
+
 /* overlay packet operation */
 struct ol_switch_ops overlay_options = {
.port_configure = vxlan_port_init,
@@ -231,6 +234,7 @@ static void
 vep_termination_usage(const char *prgname)
 {
RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
+   "   --udp-port: UDP destination port for VXLAN packet\n"
"   --nb-devices: number of virtIO device\n"
"   --dev-basename \n"
"   -p PORTMASK: Set mask for ports to be used by 
application\n"
@@ -250,6 +254,7 @@ tep_parse_args(int argc, char **argv)
unsigned i;
const char *prgname = argv[0];
static struct option long_option[] = {
+   {"udp-port", required_argument, NULL, 0},
{"nb-devices", required_argument, NULL, 0},
{"stats", required_argument, NULL, 0},
{"dev-basename", required_argument, NULL, 0},
@@ -280,6 +285,15 @@ tep_parse_args(int argc, char **argv)
} else 
num_devices = ret;
}
+   
+   if (!strncmp(long_option[option_index].name, 
"udp-port", MAX_LONG_OPT_SZ)) {
+   ret = parse_num_opt(optarg, INT16_MAX);
+   if (ret == -1) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Invalid 
argument for UDP port [0-N]\n");
+   vep_termination_usage(prgname);
+   return -1;
+   }
+   }

/* Enable/disable stats. */
if (!strncmp(long_option[option_index].name, "stats", 
MAX_LONG_OPT_SZ)) {
diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c
index 9d86616..942eb10 100644
--- a/examples/tep_termination/vxlan.c
+++ b/examples/tep_termination/vxlan.c
@@ -46,6 +46,7 @@
 extern struct vxlan_conf vxdev;
 extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
 extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
+extern uint16_t udp_port;

 /*
  * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
@@ -100,7 +101,7 @@ int decapsulation(struct rte_mbuf *pkt)
struct udp_hdr *udp_hdr;
udp_hdr = (struct udp_hdr *)((char *)phdr +
info.outer_l2_len + info.outer_l3_len);
-   if (udp_hdr->dst_port != rte_cpu_to_be_16(4789))
+   if (udp_hdr->dst_port != rte_cpu_to_be_16(udp_port))
return -1;
}
outer_header_len = info.outer_l2_len + info.outer_l3_len
diff --git a/examples/tep_termination/vxlan_setup.c 
b/examples/tep_termination/vxlan_setup.c
index 7cb2660..fbffbc8 100644
--- a/examples/tep_termination/vxlan_setup.c
+++ b/examples/tep_termination/vxlan_setup.c
@@ -80,6 +80,7 @@
 #define RTE_TEST_TX_DESC_DEFAULT 512

 extern uint16_t num_devices;
+extern uint16_t udp_port;
 extern uint8_t ports[RTE_MAX_ETHPORTS];

 /* ethernet addresses of ports */
@@ -156,10 +157,12 @@ vxlan_port_init(uint8_t port, struct rte_mempool 
*mbuf_pool)
const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
int retval;
uint16_t num_queues, q;
-//struct vxlan_conf *pconf = 
+struct vxlan_conf *pconf = 
+   struct rte_eth_udp_tunnel tunnel_udp;
struct rte_eth_rxconf *rxconf;
struct rte_eth_txconf *txconf;

+   pconf->vxport = udp_port;
rte_eth_dev_info_get (port, _info);

dev_info.max_rx_queues = num_devices;
@@ -204,6 +207,13 @@ vxlan_port_init(uint8_t port, struct rte_mempool 
*mbuf_pool)
if (retval < 0)
return retval;

+   /* Configure UDP port for VXLAN */
+   tunnel_udp.udp_port = udp_port;
+   tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
+   retval = rte_eth_dev_udp_tunnel_add(port, _udp);
+   if (retval < 0)
+   return retval;
+
rte_eth_macaddr_get(port, _eth_addr[port]);
RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
@@ -230,8 +240,7 @@ vxlan_rx_process(struct rte_mbuf *pkt)
return ret;
 

[dpdk-dev] [PATCH RFC 05/10] examples/tep_termination:implement the APIs of encapsulation and decapsulation for VXLAN

2015-04-16 Thread Jijiang Liu
Fill the APIs of encapsulation and decapsulation for VXLAN packet; for the 
encapsulation operation, IPv6 is not supported now.

Signed-off-by: Jijiang Liu 
Signed-off-by: Thomas Long 
---
 examples/tep_termination/Makefile  |4 +-
 examples/tep_termination/vxlan.c   |  160 
 examples/tep_termination/vxlan_setup.c |7 +-
 3 files changed, 167 insertions(+), 4 deletions(-)
 create mode 100644 examples/tep_termination/vxlan.c

diff --git a/examples/tep_termination/Makefile 
b/examples/tep_termination/Makefile
index ed4fab2..03ba865 100644
--- a/examples/tep_termination/Makefile
+++ b/examples/tep_termination/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,7 @@ else
 APP = tep_termination

 # all source are stored in SRCS-y
-SRCS-y :=  main.c vxlan_setup.c
+SRCS-y :=  main.c vxlan_setup.c vxlan.c

 CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
 CFLAGS += $(WERROR_FLAGS)
diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c
new file mode 100644
index 000..9d86616
--- /dev/null
+++ b/examples/tep_termination/vxlan.c
@@ -0,0 +1,160 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+#include "vxlan.h"
+
+extern struct vxlan_conf vxdev;
+extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
+extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
+
+/*
+ * Parse an ethernet header to fill the ethertype, l2_len, l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
+ * header. 
+ */
+static void
+parse_ethernet(struct ether_hdr *eth_hdr, struct offload_info *info)
+{
+struct ipv4_hdr *ipv4_hdr;
+struct ipv6_hdr *ipv6_hdr;
+ 
+info->outer_l2_len = sizeof(struct ether_hdr);
+info->ethertype = eth_hdr->ether_type;
+ 
+if (info->ethertype == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+info->outer_l2_len  += sizeof(struct vlan_hdr);
+info->ethertype = vlan_hdr->eth_proto;
+}
+ 
+switch (rte_cpu_to_be_16(info->ethertype)) {
+case ETHER_TYPE_IPv4:
+ipv4_hdr = (struct ipv4_hdr *) ((char *)eth_hdr + 
info->l2_len);
+info->outer_l3_len = sizeof(struct ipv4_hdr);
+info->l4_proto = ipv4_hdr->next_proto_id;
+break;
+case ETHER_TYPE_IPv6:
+ipv6_hdr = (struct ipv6_hdr *) ((char *)eth_hdr + 
info->l2_len);
+info->outer_l3_len = sizeof(struct ipv6_hdr);
+info->l4_proto = ipv6_hdr->proto;
+break;
+default:
+info->outer_l2_len = 0;
+info->outer_l3_len = 0;
+info->l4_proto = 0;
+break;
+   }
+
+}
+
+int decapsulation(struct rte_mbuf *pkt)
+{
+   struct offload_info info;
+   uint16_t outer_header_len;
+   
+   memset(, 0, 

[dpdk-dev] [PATCH RFC 04/10] examples/tep_termination:implement VXLAN packet processing

2015-04-16 Thread Jijiang Liu
Implement the following functions:

1> VXLAN port configuration

2> VXLAN tunnel setup

3> VXLAN tunnel destroying

4> VXLAN packet processing for Rx side

5> VXLAN packet processing for Tx side


Signed-off-by: Jijiang Liu 
Signed-off-by: Thomas Long 
---
 examples/tep_termination/Makefile  |2 +-
 examples/tep_termination/main.c|   26 ++-
 examples/tep_termination/vxlan_setup.c |  391 
 3 files changed, 415 insertions(+), 4 deletions(-)
 create mode 100644 examples/tep_termination/vxlan_setup.c

diff --git a/examples/tep_termination/Makefile 
b/examples/tep_termination/Makefile
index 23fb647..ed4fab2 100644
--- a/examples/tep_termination/Makefile
+++ b/examples/tep_termination/Makefile
@@ -48,7 +48,7 @@ else
 APP = tep_termination

 # all source are stored in SRCS-y
-SRCS-y :=  main.c
+SRCS-y :=  main.c vxlan_setup.c

 CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
 CFLAGS += $(WERROR_FLAGS)
diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index f846053..60a825e 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -113,6 +113,16 @@ struct vpool {
 /* number of devices */
 uint16_t num_devices;

+/* overlay packet operation */
+struct ol_switch_ops overlay_options = {
+   .port_configure = vxlan_port_init,
+   .tunnel_setup = vxlan_link,
+   .tunnel_destroy = vxlan_unlink,
+   .tx_handle = vxlan_tx_pkts,
+   .rx_handle = vxlan_rx_pkts,
+   .param_handle = NULL,
+};
+
 /* Enable stats. */
 static uint32_t enable_stats = 0;

@@ -350,7 +360,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, 
__attribute__((unuse
 {
struct mbuf_table *tx_q;
struct rte_mbuf **m_table;
-   unsigned len, ret = 0;
+   unsigned len, ret;

const uint16_t lcore_id = rte_lcore_id();
struct virtio_net *dev = vdev->dev;
@@ -369,6 +379,8 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, 
__attribute__((unuse

if (unlikely(len == MAX_PKT_BURST)) {
m_table = (struct rte_mbuf **)tx_q->m_table;
+   ret = overlay_options.tx_handle(ports[0], 
(uint16_t)tx_q->txq_id,
+   (struct rte_mbuf **)tx_q->m_table, (uint16_t)tx_q->len);
/* Free any buffers not handled by TX and update the port 
stats. */
if (unlikely(ret < len)) {
do {
@@ -401,7 +413,7 @@ switch_worker(__attribute__((unused)) void *arg)
volatile struct lcore_ll_info *lcore_ll;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S 
* BURST_TX_DRAIN_US;
uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
-   unsigned ret = 0, i;
+   unsigned ret, i;
const uint16_t lcore_id = rte_lcore_id();
const uint16_t num_cores = (uint16_t)rte_lcore_count();
uint16_t rx_count = 0;
@@ -429,6 +441,9 @@ switch_worker(__attribute__((unused)) void *arg)

if (tx_q->len) {
LOG_DEBUG(VHOST_DATA, "TX queue drained after 
timeout with burst size %u \n", tx_q->len);
+   ret = overlay_options.tx_handle(ports[0], 
(uint16_t)tx_q->txq_id,
+  
(struct rte_mbuf **)tx_q->m_table,
+  
(uint16_t)tx_q->len);
if (unlikely(ret < tx_q->len)) {
do {

rte_pktmbuf_free(tx_q->m_table[ret]);
@@ -462,6 +477,7 @@ switch_worker(__attribute__((unused)) void *arg)

if (unlikely(vdev->remove)) {
dev_ll = dev_ll->next;
+   overlay_options.tunnel_destroy(vdev);
vdev->ready = DEVICE_SAFE_REMOVE;
continue;
}
@@ -472,6 +488,7 @@ switch_worker(__attribute__((unused)) void *arg)
vdev->rx_q, pkts_burst, MAX_PKT_BURST);

if (rx_count) {
+   ret_count = 
overlay_options.rx_handle(dev, pkts_burst, rx_count);
if (enable_stats) {
rte_atomic64_add(

_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
@@ -492,7 +509,7 @@ switch_worker(__attribute__((unused)) void *arg)
tx_count = rte_vhost_dequeue_burst(dev, 
VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
/* If this is the first received packet we need 
to learn the MAC */
if (unlikely(vdev->ready == 
DEVICE_MAC_LEARNING) && tx_count) {
- 

[dpdk-dev] [PATCH RFC 03/10] examples/tep_termination:add the pluggable structures for VXLAN packet processing

2015-04-16 Thread Jijiang Liu
We are trying to create a framework for tunneling packet processing, so some 
common APIs are added here, which include 
1> tunnel port configuration

2> tunnel setup

3> tunnel destroying

4> tunneling packet processing for Rx side

5> tunneling packet processing for Tx side

6> tunnel parameter processing

Signed-off-by: Jijiang Liu 
Signed-off-by: Thomas Long 

---
 examples/tep_termination/vxlan_setup.h |   76 
 1 files changed, 76 insertions(+), 0 deletions(-)
 create mode 100644 examples/tep_termination/vxlan_setup.h

diff --git a/examples/tep_termination/vxlan_setup.h 
b/examples/tep_termination/vxlan_setup.h
new file mode 100644
index 000..b79a987
--- /dev/null
+++ b/examples/tep_termination/vxlan_setup.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef VXLAN_SETUP_H_
+#define VXLAN_SETUP_H_
+
+typedef int (*ol_port_configure_t)(uint8_t port, struct rte_mempool 
*mbuf_pool);
+
+typedef int (*ol_tunnel_setup_t)(struct vhost_dev *vdev, struct rte_mbuf *m);
+
+typedef void (*ol_tunnel_destroy_t)(struct vhost_dev *vdev);
+
+typedef int (*ol_tx_handle_t)(uint8_t port_id, uint16_t queue_id,
+  struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+typedef int (*ol_rx_handle_t)(struct virtio_net *dev, struct rte_mbuf **pkts,
+   uint32_t count);
+
+typedef int (*ol_param_handle)(struct virtio_net *dev);
+
+struct ol_switch_ops {
+   ol_port_configure_tport_configure;
+   ol_tunnel_setup_t  tunnel_setup;
+   ol_tunnel_destroy_ttunnel_destroy;
+   ol_tx_handle_t tx_handle;
+   ol_rx_handle_t rx_handle;
+   ol_param_handleparam_handle;
+};
+
+int
+vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool);
+
+int
+vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m);
+
+void
+vxlan_unlink(struct vhost_dev *vdev);
+
+int 
+vxlan_tx_pkts (uint8_t port_id, uint16_t queue_id,
+   struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+int
+vxlan_rx_pkts (struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count);
+
+#endif /* VXLAN_SETUP_H_ */
+
-- 
1.7.7.6



[dpdk-dev] [PATCH RFC 02/10] examples/tep_termination:define VXLAN device information and APIs

2015-04-16 Thread Jijiang Liu
Some basic VXLAN definations and APIs are added in this file, which include 
VXLAN device structure and the APIs for encapsulation and decapsulation.

Signed-off-by: Jijiang Liu 
Signed-off-by: Thomas Long 

---
 examples/tep_termination/main.c  |1 +
 examples/tep_termination/vxlan.h |   81 ++
 2 files changed, 82 insertions(+), 0 deletions(-)
 create mode 100644 examples/tep_termination/vxlan.h

diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
index 2e36ad0..e58726f 100644
--- a/examples/tep_termination/main.c
+++ b/examples/tep_termination/main.c
@@ -52,6 +52,7 @@
 #include 

 #include "main.h"
+#include "vxlan.h"

 /* the maximum number of external ports supported */
 #define MAX_SUP_PORTS 1
diff --git a/examples/tep_termination/vxlan.h b/examples/tep_termination/vxlan.h
new file mode 100644
index 000..96c68fb
--- /dev/null
+++ b/examples/tep_termination/vxlan.h
@@ -0,0 +1,81 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VXLAN_H_
+#define _VXLAN_H_
+
+#define PORT_MIN   49152
+#define PORT_MAX   65535
+#define PORT_RANGE (PORT_MAX - PORT_MIN) + 1
+
+#define VXLAN_N_PORTS  2
+#define VXLAN_FLAGS 0x0008
+#define OUTER_VLAN_ID  100
+
+struct vxlan_port
+{
+   uint32_t portid;/* VirtIO port id */
+   uint32_t peerip;/* remote VTEP IP address */
+   struct ether_addr peermac;  /* remote VTEP MAC address */
+   struct ether_addr portmac;  /* VM MAC address */
+};
+
+struct vxlan_conf
+{
+   uint16_t vxport;/* VXLAN UDP destination port */
+   uint32_t portip;/* DPDK port IP address*/
+   uint32_t in_key;/* VLAN  ID */
+   uint32_t out_key;   /* VXLAN VNI */
+   uint32_t portid;/* DPDK port id */
+   struct vxlan_port port[VXLAN_N_PORTS]; /* VXLAN configuration */
+} __rte_cache_aligned;
+
+/* structure that caches offload info for the current packet */
+struct offload_info
+{
+   uint16_t ethertype; 
+   uint8_t l2_len;
+   uint16_t l3_len;
+   uint8_t l4_len;
+   uint8_t l4_proto;
+   uint16_t outer_ethertype;
+   uint8_t outer_l2_len;
+   uint16_t outer_l3_len;
+   uint8_t outer_l4_len;
+   uint8_t outer_l4_proto;
+}__rte_cache_aligned;
+
+int decapsulation(struct rte_mbuf *pkt);
+int encapsulation(struct rte_mbuf *m, uint8_t portid);
+
+#endif /* _MAIN_H_ */
-- 
1.7.7.6



[dpdk-dev] [PATCH RFC 01/10] examples/tep_termination:initialize the VXLAN example

2015-04-16 Thread Jijiang Liu
This example uses the basic virtio devices management function from the vHost 
example, which includes virtio device creation, destroying and maintenance.

Signed-off-by: Jijiang Liu 
---
 examples/Makefile |1 +
 examples/tep_termination/Makefile |   58 +++
 examples/tep_termination/main.c   |  998 +
 examples/tep_termination/main.h   |  113 +
 4 files changed, 1170 insertions(+), 0 deletions(-)
 create mode 100644 examples/tep_termination/Makefile
 create mode 100644 examples/tep_termination/main.c
 create mode 100644 examples/tep_termination/main.h

diff --git a/examples/Makefile b/examples/Makefile
index d549026..3f08954 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -73,5 +73,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen
 DIRS-y += vmdq
 DIRS-y += vmdq_dcb
 DIRS-y += vm_power_manager
+DIRS-y += tep_termination 

 include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/tep_termination/Makefile 
b/examples/tep_termination/Makefile
new file mode 100644
index 000..23fb647
--- /dev/null
+++ b/examples/tep_termination/Makefile
@@ -0,0 +1,58 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = tep_termination
+
+# all source are stored in SRCS-y
+SRCS-y :=  main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
new file mode 100644
index 000..2e36ad0
--- /dev/null
+++ b/examples/tep_termination/main.c
@@ -0,0 +1,998 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 

[dpdk-dev] [PATCH RFC 00/10] Add a VXLAN sample

2015-04-16 Thread Jijiang Liu
This VXLAN example simulates a VXLAN Tunnel endpoint(VTEP) termination in DPDK, 
which is used to demonstrate the offload and filtering capabilities of i40 NIC 
for VXLAN packet.

And this example uses the basic virtio devices management function from vHost 
example, and it uses us-Vhost interface and tunnel filtering mechanism to 
direct the traffic to/from a specific VM.

In addition, this sample is also designed to show how tunneling protocols can 
be handled. For the vHost interface, we do not need to support zero copy/inter 
VM packet transfer etc. The approach that we take would be of benefit to you in 
that we put a pluggable structure in place so that the application could be 
easily extended to support a new tunneling protocol.

The software framework is as follows:

   |---|   |---|
   | VM-1(VNI:100) |   |  VM-2(VNI:200)|
   | |--| |--| |   | |--| |--| |
   | |vport0| |vport1| |   | |vport0| |vport1| |
   |-|--|-|--|-|   |-|--|-|--|-|  Guests
\   /  
 |-\---/|
 | us-vHost interface   |
 |  |-||--| |
 | decap| | TEP|  | encap   |   DPDK App
 |  |-||--| |
 ||||
 ||||
|| 
|-||---|
|tunnel filter|| IP/L4 Tx csum |
|IP/L4 csum   || TSO   |
|packet type  ||   |   NIC
|CRC strip||   |
|-||---|
||   
||   
||   
/---\  
VXLAN Tunnel  

The sample will support the followings:
1> Tunneling packet recognition.

2> The port of UDP tunneling is configurable

3> Directing of incoming traffic to the correct queue based on the tunnel 
filter type such as inner MAC address and VNI.
The VNI will be assigned from a static internal table based on the 
us-vhost device ID. Each device will receive a unique device ID. The inner MAC 
will be learned from the first packet transmitted from a device.

4> Decapsulation of Rx VXLAN traffic. This is a software only operation(will 
use HW header split instead later)

5> Encapsulation of Tx VXLAN traffic. This is a software only operation

6> Tx outer IP, inner IP and L4 checksum offload

7> TSO support for tunneling packet

Limitations:
1. No ARP support
2. There are some duplicated source codes because of using the basic virtio 
device management function from vhost sample, but consider that current vhost 
sample is quite complicated and huge enough, and I think we shall have a 
separate sample for tunneling packet processing.
3. Currently, only the i40e NIC is supported in the sample, but other types of 
NICs also can be supported later if those NICs are able to support tunneling 
packet filter.


Jijiang Liu (10):
  create VXLAN sample framework using virtio device management function 
  add basic VXLAN structures 
  add VXLAN operation APIs
  support overlay operations
  Add encapsulation and decapsulation function
  add udp port configuration
  add filter type configuration
  add tx checksum offload configuration
  add TSO offload configuration
  add encapsulation and decapsulation flags

 examples/Makefile  |1 +
 examples/tep_termination/Makefile  |   58 ++
 examples/tep_termination/main.c| 1117 
 examples/tep_termination/main.h|  113 
 examples/tep_termination/vxlan.c   |  242 +++
 examples/tep_termination/vxlan.h   |   81 +++
 examples/tep_termination/vxlan_setup.c |  453 +
 examples/tep_termination/vxlan_setup.h |   76 +++
 8 files changed, 2141 insertions(+), 0 deletions(-)
 create mode 100644 examples/tep_termination/Makefile
 create mode 100644 examples/tep_termination/main.c
 create mode 100644 examples/tep_termination/main.h
 create mode 100644 examples/tep_termination/vxlan.c
 create mode 100644 examples/tep_termination/vxlan.h
 create mode 100644 examples/tep_termination/vxlan_setup.c
 create mode 100644 examples/tep_termination/vxlan_setup.h

-- 
1.7.7.6



[dpdk-dev] mempool deleting and cache_size

2015-04-16 Thread Marc Sune


On 16/04/15 11:03, Gonzalez Monroy, Sergio wrote:
> On 15/04/2015 20:24, Stephen Hemminger wrote:
>> On Wed, 15 Apr 2015 20:15:18 +0100
>> Zoltan Kiss  wrote:
>>
>>> Hi,
>>>
>>> I have two questions regarding mempools:
>>>
>>> - the first is trivial: how do you delete them? Can you? I can't see a
>>> function to do that, and none of the examples are doing such thing. 
>>> When
>>> exactly it get deleted?
>> You can't delete them. They live in hugepage area and are persistent.
>> Correctly written code looks for them by name and reuses existing pool
>> if it is big enough.
>>
> FYI, I'm looking into such functionality and also delete/destroy 
> mempools (although still no plan on implementation).
>

Also the memzones behind, or will be "lost/leaked" after a mempool 
destruction?

Marc

> Sergio



[dpdk-dev] [PATCH v5 6/8] Move common functions in eal_pci.c

2015-04-16 Thread Thomas Monjalon
This patch is very sensible and difficult to follow.
I'm really afraid that some nasty bugs could be hidden.
Please could you try to split it in several steps?
Thanks

2015-04-09 12:40, Ravi Kerur:
> Changes in v5
> Rebase to latest code.
> Removed RTE_EXEC_ENV_BSDAPP from earlier changes.
> 
> Changes in v4
> Move common functions in eal_pci.c to librte_eal/common/
> eal_common_pci.c file.
> 
> Following functions are moved to eal_common_pci.c file.
> 
> void *pci_map_resource(void *requested_addr, const int vfio_fd,
>   const char *devname, off_t offset, size_t size);
> int pci_addr_comparison(struct rte_pci_addr *addr,
> struct rte_pci_addr *addr2);
> int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr,
> struct rte_pci_device *dev);
> 
> Use RTE_EXEC_ENV_BSDAPP to differentiate minor differences in
> common function.
> Fix checkpatch warnings and errors.
> 
> Changes in v3
> N/A
> 
> Changes in v2
> N/A
> 
> Changes in v1
> N/A
> 
> Signed-off-by: Ravi Kerur 
> ---
>  lib/librte_eal/bsdapp/eal/eal_pci.c| 122 ---
>  lib/librte_eal/common/eal_common_pci.c | 130 
> -
>  lib/librte_eal/common/eal_private.h|  48 +++
>  lib/librte_eal/linuxapp/eal/eal_pci.c  | 100 +-
>  lib/librte_eal/linuxapp/eal/eal_pci_init.h |   6 --
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c  |  36 ++--
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |  17 ++--
>  7 files changed, 212 insertions(+), 247 deletions(-)



[dpdk-dev] [PATCH v6] Restore support for virtio on FreeBSD

2015-04-16 Thread Raz Amir
Fixes: 8a312224bcde ("eal/bsd: fix fd leak")

Closing /dev/io fd causes SIGBUS in inb/outb instructions
as the process loses the IOPL privileges once the fd is closed:
(gdb) bt
0  0x00492f2c in outb (port=49170, data=0 '\000')
at /usr/include/machine/cpufunc.h:244
1  0x00492f7a in outb_p (data=0 '\000', port=49170)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.h:211
2  0x0049328d in vtpci_set_status (hw=0x80331f380, status=0 '\000')
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:130
3  0x004931fe in vtpci_reset (hw=0x80331f380)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:108
4  0x004a175e in eth_virtio_dev_init (eth_dev=0x831b80 
)
at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_ethdev.c:1150
5  0x00462c09 in rte_eth_dev_init (pci_drv=0x79d1a0 ,
pci_dev=0x802417560) at /dpdk/dpdk-2.0.0/lib/librte_ether/rte_ethdev.c:326
6  0x0046f03f in rte_eal_pci_probe_one_driver (dr=0x79d1a0 
,
dev=0x802417560) at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal_pci.c:487
7  0x00475b06 in pci_probe_all_drivers (dev=0x802417560)
at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:116
8  0x00475bb9 in rte_eal_pci_probe ()
at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:246
9  0x0046cd63 in rte_eal_init (argc=5, argv=0x7fffeaf0)
at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal.c:554
10 0x00404544 in main ()

Signed-off-by: Raz Amir 
---
 lib/librte_eal/bsdapp/eal/eal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 871d5f4..e20f915 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -426,7 +426,7 @@ rte_eal_iopl_init(void)
fd = open("/dev/io", O_RDWR);
if (fd < 0)
return -1;
-   close(fd);
+   /* keep fd open for iopl */
return 0;
 }

-- 
2.1.2



[dpdk-dev] [PATCH v5 5/8] Move common functions in eal_memory.c

2015-04-16 Thread Thomas Monjalon
2015-04-09 12:40, Ravi Kerur:
> Changes in v5
> Rebase to latest code.
> 
> Changes in v4
> Make rte_eal_hugepage_init and rte_eal_hugepage_attach as
> wrapper functions for BSD.
> 
> Changes in v3
> Changed subject to be more explicit on file name inclusion.
> 
> Changes in v2
> Use common function names rte_eal_hugepage_init and
> rte_eal_hugepage_attach for BSD and Linux. Update comments about its
> actuality in function declaration.
> 
> Changes in v1
> Move common functions in eal_memory.c to librte_eal/common/
> eal_common_memory.c file.
> 
> Following functions are moved to eal_common_memory.c file
> 
> static int rte_eal_memdevice_init(void); int rte_eal_memory_init(void);
> 
> Fix checkpatch warnings and errors.
> 
> Signed-off-by: Ravi Kerur 
[...]
> +/**
> + * This function prepares physical memory mapping
> + * i.e. hugepages on Linux and
> + *  contigmem on BSD.

OK

> + * It is a wrapper function for BSD which will
> + * internally call contigmem_init.

Please avoid such comment which is difficult to maintain and not really useful.

> + * It is a wrapper function for BSD which will
> + * internally call contigmem_attach.

Same comment.

Except the above comments, it seems goods.
Acked-by: Thomas Monjalon 

Thanks


[dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD

2015-04-16 Thread Raz Amir
Hi,

>From both running and reading the code, the rte_virtio_pmd_init is called
only once from: rte_eal_init -> rte_eal_dev_init.
But, the uninit won't be called, since uninit it called only for PMD_VDEV
driver types, while virtio is PMD_PDEV.
Based on that, I am going to submit the original patch again, that fd won't
be closed, and without handling the close at uninit as it isn't called and
since fd will be closed anyway when the process exits.

Thanks,
Raz.

-Original Message-
From: Ouyang, Changchun [mailto:changchun.ouy...@intel.com] 
Sent: 16 April 2015 06:31
To: Ananyev, Konstantin; Raz Amir; dev at dpdk.org
Cc: Ouyang, Changchun
Subject: RE: [dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD



> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ananyev, 
> Konstantin
> Sent: Wednesday, April 15, 2015 6:22 AM
> To: Raz Amir; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5] Restore support for virtio on 
> FreeBSD
> 
> Hi,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Raz Amir
> > Sent: Tuesday, April 14, 2015 5:23 PM
> > To: dev at dpdk.org
> > Cc: Raz Amir
> > Subject: [dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD
> >
> > Fixes: 8a312224bcde ("eal/bsd: fix fd leak")
> >
> > Closing /dev/io fd causes SIGBUS in inb/outb instructions as the 
> > process loses the IOPL privileges once the fd is closed:
> > (gdb) bt
> > 0  0x00492f2c in outb (port=49170, data=0 '\000')
> > at /usr/include/machine/cpufunc.h:244
> > 1  0x00492f7a in outb_p (data=0 '\000', port=49170)
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.h:211
> > 2  0x0049328d in vtpci_set_status (hw=0x80331f380, status=0
> '\000')
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:130
> > 3  0x004931fe in vtpci_reset (hw=0x80331f380)
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:108
> > 4  0x004a175e in eth_virtio_dev_init (eth_dev=0x831b80
> )
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_ethdev.c:1150
> > 5  0x00462c09 in rte_eth_dev_init (pci_drv=0x79d1a0
> ,
> > pci_dev=0x802417560) at
> > /dpdk/dpdk-2.0.0/lib/librte_ether/rte_ethdev.c:326
> > 6  0x0046f03f in rte_eal_pci_probe_one_driver (dr=0x79d1a0
> ,
> > dev=0x802417560) at
> > /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal_pci.c:487
> > 7  0x00475b06 in pci_probe_all_drivers (dev=0x802417560)
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:116
> > 8  0x00475bb9 in rte_eal_pci_probe ()
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:246
> > 9  0x0046cd63 in rte_eal_init (argc=5, argv=0x7fffeaf0)
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal.c:554
> > 10 0x00404544 in main ()
> >
> > Signed-off-by: Raz Amir 
> > ---
> >  lib/librte_eal/bsdapp/eal/eal.c | 19 ++-
> >  lib/librte_eal/common/include/rte_eal.h | 10 ++
> >  lib/librte_eal/linuxapp/eal/eal.c   |  5 +
> >  lib/librte_pmd_virtio/virtio_ethdev.c   |  9 +
> >  4 files changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/eal.c 
> > b/lib/librte_eal/bsdapp/eal/eal.c index 871d5f4..687dd83 100644
> > --- a/lib/librte_eal/bsdapp/eal/eal.c
> > +++ b/lib/librte_eal/bsdapp/eal/eal.c
> > @@ -112,6 +112,9 @@ struct internal_config internal_config;
> >  /* used by rte_rdtsc() */
> >  int rte_cycles_vmware_tsc_map;
> >
> > +/* fd to keep open for iopl */
> > +static int iopl_fd = -1;
> > +
> >  /* Return a pointer to the configuration structure */  struct 
> > rte_config *
> >  rte_eal_get_configuration(void)
> > @@ -421,15 +424,21 @@ int rte_eal_has_hugepages(void)  int
> >  rte_eal_iopl_init(void)
> >  {
> > -   int fd;
> > -
> > -   fd = open("/dev/io", O_RDWR);
> > -   if (fd < 0)
> > +   iopl_fd = open("/dev/io", O_RDWR);
> > +   if (iopl_fd < 0)
> > return -1;
> > -   close(fd);
> > +   /* keep fd open for iopl */
> > return 0;
> >  }
> >
> > +void
> > +rte_eal_iopl_uninit(void)
> > +{
> > +   if (iopl_fd != -1)
> > +   close(iopl_fd);
> > +   iopl_fd = -1;
> > +}
> 
> Did I get it right: that function would be invoked for at dev_detach()?
> And after we invoked it, we still we can have other multiple virtio 
> devices attached and active?
> If so, then I suppose you'll hit the same problem again.
> Konstantin
> 

Yes, need verify this issue,
If it is true, may use reference counter to resolve it?
Thanks
Changchun

> > +
> >  /* Launch threads, called at application init(). */  int 
> > rte_eal_init(int argc, char **argv) diff --git 
> > a/lib/librte_eal/common/include/rte_eal.h
> > b/lib/librte_eal/common/include/rte_eal.h
> > index 1385a73..9151e08 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -127,6 +127,16 @@ enum rte_proc_type_t
> 

[dpdk-dev] [PATCH v5 4/8] Move common functions in eal_timer.c

2015-04-16 Thread Thomas Monjalon
2015-04-09 12:40, Ravi Kerur:
> Changes in v5
> Rebase to latest code.
> 
> Changes in v4
> Removed extern declaration of eal_tsc_resolution_hz,
> instead provided _set_ API.
> Make set_tsc_freq_from_clock as wrapper function for BSD.
> 
> Changes in v3
> Changed subject to be more explicit on file name inclusion.
> 
> Changes in v2
> Use common function name set_tsc_freq_from_sysctl for BSD and Linux.
> Update comments about its actuality in function declaration.
> 
> Changes in v1
> Move common functions in eal_timer.c to librte_eal/common/
> eal_common_timer.c file.
> 
> Following functions are  moved to eal_common_timer.c file
> 
> void rte_delay_us(unsigned us);
> uint64_t rte_get_tsc_hz(void);
> static void set_tsc_freq_fallback(void);
> void set_tsc_freq(void);
> 
> Makefile changes to reflect new file added.
> Fix checkpatch warnings and errors.
> 
> Signed-off-by: Ravi Kerur 

Seems good.
Acked-by: Thomas Monjalon 

Minor nit, a blank line is missing:
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#include 


[dpdk-dev] [PATCH v5 3/8] Move common functions in eal_lcore.c

2015-04-16 Thread Thomas Monjalon
2015-04-09 12:40, Ravi Kerur:
> Changes in v5
> Rebase to latest code.
> 
> Changes in v4
> Implement cpu_detected() for BSD.
> Have common RTE_LOG for Linux and BSD in rte_eal_cpu_init().
> Remove RTE_EXEC_ENV_BSDAPP in common file.
> 
> Changes in v3
> Changed subject to be more explicit on file name inclusion.
> 
> Changes in v2
> None
> 
> Changes in v1
> Move common function in eal_lcore.c to librte_eal/common/
> eal_common_lcore.c file.
> 
> Following function is  moved to eal_common_lcore.c file
> 
> int rte_eal_cpu_init(void);
> 
> Use RTE_EXEC_ENV_BSDAPP to differentiate minor differences in
> common function.
> Makefile changes to reflect new file added.
> Fix checkpatch warnings and errors.
> 
> Signed-off-by: Ravi Kerur 
[...]
> --- a/lib/librte_eal/bsdapp/eal/eal_lcore.c
> +++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c
[...]
>  static int
> -get_ncpus(void)
> +eal_get_ncpus(void)

Why not keep it static as it seems to be specific to BSD handling?

>  {
>   int mib[2] = {CTL_HW, HW_NCPU};
>   int ncpu;
> @@ -59,63 +62,18 @@ get_ncpus(void)
>   return ncpu;
>  }
[...]
> -unsigned
> -eal_cpu_socket_id(__rte_unused unsigned cpu_id)
> +/* Check if a cpu is present by the presence of the
> + * cpu information for it.
> + */
> +int
> +eal_cpu_detected(unsigned lcore_id)
>  {
> - return cpu_socket_id(cpu_id);
> + const unsigned ncpus = eal_get_ncpus();
> + return (lcore_id < ncpus);
>  }





[dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

2015-04-16 Thread Stephen Hemminger
On Thu, 16 Apr 2015 06:26:02 +
"Ouyang, Changchun"  wrote:

> 
> 
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Thursday, April 16, 2015 1:48 PM
> To: Ouyang, Changchun
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation
> 
> No warning is needed, it just works.
> 
> I know it works, but the upper user don?t know the descriptor number is 
> reduced.
> I concern it is not so user-friendly here.
> 
> 
> On Wed, Apr 15, 2015 at 8:39 PM, Ouyang, Changchun  intel.com> wrote:
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] 
> > On Behalf Of Stephen
> > Hemminger
> > Sent: Wednesday, April 15, 2015 11:20 PM
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation
> >
> > This fixes another of the issues with running virtio on non-KVM
> > envirionments. For example, Google Compute Engine reports a ring size of
> > 16K.
> >
> > If guest virtio requests more slots than available then the queue should 
> > just
> 
> I suspect 'more' here should be 'less'?
> 
> > be initialized to the smaller value.
> >
> > Conversely, if the number of descriptors requested exceeds the virtio host
> > queue size, then just silently use the smaller host size.
> >
> > Signed-off-by: Stephen Hemminger  > networkplumber.org>
> > ---
> >  lib/librte_pmd_virtio/virtio_ethdev.c | 18 +-
> >  1 file changed, 13 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c
> > b/lib/librte_pmd_virtio/virtio_ethdev.c
> > index 3cb9c6a..db0232e 100644
> > --- a/lib/librte_pmd_virtio/virtio_ethdev.c
> > +++ b/lib/librte_pmd_virtio/virtio_ethdev.c
> > @@ -267,13 +267,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev
> > *dev,
> >   if (vq_size == 0) {
> >   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist",
> > __func__);
> >   return -EINVAL;
> > - } else if (!rte_is_power_of_2(vq_size)) {
> > + }
> > +
> > + if (!rte_is_power_of_2(vq_size)) {
> >   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2",
> > __func__);
> >   return -EINVAL;
> > - } else if (nb_desc != vq_size) {
> > - PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to
> > vq size (%d), fall to vq size",
> > - nb_desc, vq_size);
> > - nb_desc = vq_size;
> > + }
> > +
> > + if (nb_desc < vq_size) {
> > + if (!rte_is_power_of_2(nb_desc)) {
> > + PMD_INIT_LOG(ERR,
> > +  "nb_desc(%u) size is not powerof 2",
> > +  nb_desc);
> > + return -EINVAL;
> > + }
> > + vq_size = nb_desc;
> Don't we need a warning when nb_desc > vq_size?


No warning is needed. This will actually be a common case
for many applications.  

IMHO application should not have to worry about
what type of network device it is running on and therefore would likely
pass a reasonably large number of receive descriptors (say 512) and since
the default KVM/QEMU ring size is 256, the receive queue would be limited
by host not application.

The whole idea of application passing number of receive descriptors to
DPDK is bogus because each device driver has different timing, and may
use different number of receive descriptors per packet.



[dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

2015-04-16 Thread Stephen Hemminger
On Thu, 16 Apr 2015 09:38:46 +0200
Thomas Monjalon  wrote:

> Guys, this is an example of what should not be done with emails formatting.

Sorry, Google mail client for Android seems to encourage
bad formatting.


[dpdk-dev] mempool deleting and cache_size

2015-04-16 Thread Gonzalez Monroy, Sergio
On 16/04/2015 10:22, Marc Sune wrote:
>
>
> On 16/04/15 11:03, Gonzalez Monroy, Sergio wrote:
>> On 15/04/2015 20:24, Stephen Hemminger wrote:
>>> On Wed, 15 Apr 2015 20:15:18 +0100
>>> Zoltan Kiss  wrote:
>>>
 Hi,

 I have two questions regarding mempools:

 - the first is trivial: how do you delete them? Can you? I can't see a
 function to do that, and none of the examples are doing such thing. 
 When
 exactly it get deleted?
>>> You can't delete them. They live in hugepage area and are persistent.
>>> Correctly written code looks for them by name and reuses existing pool
>>> if it is big enough.
>>>
>> FYI, I'm looking into such functionality and also delete/destroy 
>> mempools (although still no plan on implementation).
>>
>
> Also the memzones behind, or will be "lost/leaked" after a mempool 
> destruction?
>
> Marc
>
>> Sergio
>
Sorry, my bad.
I did mean to say 'delete/destroy memzones' :)

Sergio


[dpdk-dev] [PATCH v5 2/8] Move common functions in eal.c

2015-04-16 Thread Thomas Monjalon
Hi Ravi,

I think this patch is too complex and move too many different things:
- sysfs
- mem_cfg
- proc_type
- application_usage
Please split them up.

I'm not sure the classification in eal_common_runtime.c and eal_common_system.c
new file is clear.

2015-04-09 12:40, Ravi Kerur:
> + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),

Why this cast is needed?



[dpdk-dev] mempool deleting and cache_size

2015-04-16 Thread Gonzalez Monroy, Sergio
On 16/04/2015 10:03, Gonzalez Monroy, Sergio wrote:
> On 15/04/2015 20:24, Stephen Hemminger wrote:
>> On Wed, 15 Apr 2015 20:15:18 +0100
>> Zoltan Kiss  wrote:
>>
>>> Hi,
>>>
>>> I have two questions regarding mempools:
>>>
>>> - the first is trivial: how do you delete them? Can you? I can't see a
>>> function to do that, and none of the examples are doing such thing. 
>>> When
>>> exactly it get deleted?
>> You can't delete them. They live in hugepage area and are persistent.
>> Correctly written code looks for them by name and reuses existing pool
>> if it is big enough.
>>
> FYI, I'm looking into such functionality and also delete/destroy 
> mempools (although still no plan on implementation).
>
> Sergio
Forgot to say, suggestions/ideas are more than welcome.

Sergio


[dpdk-dev] mempool deleting and cache_size

2015-04-16 Thread Gonzalez Monroy, Sergio
On 15/04/2015 20:24, Stephen Hemminger wrote:
> On Wed, 15 Apr 2015 20:15:18 +0100
> Zoltan Kiss  wrote:
>
>> Hi,
>>
>> I have two questions regarding mempools:
>>
>> - the first is trivial: how do you delete them? Can you? I can't see a
>> function to do that, and none of the examples are doing such thing. When
>> exactly it get deleted?
> You can't delete them. They live in hugepage area and are persistent.
> Correctly written code looks for them by name and reuses existing pool
> if it is big enough.
>
FYI, I'm looking into such functionality and also delete/destroy 
mempools (although still no plan on implementation).

Sergio


[dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

2015-04-16 Thread Thomas Monjalon
Guys, this is an example of what should not be done with emails formatting.

2015-04-16 06:26, Ouyang, Changchun:
> 
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Thursday, April 16, 2015 1:48 PM
> To: Ouyang, Changchun
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

This header is partly useless.

> 
> No warning is needed, it just works.

Stephen, please do not top post. It makes harder to find what you are 
commenting.

> 
> I know it works, but the upper user don?t know the descriptor number is 
> reduced.
> I concern it is not so user-friendly here.

Changchun, please be sure the above text is quoted.
How are we supposed to understand who is speaking here?

All, please, take care of readers.
Thanks

> On Wed, Apr 15, 2015 at 8:39 PM, Ouyang, Changchun  intel.com> wrote:
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] 
> > On Behalf Of Stephen
> > Hemminger
> > Sent: Wednesday, April 15, 2015 11:20 PM
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation
> >
> > This fixes another of the issues with running virtio on non-KVM
> > envirionments. For example, Google Compute Engine reports a ring size of
> > 16K.
> >
> > If guest virtio requests more slots than available then the queue should 
> > just
> 
> I suspect 'more' here should be 'less'?
> 
> > be initialized to the smaller value.
> >
> > Conversely, if the number of descriptors requested exceeds the virtio host
> > queue size, then just silently use the smaller host size.
> >
> > Signed-off-by: Stephen Hemminger  > networkplumber.org>
> > ---
> >  lib/librte_pmd_virtio/virtio_ethdev.c | 18 +-
> >  1 file changed, 13 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c
> > b/lib/librte_pmd_virtio/virtio_ethdev.c
> > index 3cb9c6a..db0232e 100644
> > --- a/lib/librte_pmd_virtio/virtio_ethdev.c
> > +++ b/lib/librte_pmd_virtio/virtio_ethdev.c
> > @@ -267,13 +267,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev
> > *dev,
> >   if (vq_size == 0) {
> >   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist",
> > __func__);
> >   return -EINVAL;
> > - } else if (!rte_is_power_of_2(vq_size)) {
> > + }
> > +
> > + if (!rte_is_power_of_2(vq_size)) {
> >   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2",
> > __func__);
> >   return -EINVAL;
> > - } else if (nb_desc != vq_size) {
> > - PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to
> > vq size (%d), fall to vq size",
> > - nb_desc, vq_size);
> > - nb_desc = vq_size;
> > + }
> > +
> > + if (nb_desc < vq_size) {
> > + if (!rte_is_power_of_2(nb_desc)) {
> > + PMD_INIT_LOG(ERR,
> > +  "nb_desc(%u) size is not powerof 2",
> > +  nb_desc);
> > + return -EINVAL;
> > + }
> > + vq_size = nb_desc;
> Don't we need a warning when nb_desc > vq_size?
> 
> >   }
> >
> >   if (queue_type == VTNET_RQ) {
> > --
> > 2.1.4
> 
> 




[dpdk-dev] [PATCH] librte_pmd_fm10k: Fix max_vfs issue in fm10k PMD

2015-04-16 Thread Chen, Jing D
Hi, 

> -Original Message-
> From: Michael Qiu [mailto:qiudayu at cn.ibm.com]
> Sent: Tuesday, April 14, 2015 5:25 PM
> To: dev at dpdk.org
> Cc: Chen, Jing D; Qiu, Michael
> Subject: [PATCH] librte_pmd_fm10k: Fix max_vfs issue in fm10k PMD
> 
> From: Michael Qiu 
> 
> In DPDK, max_vfs means vf numbers created, not the max number vfs
> the device supported.
> 
> Signed-off-by: Michael Qiu 
> ---
>  lib/librte_pmd_fm10k/fm10k_ethdev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/librte_pmd_fm10k/fm10k_ethdev.c
> b/lib/librte_pmd_fm10k/fm10k_ethdev.c
> index 0312fad..297ff88 100644
> --- a/lib/librte_pmd_fm10k/fm10k_ethdev.c
> +++ b/lib/librte_pmd_fm10k/fm10k_ethdev.c
> @@ -770,7 +770,7 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev,
>   dev_info->max_tx_queues  = hw->mac.max_queues;
>   dev_info->max_mac_addrs  = 1;
>   dev_info->max_hash_mac_addrs = 0;
> - dev_info->max_vfs= FM10K_MAX_VF_NUM;
> + dev_info->max_vfs= dev->pci_dev->max_vfs;
>   dev_info->max_vmdq_pools = ETH_64_POOLS;
>   dev_info->rx_offload_capa =
>   DEV_RX_OFFLOAD_IPV4_CKSUM |
> --
> 1.9.3

Acked-by Jing Chen 



[dpdk-dev] [PATCH v2 1/2] ixgbe: fix build with gcc 4.4

2015-04-16 Thread Zhang, Helin


> -Original Message-
> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com]
> Sent: Thursday, April 16, 2015 4:49 AM
> To: dev at dpdk.org
> Cc: Vlad Zolotarov; Ananyev, Konstantin; Zhang, Helin
> Subject: [PATCH v2 1/2] ixgbe: fix build with gcc 4.4
> 
> With GCC 4.4.7 from CentOS 6.5, the following errors arise:
> 
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function ?ixgbe_dev_rx_queue_setup?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: missing initializer
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:2509: error: (near initialization for
> ?dev_info.driver_name?)
> 
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function ?ixgbe_set_rsc?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: missing initializer
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:4072: error: (near initialization for
> ?dev_info.driver_name?)
> 
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c: In function
> ?ixgbe_recv_pkts_lro_single_alloc?:
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1479: error: ?next_rsc_entry? may be used
> uninitialized in this function
> lib/librte_pmd_ixgbe/ixgbe_rxtx.c:1480: error: ?next_rxe? may be used
> uninitialized in this function
> 
> The "missing initializer" warning is a GCC bug which seems fixed in 4.7.
> The "may be used uninitialized" warning seems to be another GCC bug and is
> workarounded with NULL initialization.
> 
> Fixes: 8eecb3295aed ("ixgbe: add LRO support")
> 
> Signed-off-by: Thomas Monjalon 
Acked-by: Helin Zhang 


[dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

2015-04-16 Thread Ouyang, Changchun


From: Stephen Hemminger [mailto:step...@networkplumber.org]
Sent: Thursday, April 16, 2015 1:48 PM
To: Ouyang, Changchun
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

No warning is needed, it just works.

I know it works, but the upper user don?t know the descriptor number is reduced.
I concern it is not so user-friendly here.


On Wed, Apr 15, 2015 at 8:39 PM, Ouyang, Changchun mailto:changchun.ouyang at intel.com>> wrote:


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On 
> Behalf Of Stephen
> Hemminger
> Sent: Wednesday, April 15, 2015 11:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation
>
> This fixes another of the issues with running virtio on non-KVM
> envirionments. For example, Google Compute Engine reports a ring size of
> 16K.
>
> If guest virtio requests more slots than available then the queue should just

I suspect 'more' here should be 'less'?

> be initialized to the smaller value.
>
> Conversely, if the number of descriptors requested exceeds the virtio host
> queue size, then just silently use the smaller host size.
>
> Signed-off-by: Stephen Hemminger  networkplumber.org>
> ---
>  lib/librte_pmd_virtio/virtio_ethdev.c | 18 +-
>  1 file changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c
> b/lib/librte_pmd_virtio/virtio_ethdev.c
> index 3cb9c6a..db0232e 100644
> --- a/lib/librte_pmd_virtio/virtio_ethdev.c
> +++ b/lib/librte_pmd_virtio/virtio_ethdev.c
> @@ -267,13 +267,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev
> *dev,
>   if (vq_size == 0) {
>   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist",
> __func__);
>   return -EINVAL;
> - } else if (!rte_is_power_of_2(vq_size)) {
> + }
> +
> + if (!rte_is_power_of_2(vq_size)) {
>   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2",
> __func__);
>   return -EINVAL;
> - } else if (nb_desc != vq_size) {
> - PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to
> vq size (%d), fall to vq size",
> - nb_desc, vq_size);
> - nb_desc = vq_size;
> + }
> +
> + if (nb_desc < vq_size) {
> + if (!rte_is_power_of_2(nb_desc)) {
> + PMD_INIT_LOG(ERR,
> +  "nb_desc(%u) size is not powerof 2",
> +  nb_desc);
> + return -EINVAL;
> + }
> + vq_size = nb_desc;
Don't we need a warning when nb_desc > vq_size?

>   }
>
>   if (queue_type == VTNET_RQ) {
> --
> 2.1.4



[dpdk-dev] [PATCH 3/5] virtio: don't set mac table unless negotiated

2015-04-16 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Wednesday, April 15, 2015 11:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 3/5] virtio: don't set mac table unless negotiated
> 
> Don't attempt to set the MAC address table unless the host allows it.
> Also, don't return a value from mac_table_set since all callers ignore the
> return value.
> 
> Signed-off-by: Stephen Hemminger 

Acked-by: Changchun Ouyang 


[dpdk-dev] [PATCH 2/5] virtio: don't enable/disable rx modes unless supported

2015-04-16 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Wednesday, April 15, 2015 11:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 2/5] virtio: don't enable/disable rx modes unless
> supported
> 
> Don't try to set features related to receiving unless the appropriate feature
> bit has ben negotiated with the host.
> 
> This solves some of the issues when using virtio on non-KVM/QEMU
> hypervisors.
> 
> Signed-off-by: Stephen Hemminger 

Acked-by: Changchun Ouyang 


[dpdk-dev] [PATCH 1/5] virtio: remove useless new lines

2015-04-16 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Wednesday, April 15, 2015 11:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 1/5] virtio: remove useless new lines
> 
> There are several places in virtio with extra newlines between calling a
> function and checking the result.
> Remove them to improve readability.
> 
> Signed-off-by: Stephen Hemminger 

Acked-by: Changchun Ouyang 


[dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation

2015-04-16 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Wednesday, April 15, 2015 11:20 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH 4/5] virtio: fix ring size negotiation
> 
> This fixes another of the issues with running virtio on non-KVM
> envirionments. For example, Google Compute Engine reports a ring size of
> 16K.
> 
> If guest virtio requests more slots than available then the queue should just

I suspect 'more' here should be 'less'?

> be initialized to the smaller value.
> 
> Conversely, if the number of descriptors requested exceeds the virtio host
> queue size, then just silently use the smaller host size.
> 
> Signed-off-by: Stephen Hemminger 
> ---
>  lib/librte_pmd_virtio/virtio_ethdev.c | 18 +-
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c
> b/lib/librte_pmd_virtio/virtio_ethdev.c
> index 3cb9c6a..db0232e 100644
> --- a/lib/librte_pmd_virtio/virtio_ethdev.c
> +++ b/lib/librte_pmd_virtio/virtio_ethdev.c
> @@ -267,13 +267,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev
> *dev,
>   if (vq_size == 0) {
>   PMD_INIT_LOG(ERR, "%s: virtqueue does not exist",
> __func__);
>   return -EINVAL;
> - } else if (!rte_is_power_of_2(vq_size)) {
> + }
> +
> + if (!rte_is_power_of_2(vq_size)) {
>   PMD_INIT_LOG(ERR, "%s: virtqueue size is not powerof 2",
> __func__);
>   return -EINVAL;
> - } else if (nb_desc != vq_size) {
> - PMD_INIT_LOG(ERR, "Warning: nb_desc(%d) is not equal to
> vq size (%d), fall to vq size",
> - nb_desc, vq_size);
> - nb_desc = vq_size;
> + }
> +
> + if (nb_desc < vq_size) {
> + if (!rte_is_power_of_2(nb_desc)) {
> + PMD_INIT_LOG(ERR,
> +  "nb_desc(%u) size is not powerof 2",
> +  nb_desc);
> + return -EINVAL;
> + }
> + vq_size = nb_desc;

Don't we need a warning when nb_desc > vq_size?

>   }
> 
>   if (queue_type == VTNET_RQ) {
> --
> 2.1.4



[dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD

2015-04-16 Thread Ouyang, Changchun


> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ananyev,
> Konstantin
> Sent: Wednesday, April 15, 2015 6:22 AM
> To: Raz Amir; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD
> 
> Hi,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Raz Amir
> > Sent: Tuesday, April 14, 2015 5:23 PM
> > To: dev at dpdk.org
> > Cc: Raz Amir
> > Subject: [dpdk-dev] [PATCH v5] Restore support for virtio on FreeBSD
> >
> > Fixes: 8a312224bcde ("eal/bsd: fix fd leak")
> >
> > Closing /dev/io fd causes SIGBUS in inb/outb instructions as the
> > process loses the IOPL privileges once the fd is closed:
> > (gdb) bt
> > 0  0x00492f2c in outb (port=49170, data=0 '\000')
> > at /usr/include/machine/cpufunc.h:244
> > 1  0x00492f7a in outb_p (data=0 '\000', port=49170)
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.h:211
> > 2  0x0049328d in vtpci_set_status (hw=0x80331f380, status=0
> '\000')
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:130
> > 3  0x004931fe in vtpci_reset (hw=0x80331f380)
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_pci.c:108
> > 4  0x004a175e in eth_virtio_dev_init (eth_dev=0x831b80
> )
> > at /dpdk/dpdk-2.0.0/lib/librte_pmd_virtio/virtio_ethdev.c:1150
> > 5  0x00462c09 in rte_eth_dev_init (pci_drv=0x79d1a0
> ,
> > pci_dev=0x802417560) at
> > /dpdk/dpdk-2.0.0/lib/librte_ether/rte_ethdev.c:326
> > 6  0x0046f03f in rte_eal_pci_probe_one_driver (dr=0x79d1a0
> ,
> > dev=0x802417560) at
> > /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal_pci.c:487
> > 7  0x00475b06 in pci_probe_all_drivers (dev=0x802417560)
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:116
> > 8  0x00475bb9 in rte_eal_pci_probe ()
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/common/eal_common_pci.c:246
> > 9  0x0046cd63 in rte_eal_init (argc=5, argv=0x7fffeaf0)
> > at /dpdk/dpdk-2.0.0/lib/librte_eal/bsdapp/eal/eal.c:554
> > 10 0x00404544 in main ()
> >
> > Signed-off-by: Raz Amir 
> > ---
> >  lib/librte_eal/bsdapp/eal/eal.c | 19 ++-
> >  lib/librte_eal/common/include/rte_eal.h | 10 ++
> >  lib/librte_eal/linuxapp/eal/eal.c   |  5 +
> >  lib/librte_pmd_virtio/virtio_ethdev.c   |  9 +
> >  4 files changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/eal.c
> > b/lib/librte_eal/bsdapp/eal/eal.c index 871d5f4..687dd83 100644
> > --- a/lib/librte_eal/bsdapp/eal/eal.c
> > +++ b/lib/librte_eal/bsdapp/eal/eal.c
> > @@ -112,6 +112,9 @@ struct internal_config internal_config;
> >  /* used by rte_rdtsc() */
> >  int rte_cycles_vmware_tsc_map;
> >
> > +/* fd to keep open for iopl */
> > +static int iopl_fd = -1;
> > +
> >  /* Return a pointer to the configuration structure */  struct
> > rte_config *
> >  rte_eal_get_configuration(void)
> > @@ -421,15 +424,21 @@ int rte_eal_has_hugepages(void)  int
> >  rte_eal_iopl_init(void)
> >  {
> > -   int fd;
> > -
> > -   fd = open("/dev/io", O_RDWR);
> > -   if (fd < 0)
> > +   iopl_fd = open("/dev/io", O_RDWR);
> > +   if (iopl_fd < 0)
> > return -1;
> > -   close(fd);
> > +   /* keep fd open for iopl */
> > return 0;
> >  }
> >
> > +void
> > +rte_eal_iopl_uninit(void)
> > +{
> > +   if (iopl_fd != -1)
> > +   close(iopl_fd);
> > +   iopl_fd = -1;
> > +}
> 
> Did I get it right: that function would be invoked for at dev_detach()?
> And after we invoked it, we still we can have other multiple virtio devices
> attached and active?
> If so, then I suppose you'll hit the same problem again.
> Konstantin
> 

Yes, need verify this issue,
If it is true, may use reference counter to resolve it?
Thanks
Changchun

> > +
> >  /* Launch threads, called at application init(). */  int
> > rte_eal_init(int argc, char **argv) diff --git
> > a/lib/librte_eal/common/include/rte_eal.h
> > b/lib/librte_eal/common/include/rte_eal.h
> > index 1385a73..9151e08 100644
> > --- a/lib/librte_eal/common/include/rte_eal.h
> > +++ b/lib/librte_eal/common/include/rte_eal.h
> > @@ -127,6 +127,16 @@ enum rte_proc_type_t
> rte_eal_process_type(void);
> > int rte_eal_iopl_init(void);
> >
> >  /**
> > + * Release iopl priviledge - currently relevant only for FreeBSD.
> > + *
> > + * This function should be called by pmds which need access to ioports.
> > +
> > + * @return
> > + *   void
> > + */
> > +void rte_eal_iopl_uninit(void);
> > +
> > +/**
> >   * Initialize the Environment Abstraction Layer (EAL).
> >   *
> >   * This function is to be executed on the MASTER lcore only, as soon
> > diff --git a/lib/librte_eal/linuxapp/eal/eal.c
> > b/lib/librte_eal/linuxapp/eal/eal.c
> > index bd770cf..687cebf 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal.c
> > @@ -695,6 +695,11 @@ rte_eal_iopl_init(void)  #endif  }

[dpdk-dev] freeze with dpdk-2.0.0

2015-04-16 Thread Ouyang, Changchun
Hi Olivier

From: Olivier Deme [mailto:od...@druidsoftware.com]
Sent: Wednesday, April 15, 2015 5:49 PM
To: Ouyang, Changchun; dev at dpdk.org
Subject: Re: [dpdk-dev] freeze with dpdk-2.0.0

Hi Changchun,

I confirm that blacklisting the first two network interfaces solve the problem.
Just to be clear, is it the case that it is not possible to have a DPDK 
application acquiring selectively virtio network devices without using the 
blacklisting (-b) option?

It seems a bit clumsy to have the application being configured with all network 
interfaces to be bound to DPDK.

just virtio device, other types device doesn?t has issue.  as the virtio pmd 
don?t rely on uio/igb_uio module any more,
so it needs use ?b to put whatever you need to blacklist, while on default all 
device are on white list.
Thanks
Changchun



[dpdk-dev] vhost-switch TX corrupted packet with Fedora 21 VM

2015-04-16 Thread Choi, Sy Jong
Hi

Sending packets out from virtio to dpdk vhost to phy will cause packets 
corruption
I have tested this using 
 - DPDK 2.0.0 using vhost-cuse, 
 - qemu 1.6.1. 
 - DPDK vhost-switch sample app.

Fedara 21 (kernel 3.19), packets corrupted.
Debian Wheezy (kernel 3.2), packets are ok.

Sometime the packets was not even sent out, and I keep getting this message. 
VHOST_DATA: (0) WARNING: This device is using an existing MAC address and has 
not been registered
VHOST_DATA: (0) WARNING: This device is using an existing MAC address and has 
not been registered

I think DPDK vhost-switch filter non register MAC address.
Occasionally, packets were being forwarded by vhost-switch.

Setup is very simple, just configure IP address (1.1.1.1/24) on the VM eth0, 
and ping an IP address (1.1.1.2) on the same subnet with Arp table entry added. 
   __
+---+   |
|   guest   |   |
|   +---+   |   |
|   |   Ping  ICMP  |   |   |
|   +---+   |   |
|   :   |   |
|   |   |   |  guest
|   v   |   |
|   +---+   |   |
|   |   Kernel Virtio   |   |   |
+---+---+---+---+---+ __|
:   
|   
v__
+---+---+--+--=+---+   |
|   | vhost net0|  |   |
|   +---+  |   |
|   :  |   |
|   |  |   |  host
|   V  |   |
|   +--+   |   |
|   |   phy port   |  vhost-switch |   |
+---+--+---+ __|
   :   
   |   
   V   
+--+
|  |
|traffic generator |
|  |
+--+

ICMP packet has become:-
0010: 10001050b95a8100 00881050b95a00882050b95a
0020: 00882050b95a00883050b95a 00883050b95a00884050b95a
0030: 00884050b95a00885050b95a 00885050b95a00886050b95a
0040: 00886050b95a

Regards,
Choi, Sy Jong
Platform Application Engineer



[dpdk-dev] [PATCH v2 1/2] ixgbe: fix build with gcc 4.4

2015-04-16 Thread Thomas Monjalon
2015-04-16 12:14, Vlad Zolotarov:
> On 04/15/15 23:49, Thomas Monjalon wrote:
> > The "may be used uninitialized" warning seems to be another GCC bug and is
> > workarounded with NULL initialization.
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -1476,8 +1476,8 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf 
> > **rx_pkts, uint16_t nb_pkts,
> > bool eop;
> > struct ixgbe_rx_entry *rxe;
> > struct ixgbe_rsc_entry *rsc_entry;
> > -   struct ixgbe_rsc_entry *next_rsc_entry;
> > -   struct ixgbe_rx_entry *next_rxe;
> > +   struct ixgbe_rsc_entry *next_rsc_entry = NULL;
> > +   struct ixgbe_rx_entry *next_rxe = NULL;
> 
> -Wno-maybe-uninitialized ?

I prefer avoiding this flag for 2 reasons:
- It's not supported in every GCC versions (need special handling)
- NULL assigment doesn't hurt