[dpdk-dev] [PATCH v3 13/13] testpmd: add a verbose mode csum forward engine

2014-11-20 Thread Olivier Matz
If the user specifies 'set verbose 1' in testpmd command line,
the csum forward engine will dump some informations about received
and transmitted packets, especially which flags are set and what
values are assigned to l2_len, l3_len, l4_len and tso_segsz.

This can help someone implementing TSO or hardware checksum offload to
understand how to configure the mbufs.

Example of output for one packet:

 --
 rx: l2_len=14 ethertype=800 l3_len=20 l4_proto=6 l4_len=20
 tx: m->l2_len=14 m->l3_len=20 m->l4_len=20
 tx: m->tso_segsz=800
 tx: flags=PKT_TX_IP_CKSUM PKT_TX_TCP_SEG
 --

Signed-off-by: Olivier Matz 
---
 app/test-pmd/csumonly.c | 51 +
 1 file changed, 51 insertions(+)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index ec9555f..72b984c 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -467,6 +467,57 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
m->tso_segsz = tso_segsz;
m->ol_flags = ol_flags;

+   /* if verbose mode is enabled, dump debug info */
+   if (verbose_level > 0) {
+   struct {
+   uint64_t flag;
+   uint64_t mask;
+   } tx_flags[] = {
+   { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM },
+   { PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK },
+   { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK },
+   { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK },
+   { PKT_TX_VXLAN_CKSUM, PKT_TX_VXLAN_CKSUM },
+   { PKT_TX_TCP_SEG, PKT_TX_TCP_SEG },
+   };
+   unsigned j;
+   const char *name;
+
+   printf("-\n");
+   /* dump rx parsed packet info */
+   printf("rx: l2_len=%d ethertype=%x l3_len=%d "
+   "l4_proto=%d l4_len=%d\n",
+   l2_len, rte_be_to_cpu_16(ethertype),
+   l3_len, l4_proto, l4_len);
+   if (tunnel == 1)
+   printf("rx: outer_l2_len=%d outer_ethertype=%x "
+   "outer_l3_len=%d\n", outer_l2_len,
+   rte_be_to_cpu_16(outer_ethertype),
+   outer_l3_len);
+   /* dump tx packet info */
+   if ((testpmd_ol_flags & (TESTPMD_TX_OFFLOAD_IP_CKSUM |
+   TESTPMD_TX_OFFLOAD_UDP_CKSUM |
+   TESTPMD_TX_OFFLOAD_TCP_CKSUM |
+   TESTPMD_TX_OFFLOAD_SCTP_CKSUM)) 
||
+   tso_segsz != 0)
+   printf("tx: m->l2_len=%d m->l3_len=%d "
+   "m->l4_len=%d\n",
+   m->l2_len, m->l3_len, m->l4_len);
+   if ((tunnel == 1) &&
+   (testpmd_ol_flags & 
TESTPMD_TX_OFFLOAD_VXLAN_CKSUM))
+   printf("tx: m->inner_l2_len=%d 
m->inner_l3_len=%d\n",
+   m->inner_l2_len, m->inner_l3_len);
+   if (tso_segsz != 0)
+   printf("tx: m->tso_segsz=%d\n", m->tso_segsz);
+   printf("tx: flags=");
+   for (j = 0; j < sizeof(tx_flags)/sizeof(*tx_flags); 
j++) {
+   name = 
rte_get_tx_ol_flag_name(tx_flags[j].flag);
+   if ((m->ol_flags & tx_flags[j].mask) ==
+   tx_flags[j].flag)
+   printf("%s ", name);
+   }
+   printf("\n");
+   }
}
nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
fs->tx_packets += nb_tx;
-- 
2.1.0



[dpdk-dev] [PATCH v3 12/13] testpmd: support TSO in csum forward engine

2014-11-20 Thread Olivier Matz
Add two new commands in testpmd:

- tso set  
- tso show 

These commands can be used enable TSO when transmitting TCP packets in
the csum forward engine. Ex:

  set fwd csum
  tx_checksum set ip hw 0
  tso set 800 0
  start

Signed-off-by: Olivier Matz 
---
 app/test-pmd/cmdline.c  | 92 +
 app/test-pmd/csumonly.c | 64 --
 app/test-pmd/testpmd.h  |  1 +
 3 files changed, 139 insertions(+), 18 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 61e4340..fe2ee41 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -323,6 +323,14 @@ static void cmd_help_long_parsed(void *parsed_result,
"tx_checksum show (port_id)\n"
"Display tx checksum offload configuration\n\n"

+   "tso set (segsize) (portid)\n"
+   "Enable TCP Segmentation Offload in csum forward"
+   " engine.\n"
+   "Please check the NIC datasheet for HW limits.\n\n"
+
+   "tso show (portid)"
+   "Display the status of TCP Segmentation 
Offload.\n\n"
+
"set fwd (%s)\n"
"Set packet forwarding mode.\n\n"

@@ -2867,6 +2875,88 @@ cmdline_parse_inst_t cmd_tx_cksum_show = {
},
 };

+/* *** ENABLE HARDWARE SEGMENTATION IN TX PACKETS *** */
+struct cmd_tso_set_result {
+   cmdline_fixed_string_t tso;
+   cmdline_fixed_string_t mode;
+   uint16_t tso_segsz;
+   uint8_t port_id;
+};
+
+static void
+cmd_tso_set_parsed(void *parsed_result,
+  __attribute__((unused)) struct cmdline *cl,
+  __attribute__((unused)) void *data)
+{
+   struct cmd_tso_set_result *res = parsed_result;
+   struct rte_eth_dev_info dev_info;
+
+   if (port_id_is_invalid(res->port_id))
+   return;
+
+   if (!strcmp(res->mode, "set"))
+   ports[res->port_id].tso_segsz = res->tso_segsz;
+
+   if (ports[res->port_id].tso_segsz == 0)
+   printf("TSO is disabled\n");
+   else
+   printf("TSO segment size is %d\n",
+   ports[res->port_id].tso_segsz);
+
+   /* display warnings if configuration is not supported by the NIC */
+   rte_eth_dev_info_get(res->port_id, &dev_info);
+   if ((ports[res->port_id].tso_segsz != 0) &&
+   (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0) {
+   printf("Warning: TSO enabled but not "
+   "supported by port %d\n", res->port_id);
+   }
+}
+
+cmdline_parse_token_string_t cmd_tso_set_tso =
+   TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+   tso, "tso");
+cmdline_parse_token_string_t cmd_tso_set_mode =
+   TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+   mode, "set");
+cmdline_parse_token_num_t cmd_tso_set_tso_segsz =
+   TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+   tso_segsz, UINT16);
+cmdline_parse_token_num_t cmd_tso_set_portid =
+   TOKEN_NUM_INITIALIZER(struct cmd_tso_set_result,
+   port_id, UINT8);
+
+cmdline_parse_inst_t cmd_tso_set = {
+   .f = cmd_tso_set_parsed,
+   .data = NULL,
+   .help_str = "Set TSO segment size for csum engine (0 to disable): "
+   "tso set  ",
+   .tokens = {
+   (void *)&cmd_tso_set_tso,
+   (void *)&cmd_tso_set_mode,
+   (void *)&cmd_tso_set_tso_segsz,
+   (void *)&cmd_tso_set_portid,
+   NULL,
+   },
+};
+
+cmdline_parse_token_string_t cmd_tso_show_mode =
+   TOKEN_STRING_INITIALIZER(struct cmd_tso_set_result,
+   mode, "show");
+
+
+cmdline_parse_inst_t cmd_tso_show = {
+   .f = cmd_tso_set_parsed,
+   .data = NULL,
+   .help_str = "Show TSO segment size for csum engine: "
+   "tso show ",
+   .tokens = {
+   (void *)&cmd_tso_set_tso,
+   (void *)&cmd_tso_show_mode,
+   (void *)&cmd_tso_set_portid,
+   NULL,
+   },
+};
+
 /* *** ENABLE/DISABLE FLUSH ON RX STREAMS *** */
 struct cmd_set_flush_rx {
cmdline_fixed_string_t set;
@@ -7880,6 +7970,8 @@ cmdline_parse_ctx_t main_ctx[] = {
(cmdline_parse_inst_t *)&cmd_tx_vlan_set_pvid,
(cmdline_parse_inst_t *)&cmd_tx_cksum_set,
(cmdline_parse_inst_t *)&cmd_tx_cksum_show,
+   (cmdline_parse_inst_t *)&cmd_tso_set,
+   (cmdline_parse_inst_t *)&cmd_tso_show,
(cmdline_parse_inst_t *)&cmd_link_flow_control_set,
(cmdline_parse_inst_t *)&cmd_link_flow_control_set_rx,
(cmdline_parse_inst_t *)&cmd_link_flow_control_set_tx,
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 37d4129..ec9555f 100644
--- a/app/test-pmd/

[dpdk-dev] [PATCH v3 11/13] ixgbe: support TCP segmentation offload

2014-11-20 Thread Olivier Matz
Implement TSO (TCP segmentation offload) in ixgbe driver. The driver is
now able to use PKT_TX_TCP_SEG mbuf flag and mbuf hardware offload infos
(l2_len, l3_len, l4_len, tso_segsz) to configure the hardware support of
TCP segmentation.

In ixgbe, when doing TSO, the IP length must not be included in the TCP
pseudo header checksum. A new function ixgbe_fix_tcp_phdr_cksum() is
used to fix the pseudo header checksum of the packet before giving it to
the hardware.

In the patch, the tx_desc_cksum_flags_to_olinfo() and
tx_desc_ol_flags_to_cmdtype() functions have been reworked to make them
clearer. This should not impact performance as gcc (version 4.8 in my
case) is smart enough to convert the tests into a code that does not
contain any branch instruction.

Signed-off-by: Olivier Matz 
Acked-by: Konstantin Ananyev 
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |   3 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 170 ++--
 lib/librte_pmd_ixgbe/ixgbe_rxtx.h   |  19 ++--
 3 files changed, 118 insertions(+), 74 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index 2eb609c..2c2ecc0 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -1964,7 +1964,8 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
DEV_TX_OFFLOAD_IPV4_CKSUM  |
DEV_TX_OFFLOAD_UDP_CKSUM   |
DEV_TX_OFFLOAD_TCP_CKSUM   |
-   DEV_TX_OFFLOAD_SCTP_CKSUM;
+   DEV_TX_OFFLOAD_SCTP_CKSUM  |
+   DEV_TX_OFFLOAD_TCP_TSO;

dev_info->default_rxconf = (struct rte_eth_rxconf) {
.rx_thresh = {
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 2df3385..63216fa 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -94,7 +95,8 @@
 #define IXGBE_TX_OFFLOAD_MASK ( \
PKT_TX_VLAN_PKT |\
PKT_TX_IP_CKSUM |\
-   PKT_TX_L4_MASK)
+   PKT_TX_L4_MASK | \
+   PKT_TX_TCP_SEG)

 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
@@ -363,59 +365,84 @@ ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 static inline void
 ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,
volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
-   uint64_t ol_flags, uint32_t vlan_macip_lens)
+   uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
 {
uint32_t type_tucmd_mlhl;
-   uint32_t mss_l4len_idx;
+   uint32_t mss_l4len_idx = 0;
uint32_t ctx_idx;
-   uint32_t cmp_mask;
+   uint32_t vlan_macip_lens;
+   union ixgbe_tx_offload tx_offload_mask;

ctx_idx = txq->ctx_curr;
-   cmp_mask = 0;
+   tx_offload_mask.data = 0;
type_tucmd_mlhl = 0;

+   /* Specify which HW CTX to upload. */
+   mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
+
if (ol_flags & PKT_TX_VLAN_PKT) {
-   cmp_mask |= TX_VLAN_CMP_MASK;
+   tx_offload_mask.vlan_tci = ~0;
}

-   if (ol_flags & PKT_TX_IP_CKSUM) {
-   type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
-   cmp_mask |= TX_MACIP_LEN_CMP_MASK;
-   }
+   /* check if TCP segmentation required for this packet */
+   if (ol_flags & PKT_TX_TCP_SEG) {
+   /* implies IP cksum and TCP cksum */
+   type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
+   IXGBE_ADVTXD_TUCMD_L4T_TCP |
+   IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
+
+   tx_offload_mask.l2_len = ~0;
+   tx_offload_mask.l3_len = ~0;
+   tx_offload_mask.l4_len = ~0;
+   tx_offload_mask.tso_segsz = ~0;
+   mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
+   mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
+   } else { /* no TSO, check if hardware checksum is needed */
+   if (ol_flags & PKT_TX_IP_CKSUM) {
+   type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
+   tx_offload_mask.l2_len = ~0;
+   tx_offload_mask.l3_len = ~0;
+   }

-   /* Specify which HW CTX to upload. */
-   mss_l4len_idx = (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
-   switch (ol_flags & PKT_TX_L4_MASK) {
-   case PKT_TX_UDP_CKSUM:
-   type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
+   switch (ol_flags & PKT_TX_L4_MASK) {
+   case PKT_TX_UDP_CKSU

[dpdk-dev] [PATCH v3 10/13] mbuf: generic support for TCP segmentation offload

2014-11-20 Thread Olivier Matz
Some of the NICs supported by DPDK have a possibility to accelerate TCP
traffic by using segmentation offload. The application prepares a packet
with valid TCP header with size up to 64K and deleguates the
segmentation to the NIC.

Implement the generic part of TCP segmentation offload in rte_mbuf. It
introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes)
and tso_segsz (MSS of packets).

To delegate the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_TCP_CKSUM)
- set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
  the packet
- fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- calculate the pseudo header checksum without taking ip_len in account,
  and set it in the TCP header, for instance by using
  rte_ipv4_phdr_cksum(ip_hdr, ol_flags)

The API is inspired from ixgbe hardware (the next commit adds the
support for ixgbe), but it seems generic enough to be used for other
hw/drivers in the future.

This commit also reworks the way l2_len and l3_len are used in igb
and ixgbe drivers as the l2_l3_len is not available anymore in mbuf.

Signed-off-by: Mirek Walukiewicz 
Signed-off-by: Olivier Matz 
Acked-by: Konstantin Ananyev 
---
 app/test-pmd/testpmd.c|  2 +-
 examples/ipv4_multicast/main.c|  2 +-
 lib/librte_mbuf/rte_mbuf.c|  1 +
 lib/librte_mbuf/rte_mbuf.h| 45 +++
 lib/librte_net/rte_ip.h   | 39 +++--
 lib/librte_pmd_e1000/igb_rxtx.c   | 11 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +-
 7 files changed, 82 insertions(+), 29 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 12adafa..632a993 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -408,7 +408,7 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
mb->ol_flags = 0;
mb->data_off = RTE_PKTMBUF_HEADROOM;
mb->nb_segs  = 1;
-   mb->l2_l3_len   = 0;
+   mb->tx_offload   = 0;
mb->vlan_tci = 0;
mb->hash.rss = 0;
 }
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index 590d11a..80c5140 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -302,7 +302,7 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
/* copy metadata from source packet*/
hdr->port = pkt->port;
hdr->vlan_tci = pkt->vlan_tci;
-   hdr->l2_l3_len = pkt->l2_l3_len;
+   hdr->tx_offload = pkt->tx_offload;
hdr->hash = pkt->hash;

hdr->ol_flags = pkt->ol_flags;
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 9b57b3a..87c2963 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -241,6 +241,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
+   case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
default: return NULL;
}
 }
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 10ddd93..bc6c363 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -125,6 +126,20 @@ extern "C" {

 #define PKT_TX_VLAN_PKT  (1ULL << 55) /**< TX packet is a 802.1q VLAN 
packet. */

+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ *PKT_TX_TCP_CKSUM)
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ *to 0 in the packet
+ *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ *  - calculate the pseudo header checksum without taking ip_len in accound,
+ *and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
+ *rte_ipv6_phdr_cksum() that can be used as helpers.
+ */
+#define PKT_TX_TCP_SEG   (1ULL << 49)
+
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG   (1ULL << 63) /**< Mbuf contains control data */

@@ -226,22 +241,18 @@ struct rte_mbuf {

/* fields to support TX offloads */
union {
-   uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
+   uint64_t tx_offload;   /**< combined for easy fetch */
struct {
-   uint16_t l3_len:9;  /**< L3 (IP) Header Length. */
-   uint16_t l2_len:7;  /**< L2 (MAC) Header Length. */
-   };
-  

[dpdk-dev] [PATCH v3 09/13] mbuf: introduce new checksum API

2014-11-20 Thread Olivier Matz
Introduce new functions to calculate checksums. These new functions
are derivated from the ones provided csumonly.c but slightly reworked.
There is still some room for future optimization of these functions
(maybe SSE/AVX, ...).

This API will be modified in tbe next commits by the introduction of
TSO that requires a different pseudo header checksum to be set in the
packet.

Signed-off-by: Olivier Matz 
Acked-by: Konstantin Ananyev 
---
 app/test-pmd/csumonly.c| 133 ++--
 lib/librte_mbuf/rte_mbuf.h |   3 +-
 lib/librte_net/rte_ip.h| 183 +
 3 files changed, 193 insertions(+), 126 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 4d6f1ee..37d4129 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -87,137 +87,22 @@
 #define _htons(x) (x)
 #endif

-static inline uint16_t
-get_16b_sum(uint16_t *ptr16, uint32_t nr)
-{
-   uint32_t sum = 0;
-   while (nr > 1)
-   {
-   sum +=*ptr16;
-   nr -= sizeof(uint16_t);
-   ptr16++;
-   if (sum > UINT16_MAX)
-   sum -= UINT16_MAX;
-   }
-
-   /* If length is in odd bytes */
-   if (nr)
-   sum += *((uint8_t*)ptr16);
-
-   sum = ((sum & 0x) >> 16) + (sum & 0x);
-   sum &= 0x0;
-   return (uint16_t)sum;
-}
-
-static inline uint16_t
-get_ipv4_cksum(struct ipv4_hdr *ipv4_hdr)
-{
-   uint16_t cksum;
-   cksum = get_16b_sum((uint16_t*)ipv4_hdr, sizeof(struct ipv4_hdr));
-   return (uint16_t)((cksum == 0x)?cksum:~cksum);
-}
-
-
-static inline uint16_t
-get_ipv4_psd_sum(struct ipv4_hdr *ip_hdr)
-{
-   /* Pseudo Header for IPv4/UDP/TCP checksum */
-   union ipv4_psd_header {
-   struct {
-   uint32_t src_addr; /* IP address of source host. */
-   uint32_t dst_addr; /* IP address of destination 
host(s). */
-   uint8_t  zero; /* zero. */
-   uint8_t  proto;/* L4 protocol type. */
-   uint16_t len;  /* L4 length. */
-   } __attribute__((__packed__));
-   uint16_t u16_arr[0];
-   } psd_hdr;
-
-   psd_hdr.src_addr = ip_hdr->src_addr;
-   psd_hdr.dst_addr = ip_hdr->dst_addr;
-   psd_hdr.zero = 0;
-   psd_hdr.proto= ip_hdr->next_proto_id;
-   psd_hdr.len  = 
rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length)
-   - sizeof(struct ipv4_hdr)));
-   return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr));
-}
-
-static inline uint16_t
-get_ipv6_psd_sum(struct ipv6_hdr *ip_hdr)
-{
-   /* Pseudo Header for IPv6/UDP/TCP checksum */
-   union ipv6_psd_header {
-   struct {
-   uint8_t src_addr[16]; /* IP address of source host. */
-   uint8_t dst_addr[16]; /* IP address of destination 
host(s). */
-   uint32_t len; /* L4 length. */
-   uint32_t proto;   /* L4 protocol - top 3 bytes must 
be zero */
-   } __attribute__((__packed__));
-
-   uint16_t u16_arr[0]; /* allow use as 16-bit values with safe 
aliasing */
-   } psd_hdr;
-
-   rte_memcpy(&psd_hdr.src_addr, ip_hdr->src_addr,
-   sizeof(ip_hdr->src_addr) + sizeof(ip_hdr->dst_addr));
-   psd_hdr.len   = ip_hdr->payload_len;
-   psd_hdr.proto = (ip_hdr->proto << 24);
-
-   return get_16b_sum(psd_hdr.u16_arr, sizeof(psd_hdr));
-}
-
 static uint16_t
 get_psd_sum(void *l3_hdr, uint16_t ethertype)
 {
if (ethertype == _htons(ETHER_TYPE_IPv4))
-   return get_ipv4_psd_sum(l3_hdr);
+   return rte_ipv4_phdr_cksum(l3_hdr);
else /* assume ethertype == ETHER_TYPE_IPv6 */
-   return get_ipv6_psd_sum(l3_hdr);
-}
-
-static inline uint16_t
-get_ipv4_udptcp_checksum(struct ipv4_hdr *ipv4_hdr, uint16_t *l4_hdr)
-{
-   uint32_t cksum;
-   uint32_t l4_len;
-
-   l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - sizeof(struct 
ipv4_hdr);
-
-   cksum = get_16b_sum(l4_hdr, l4_len);
-   cksum += get_ipv4_psd_sum(ipv4_hdr);
-
-   cksum = ((cksum & 0x) >> 16) + (cksum & 0x);
-   cksum = (~cksum) & 0x;
-   if (cksum == 0)
-   cksum = 0x;
-   return (uint16_t)cksum;
-}
-
-static inline uint16_t
-get_ipv6_udptcp_checksum(struct ipv6_hdr *ipv6_hdr, uint16_t *l4_hdr)
-{
-   uint32_t cksum;
-   uint32_t l4_len;
-
-   l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
-
-   cksum = get_16b_sum(l4_hdr, l4_len);
-   cksum += get_ipv6_psd_sum(ipv6_hdr);
-
-   cksum = ((cksum & 0x) >> 16) + (cksum & 0x);
-   cksum = (~cksum) & 0x;
-   if (cksum == 0)
-   cksum = 0x;
-
-   return (uint16_t)cksum;
+ 

[dpdk-dev] [PATCH v3 08/13] testpmd: rework csum forward engine

2014-11-20 Thread Olivier Matz
The csum forward engine was becoming too complex to be used and
extended (the next commits want to add the support of TSO):

- no explaination about what the code does
- code is not factorized, lots of code duplicated, especially between
  ipv4/ipv6
- user command line api: use of bitmasks that need to be calculated by
  the user
- the user flags don't have the same semantic:
  - for legacy IP/UDP/TCP/SCTP, it selects software or hardware checksum
  - for other (vxlan), it selects between hardware checksum or no
checksum
- the code relies too much on flags set by the driver without software
  alternative (ex: PKT_RX_TUNNEL_IPV4_HDR). It is nice to be able to
  compare a software implementation with the hardware offload.

This commit tries to fix these issues, and provide a simple definition
of what is done by the forward engine:

 * Receive a burst of packets, and for supported packet types:
 *  - modify the IPs
 *  - reprocess the checksum in SW or HW, depending on testpmd command line
 *configuration
 * Then packets are transmitted on the output port.
 *
 * Supported packets are:
 *   Ether / (vlan) / IP|IP6 / UDP|TCP|SCTP .
 *   Ether / (vlan) / IP|IP6 / UDP / VxLAN / Ether / IP|IP6 / UDP|TCP|SCTP
 *
 * The network parser supposes that the packet is contiguous, which may
 * not be the case in real life.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/cmdline.c  | 156 ---
 app/test-pmd/config.c   |  13 +-
 app/test-pmd/csumonly.c | 676 ++--
 app/test-pmd/testpmd.h  |  17 +-
 4 files changed, 437 insertions(+), 425 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 4c3fc76..61e4340 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -310,19 +310,19 @@ static void cmd_help_long_parsed(void *parsed_result,
"Disable hardware insertion of a VLAN header in"
" packets sent on a port.\n\n"

-   "tx_checksum set (mask) (port_id)\n"
-   "Enable hardware insertion of checksum offload with"
-   " the 8-bit mask, 0~0xff, in packets sent on a port.\n"
-   "bit 0 - insert ip   checksum offload if set\n"
-   "bit 1 - insert udp  checksum offload if set\n"
-   "bit 2 - insert tcp  checksum offload if set\n"
-   "bit 3 - insert sctp checksum offload if set\n"
-   "bit 4 - insert inner ip  checksum offload if 
set\n"
-   "bit 5 - insert inner udp checksum offload if 
set\n"
-   "bit 6 - insert inner tcp checksum offload if 
set\n"
-   "bit 7 - insert inner sctp checksum offload if 
set\n"
+   "tx_cksum set (ip|udp|tcp|sctp|vxlan) (hw|sw) 
(port_id)\n"
+   "Select hardware or software calculation of the"
+   " checksum with when transmitting a packet using the"
+   " csum forward engine.\n"
+   "ip|udp|tcp|sctp always concern the inner layer.\n"
+   "vxlan concerns the outer IP and UDP layer (in"
+   " case the packet is recognized as a vxlan packet by"
+   " the forward engine)\n"
"Please check the NIC datasheet for HW limits.\n\n"

+   "tx_checksum show (port_id)\n"
+   "Display tx checksum offload configuration\n\n"
+
"set fwd (%s)\n"
"Set packet forwarding mode.\n\n"

@@ -2738,48 +2738,131 @@ cmdline_parse_inst_t cmd_tx_vlan_reset = {


 /* *** ENABLE HARDWARE INSERTION OF CHECKSUM IN TX PACKETS *** */
-struct cmd_tx_cksum_set_result {
+struct cmd_tx_cksum_result {
cmdline_fixed_string_t tx_cksum;
-   cmdline_fixed_string_t set;
-   uint8_t cksum_mask;
+   cmdline_fixed_string_t mode;
+   cmdline_fixed_string_t proto;
+   cmdline_fixed_string_t hwsw;
uint8_t port_id;
 };

 static void
-cmd_tx_cksum_set_parsed(void *parsed_result,
+cmd_tx_cksum_parsed(void *parsed_result,
   __attribute__((unused)) struct cmdline *cl,
   __attribute__((unused)) void *data)
 {
-   struct cmd_tx_cksum_set_result *res = parsed_result;
+   struct cmd_tx_cksum_result *res = parsed_result;
+   int hw = 0;
+   uint16_t ol_flags, mask = 0;
+   struct rte_eth_dev_info dev_info;
+
+   if (port_id_is_invalid(res->port_id)) {
+   printf("invalid port %d\n", res->port_id);
+   return;
+   }

-   tx_cksum_set(res->port_id, res->cksum_mask);
+   if (!strcmp(res->mode, "set")) {
+
+   if (!strcmp(res->hwsw, "hw"))
+   hw = 1;
+
+   if (!strcmp(res->

[dpdk-dev] [PATCH v3 07/13] testpmd: fix use of offload flags in testpmd

2014-11-20 Thread Olivier Matz
In testpmd the rte_port->tx_ol_flags flag was used in 2 incompatible
manners:
- sometimes used with testpmd specific flags (0xff for checksums, and
  bit 11 for vlan)
- sometimes assigned to m->ol_flags directly, which is wrong in case
  of checksum flags

This commit replaces the hardcoded values by named definitions, which
are not compatible with mbuf flags. The testpmd forward engines are
fixed to use the flags properly.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/config.c   |  4 ++--
 app/test-pmd/csumonly.c | 40 +++-
 app/test-pmd/macfwd.c   |  5 -
 app/test-pmd/macswap.c  |  5 -
 app/test-pmd/testpmd.h  | 28 +---
 app/test-pmd/txonly.c   |  9 ++---
 6 files changed, 60 insertions(+), 31 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index b102b72..34b6fdb 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1670,7 +1670,7 @@ tx_vlan_set(portid_t port_id, uint16_t vlan_id)
return;
if (vlan_id_is_invalid(vlan_id))
return;
-   ports[port_id].tx_ol_flags |= PKT_TX_VLAN_PKT;
+   ports[port_id].tx_ol_flags |= TESTPMD_TX_OFFLOAD_INSERT_VLAN;
ports[port_id].tx_vlan_id = vlan_id;
 }

@@ -1679,7 +1679,7 @@ tx_vlan_reset(portid_t port_id)
 {
if (port_id_is_invalid(port_id))
return;
-   ports[port_id].tx_ol_flags &= ~PKT_TX_VLAN_PKT;
+   ports[port_id].tx_ol_flags &= ~TESTPMD_TX_OFFLOAD_INSERT_VLAN;
 }

 void
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 8d10bfd..743094a 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -322,7 +322,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
/* Do not delete, this is required by HW*/
ipv4_hdr->hdr_checksum = 0;

-   if (tx_ol_flags & 0x1) {
+   if (tx_ol_flags & TESTPMD_TX_OFFLOAD_IP_CKSUM) {
/* HW checksum */
ol_flags |= PKT_TX_IP_CKSUM;
}
@@ -336,7 +336,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
if (l4_proto == IPPROTO_UDP) {
udp_hdr = (struct udp_hdr*) 
(rte_pktmbuf_mtod(mb,
unsigned char *) + l2_len + 
l3_len);
-   if (tx_ol_flags & 0x2) {
+   if (tx_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) 
{
/* HW Offload */
ol_flags |= PKT_TX_UDP_CKSUM;
if (ipv4_tunnel)
@@ -358,7 +358,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
uint16_t len;

/* Check if inner L3/L4 checkum flag is 
set */
-   if (tx_ol_flags & 0xF0)
+   if (tx_ol_flags & 
TESTPMD_TX_OFFLOAD_INNER_CKSUM_MASK)
ol_flags |= PKT_TX_VXLAN_CKSUM;

inner_l2_len  = sizeof(struct 
ether_hdr);
@@ -381,7 +381,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
unsigned char 
*) + len);
inner_l4_proto = 
inner_ipv4_hdr->next_proto_id;

-   if (tx_ol_flags & 0x10) {
+   if (tx_ol_flags & 
TESTPMD_TX_OFFLOAD_INNER_IP_CKSUM) {

/* Do not delete, this 
is required by HW*/

inner_ipv4_hdr->hdr_checksum = 0;
@@ -394,7 +394,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
unsigned char 
*) + len);
inner_l4_proto = 
inner_ipv6_hdr->proto;
}
-   if ((inner_l4_proto == IPPROTO_UDP) && 
(tx_ol_flags & 0x20)) {
+   if ((inner_l4_proto == IPPROTO_UDP) &&
+   (tx_ol_flags & 
TESTPMD_TX_OFFLOAD_INNER_UDP_CKSUM)) {

/* HW Offload */
ol_flags |= PKT_TX_UDP_CKSUM;
@@ -405,7 +406,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
else if (eth_type == 
ETHER_TYPE_IPv6)

inner_udp_hdr->dgram_cksum = get_ipv6_psd_sum(inner_ipv6_hdr);

-   } else if ((inner_l4_proto == 
IPPROTO_TCP) &&

[dpdk-dev] [PATCH v3 06/13] mbuf: add functions to get the name of an ol_flag

2014-11-20 Thread Olivier Matz
In test-pmd (rxonly.c), the code is able to dump the list of ol_flags.
The issue is that the list of flags in the application has to be
synchronized with the flags defined in rte_mbuf.h.

This patch introduces 2 new functions rte_get_rx_ol_flag_name()
and rte_get_tx_ol_flag_name() that returns the name of a flag from
its mask. It also fixes rxonly.c to use this new functions and to
display the proper flags.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/rxonly.c  | 36 ++
 lib/librte_mbuf/rte_mbuf.c | 48 ++
 lib/librte_mbuf/rte_mbuf.h | 25 
 3 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index 9ad1df6..51a530a 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -71,26 +71,6 @@

 #include "testpmd.h"

-#define MAX_PKT_RX_FLAGS 13
-static const char *pkt_rx_flag_names[MAX_PKT_RX_FLAGS] = {
-   "VLAN_PKT",
-   "RSS_HASH",
-   "PKT_RX_FDIR",
-   "IP_CKSUM",
-   "IP_CKSUM_BAD",
-
-   "IPV4_HDR",
-   "IPV4_HDR_EXT",
-   "IPV6_HDR",
-   "IPV6_HDR_EXT",
-
-   "IEEE1588_PTP",
-   "IEEE1588_TMST",
-
-   "TUNNEL_IPV4_HDR",
-   "TUNNEL_IPV6_HDR",
-};
-
 static inline void
 print_ether_addr(const char *what, struct ether_addr *eth_addr)
 {
@@ -214,12 +194,16 @@ pkt_burst_receive(struct fwd_stream *fs)
printf(" - Receive queue=0x%x", (unsigned) fs->rx_queue);
printf("\n");
if (ol_flags != 0) {
-   int rxf;
-
-   for (rxf = 0; rxf < MAX_PKT_RX_FLAGS; rxf++) {
-   if (ol_flags & (1 << rxf))
-   printf("  PKT_RX_%s\n",
-  pkt_rx_flag_names[rxf]);
+   unsigned rxf;
+   const char *name;
+
+   for (rxf = 0; rxf < sizeof(mb->ol_flags) * 8; rxf++) {
+   if ((ol_flags & (1ULL << rxf)) == 0)
+   continue;
+   name = rte_get_rx_ol_flag_name(1ULL << rxf);
+   if (name == NULL)
+   continue;
+   printf("  %s\n", name);
}
}
rte_pktmbuf_free(mb);
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 52e7574..9b57b3a 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2014 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -196,3 +197,50 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, 
unsigned dump_len)
nb_segs --;
}
 }
+
+/*
+ * Get the name of a RX offload flag. Must be kept synchronized with flag
+ * definitions in rte_mbuf.h.
+ */
+const char *rte_get_rx_ol_flag_name(uint64_t mask)
+{
+   switch (mask) {
+   case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT";
+   case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
+   case PKT_RX_FDIR: return "PKT_RX_FDIR";
+   case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
+   case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
+   /* case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; */
+   /* case PKT_RX_OVERSIZE: return "PKT_RX_OVERSIZE"; */
+   /* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
+   /* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
+   /* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
+   case PKT_RX_IPV4_HDR: return "PKT_RX_IPV4_HDR";
+   case PKT_RX_IPV4_HDR_EXT: return "PKT_RX_IPV4_HDR_EXT";
+   case PKT_RX_IPV6_HDR: return "PKT_RX_IPV6_HDR";
+   case PKT_RX_IPV6_HDR_EXT: return "PKT_RX_IPV6_HDR_EXT";
+   case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
+   case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+   case PKT_RX_TUNNEL_IPV4_HDR: return "PKT_RX_TUNNEL_IPV4_HDR";
+   case PKT_RX_TUNNEL_IPV6_HDR: return "PKT_RX_TUNNEL_IPV6_HDR";
+   default: return NULL;
+   }
+}
+
+/*
+ * Get the name of a TX offload flag. Must be kept synchronized with flag
+ * definitions in rte_mbuf.h.
+ */
+const char *rte_get_tx_ol_flag_name(uint64_t mask)
+{
+   switch (mask) {
+   case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
+   case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
+   case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
+   case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
+   case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
+   case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
+   case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";

[dpdk-dev] [PATCH v3 05/13] mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition

2014-11-20 Thread Olivier Matz
This definition is specific to Intel PMD drivers and its definition
"indicate what bits required for building TX context" shows that it
should not be in the generic rte_mbuf.h but in the PMD driver.

Signed-off-by: Olivier Matz 
Acked-by: Bruce Richardson 
---
 lib/librte_mbuf/rte_mbuf.h| 5 -
 lib/librte_pmd_e1000/igb_rxtx.c   | 8 +++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 8 +++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 0c96b00..62d952d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -124,11 +124,6 @@ extern "C" {
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG   (1ULL << 63) /**< Mbuf contains control data */

-/**
- * Bit Mask to indicate what bits required for building TX context
- */
-#define PKT_TX_OFFLOAD_MASK (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | 
PKT_TX_L4_MASK)
-
 /* define a set of marker types that can be used to refer to set points in the
  * mbuf */
 typedef void*MARKER[0];   /**< generic marker for a point in a structure */
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index b406397..433c616 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -84,6 +84,12 @@
ETH_RSS_IPV6_UDP | \
ETH_RSS_IPV6_UDP_EX)

+/* Bit Mask to indicate what bits required for building TX context */
+#define IGB_TX_OFFLOAD_MASK (   \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK)
+
 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
 {
@@ -400,7 +406,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
ol_flags = tx_pkt->ol_flags;
vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
-   tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
+   tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;

/* If a Context Descriptor need be built . */
if (tx_ol_req) {
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 7e470ce..ca35db2 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -90,6 +90,12 @@
ETH_RSS_IPV6_UDP | \
ETH_RSS_IPV6_UDP_EX)

+/* Bit Mask to indicate what bits required for building TX context */
+#define IXGBE_TX_OFFLOAD_MASK ( \
+   PKT_TX_VLAN_PKT |\
+   PKT_TX_IP_CKSUM |\
+   PKT_TX_L4_MASK)
+
 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
 {
@@ -580,7 +586,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
ol_flags = tx_pkt->ol_flags;

/* If hardware offload required */
-   tx_ol_req = ol_flags & PKT_TX_OFFLOAD_MASK;
+   tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
if (tx_ol_req) {
vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
-- 
2.1.0



[dpdk-dev] [PATCH v3 04/13] mbuf: add help about TX checksum flags

2014-11-20 Thread Olivier Matz
Describe how to use hardware checksum API.

Signed-off-by: Olivier Matz 
Acked-by: Bruce Richardson 
---
 lib/librte_mbuf/rte_mbuf.h | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index d3eba44..0c96b00 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -98,14 +98,17 @@ extern "C" {
 /* add new TX flags here */
 #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed 
by NIC */
 #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to 
timestamp. */
-/*
- * Bits 52+53 used for L4 packet type with checksum enabled.
- * 00: Reserved
- * 01: TCP checksum
- * 10: SCTP checksum
- * 11: UDP checksum
+
+/**
+ * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
+ * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
+ * L4 checksum offload, the user needs to:
+ *  - fill l2_len and l3_len in mbuf
+ *  - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
+ *  - calculate the pseudo header checksum and set it in the L4 header (only
+ *for TCP or UDP). For SCTP, set the crc field to 0.
  */
-#define PKT_TX_L4_NO_CKSUM   (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
+#define PKT_TX_L4_NO_CKSUM   (0ULL << 52) /* Disable L4 cksum of TX pkt. */
 #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed 
by NIC. */
 #define PKT_TX_SCTP_CKSUM(2ULL << 52) /**< SCTP cksum of TX pkt. computed 
by NIC. */
 #define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed 
by NIC. */
-- 
2.1.0



[dpdk-dev] [PATCH v3 03/13] mbuf: reorder tx ol_flags

2014-11-20 Thread Olivier Matz
The tx mbuf flags are now ordered from the lowest value to the
the highest. Add comments to explain where to add new flags.

By the way, move the PKT_TX_VXLAN_CKSUM at the right place.

Signed-off-by: Olivier Matz 
---
 lib/librte_mbuf/rte_mbuf.h | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f5f8658..d3eba44 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -93,14 +93,11 @@ extern "C" {
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped 
packet.*/
 #define PKT_RX_TUNNEL_IPV4_HDR (1ULL << 11) /**< RX tunnel packet with IPv4 
header.*/
 #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 
header. */
+/* add new RX flags here */

-#define PKT_TX_VLAN_PKT  (1ULL << 55) /**< TX packet is a 802.1q VLAN 
packet. */
-#define PKT_TX_IP_CKSUM  (1ULL << 54) /**< IP cksum of TX pkt. computed by 
NIC. */
+/* add new TX flags here */
 #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed 
by NIC */
-#define PKT_TX_IPV4_CSUM PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
-#define PKT_TX_IPV4  PKT_RX_IPV4_HDR /**< IPv4 with no IP checksum 
offload. */
-#define PKT_TX_IPV6  PKT_RX_IPV6_HDR /**< IPv6 packet */
-
+#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to 
timestamp. */
 /*
  * Bits 52+53 used for L4 packet type with checksum enabled.
  * 00: Reserved
@@ -114,8 +111,12 @@ extern "C" {
 #define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed 
by NIC. */
 #define PKT_TX_L4_MASK   (3ULL << 52) /**< Mask for L4 cksum offload 
request. */

-/* Bit 51 - IEEE1588*/
-#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to 
timestamp. */
+#define PKT_TX_IP_CKSUM  (1ULL << 54) /**< IP cksum of TX pkt. computed by 
NIC. */
+#define PKT_TX_IPV4_CSUM PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
+#define PKT_TX_IPV4  PKT_RX_IPV4_HDR /**< IPv4 with no IP checksum 
offload. */
+#define PKT_TX_IPV6  PKT_RX_IPV6_HDR /**< IPv6 packet */
+
+#define PKT_TX_VLAN_PKT  (1ULL << 55) /**< TX packet is a 802.1q VLAN 
packet. */

 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG   (1ULL << 63) /**< Mbuf contains control data */
-- 
2.1.0



[dpdk-dev] [PATCH v3 02/13] ixgbe: fix remaining pkt_flags variable size to 64 bits

2014-11-20 Thread Olivier Matz
Since commit 4332beee9 "mbuf: expand ol_flags field to 64-bits", the
packet flags are now 64 bits wide. Some occurences were forgotten in
the ixgbe driver.

Signed-off-by: Olivier Matz 
Acked-by: Bruce Richardson 
---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index ecebbf6..7e470ce 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -817,7 +817,7 @@ end_of_tx:
 static inline uint64_t
 rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
 {
-   uint16_t pkt_flags;
+   uint64_t pkt_flags;

static uint64_t ip_pkt_types_map[16] = {
0, PKT_RX_IPV4_HDR, PKT_RX_IPV4_HDR_EXT, PKT_RX_IPV4_HDR_EXT,
@@ -834,7 +834,7 @@ rx_desc_hlen_type_rss_to_pkt_flags(uint32_t hl_tp_rs)
};

 #ifdef RTE_LIBRTE_IEEE1588
-   static uint32_t ip_pkt_etqf_map[8] = {
+   static uint64_t ip_pkt_etqf_map[8] = {
0, 0, 0, PKT_RX_IEEE1588_PTP,
0, 0, 0, 0,
};
@@ -903,7 +903,7 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq)
struct igb_rx_entry *rxep;
struct rte_mbuf *mb;
uint16_t pkt_len;
-   uint16_t pkt_flags;
+   uint64_t pkt_flags;
int s[LOOK_AHEAD], nb_dd;
int i, j, nb_rx = 0;

@@ -1335,7 +1335,7 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
uint16_t nb_rx;
uint16_t nb_hold;
uint16_t data_len;
-   uint16_t pkt_flags;
+   uint64_t pkt_flags;

nb_rx = 0;
nb_hold = 0;
@@ -1511,9 +1511,9 @@ ixgbe_recv_scattered_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
first_seg->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(hlen_type_rss);
-   pkt_flags = (uint16_t)(pkt_flags |
+   pkt_flags = (pkt_flags |
rx_desc_status_to_pkt_flags(staterr));
-   pkt_flags = (uint16_t)(pkt_flags |
+   pkt_flags = (pkt_flags |
rx_desc_error_to_pkt_flags(staterr));
first_seg->ol_flags = pkt_flags;

-- 
2.1.0



[dpdk-dev] [PATCH v3 01/13] igb/ixgbe: fix IP checksum calculation

2014-11-20 Thread Olivier Matz
According to Intel? 82599 10 GbE Controller Datasheet (Table 7-38), both
L2 and L3 lengths are needed to offload the IP checksum.

Note that the e1000 driver does not need to be patched as it already
contains the fix.

Signed-off-by: Olivier Matz 
Acked-by: Konstantin Ananyev 
---
 lib/librte_pmd_e1000/igb_rxtx.c   | 2 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index 0dca7b7..b406397 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -262,7 +262,7 @@ igbe_set_xmit_ctx(struct igb_tx_queue* txq,

if (ol_flags & PKT_TX_IP_CKSUM) {
type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
-   cmp_mask |= TX_MAC_LEN_CMP_MASK;
+   cmp_mask |= TX_MACIP_LEN_CMP_MASK;
}

/* Specify which HW CTX to upload. */
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f9b3fe3..ecebbf6 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -374,7 +374,7 @@ ixgbe_set_xmit_ctx(struct igb_tx_queue* txq,

if (ol_flags & PKT_TX_IP_CKSUM) {
type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
-   cmp_mask |= TX_MAC_LEN_CMP_MASK;
+   cmp_mask |= TX_MACIP_LEN_CMP_MASK;
}

/* Specify which HW CTX to upload. */
-- 
2.1.0



[dpdk-dev] [PATCH v3 00/13] add TSO support

2014-11-20 Thread Olivier Matz
This series add TSO support in ixgbe DPDK driver. This is a rework
of the series sent earlier this week [1]. This work is based on
another version [2] that was posted several months ago and
which included a mbuf rework that is now in mainline.

Changes in v3:

- indicate that rte_get_rx_ol_flag_name() and rte_get_tx_ol_flag_name()
  should be kept synchronized with flags definition
- use sizeof() when appropriate in rte_raw_cksum()
- remove double semicolon in ixgbe driver
- reorder tx ol_flags as requested by Thomas
- add missing copyrights when big modifications are made
- enhance the help of tx_cksum command in testpmd
- enhance the description of csumonly (comments)

Changes in v2:

- move rte_get_rx_ol_flag_name() and rte_get_tx_ol_flag_name() in
  rte_mbuf.c, and fix comments
- use IGB_TX_OFFLOAD_MASK and IXGBE_TX_OFFLOAD_MASK to replace
  PKT_TX_OFFLOAD_MASK
- fix inner_l2_len and inner_l3_len bitfields: use uint64_t instead
  of uint16_t
- replace assignation of l2_len and l3_len by assignation of tx_offload.
  It now includes inner_l2_len and inner_l3_len at the same time.
- introduce a new cksum api in rte_ip.h following discussion with
  Konstantin
- reorder commits to have all TSO commits at the end of the series
- use ol_flags for phdr checksum calculation (this now matches ixgbe
  API: standard pseudo hdr cksum for TCP cksum offload, pseudo hdr
  cksum without ip paylen for TSO). This will probably be changed
  with a dev_prep_tx() like function for 2.0 release.
- rebase on latest head


This series first fixes some bugs that were discovered during the
development, adds some changes to the mbuf API (new l4_len and
tso_segsz fields), adds TSO support in ixgbe, reworks testpmd
csum forward engine, and finally adds TSO support in testpmd so it
can be validated.

The new fields added in mbuf try to be generic enough to apply to
other hardware in the future. To delegate the TCP segmentation to the
hardware, the user has to:

  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
PKT_TX_TCP_CKSUM)
  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
to 0 in the packet
  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
  - calculate the pseudo header checksum and set it in the TCP header,
as required when doing hardware TCP checksum offload

The test report will be added as an answer to this cover letter and
could be linked in the concerned commits.

[1] http://dpdk.org/ml/archives/dev/2014-November/007953.html
[2] http://dpdk.org/ml/archives/dev/2014-May/002537.html

Olivier Matz (13):
  igb/ixgbe: fix IP checksum calculation
  ixgbe: fix remaining pkt_flags variable size to 64 bits
  mbuf: reorder tx ol_flags
  mbuf: add help about TX checksum flags
  mbuf: remove too specific PKT_TX_OFFLOAD_MASK definition
  mbuf: add functions to get the name of an ol_flag
  testpmd: fix use of offload flags in testpmd
  testpmd: rework csum forward engine
  mbuf: introduce new checksum API
  mbuf: generic support for TCP segmentation offload
  ixgbe: support TCP segmentation offload
  testpmd: support TSO in csum forward engine
  testpmd: add a verbose mode csum forward engine

 app/test-pmd/cmdline.c  | 248 +--
 app/test-pmd/config.c   |  17 +-
 app/test-pmd/csumonly.c | 814 
 app/test-pmd/macfwd.c   |   5 +-
 app/test-pmd/macswap.c  |   5 +-
 app/test-pmd/rxonly.c   |  36 +-
 app/test-pmd/testpmd.c  |   2 +-
 app/test-pmd/testpmd.h  |  24 +-
 app/test-pmd/txonly.c   |   9 +-
 examples/ipv4_multicast/main.c  |   2 +-
 lib/librte_mbuf/rte_mbuf.c  |  49 +++
 lib/librte_mbuf/rte_mbuf.h  | 102 +++--
 lib/librte_net/rte_ip.h | 208 +
 lib/librte_pmd_e1000/igb_rxtx.c |  21 +-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |   3 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   | 179 +---
 lib/librte_pmd_ixgbe/ixgbe_rxtx.h   |  19 +-
 17 files changed, 1094 insertions(+), 649 deletions(-)

-- 
2.1.0



[dpdk-dev] DPDK Community Conference Call - Friday 31st October

2014-11-20 Thread O'driscoll, Tim
The video is now accessible at: http://youtu.be/AbHQ4YaWY90. Thomas may want to 
add a link to this from somewhere on the dpdk.org page.


Tim

> From: Kevin Wilson [mailto:wkevils at gmail.com]
> 
> Hi,
> >I'll post a recording of it soon.
> Great idea! most welcomed!
> 
> Kevin
> 
> 
> On Thu, Nov 20, 2014 at 3:13 PM, O'driscoll, Tim 
> wrote:
> > Hi Kevin,
> >
> >> From: Kevin Wilson [mailto:wkevils at gmail.com]
> >> > We'll schedule a follow-up call for 2 weeks' time
> >> Just a short question - is this still intended to be held ?
> >
> > We had our second call earlier this week, on Tuesday. I'll post a recording 
> > of
> it soon.
> >
> > The next call will be in 2 weeks' time, probably on Tuesday December 2nd. I
> just need to finalise the time before confirming this. We have had a couple of
> requests to alternate between a time that's suitable for USA/Europe, and
> one that's more suitable for Asia. So, the next call will probably be early 
> in the
> morning in Europe, afternoon in Asia, and the middle of the night in the USA.
> >
> >
> > Tim


[dpdk-dev] versioning and maintenance

2014-11-20 Thread Thomas Monjalon
2014-11-20 13:25, Neil Horman:
> On Thu, Nov 20, 2014 at 06:09:10PM +0100, Thomas Monjalon wrote:
> > 2014-11-19 10:13, Neil Horman:
> > > On Wed, Nov 19, 2014 at 11:35:08AM +, Bruce Richardson wrote:
> > > > On Wed, Nov 19, 2014 at 12:22:14PM +0100, Thomas Monjalon wrote:
> > > > > Following the discussion we had with Neil during the conference call,
> > > > > I suggest this plan, starting with the next release (2.0):
> > > > >   - add version numbers to libraries
> > > > >   - add version numbers to functions inside .map files
> > > > >   - create a git tree dedicated to maintenance and API 
> > > > > compatibility
> > > > > 
> > > > > It means these version numbers must be incremented when breaking the 
> > > > > API.
> > > > > Though the old code paths will be maintained and tested separately by 
> > > > > volunteers.
> > > > > A mailing list for maintenance purpose could be created if needed.
> > > > >
> > > > Hi Thomas,
> > > > 
> > > > I really think that the versionning is best handled inside the main 
> > > > repository
> > > > itself. Given that the proposed deprecation policy is over two releases 
> > > > i.e. an
> > > > API is marked deprecated in release X and then removed in X+1, I don't 
> > > > see the
> > > > maintaining of old code paths to be particularly onerous.
> > > > 
> > > > /Bruce
> > > 
> > > I agree with Bruce, even if it is on occasion an added workload, its not 
> > > the
> > > sort of thing that can or should be placed on an alternate developer.  
> > > Backwards
> > > compatibility is the sort of thing that has to be on the mind of the 
> > > developer
> > > when modifying an API, and on the mind of the reviewer when reviewing 
> > > code.  To
> > > shunt that responsibility elsewhere invites the opportunity for backwards
> > > compatibilty to be a second class citizen who's goal will never be 
> > > reached,
> > > because developers instituting ABI changes will never care about the
> > > consequences, and anyone worrying about backwards compatibility will 
> > > always be
> > > playing catch up, possibly allowing ABI breaks to slip through.
> > > 
> > > Neil
> >  
> > Before taking a decision, we should detail every concern.
> > 
> > 1/
> > Currently there are not a lot of API refactoring because DPDK is well 
> > tailored
> > for x86 and Intel NICs. But we are seeing that new CPU and new NICs to 
> > support
> > would require some adaptations.
> > 
> Yes, you're absolutely right here.  I had hoped that, during my presentation
> that this would happen occasionaly, and that we would need to deal with it.
> What I think you are implying here (correct me if I'm wrong), is that you 
> would
> advocate that we wait to introduce ABI versioning until after such refactoring
> is, for lack of a better term "complete".  The problem here is that, software
> that is growing in user base is never "complete".  What you are effectively
> saying is that you want to wait until the API is in a state in which no (or
> almost no) more changes are required, then fixate it.  Thats quite simply 
> never
> going to happen.  And if it does, it obviates the need for versioning at all.

I agree Neil. This point is not about how long we should wait but how the
overhead could be estimate for coming releases.

> > 2/
> > I'm curious to know how you would handle a big change like the recent mbuf 
> > rework.
> > Should we duplicate the structure and all the functions using mbuf?
> 
> Several ways, what you suggest above is one way, although thats what I would
> consider to be a pessimal case.  Ideally such large changes are extreemely 
> rare
> (a search of the git history I think confirms this).  Much more common are
> small, limited changes to various API's for which providing multiple versions 
> of
> a function is a much more reasonable approach.
> 
> In the event that we do decide to do a refactor that is so far reaching that 
> we
> simply don't feel like multi-versioning is feasible, the recourse is then to
> deprecate the old API, publish that information on the deprecation schedule,
> wait for a release, then replace it wholesale.  When the API is released, we
> bump the DSO version number.  Note the versioning policy never guarantees that
> backwards compatibility will always be available, nor does it stipulate that a
> newer version of the API is available prior to removing the old one. The goal
> here is to give distributors and application vendors advanced notice of ABI
> breaking changes so that they can adapt appropriately before they are caught 
> off
> guard.  If the new ABI can't be packaged alongside the old, then so be it,
> downstream vendors will have to use the upstream git head to test and 
> validate,
> rather than a newer distribution release

Seems reasonable.

> Ideally though, that shouldn't happen, because it causes downstream headaches,
> and we would really like to avoid that.  Thats why I feel its so important to
> keep this work in the main tree.  If we s

[dpdk-dev] versioning and maintenance

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 10:08:25PM +0100, Thomas Monjalon wrote:
> 2014-11-20 13:25, Neil Horman:
> > On Thu, Nov 20, 2014 at 06:09:10PM +0100, Thomas Monjalon wrote:
> > > 2014-11-19 10:13, Neil Horman:
> > > > On Wed, Nov 19, 2014 at 11:35:08AM +, Bruce Richardson wrote:
> > > > > On Wed, Nov 19, 2014 at 12:22:14PM +0100, Thomas Monjalon wrote:
> > > > > > Following the discussion we had with Neil during the conference 
> > > > > > call,
> > > > > > I suggest this plan, starting with the next release (2.0):
> > > > > > - add version numbers to libraries
> > > > > > - add version numbers to functions inside .map files
> > > > > > - create a git tree dedicated to maintenance and API 
> > > > > > compatibility
> > > > > > 
> > > > > > It means these version numbers must be incremented when breaking 
> > > > > > the API.
> > > > > > Though the old code paths will be maintained and tested separately 
> > > > > > by volunteers.
> > > > > > A mailing list for maintenance purpose could be created if needed.
> > > > > >
> > > > > Hi Thomas,
> > > > > 
> > > > > I really think that the versionning is best handled inside the main 
> > > > > repository
> > > > > itself. Given that the proposed deprecation policy is over two 
> > > > > releases i.e. an
> > > > > API is marked deprecated in release X and then removed in X+1, I 
> > > > > don't see the
> > > > > maintaining of old code paths to be particularly onerous.
> > > > > 
> > > > > /Bruce
> > > > 
> > > > I agree with Bruce, even if it is on occasion an added workload, its 
> > > > not the
> > > > sort of thing that can or should be placed on an alternate developer.  
> > > > Backwards
> > > > compatibility is the sort of thing that has to be on the mind of the 
> > > > developer
> > > > when modifying an API, and on the mind of the reviewer when reviewing 
> > > > code.  To
> > > > shunt that responsibility elsewhere invites the opportunity for 
> > > > backwards
> > > > compatibilty to be a second class citizen who's goal will never be 
> > > > reached,
> > > > because developers instituting ABI changes will never care about the
> > > > consequences, and anyone worrying about backwards compatibility will 
> > > > always be
> > > > playing catch up, possibly allowing ABI breaks to slip through.
> > > > 
> > > > Neil
> > >  
> > > Before taking a decision, we should detail every concern.
> > > 
> > > 1/
> > > Currently there are not a lot of API refactoring because DPDK is well 
> > > tailored
> > > for x86 and Intel NICs. But we are seeing that new CPU and new NICs to 
> > > support
> > > would require some adaptations.
> > > 
> > Yes, you're absolutely right here.  I had hoped that, during my presentation
> > that this would happen occasionaly, and that we would need to deal with it.
> > What I think you are implying here (correct me if I'm wrong), is that you 
> > would
> > advocate that we wait to introduce ABI versioning until after such 
> > refactoring
> > is, for lack of a better term "complete".  The problem here is that, 
> > software
> > that is growing in user base is never "complete".  What you are effectively
> > saying is that you want to wait until the API is in a state in which no (or
> > almost no) more changes are required, then fixate it.  Thats quite simply 
> > never
> > going to happen.  And if it does, it obviates the need for versioning at 
> > all.
> 
> I agree Neil. This point is not about how long we should wait but how the
> overhead could be estimate for coming releases.
> 
Well, I understand the desire, but I'm not sure how it can be accomplished.  For
a given release, the overhead will be dependent on two factors:

1) The number off ABI changes in a given release

2) The extent of the ABI changes that were made.

If we have a way to predict those, then we can estimate the overhead, but
without that information, you're kinda stuck.  That said, if we all concur that
this is a necessecary effort to undertake, then the overhead is, not overly
important.  Whats more important is providing enough time to alot enough time to
do the work for a given project.  That is to say, when undertaking a large
refactoring, or other project that promises to make significant ABI changes,
that the developer needs to factor in time to design an implement backwards
compatibility.  Put another way, if the developer does their job right, and
takes backwards compatibility seriously, the overhead to you as a maintainer is
nil.  The onus to handle this extra effort needs to be on the developer.

> > > 2/
> > > I'm curious to know how you would handle a big change like the recent 
> > > mbuf rework.
> > > Should we duplicate the structure and all the functions using mbuf?
> > 
> > Several ways, what you suggest above is one way, although thats what I would
> > consider to be a pessimal case.  Ideally such large changes are extreemely 
> > rare
> > (a search of the git history I think confirms this).  Much more common are
> > small, limited cha

[dpdk-dev] DPDK Community Conference Call - Friday 31st October

2014-11-20 Thread Kevin Wilson
Hi,
>I'll post a recording of it soon.
Great idea! most welcomed!

Kevin


On Thu, Nov 20, 2014 at 3:13 PM, O'driscoll, Tim
 wrote:
> Hi Kevin,
>
>> From: Kevin Wilson [mailto:wkevils at gmail.com]
>> > We'll schedule a follow-up call for 2 weeks' time
>> Just a short question - is this still intended to be held ?
>
> We had our second call earlier this week, on Tuesday. I'll post a recording 
> of it soon.
>
> The next call will be in 2 weeks' time, probably on Tuesday December 2nd. I 
> just need to finalise the time before confirming this. We have had a couple 
> of requests to alternate between a time that's suitable for USA/Europe, and 
> one that's more suitable for Asia. So, the next call will probably be early 
> in the morning in Europe, afternoon in Asia, and the middle of the night in 
> the USA.
>
>
> Tim


[dpdk-dev] [PATCH] testpmd: Add port hotplug support

2014-11-20 Thread Tetsuya Mukawa
The patch introduces following commands.
- port [attach|detach] [p|v] [ident]
 - attach: attaching a port
 - detach: detaching a port
 - p: physical port
 - v: virtual port
 - ident: pci address of physical device.
  Or device name and paramerters of virtual device.
 (ex. :02:00.0, eth_pcap0,iface=eth0)

Signed-off-by: Tetsuya Mukawa 
---
 app/test-pmd/cmdline.c| 139 +++-
 app/test-pmd/config.c |  54 +++
 app/test-pmd/parameters.c |  21 +++--
 app/test-pmd/testpmd.c| 226 --
 app/test-pmd/testpmd.h|  17 
 5 files changed, 366 insertions(+), 91 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 4c3fc76..980bd34 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -550,6 +550,12 @@ static void cmd_help_long_parsed(void *parsed_result,
"port close (port_id|all)\n"
"Close all ports or port_id.\n\n"

+   "port add (p|a) (ident)\n"
+   "Add physical or virtual dev by pci address or 
virtual device name\n\n"
+
+   "port del (p|a) (port_id)\n"
+   "Del physical or virtual dev by port_id\n\n"
+
"port config (port_id|all)"
" speed (10|100|1000|1|4|auto)"
" duplex (half|full|auto)\n"
@@ -796,6 +802,101 @@ cmdline_parse_inst_t cmd_operate_specific_port = {
},
 };

+/* *** attach a specificied port *** */
+struct cmd_operate_attach_port_result {
+   cmdline_fixed_string_t port;
+   cmdline_fixed_string_t keyword;
+   cmdline_fixed_string_t type;
+   cmdline_fixed_string_t identifier;
+};
+
+static void cmd_operate_attach_port_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_operate_attach_port_result *res = parsed_result;
+
+   if (!strcmp(res->keyword, "attach"))
+   attach_port(res->type, res->identifier);
+   else
+   printf("Unknown parameter\n");
+}
+
+cmdline_parse_token_string_t cmd_operate_attach_port_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_attach_port_result,
+   port, "port");
+cmdline_parse_token_string_t cmd_operate_attach_port_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_attach_port_result,
+   keyword, "attach");
+cmdline_parse_token_string_t cmd_operate_attach_port_type =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_attach_port_result,
+   type, "p#v");
+
+cmdline_parse_token_string_t cmd_operate_attach_port_identifier =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_attach_port_result,
+   identifier, NULL);
+
+cmdline_parse_inst_t cmd_operate_attach_port = {
+   .f = cmd_operate_attach_port_parsed,
+   .data = NULL,
+   .help_str = "port attach p|v ident p: physical, v: virtual, "
+   "ident: pci address or virtual dev name",
+   .tokens = {
+   (void *)&cmd_operate_attach_port_port,
+   (void *)&cmd_operate_attach_port_keyword,
+   (void *)&cmd_operate_attach_port_type,
+   (void *)&cmd_operate_attach_port_identifier,
+   NULL,
+   },
+};
+
+/* *** detach a specificied port *** */
+struct cmd_operate_detach_port_result {
+   cmdline_fixed_string_t port;
+   cmdline_fixed_string_t keyword;
+   cmdline_fixed_string_t type;
+   uint8_t port_id;
+};
+
+static void cmd_operate_detach_port_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_operate_detach_port_result *res = parsed_result;
+
+   if (!strcmp(res->keyword, "detach"))
+   detach_port(res->type, res->port_id);
+   else
+   printf("Unknown parameter\n");
+}
+
+cmdline_parse_token_string_t cmd_operate_detach_port_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_detach_port_result,
+   port, "port");
+cmdline_parse_token_string_t cmd_operate_detach_port_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_detach_port_result,
+   keyword, "detach");
+cmdline_parse_token_string_t cmd_operate_detach_port_type =
+   TOKEN_STRING_INITIALIZER(struct cmd_operate_detach_port_result,
+   type, "p#v");
+
+cmdline_parse_token_num_t cmd_operate_detach_port_port_id =
+   TOKEN_NUM_INITIALIZER(struct cmd_operate_detach_port_result,
+   port_id, UINT8);
+
+cmdline_parse_inst_t cmd_operate_detach_port = {
+   .f = cmd_operate_detach_port_parsed,
+   .data = NULL,
+   .help_st

[dpdk-dev] [PATCH] librte_pmd_pcap: Add port hotplug support

2014-11-20 Thread Tetsuya Mukawa
This patch adds finalization code to free resources allocated by the
PMD.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_pmd_pcap/rte_eth_pcap.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/lib/librte_pmd_pcap/rte_eth_pcap.c 
b/lib/librte_pmd_pcap/rte_eth_pcap.c
index f12d1e7..df8498f 100644
--- a/lib/librte_pmd_pcap/rte_eth_pcap.c
+++ b/lib/librte_pmd_pcap/rte_eth_pcap.c
@@ -499,6 +499,13 @@ static struct eth_dev_ops ops = {
.stats_reset = eth_stats_reset,
 };

+static struct eth_driver rte_pcap_pmd = {
+   .pci_drv = {
+   .name = "rte_pcap_pmd",
+   .drv_flags = RTE_PCI_DRV_DETACHABLE,
+   },
+};
+
 /*
  * Function handler that opens the pcap file for reading a stores a
  * reference of it for use it later on.
@@ -739,10 +746,13 @@ rte_pmd_init_internals(const char *name, const unsigned 
nb_rx_queues,
data->nb_tx_queues = (uint16_t)nb_tx_queues;
data->dev_link = pmd_link;
data->mac_addrs = ð_addr;
+   strncpy(data->name,
+   (*eth_dev)->data->name, strlen((*eth_dev)->data->name));

(*eth_dev)->data = data;
(*eth_dev)->dev_ops = &ops;
(*eth_dev)->pci_dev = pci_dev;
+   (*eth_dev)->driver = &rte_pcap_pmd;

return 0;

@@ -927,10 +937,33 @@ rte_pmd_pcap_devinit(const char *name, const char *params)

 }

+static int
+rte_pmd_pcap_devclose(const char *name, const char *params __rte_unused)
+{
+   struct rte_eth_dev *eth_dev = NULL;
+
+   RTE_LOG(INFO, PMD, "Closing pcap ethdev on numa socket %u\n",
+   rte_socket_id());
+
+   /* reserve an ethdev entry */
+   eth_dev = rte_eth_dev_allocated(name);
+   if (eth_dev == NULL)
+   return -1;
+
+   rte_free(eth_dev->data->dev_private);
+   rte_free(eth_dev->data);
+   rte_free(eth_dev->pci_dev);
+
+   rte_eth_dev_free(name);
+
+   return 0;
+}
+
 static struct rte_driver pmd_pcap_drv = {
.name = "eth_pcap",
.type = PMD_VDEV,
.init = rte_pmd_pcap_devinit,
+   .close = rte_pmd_pcap_devclose,
 };

 PMD_REGISTER_DRIVER(pmd_pcap_drv);
-- 
1.9.1



[dpdk-dev] versioning and maintenance

2014-11-20 Thread Thomas Monjalon
Hi,

2014-11-19 10:13, Neil Horman:
> On Wed, Nov 19, 2014 at 11:35:08AM +, Bruce Richardson wrote:
> > On Wed, Nov 19, 2014 at 12:22:14PM +0100, Thomas Monjalon wrote:
> > > Following the discussion we had with Neil during the conference call,
> > > I suggest this plan, starting with the next release (2.0):
> > >   - add version numbers to libraries
> > >   - add version numbers to functions inside .map files
> > >   - create a git tree dedicated to maintenance and API compatibility
> > > 
> > > It means these version numbers must be incremented when breaking the API.
> > > Though the old code paths will be maintained and tested separately by 
> > > volunteers.
> > > A mailing list for maintenance purpose could be created if needed.
> > >
> > Hi Thomas,
> > 
> > I really think that the versionning is best handled inside the main 
> > repository
> > itself. Given that the proposed deprecation policy is over two releases 
> > i.e. an
> > API is marked deprecated in release X and then removed in X+1, I don't see 
> > the
> > maintaining of old code paths to be particularly onerous.
> > 
> > /Bruce
> 
> I agree with Bruce, even if it is on occasion an added workload, its not the
> sort of thing that can or should be placed on an alternate developer.  
> Backwards
> compatibility is the sort of thing that has to be on the mind of the developer
> when modifying an API, and on the mind of the reviewer when reviewing code.  
> To
> shunt that responsibility elsewhere invites the opportunity for backwards
> compatibilty to be a second class citizen who's goal will never be reached,
> because developers instituting ABI changes will never care about the
> consequences, and anyone worrying about backwards compatibility will always be
> playing catch up, possibly allowing ABI breaks to slip through.
> 
> Neil

Before taking a decision, we should detail every concern.

1/
Currently there are not a lot of API refactoring because DPDK is well tailored
for x86 and Intel NICs. But we are seeing that new CPU and new NICs to support
would require some adaptations.

2/
I'm curious to know how you would handle a big change like the recent mbuf 
rework.
Should we duplicate the structure and all the functions using mbuf?

3/
Should we add new fields at the end of its structure to avoid ABI breaking?

4/
Developers contribute because they need some changes. So when breaking
an API, their application is already ready for the new version.
I mean the author of such patch is probably not really motivated to keep ABI
compability and duplicate the code path.

5/
Intead of simply modifying an API function, it would appear as a whole new
function with some differences compared to the old one. Such change is really
not convenient to review.

6/
Testing ABI compatibility could be tricky. We would need a tool to check it's
mostly OK. The good place for such a tool is in app/test. It was designed to be
the unit tests of the API.

7/
This system would allow application developpers to upgrade DPDK to n+1 without
rebuilding. But when upgrading to n+2, they should have adapted their
application to comply with n+1 API (because n will be removed).
So this solution offers a delay between the upgrade decision and the
app work. Note that they could prepare their application before upgrading.
Anyway, an upgrade should be tested before doing it effectively. The behaviour
of the application could change and require some adaptations.

8/
How to handle a change in the implementation of a PMD which severely impact
the application? Example: an ol_flag was mis-used and the application has
a workaround to interpret this flag but it's now incompatible with the fix.

9/
When we don't want to adapt an application, it means the development is
finished and we don't care about the new features of the library.
So I wonder if it wouldn't be more appropriate to provide stable releases
with true maintenance to such users. I understood that is what Redhat provides
to their customers.

Hope this discussion will bring a clear idea of what should be done with
which implications.
Thanks
-- 
Thomas


[dpdk-dev] [PATCH 25/25] eal: Enable port hotplug framework in Linux

2014-11-20 Thread Tetsuya Mukawa
The patch enables CONFIG_RTE_LIBRTE_EAL_HOTPLUG in Linux configuration.

Signed-off-by: Tetsuya Mukawa 
---
 config/common_linuxapp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 57b61c9..a2b1e36 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -144,6 +144,11 @@ CONFIG_RTE_LIBRTE_EAL_LINUXAPP=y
 CONFIG_RTE_LIBRTE_EAL_BAREMETAL=n

 #
+# Compile Environment Abstraction Layer to support hotplug
+#
+CONFIG_RTE_LIBRTE_EAL_HOTPLUG=y
+
+#
 # Compile Environment Abstraction Layer to support Vmware TSC map
 #
 CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y
-- 
1.9.1



[dpdk-dev] [PATCH 24/25] eal/pci: Add port hotplug functions for physical devices.

2014-11-20 Thread Tetsuya Mukawa
The patch adds rte_eal_dev_attach_pdev() and rte_eal_dev_detach_pdev().

rte_eal_dev_attach_pdev() receives a PCI address of the device and
returns an attached port number.
rte_eal_dev_detach_pdev() receives a port number, and returns a PCI
address actually detached.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_dev.c  | 58 +
 lib/librte_eal/common/include/rte_dev.h | 25 ++
 2 files changed, 83 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index 0518e3c..c2d5cd6 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -181,6 +181,64 @@ rte_eal_dev_close_one(const char *name)
return rte_eal_dev_find_and_invoke(name, INVOKE_CLOSE);
 }

+/* attach the new physical device, then store port_id of the device */
+int
+rte_eal_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id)
+{
+   uint8_t new_port_id;
+   struct rte_eth_dev devs[RTE_MAX_ETHPORTS];
+
+   /* save current port status */
+   rte_eth_dev_save(devs);
+   /* re-construct pci_device_list */
+   if (rte_eal_pci_scan())
+   goto err;
+   /* invoke probe func of the driver can handle the new device */
+   if (rte_eal_pci_probe_one(addr))
+   goto err;
+   /* get port_id enabled by above procedures */
+   if (rte_eth_dev_get_changed_port(devs, &new_port_id))
+   goto err;
+
+   *port_id = new_port_id;
+   return 0;
+err:
+   RTE_LOG(ERR, EAL, "Drver, cannot attach the device\n");
+   return -1;
+}
+
+/* detach the new physical device, then store pci_addr of the device */
+int
+rte_eal_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr)
+{
+   struct rte_pci_addr freed_addr;
+   struct rte_pci_addr vp;
+
+   /* check whether the driver supports detach feature, or not */
+   if (!rte_eth_dev_check_detachable(port_id))
+   goto err;
+
+   /* get pci address by port id */
+   if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr))
+   goto err;
+
+   /* Zerod pci addr means the port comes from virtual device */
+   vp.domain = vp.bus = vp.devid = vp.function = 0;
+   if (eal_compare_pci_addr(&vp, &freed_addr) == 0)
+   goto err;
+
+   /* invoke close func of the driver,
+* also remove the device from pci_device_list */
+   if (rte_eal_pci_close_one(&freed_addr))
+   goto err;
+
+   *addr = freed_addr;
+   return 0;
+err:
+   RTE_LOG(ERR, EAL, "Drver, cannot detach the device\n");
+   return -1;
+}
+
 static void
 get_vdev_name(char *vdevargs)
 {
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index 159d5a5..26d7526 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -101,6 +101,19 @@ void rte_eal_driver_unregister(struct rte_driver *driver);
 #if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)

 /**
+ * Attach a new physical device.
+ *
+ * @param addr
+ *   A pointer to a pci address structure describing the new
+ *   device to be attached.
+ * @param port_id
+ *  A pointer to a port identifier actually attached.
+ * @return
+ *  0 on success and port_id is filled, negative on error
+ */
+int rte_eal_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id);
+
+/**
  * Attach a new virtual device.
  *
  * @param vdevargs
@@ -114,6 +127,18 @@ void rte_eal_driver_unregister(struct rte_driver *driver);
 int rte_eal_dev_attach_vdev(const char *vdevargs, uint8_t *port_id);

 /**
+ * Detach a physical device.
+ *
+ * @param port_id
+ *   The port identifier of the physical device to detach.
+ * @param addr
+ *  A pointer to a pci address structure actually detached.
+ * @return
+ *  0 on success and addr is filled, negative on error
+ */
+int rte_eal_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr);
+
+/**
  * Detach a virtual device.
  *
  * @param port_id
-- 
1.9.1



[dpdk-dev] [PATCH 23/25] eal/pci: Add rte_eal_pci_probe_one and rte_eal_pci_close_one

2014-11-20 Thread Tetsuya Mukawa
The functions are used for probe and close a device.
First the function tries to find a device that has the specfied PCI address.
Then, probe or close the device.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_pci.c  | 58 +
 lib/librte_eal/common/include/rte_pci.h | 26 +++
 2 files changed, 84 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index b404ee0..5ff7b49 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -152,6 +152,64 @@ pci_close_all_drivers(struct rte_pci_device *dev)
 {
return pci_invoke_all_drivers(dev, INVOKE_CLOSE);
 }
+
+static int
+rte_eal_pci_invoke_one(struct rte_pci_addr *addr, int type)
+{
+   struct rte_pci_device *dev = NULL;
+   int ret = 0;
+
+   TAILQ_FOREACH(dev, &pci_device_list, next) {
+   if (eal_compare_pci_addr(&dev->addr, addr))
+   continue;
+
+   switch (type) {
+   case INVOKE_PROBE:
+   ret = pci_probe_all_drivers(dev);
+   break;
+   case INVOKE_CLOSE:
+   ret = pci_close_all_drivers(dev);
+   break;
+   }
+   if (ret < 0)
+   goto invoke_err_return;
+   if (type == INVOKE_CLOSE)
+   goto remove_dev;
+   return 0;
+   }
+
+   return -1;
+
+invoke_err_return:
+   RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
+   " cannot be used\n", dev->addr.domain, dev->addr.bus,
+   dev->addr.devid, dev->addr.function);
+   return -1;
+
+remove_dev:
+   TAILQ_REMOVE(&pci_device_list, dev, next);
+   return 0;
+}
+
+/*
+ * Find the pci device specified by pci address, then invoke probe function of
+ * the driver of the devive.
+ */
+int
+rte_eal_pci_probe_one(struct rte_pci_addr *addr)
+{
+   return rte_eal_pci_invoke_one(addr, INVOKE_PROBE);
+}
+
+/*
+ * Find the pci device specified by pci address, then invoke close function of
+ * the driver of the devive.
+ */
+int
+rte_eal_pci_close_one(struct rte_pci_addr *addr)
+{
+   return rte_eal_pci_invoke_one(addr, INVOKE_CLOSE);
+}
 #endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */

 /*
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 74720d1..33300be 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -311,6 +311,32 @@ eal_compare_pci_addr(struct rte_pci_addr *addr, struct 
rte_pci_addr *addr2)
 int rte_eal_pci_probe(void);

 /**
+ * Probe the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * addrrss, then call the probe() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_pci_probe_one(struct rte_pci_addr *addr);
+
+/**
+ * Close the single PCI device.
+ *
+ * Scan the content of the PCI bus, and find the pci device specified by pci
+ * addrrss, then call the close() function for registered driver that has a
+ * matching entry in its id_table for discovered device.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_pci_close_one(struct rte_pci_addr *addr);
+
+/**
  * Dump the content of the PCI bus.
  *
  * @param f
-- 
1.9.1



[dpdk-dev] [PATCH 22/25] eal/pci: Add pci_close_all_drivers

2014-11-20 Thread Tetsuya Mukawa
The function tries to find a driver for the specified device, and then
close the driver.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_pci.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index 1e3efea..b404ee0 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -100,6 +100,7 @@ static struct rte_devargs *pci_devargs_lookup(struct 
rte_pci_device *dev)
 }

 #define INVOKE_PROBE   (0)
+#define INVOKE_CLOSE   (1)

 static int
 pci_invoke_all_drivers(struct rte_pci_device *dev, int type)
@@ -112,6 +113,11 @@ pci_invoke_all_drivers(struct rte_pci_device *dev, int 
type)
case INVOKE_PROBE:
rc = rte_eal_pci_probe_one_driver(dr, dev);
break;
+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+   case INVOKE_CLOSE:
+   rc = rte_eal_pci_close_one_driver(dr, dev);
+   break;
+#endif
}
if (rc < 0)
/* negative value is an error */
@@ -135,6 +141,19 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
return pci_invoke_all_drivers(dev, INVOKE_PROBE);
 }

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+/*
+ * If vendor/device ID match, call the devclose() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_close_all_drivers(struct rte_pci_device *dev)
+{
+   return pci_invoke_all_drivers(dev, INVOKE_CLOSE);
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /*
  * Scan the content of the PCI bus, and call the devinit() function for
  * all registered drivers that have a matching entry in its id_table
-- 
1.9.1



[dpdk-dev] [PATCH 21/25] eal/pci: Fix pci_probe_all_drivers to share code with closing function

2014-11-20 Thread Tetsuya Mukawa
pci_close_all_drivers() will be implemented after the patch.
To share a part of code between thses 2 functions, The patch fixes
pci_probe_all_drivers() first.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_pci.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index f01f258..1e3efea 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -99,19 +99,20 @@ static struct rte_devargs *pci_devargs_lookup(struct 
rte_pci_device *dev)
return NULL;
 }

-/*
- * If vendor/device ID match, call the devinit() function of all
- * registered driver for the given device. Return -1 if initialization
- * failed, return 1 if no driver is found for this device.
- */
+#define INVOKE_PROBE   (0)
+
 static int
-pci_probe_all_drivers(struct rte_pci_device *dev)
+pci_invoke_all_drivers(struct rte_pci_device *dev, int type)
 {
struct rte_pci_driver *dr = NULL;
-   int rc;
+   int rc = 0;

TAILQ_FOREACH(dr, &pci_driver_list, next) {
-   rc = rte_eal_pci_probe_one_driver(dr, dev);
+   switch (type) {
+   case INVOKE_PROBE:
+   rc = rte_eal_pci_probe_one_driver(dr, dev);
+   break;
+   }
if (rc < 0)
/* negative value is an error */
return -1;
@@ -124,6 +125,17 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
 }

 /*
+ * If vendor/device ID match, call the devinit() function of all
+ * registered driver for the given device. Return -1 if initialization
+ * failed, return 1 if no driver is found for this device.
+ */
+static int
+pci_probe_all_drivers(struct rte_pci_device *dev)
+{
+   return pci_invoke_all_drivers(dev, INVOKE_PROBE);
+}
+
+/*
  * Scan the content of the PCI bus, and call the devinit() function for
  * all registered drivers that have a matching entry in its id_table
  * for discovered devices.
-- 
1.9.1



[dpdk-dev] [PATCH 20/25] eal/pci: Add rte_eal_pci_close_one_driver

2014-11-20 Thread Tetsuya Mukawa
The function is used for closing the specified driver and device.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/include/eal_private.h | 11 ++
 lib/librte_eal/linuxapp/eal/eal_pci.c   | 58 +
 2 files changed, 69 insertions(+)

diff --git a/lib/librte_eal/common/include/eal_private.h 
b/lib/librte_eal/common/include/eal_private.h
index a1127ab..b2776cc 100644
--- a/lib/librte_eal/common/include/eal_private.h
+++ b/lib/librte_eal/common/include/eal_private.h
@@ -176,6 +176,17 @@ int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev);

 /**
+ * Munmap memory for single PCI device
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
+   struct rte_pci_device *dev);
+
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 0317366..fbfef40 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -599,6 +599,64 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, 
struct rte_pci_device *d
return 1;
 }

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+/*
+ * If vendor/device ID match, call the devshutdown() function of the
+ * driver.
+ */
+int
+rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
+   struct rte_pci_device *dev)
+{
+   struct rte_pci_id *id_table;
+
+   for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) {
+
+   /* check if device's identifiers match the driver's ones */
+   if (id_table->vendor_id != dev->id.vendor_id &&
+   id_table->vendor_id != PCI_ANY_ID)
+   continue;
+   if (id_table->device_id != dev->id.device_id &&
+   id_table->device_id != PCI_ANY_ID)
+   continue;
+   if (id_table->subsystem_vendor_id !=
+   dev->id.subsystem_vendor_id &&
+   id_table->subsystem_vendor_id != PCI_ANY_ID)
+   continue;
+   if (id_table->subsystem_device_id !=
+   dev->id.subsystem_device_id &&
+   id_table->subsystem_device_id != PCI_ANY_ID)
+   continue;
+
+   struct rte_pci_addr *loc = &dev->addr;
+
+   RTE_LOG(DEBUG, EAL,
+   "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+   loc->domain, loc->bus, loc->devid,
+   loc->function, dev->numa_node);
+
+   RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n",
+   dev->id.vendor_id, dev->id.device_id,
+   dr->name);
+
+   /* call the driver devshutdown() function */
+   if (dr->devshutdown && (dr->devshutdown(dr, dev) < 0))
+   return -1;  /* negative value is an error */
+
+   /* clear driver structure */
+   dev->driver = NULL;
+
+   if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
+   /* unmap resources for devices that use igb_uio */
+   pci_unmap_device(dev);
+
+   return 0;
+   }
+   /* return positive value if driver is not found */
+   return 1;
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /* Init the PCI EAL subsystem */
 int
 rte_eal_pci_init(void)
-- 
1.9.1



[dpdk-dev] [PATCH 19/25] eal/pci: Change scope of rte_eal_pci_scan to global

2014-11-20 Thread Tetsuya Mukawa
The function is called by port hotplug framework, so change scope of the
function to global.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/include/eal_private.h | 11 +++
 lib/librte_eal/linuxapp/eal/eal_pci.c   |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/common/include/eal_private.h 
b/lib/librte_eal/common/include/eal_private.h
index 232fcec..a1127ab 100644
--- a/lib/librte_eal/common/include/eal_private.h
+++ b/lib/librte_eal/common/include/eal_private.h
@@ -154,6 +154,17 @@ struct rte_pci_driver;
 struct rte_pci_device;

 /**
+ * Scan the content of the PCI bus, and the devices in the devices
+ * list
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_pci_scan(void);
+
+/**
  * Mmap memory for single PCI device
  *
  * This function is private to EAL.
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 85cdd27..0317366 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -361,8 +361,8 @@ error:
  * Scan the content of the PCI bus, and the devices in the devices
  * list
  */
-static int
-pci_scan(void)
+int
+rte_eal_pci_scan(void)
 {
struct dirent *e;
DIR *dir;
@@ -612,7 +612,7 @@ rte_eal_pci_init(void)
if (internal_config.no_pci)
return 0;

-   if (pci_scan() < 0) {
+   if (rte_eal_pci_scan() < 0) {
RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
return -1;
}
-- 
1.9.1



[dpdk-dev] [PATCH 18/25] eal/pci: Prevent double registrations for pci_device_list

2014-11-20 Thread Tetsuya Mukawa
The patch fixes pci_scan_one() not to register same pci devices twice.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index d7293c7..85cdd27 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -292,14 +292,17 @@ pci_scan_one(const char *dirname, uint16_t domain, 
uint8_t bus,
}
else {
struct rte_pci_device *dev2 = NULL;
+   int ret;

TAILQ_FOREACH(dev2, &pci_device_list, next) {
-   if (eal_compare_pci_addr(&dev->addr, &dev2->addr) != 0)
+   ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
+   if (ret > 0)
continue;
-   else {
+   else if (ret < 0) {
TAILQ_INSERT_BEFORE(dev2, dev, next);
return 0;
-   }
+   } else  /* already registered */
+   return 0;
}
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
}
-- 
1.9.1



[dpdk-dev] [PATCH 17/25] eal/linux/pci: Add functions for unmapping igb_uio resources

2014-11-20 Thread Tetsuya Mukawa
The patch adds functions for unmapping igb_uio resources. The patch is only
for Linux and igb_uio environment. VFIO and BSD are not supported.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/linuxapp/eal/eal_pci.c  | 32 +
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c  | 56 ++
 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h |  7 +++
 3 files changed, 95 insertions(+)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 78df974..d7293c7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -123,6 +123,22 @@ fail:
return NULL;
 }

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+/* unmap a particular resource */
+void
+pci_unmap_resource(void *requested_addr, size_t size)
+{
+   /* Unmap the PCI memory resource of device */
+   if (munmap(requested_addr, size)) {
+   RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
+   __func__, requested_addr, (unsigned long)size,
+   strerror(errno));
+   } else
+   RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n",
+   requested_addr);
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /* parse the "resource" sysfs file */
 #define IORESOURCE_MEM  0x0200

@@ -493,6 +509,22 @@ pci_map_device(struct rte_pci_device *dev)
return 0;
 }

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+static void
+pci_unmap_device(struct rte_pci_device *dev)
+{
+   /* try unmapping the NIC resources using VFIO if it exists */
+#ifdef VFIO_PRESENT
+   if (pci_vfio_is_enabled()) {
+   RTE_LOG(ERR, EAL, "%s() doesn't support vfio yet.\n",
+   __func__);
+   return;
+   }
+#endif
+   pci_uio_unmap_resource(dev);
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /*
  * If vendor/device ID match, call the devinit() function of the
  * driver.
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index f0deeba..ff48eb9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -395,6 +395,62 @@ pci_uio_map_resource(struct rte_pci_device *dev)
return 0;
 }

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+static void
+pci_uio_unmap(struct mapped_pci_resource *uio_res)
+{
+   int i;
+
+   for (i = 0; i != uio_res->nb_maps; i++)
+   pci_unmap_resource(uio_res->maps[i].addr,
+   (size_t)uio_res->maps[i].size);
+}
+
+static struct mapped_pci_resource *
+pci_uio_find_resource(struct rte_pci_device *dev)
+{
+   struct mapped_pci_resource *uio_res;
+
+   TAILQ_FOREACH(uio_res, pci_res_list, next) {
+
+   /* skip this element if it doesn't match our PCI address */
+   if (!eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
+   return uio_res;
+   }
+   return NULL;
+}
+
+/* unmap the PCI resource of a PCI device in virtual memory */
+void
+pci_uio_unmap_resource(struct rte_pci_device *dev)
+{
+   struct mapped_pci_resource *uio_res;
+
+   /* find an entry for the device */
+   uio_res = pci_uio_find_resource(dev);
+   if (uio_res == NULL)
+   return;
+
+   /* secondary processes - just free maps */
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   return pci_uio_unmap(uio_res);
+
+   TAILQ_REMOVE(pci_res_list, uio_res, next);
+
+   /* unmap all resources */
+   pci_uio_unmap(uio_res);
+
+   /* free uio resource */
+   rte_free(uio_res);
+
+   /* close fd if in primary process */
+   close(dev->intr_handle.fd);
+
+   dev->intr_handle.fd = -1;
+   dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /*
  * parse a sysfs file containing one integer value
  * different to the eal version, as it needs to work with 64-bit values
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h 
b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
index d758bee..ab9c16b 100644
--- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
@@ -65,6 +65,13 @@ void *pci_map_resource(void *requested_addr, int fd, off_t 
offset,
 /* map IGB_UIO resource prototype */
 int pci_uio_map_resource(struct rte_pci_device *dev);

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+void pci_unmap_resource(void *requested_addr, size_t size);
+
+/* unmap IGB_UIO resource prototype */
+void pci_uio_unmap_resource(struct rte_pci_device *dev);
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 #ifdef VFIO_PRESENT

 #define VFIO_MAX_GROUPS 64

[dpdk-dev] [PATCH 16/25] eal/pci: Add port hotplug functions for virtual devices.

2014-11-20 Thread Tetsuya Mukawa
The patch adds rte_eal_dev_attach_vdev() and rte_eal_dev_detach_vdev().

rte_eal_dev_attach_vdev() receives virtual device name and parameters,
and returns an attached port number.
rte_eal_dev_detach_vdev() receives a port number, and returns device
name actually detached.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_dev.c  | 76 +
 lib/librte_eal/common/include/rte_dev.h | 29 +
 2 files changed, 105 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index 183d65b..0518e3c 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -180,4 +180,80 @@ rte_eal_dev_close_one(const char *name)
 {
return rte_eal_dev_find_and_invoke(name, INVOKE_CLOSE);
 }
+
+static void
+get_vdev_name(char *vdevargs)
+{
+   char *sep;
+
+   /* set the first ',' to '\0' to split name and arguments */
+   sep = strchr(vdevargs, ',');
+   if (sep != NULL)
+   sep[0] = '\0';
+}
+
+/* attach the new virtual device, then store port_id of the device */
+int
+rte_eal_dev_attach_vdev(const char *vdevargs, uint8_t *port_id)
+{
+   char *args;
+   uint8_t new_port_id;
+   struct rte_eth_dev devs[RTE_MAX_ETHPORTS];
+
+   args = strdup(vdevargs);
+   if (args == NULL)
+   return -1;
+
+   /* save current port status */
+   rte_eth_dev_save(devs);
+   /* add the vdevargs to devargs_list */
+   if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL, args))
+   goto err0;
+   /* parse vdevargs, then retrieve device name */
+   get_vdev_name(args);
+   /* walk around dev_driver_list to find the driver of the device,
+* then invoke probe function o the driver */
+   if (rte_eal_dev_init_one(args))
+   goto err1;
+   /* get port_id enabled by above procedures */
+   if (rte_eth_dev_get_changed_port(devs, &new_port_id))
+   goto err1;
+
+   free(args);
+   *port_id = new_port_id;
+   return 0;
+err1:
+   rte_eal_devargs_remove(RTE_DEVTYPE_VIRTUAL, args);
+err0:
+   RTE_LOG(ERR, EAL, "Drver, cannot detach the device\n");
+   free(args);
+   return -1;
+}
+
+/* detach the new virtual device, then store the name of the device */
+int
+rte_eal_dev_detach_vdev(uint8_t port_id, char *vdevname)
+{
+   char name[RTE_ETH_NAME_MAX_LEN];
+
+   /* check whether the driver supports detach feature, or not */
+   if (!rte_eth_dev_check_detachable(port_id))
+   goto err;
+
+   /* get device name by port id */
+   if (rte_eth_dev_get_name_by_port(port_id, name))
+   goto err;
+   /* walk around dev_driver_list to find the driver of the device,
+* then invoke close function o the driver */
+   if (rte_eal_dev_close_one(name))
+   goto err;
+   /* remove the vdevname from devargs_list */
+   rte_eal_devargs_remove(RTE_DEVTYPE_VIRTUAL, name);
+
+   strncpy(vdevname, name, sizeof(name));
+   return 0;
+err:
+   RTE_LOG(ERR, EAL, "Drver, cannot detach the device\n");
+   return -1;
+}
 #endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index 71d40c3..159d5a5 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -98,6 +98,35 @@ void rte_eal_driver_register(struct rte_driver *driver);
  */
 void rte_eal_driver_unregister(struct rte_driver *driver);

+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+
+/**
+ * Attach a new virtual device.
+ *
+ * @param vdevargs
+ *   A pointer to a strings array describing the new device
+ *   to be attached.
+ * @param port_id
+ *  A pointer to a port identifier actually attached.
+ * @return
+ *  0 on success and port_id is filled, negative on error
+ */
+int rte_eal_dev_attach_vdev(const char *vdevargs, uint8_t *port_id);
+
+/**
+ * Detach a virtual device.
+ *
+ * @param port_id
+ *   The port identifier of the virtual device to detach.
+ * @param addr
+ *  A pointer to a virtual device name actually detached.
+ * @return
+ *  0 on success and vdevname is filled, negative on error
+ */
+int rte_eal_dev_detach_vdev(uint8_t port_id, char *vdevname);
+
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
+
 /**
  * Initalize all the registered drivers in this process
  */
-- 
1.9.1



[dpdk-dev] [PATCH 15/25] eal/pci: Add probe and close function for virtual drivers

2014-11-20 Thread Tetsuya Mukawa
The patch adds rte_eal_dev_init_one() and rte_eal_dev_close_one().
These are used for attaching and detaching virtual devices.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_dev.c  | 74 +
 lib/librte_eal/common/include/rte_dev.h |  6 +++
 lib/librte_eal/linuxapp/eal/Makefile|  1 +
 3 files changed, 81 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_dev.c 
b/lib/librte_eal/common/eal_common_dev.c
index eae5656..183d65b 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -32,10 +32,13 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

+#include 
+#include 
 #include 
 #include 
 #include 

+#include 
 #include 
 #include 
 #include 
@@ -107,3 +110,74 @@ rte_eal_dev_init(void)
}
return 0;
 }
+
+/* So far, linux only supports DPDK hotplug function. */
+#if defined(RTE_LIBRTE_EAL_HOTPLUG) && defined(RTE_LIBRTE_EAL_LINUXAPP)
+
+#define INVOKE_PROBE   (0)
+#define INVOKE_CLOSE   (1)
+
+static void
+rte_eal_dev_invoke(struct rte_driver *driver,
+   struct rte_devargs *devargs, int type)
+{
+   switch (type) {
+   case INVOKE_PROBE:
+   driver->init(devargs->virtual.drv_name, devargs->args);
+   break;
+   case INVOKE_CLOSE:
+   driver->close(devargs->virtual.drv_name, devargs->args);
+   break;
+   }
+}
+
+static int
+rte_eal_dev_find_and_invoke(const char *name, int type)
+{
+   struct rte_devargs *devargs;
+   struct rte_driver *driver;
+
+   /* call the init function for each virtual device */
+   TAILQ_FOREACH(devargs, &devargs_list, next) {
+
+   if (devargs->type != RTE_DEVTYPE_VIRTUAL)
+   continue;
+
+   if (strncmp(name, devargs->virtual.drv_name, strlen(name)))
+   continue;
+
+   TAILQ_FOREACH(driver, &dev_driver_list, next) {
+   if (driver->type != PMD_VDEV)
+   continue;
+
+   /* search a driver prefix in virtual device name */
+   if (!strncmp(driver->name, devargs->virtual.drv_name,
+   strlen(driver->name))) {
+   rte_eal_dev_invoke(driver, devargs, type);
+   break;
+   }
+   }
+
+   if (driver == NULL) {
+   RTE_LOG(WARNING, EAL, "no driver found for %s\n",
+ devargs->virtual.drv_name);
+   }
+   return 0;
+   }
+   return 1;
+}
+
+/* find and initialize the driver of specified virtual device */
+static int
+rte_eal_dev_init_one(const char *name)
+{
+   return rte_eal_dev_find_and_invoke(name, INVOKE_PROBE);
+}
+
+/* find and finalize the driver of specified virtual device */
+static int
+rte_eal_dev_close_one(const char *name)
+{
+   return rte_eal_dev_find_and_invoke(name, INVOKE_CLOSE);
+}
+#endif /* RTE_LIBRTE_EAL_HOTPLUG & RTE_LIBRTE_EAL_LINUXAPP */
diff --git a/lib/librte_eal/common/include/rte_dev.h 
b/lib/librte_eal/common/include/rte_dev.h
index f7e3a10..71d40c3 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -57,6 +57,11 @@ TAILQ_HEAD(rte_driver_list, rte_driver);
 typedef int (rte_dev_init_t)(const char *name, const char *args);

 /**
+ * Close function called for each device driver once.
+ */
+typedef int (rte_dev_close_t)(const char *name, const char *args);
+
+/**
  * Driver type enumeration
  */
 enum pmd_type {
@@ -72,6 +77,7 @@ struct rte_driver {
enum pmd_type type;/**< PMD Driver type */
const char *name;   /**< Driver name. */
rte_dev_init_t *init;  /**< Device init. function. */
+   rte_dev_close_t *close;/**< Device close. function. */
 };

 /**
diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
b/lib/librte_eal/linuxapp/eal/Makefile
index c99433e..27e9b48 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -40,6 +40,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
 CFLAGS += -I$(RTE_SDK)/lib/librte_ring
 CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
 CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
+CFLAGS += -I$(RTE_SDK)/lib/librte_mbuf
 CFLAGS += -I$(RTE_SDK)/lib/librte_ether
 CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
 CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring
-- 
1.9.1



[dpdk-dev] [PATCH 14/25] eal/pci: Add rte_eal_devargs_remove

2014-11-20 Thread Tetsuya Mukawa
The function removes a specified devargs from devargs_list.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_devargs.c  | 13 +
 lib/librte_eal/common/include/rte_devargs.h | 18 ++
 2 files changed, 31 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_devargs.c 
b/lib/librte_eal/common/eal_common_devargs.c
index aaa6901..0916cf8 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -137,6 +137,19 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char 
*devargs_str)
return 0;
 }

+/* remove it from the devargs_list */
+void
+rte_eal_devargs_remove(enum rte_devtype devtype, void *args)
+{
+   struct rte_devargs *devargs;
+
+   devargs = rte_eal_devargs_find(devtype, args);
+   if (devargs == NULL)
+   return;
+
+   TAILQ_REMOVE(&devargs_list, devargs, next);
+}
+
 /* count the number of devices of a specified type */
 unsigned int
 rte_eal_devargs_type_count(enum rte_devtype devtype)
diff --git a/lib/librte_eal/common/include/rte_devargs.h 
b/lib/librte_eal/common/include/rte_devargs.h
index 9f9c98f..57842b3 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -123,6 +123,24 @@ extern struct rte_devargs_list devargs_list;
 int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);

 /**
+ * Remove a device form the user device list
+ *
+ * For PCI devices, the format of arguments string is "PCI_ADDR". It shouldn't
+ * involves parameters for the device. Example: "08:00.1".
+ *
+ * For virtual devices, the format of arguments string is "DRIVER_NAME*". It
+ * shouldn't involves parameters for the device. Example: "eth_ring". The
+ * validity of the driver name is not checked by this function, it is done
+ * when closing the drivers.
+ *
+ * @param devtype
+ *   The type of the device.
+ * @param name
+ *   The name of the device.
+ */
+void rte_eal_devargs_remove(enum rte_devtype devtype, void *args);
+
+/**
  * Count the number of user devices of a specified type
  *
  * @param devtype
-- 
1.9.1



[dpdk-dev] [PATCH 13/25] eal/pci: Prevent double registration for devargs_list

2014-11-20 Thread Tetsuya Mukawa
The patch fixes rte_eal_devargs_add() not to register same device twice.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/eal_common_devargs.c | 32 ++
 1 file changed, 32 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_devargs.c 
b/lib/librte_eal/common/eal_common_devargs.c
index 4c7d11a..aaa6901 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -44,6 +44,32 @@
 struct rte_devargs_list devargs_list =
TAILQ_HEAD_INITIALIZER(devargs_list);

+
+/* find a entry specified by pci address or device name */
+static struct rte_devargs *
+rte_eal_devargs_find(enum rte_devtype devtype, void *args)
+{
+   struct rte_devargs *devargs;
+
+   TAILQ_FOREACH(devargs, &devargs_list, next) {
+   switch (devtype) {
+   case RTE_DEVTYPE_WHITELISTED_PCI:
+   case RTE_DEVTYPE_BLACKLISTED_PCI:
+   if (eal_compare_pci_addr(&devargs->pci.addr, args) == 0)
+   goto found;
+   break;
+   case RTE_DEVTYPE_VIRTUAL:
+   if (memcmp(&devargs->virtual.drv_name, args,
+   strlen((char *)args)) == 0)
+   goto found;
+   break;
+   }
+   }
+   return NULL;
+found:
+   return devargs;
+}
+
 /* store a whitelist parameter for later parsing */
 int
 rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
@@ -101,6 +127,12 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char 
*devargs_str)
break;
}

+   /* make sure there is no same entry */
+   if (rte_eal_devargs_find(devtype, &devargs->pci.addr)) {
+   RTE_LOG(ERR, EAL, "device already registered: <%s>\n", buf);
+   return -1;
+   }
+
TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
return 0;
 }
-- 
1.9.1



[dpdk-dev] [PATCH 12/25] ethdev: Change scope of rte_eth_dev_allocated to global

2014-11-20 Thread Tetsuya Mukawa
This function is used by virtual PMDs to support port hotplug framework.
So change scope of the function to global.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c |  2 +-
 lib/librte_ether/rte_ethdev.h | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 0d2397b..c697d83 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -196,7 +196,7 @@ rte_eth_dev_data_alloc(void)
RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data));
 }

-static struct rte_eth_dev *
+struct rte_eth_dev *
 rte_eth_dev_allocated(const char *name)
 {
unsigned i;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 49b4e9b..0846bff 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1708,6 +1708,16 @@ extern int rte_eth_dev_get_name_by_port(uint8_t port_id, 
char *name);
 extern int rte_eth_dev_check_detachable(uint8_t port_id);

 /**
+ * Function for internal use by port hotplug functions.
+ * Returns a ethdev slot specified by the unique identifier name.
+ * @param  name
+ *  The pointer to the Unique identifier name for each Ethernet device
+ * @return
+ *   - The pointer to the ethdev slot, on success. NULL on error
+ */
+extern struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 11/25] ethdev: Add rte_eth_dev_check_detachable

2014-11-20 Thread Tetsuya Mukawa
The function returns whether a PMD supports detach function, or not.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c |  9 +
 lib/librte_ether/rte_ethdev.h | 11 +++
 2 files changed, 20 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 372ab7d..0d2397b 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -466,6 +466,15 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
return 0;
 }

+int
+rte_eth_dev_check_detachable(uint8_t port_id)
+{
+   uint32_t drv_flags;
+
+   drv_flags = rte_eth_devices[port_id].driver->pci_drv.drv_flags;
+   return !!(drv_flags & RTE_PCI_DRV_DETACHABLE);
+}
+
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2087daf..49b4e9b 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1697,6 +1697,17 @@ extern int rte_eth_dev_get_port_by_addr(
 extern int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);

 /**
+ * Function for internal use by port hotplug functions.
+ * Check whether or not, a PMD that is handling the ethdev specified by port
+ * identifier can support detach function.
+ * @param  port_id
+ *   The port identifier
+ * @return
+ *   - 0 on supporting detach function, negative on not supporting
+ */
+extern int rte_eth_dev_check_detachable(uint8_t port_id);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 10/25] ethdev: Add rte_eth_dev_get_name_by_port

2014-11-20 Thread Tetsuya Mukawa
The function returns a unique identifier name of a ethdev specified by
port identifier.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 17 +
 lib/librte_ether/rte_ethdev.h | 12 
 2 files changed, 29 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index fa5b928..372ab7d 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -449,6 +449,23 @@ rte_eth_dev_get_port_by_addr(struct rte_pci_addr *addr, 
uint8_t *port_id)
return -1;
 }

+int
+rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
+{
+   char *tmp;
+
+   if (rte_eth_dev_validate_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return -EINVAL;
+   }
+
+   /* shouldn't check 'rte_eth_devices[i].data',
+* because it might be overwritten by VDEV PMD */
+   tmp = rte_eth_dev_data[port_id].name;
+   strncpy(name, tmp, strlen(tmp) + 1);
+   return 0;
+}
+
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 8bdc8ae..2087daf 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1685,6 +1685,18 @@ extern int rte_eth_dev_get_port_by_addr(
struct rte_pci_addr *addr, uint8_t *port_id);

 /**
+ * Function for internal use by port hotplug functions.
+ * Returns a unique identifier name of a ethdev specified by port identifier.
+ * @param  port_id
+ *   The port identifier.
+ * @param  name
+ *  The pointer to the Unique identifier name for each Ethernet device
+ * @return
+ *   - 0 on success, negative on error
+ */
+extern int rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 09/25] ethdev: Add rte_eth_dev_get_port_by_addr

2014-11-20 Thread Tetsuya Mukawa
The function returns a port identifier of a ethdev specified by pci
address.

v3:
- Fix if-condition bug while comparing pci addresses.
- Add error checking codes.
Reported-by: Mark Enright 

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 17 +
 lib/librte_ether/rte_ethdev.h | 13 +
 2 files changed, 30 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index fa75ea9..fa5b928 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -432,6 +432,23 @@ rte_eth_dev_get_addr_by_port(uint8_t port_id, struct 
rte_pci_addr *addr)
return 0;
 }

+int
+rte_eth_dev_get_port_by_addr(struct rte_pci_addr *addr, uint8_t *port_id)
+{
+   struct rte_pci_addr *tmp;
+
+   for (*port_id = 0; *port_id < RTE_MAX_ETHPORTS; (*port_id)++) {
+   if (!rte_eth_devices[*port_id].attached)
+   continue;
+   if (!rte_eth_devices[*port_id].pci_dev)
+   continue;
+   tmp = &rte_eth_devices[*port_id].pci_dev->addr;
+   if (eal_compare_pci_addr(tmp, addr) == 0)
+   return 0;
+   }
+   return -1;
+}
+
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index fd4aa5a..8bdc8ae 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1672,6 +1672,19 @@ extern int rte_eth_dev_get_addr_by_port(
uint8_t port_id, struct rte_pci_addr *addr);

 /**
+ * Function for internal use by port hotplug functions.
+ * Returns a port identifier of a ethdev specified by pci address.
+ * @param  addr
+ *   The pointer to the pci address of the Ethernet device.
+ * @param  port_id
+ *   The pointer to the port identifier
+ * @return
+ *   - 0 on success, negative on error
+ */
+extern int rte_eth_dev_get_port_by_addr(
+   struct rte_pci_addr *addr, uint8_t *port_id);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 08/25] ethdev: Add rte_eth_dev_get_addr_by_port

2014-11-20 Thread Tetsuya Mukawa
The function returns a pci address of a ethdev specified by port
identifier.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 12 
 lib/librte_ether/rte_ethdev.h | 13 +
 2 files changed, 25 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 5e1b6b9..fa75ea9 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -420,6 +420,18 @@ rte_eth_dev_get_changed_port(struct rte_eth_dev *devs, 
uint8_t *port_id)
return 1;
 }

+int
+rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
+{
+   if (rte_eth_dev_validate_port(port_id)) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return -EINVAL;
+   }
+
+   *addr = rte_eth_devices[port_id].pci_dev->addr;
+   return 0;
+}
+
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 683b79c..fd4aa5a 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1659,6 +1659,19 @@ extern int rte_eth_dev_get_changed_port(
struct rte_eth_dev *devs, uint8_t *port_id);

 /**
+ * Function for internal use by port hotplug functions.
+ * Returns a pci address of a ethdev specified by port identifier.
+ * @param  port_id
+ *   The port identifier of the Ethernet device
+ * @param  addr
+ *   The pointer to the pci address
+ * @return
+ *   - 0 on success, negative on error
+ */
+extern int rte_eth_dev_get_addr_by_port(
+   uint8_t port_id, struct rte_pci_addr *addr);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 07/25] ethdev: Add functions to know which port is attached or detached

2014-11-20 Thread Tetsuya Mukawa
The patch adds rte_eth_dev_save() and rte_eth_dev_get_changed_port().
rte_eth_dev_save() is used for saving current rte_eth_dev structures.
rte_eth_dev_get_changed_port() receives the rte_eth_dev structures, then
compare these with current values to know which port is actually
attached or detached.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 18 ++
 lib/librte_ether/rte_ethdev.h | 21 +
 2 files changed, 39 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index c9f82d9..5e1b6b9 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -402,6 +402,24 @@ rte_eth_dev_count(void)
return (nb_ports);
 }

+void
+rte_eth_dev_save(struct rte_eth_dev *devs)
+{
+   /* save current rte_eth_devices */
+   memcpy(devs, rte_eth_devices,
+   sizeof(struct rte_eth_dev) * RTE_MAX_ETHPORTS);
+}
+
+int
+rte_eth_dev_get_changed_port(struct rte_eth_dev *devs, uint8_t *port_id)
+{
+   /* check which port was attached or detached */
+   for (*port_id = 0; *port_id < RTE_MAX_ETHPORTS; (*port_id)++, devs++)
+   if (rte_eth_devices[*port_id].attached ^ devs->attached)
+   return 0;
+   return 1;
+}
+
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 558d4d3..683b79c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1638,6 +1638,27 @@ extern struct rte_eth_dev rte_eth_devices[];
 extern uint8_t rte_eth_dev_count(void);

 /**
+ * Function for internal use by port hotplug functions.
+ * Copies current ethdev structures to the specified pointer.
+ *
+ * @param  devsThe pointer to the ethdev structures
+ */
+extern void rte_eth_dev_save(struct rte_eth_dev *devs);
+
+/**
+ * Function for internal use by port hotplug functions.
+ * Compare the specified ethdev structures with currrents. Then
+ * if there is a port which status is changed, fill the specified pointer
+ * with the port id of that port.
+ * @param  devsThe pointer to the ethdev structures
+ * @param  port_id The pointer to the port id
+ * @return
+ *   - 0 on success, negative on error
+ */
+extern int rte_eth_dev_get_changed_port(
+   struct rte_eth_dev *devs, uint8_t *port_id);
+
+/**
  * Function for internal use by dummy drivers primarily, e.g. ring-based
  * driver.
  * Allocates a new ethdev slot for an ethernet device and returns the pointer
-- 
1.9.1



[dpdk-dev] [PATCH 06/25] ethdev: Add rte_eth_dev_shutdown for closing PCI devices.

2014-11-20 Thread Tetsuya Mukawa
rte_eth_dev_shutdown() is called when PCI device is closed.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index f217e14..c9f82d9 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -321,6 +321,42 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
return diag;
 }

+static int
+rte_eth_dev_shutdown(struct rte_pci_driver *pci_drv,
+struct rte_pci_device *pci_dev)
+{
+   struct eth_driver *eth_drv;
+   struct rte_eth_dev *eth_dev;
+   char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+
+   /* Create unique Ethernet device name using PCI address */
+   snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%d:%d.%d",
+   pci_dev->addr.bus, pci_dev->addr.devid,
+   pci_dev->addr.function);
+
+   eth_dev = rte_eth_dev_free(ethdev_name);
+   if (eth_dev == NULL)
+   return -ENODEV;
+
+   eth_drv = (struct eth_driver *)pci_drv;
+
+   /* Invoke PMD device shutdown function */
+   if (*eth_drv->eth_dev_shutdown)
+   (*eth_drv->eth_dev_shutdown)(eth_drv, eth_dev);
+
+   /* init user callbacks */
+   TAILQ_INIT(&(eth_dev->callbacks));
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   rte_free(eth_dev->data->dev_private);
+
+   eth_dev->pci_dev = NULL;
+   eth_dev->driver = NULL;
+   eth_dev->data = NULL;
+
+   return 0;
+}
+
 /**
  * Register an Ethernet [Poll Mode] driver.
  *
@@ -339,6 +375,7 @@ void
 rte_eth_driver_register(struct eth_driver *eth_drv)
 {
eth_drv->pci_drv.devinit = rte_eth_dev_init;
+   eth_drv->pci_drv.devshutdown = rte_eth_dev_shutdown;
rte_eal_pci_register(ð_drv->pci_drv);
 }

-- 
1.9.1



[dpdk-dev] [PATCH 05/25] eal, ethdev: Add function pointer for closing a device

2014-11-20 Thread Tetsuya Mukawa
The patch adds function pointer to rte_pci_driver and eth_driver
structure. These function pointers are used when ports are detached.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/include/rte_pci.h | 7 +++
 lib/librte_ether/rte_ethdev.h   | 4 
 2 files changed, 11 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index fe374a8..74720d1 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -181,12 +181,19 @@ struct rte_pci_driver;
 typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);

 /**
+ * Shutdown function for the driver called during hotplugging.
+ */
+typedef int (pci_devshutdown_t)(
+   struct rte_pci_driver *, struct rte_pci_device *);
+
+/**
  * A structure describing a PCI driver.
  */
 struct rte_pci_driver {
TAILQ_ENTRY(rte_pci_driver) next;   /**< Next in list. */
const char *name;   /**< Driver name. */
pci_devinit_t *devinit; /**< Device init. function. */
+   pci_devshutdown_t *devshutdown; /**< Device shutdown function. 
*/
struct rte_pci_id *id_table;/**< ID table, NULL terminated. 
*/
uint32_t drv_flags; /**< Flags contolling handling 
of device. */
 };
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 548467c..558d4d3 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1696,6 +1696,9 @@ struct eth_driver;
 typedef int (*eth_dev_init_t)(struct eth_driver  *eth_drv,
  struct rte_eth_dev *eth_dev);

+typedef int (*eth_dev_shutdown_t)(struct eth_driver  *eth_drv,
+ struct rte_eth_dev *eth_dev);
+
 /**
  * @internal
  * The structure associated with a PMD Ethernet driver.
@@ -1712,6 +1715,7 @@ typedef int (*eth_dev_init_t)(struct eth_driver  *eth_drv,
 struct eth_driver {
struct rte_pci_driver pci_drv;/**< The PMD is also a PCI driver. */
eth_dev_init_t eth_dev_init;  /**< Device init function. */
+   eth_dev_shutdown_t eth_dev_shutdown;/**< Device shutdown function. */
unsigned int dev_private_size;/**< Size of device private data. */
 };

-- 
1.9.1



[dpdk-dev] [PATCH 04/25] ethdev: Add rte_eth_dev_free to free specified device

2014-11-20 Thread Tetsuya Mukawa
This patch adds rte_eth_dev_free(). The function is used for changing a
attached status of the device that has specified name.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 17 +
 lib/librte_ether/rte_ethdev.h | 11 +++
 2 files changed, 28 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 446c53a..f217e14 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -249,6 +249,23 @@ rte_eth_dev_allocate(const char *name)
return eth_dev;
 }

+struct rte_eth_dev *
+rte_eth_dev_free(const char *name)
+{
+   struct rte_eth_dev *eth_dev;
+
+   eth_dev = rte_eth_dev_allocated(name);
+   if (eth_dev == NULL) {
+   PMD_DEBUG_TRACE("Ethernet Device with name %s doesn't exist!\n",
+   name);
+   return NULL;
+   }
+
+   eth_dev->attached = 0;
+   nb_ports--;
+   return eth_dev;
+}
+
 static int
 rte_eth_dev_init(struct rte_pci_driver *pci_drv,
 struct rte_pci_device *pci_dev)
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 2cd1c43..548467c 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1649,6 +1649,17 @@ extern uint8_t rte_eth_dev_count(void);
  */
 struct rte_eth_dev *rte_eth_dev_allocate(const char *name);

+/**
+ * Function for internal use by dummy drivers primarily, e.g. ring-based
+ * driver.
+ * Free the specified ethdev and returns the pointer to that slot.
+ *
+ * @param  nameUnique identifier name for each Ethernet device
+ * @return
+ *   - Slot in the rte_dev_devices array for the freed device;
+ */
+struct rte_eth_dev *rte_eth_dev_free(const char *name);
+
 struct eth_driver;
 /**
  * @internal
-- 
1.9.1



[dpdk-dev] [PATCH 03/25] eal/pci: Replace pci address comparison code by eal_compare_pci_addr

2014-11-20 Thread Tetsuya Mukawa
This patch replaces pci_addr_comparison() and memcmp() of pci addresses by
eal_compare_pci_addr().

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/bsdapp/eal/eal_pci.c   | 16 +---
 lib/librte_eal/common/eal_common_pci.c|  2 +-
 lib/librte_eal/common/include/rte_pci.h   | 29 +
 lib/librte_eal/linuxapp/eal/eal_pci.c | 16 +---
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
 5 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c 
b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 74ecce7..7eda513 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
return (0);
 }

-/* Compare two PCI device addresses. */
-static int
-pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
-{
-   uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
(addr->devid << 8) + addr->function;
-   uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + 
(addr2->devid << 8) + addr2->function;
-
-   if (dev_addr > dev_addr2)
-   return 1;
-   else
-   return 0;
-}
-
-
 /* Scan one pci sysfs entry, and fill the devices list from it. */
 static int
 pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
@@ -358,7 +344,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
struct rte_pci_device *dev2 = NULL;

TAILQ_FOREACH(dev2, &pci_device_list, next) {
-   if (pci_addr_comparison(&dev->addr, &dev2->addr))
+   if (eal_compare_pci_addr(&dev->addr, &dev2->addr))
continue;
else {
TAILQ_INSERT_BEFORE(dev2, dev, next);
diff --git a/lib/librte_eal/common/eal_common_pci.c 
b/lib/librte_eal/common/eal_common_pci.c
index f3c7f71..f01f258 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct 
rte_pci_device *dev)
if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
continue;
-   if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
+   if (eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
return devargs;
}
return NULL;
diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index b819539..fe374a8 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -261,6 +261,35 @@ eal_parse_pci_DomBDF(const char *input, struct 
rte_pci_addr *dev_addr)
 }
 #undef GET_PCIADDR_FIELD

+/* Compare two PCI device addresses. */
+/**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ * The PCI Bus-Device-Function address to compare
+ * @param addr2
+ * The PCI Bus-Device-Function address to compare
+ * @return
+ *  0 on equal PCI address.
+ *  Positive on addr is greater than addr2.
+ *  Negative on addr is less than addr2.
+ */
+static inline int
+eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
+{
+   uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) +
+   (addr->devid << 8) + addr->function;
+   uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) +
+   (addr2->devid << 8) + addr2->function;
+
+   if (dev_addr > dev_addr2)
+   return 1;
+   else if (dev_addr < dev_addr2)
+   return -1;
+   else
+   return 0;
+}
+
 /**
  * Probe the PCI bus for registered drivers.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index ddb0535..78df974 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -186,20 +186,6 @@ error:
return -1;
 }

-/* Compare two PCI device addresses. */
-static int
-pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
-{
-   uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
(addr->devid << 8) + addr->function;
-   uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + 
(addr2->devid << 8) + addr2->function;
-
-   if (dev_addr > dev_addr2)
-   return 1;
-   else
-   return 0;
-}
-
-
 /* Scan one pci sysfs entry, and fill the devices list from it. */
 static int
 pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
@@ -292,7 +278,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t 
bus,
struct rte_pci_device *dev2 = NULL;

TAILQ_FOREACH(dev2, &pci_device_list, next) {
-

[dpdk-dev] [PATCH 02/25] ethdev: Remove assumption that port will not be detached

2014-11-20 Thread Tetsuya Mukawa
To remove assumption, do like followings.

- Add 'attached' member to rte_eth_dev structure.
  This member is used for indicating the port is attached, or not.
- Add rte_eth_dev_allocate_new_port().
  This function is used for allocating new port.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_ether/rte_ethdev.c | 236 +++---
 lib/librte_ether/rte_ethdev.h |   5 +
 2 files changed, 136 insertions(+), 105 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 8c65d72..446c53a 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -201,19 +201,33 @@ rte_eth_dev_allocated(const char *name)
 {
unsigned i;

-   for (i = 0; i < nb_ports; i++) {
-   if (strcmp(rte_eth_devices[i].data->name, name) == 0)
+   for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+   if (rte_eth_devices[i].attached && strcmp(
+   rte_eth_devices[i].data->name, name) == 0)
return &rte_eth_devices[i];
}
return NULL;
 }

+static uint8_t
+rte_eth_dev_allocate_new_port(void)
+{
+   unsigned i;
+
+   for (i = 0; i < RTE_MAX_ETHPORTS; i++)
+   if (!rte_eth_devices[i].attached)
+   return i;
+   return RTE_MAX_ETHPORTS;
+}
+
 struct rte_eth_dev *
 rte_eth_dev_allocate(const char *name)
 {
+   uint8_t port_id;
struct rte_eth_dev *eth_dev;

-   if (nb_ports == RTE_MAX_ETHPORTS) {
+   port_id = rte_eth_dev_allocate_new_port();
+   if (port_id == RTE_MAX_ETHPORTS) {
PMD_DEBUG_TRACE("Reached maximum number of Ethernet ports\n");
return NULL;
}
@@ -226,10 +240,12 @@ rte_eth_dev_allocate(const char *name)
return NULL;
}

-   eth_dev = &rte_eth_devices[nb_ports];
-   eth_dev->data = &rte_eth_dev_data[nb_ports];
+   eth_dev = &rte_eth_devices[port_id];
+   eth_dev->data = &rte_eth_dev_data[port_id];
snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
-   eth_dev->data->port_id = nb_ports++;
+   eth_dev->data->port_id = port_id;
+   eth_dev->attached = 1;
+   nb_ports++;
return eth_dev;
 }

@@ -283,6 +299,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
(unsigned) pci_dev->id.device_id);
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
rte_free(eth_dev->data->dev_private);
+   eth_dev->attached = 0;
nb_ports--;
return diag;
 }
@@ -308,10 +325,19 @@ rte_eth_driver_register(struct eth_driver *eth_drv)
rte_eal_pci_register(ð_drv->pci_drv);
 }

+static int
+rte_eth_dev_validate_port(uint8_t port_id)
+{
+   if (port_id >= RTE_MAX_ETHPORTS)
+   return 1;
+
+   return !rte_eth_devices[port_id].attached;
+}
+
 int
 rte_eth_dev_socket_id(uint8_t port_id)
 {
-   if (port_id >= nb_ports)
+   if (rte_eth_dev_validate_port(port_id))
return -1;
return rte_eth_devices[port_id].pci_dev->numa_node;
 }
@@ -369,7 +395,7 @@ rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t 
rx_queue_id)
 * in a multi-process setup*/
PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY);

-   if (port_id >= nb_ports) {
+   if (rte_eth_dev_validate_port(port_id)) {
PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -395,7 +421,7 @@ rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t 
rx_queue_id)
 * in a multi-process setup*/
PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY);

-   if (port_id >= nb_ports) {
+   if (rte_eth_dev_validate_port(port_id)) {
PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -421,7 +447,7 @@ rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t 
tx_queue_id)
 * in a multi-process setup*/
PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY);

-   if (port_id >= nb_ports) {
+   if (rte_eth_dev_validate_port(port_id)) {
PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -447,7 +473,7 @@ rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t 
tx_queue_id)
 * in a multi-process setup*/
PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY);

-   if (port_id >= nb_ports) {
+   if (rte_eth_dev_validate_port(port_id)) {
PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -662,7 +688,7 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, 
uint16_t nb_tx_q,
 * in a multi-process setup*/
PROC_PRIMARY_OR_ERR_RET(-E_RTE_SECONDARY);

-   if (port_id >= nb_ports || port_id >= RTE_MAX_ETHPORTS) {
+   if (rte_eth_dev_validate_port(port_id)) {
PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
return (-EINVAL);
   

[dpdk-dev] [PATCH 01/25] eal/pci: Add a new flag indicating a driver can detach devices at runtime.

2014-11-20 Thread Tetsuya Mukawa
This patch adds "RTE_PCI_DRV_DETACHABLE" to drv_flags of rte_pci_driver
structure. The flags indicates the driver can detach devices at runtime.

Signed-off-by: Tetsuya Mukawa 
---
 lib/librte_eal/common/include/rte_pci.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 66ed793..b819539 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -199,6 +199,8 @@ struct rte_pci_driver {
 #define RTE_PCI_DRV_FORCE_UNBIND 0x0004
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
+/** Device driver supports detaching capablity */
+#define RTE_PCI_DRV_DETACHABLE 0x0010

 /**< Internal use only - Macro used by pci addr parsing functions **/
 #define GET_PCIADDR_FIELD(in, fd, lim, dlm)   \
-- 
1.9.1



[dpdk-dev] [PATCH 00/25] Port Hotplug Framework

2014-11-20 Thread Tetsuya Mukawa
This patch series adds a dynamic port hotplug framework to DPDK.
With the patches, DPDK apps can attach or detach ports at runtime.

The basic concept of the port hotplug is like followings.
- DPDK apps must have resposibility to manage ports.
  DPDK apps only know which ports are attached or detached at the moment.
  The port hotplug framework is implemented to allow DPDK apps to manage ports.
  For example, when DPDK apps call port attach function, attached port number
  will be returned. Also DPDK apps can detach port by port number.
- Kernel support is needed for attaching or detaching physical device ports.
  To attach new device, the device will be recognized by kernel at first and
  controlled by kernel driver. Then user can bind the device to igb_uio
  by 'dpdk_nic_bind.py'. Finally, DPDK apps can call the port hotplug
  functions to attach ports.
  For detaching, steps are vice versa.
- Before detach ports, ports must be stopped and closed.
  DPDK application must call rte_eth_dev_stop() and rte_eth_dev_close() before
  detaching ports. These function will call finalization codes of PMDs.
  But so far, no PMD frees all resources allocated by initialization.
  It means PMDs are needed to be fixed to support the port hotplug.
  'RTE_PCI_DRV_DETACHABLE' is a new flag indicating a PMD supports detaching.
  Without this flag, detaching will be failed.
- Mustn't affect legacy DPDK apps.
  No DPDK EAL behavior is changed, if the port hotplug functions are't called.
  So all legacy DPDK apps can still work without modifications.

And few limitations.
- The port hotplug functions are not thread safe.
  DPDK apps should handle it.
- Only support Linux and igb_uio so far.
  BSD and VFIO is not supported. I will send VFIO patches at least, but I don't
  have a plan to submit BSD patch so far.


Here is port hotplug APIs.
---
/**
 * Attach a new physical device.
 *
 * @param addr
 *   A pointer to a pci address structure describing the new
 *   device to be attached.
 * @param port_id
 *  A pointer to a port identifier actually attached.
 * @return
 *  0 on success and port_id is filled, negative on error
 */
int rte_eal_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id);

/**
 * Attach a new virtual device.
 *
 * @param vdevargs
 *   A pointer to a strings array describing the new device
 *   to be attached.
 * @param port_id
 *  A pointer to a port identifier actually attached.
 * @return
 *  0 on success and port_id is filled, negative on error
 */
int rte_eal_dev_attach_vdev(const char *vdevargs, uint8_t *port_id);

/**
 * Detach a physical device.
 *
 * @param port_id
 *   The port identifier of the physical device to detach.
 * @param addr
 *  A pointer to a pci address structure actually detached.
 * @return
 *  0 on success and addr is filled, negative on error
 */
int rte_eal_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr);

/**
 * Detach a virtual device.
 *
 * @param port_id
 *   The port identifier of the virtual device to detach.
 * @param addr
 *  A pointer to a virtual device name actually detached.
 * @return
 *  0 on success and vdevname is filled, negative on error
 */
int rte_eal_dev_detach_vdev(uint8_t port_id, char *vdevname);
---

This patch series are for DPDK EAL. To use port hotplug function by DPDK apps,
each PMD should be fixed to support 'RTE_PCI_DRV_DETACHABLE' flag. Please check
a patch for pcap PMD.

Also please check testpmd patch. It will show you how to fix your legacy
applications to support port hotplug feature.


PATCH v1 Chages:
 - Fix error checking code of librte_eth APIs.
 - Fix issue that port from pcap PMD cannot be detached correctly.
 - Fix issue that testpmd could hang after forwarding, if attaching and 
detaching
   is repeatedly.
 - Fix if-condition of rte_eth_dev_get_port_by_addr().
   (Thanks to Mark Enright)

RFC PATCH v2 Changes:
- remove 'rte_eth_dev_validate_port()', and cleanup codes.


Tetsuya Mukawa (25):
  eal/pci: Add a new flag indicating a driver can detach devices at
runtime.
  ethdev: Remove assumption that port will not be detached
  eal/pci: Replace pci address comparison code by eal_compare_pci_addr
  ethdev: Add rte_eth_dev_free to free specified device
  eal,ethdev: Add function pointer for closing a device
  ethdev: Add rte_eth_dev_shutdown for closing PCI devices.
  ethdev: Add functions to know which port is attached or detached
  ethdev: Add rte_eth_dev_get_addr_by_port
  ethdev: Add rte_eth_dev_get_port_by_addr
  ethdev: Add rte_eth_dev_get_name_by_port
  ethdev: Add rte_eth_dev_check_detachable
  ethdev: Change scope of rte_eth_dev_allocated to global
  eal/pci: Prevent double registration for devargs_list
  eal/pci: Add rte_eal_devargs_remove
  eal/pci: Add probe and close function for virtual drivers
  eal/pci: Add port hotplug functions for virtual devices.
  

[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-20 Thread Olivier MATZ
Hi Jijiang,

On 11/20/2014 08:28 AM, Liu, Jijiang wrote:
>> The original behavior (without your vxlan patches), which still works today, 
>> is to
>> select inner or outer using the m->l2_len field:
>>
>>- checksum outer IP + UDP
>>  m->l2_len=14 m->l3_len=20
>>  flags=PKT_TX_IP_CKSUM PKT_TX_UDP_CKSUM
>>
>>- checksum inner IP + UDP
>>  m->l2_len=64 m->l3_len=20
>>  flags=PKT_TX_IP_CKSUM PKT_TX_UDP_CKSUM
>>  of course, the packet is valid only if the outer IP checksum is
>>  already correct and outer UDP checksum is 0
>>
>> If i40e does not act like this, it does not follow the previous API.
>
> No,  i40e follows this.

OK. This is assumption (A):
To calculate the inner IP + UDP checksum, you don't need VXLAN flag.
You just acked it.

>>> 2. only set inner L3/L4 header TX checksum
>>>  tx_checksum set 0x30 0
>>>In this case, the PKT_TX_VXLAN_CKSUM flag is set, so driver think it is 
>>> VXLAN
>> packet, and we don't need to change outer ones because we don't set outer 
>> flags
>> here (PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM).

Assumption (B):
To calculate the inner IP + UDP checksum (this is what you wrote "only
set inner L3/L4 header TX checksum"), you say you set the VXLAN flag.
This is the opposite of (A).

>> As explained above, there is no need to set the PKT_TX_VXLAN_CKSUM if you
>> only want to set the inner L3/L4 checksum.
>> This was already working like this
>> before your patches, as long as l2_len and l3_len are set properly in the 
>> mbuf
>> (l2_len should include the outer headers).
>
> Does VXLAN TX checksum offload or ordinary L2 packet TX checksum offload work?
> Have you ever tested it on a NIC that supports VXLAN.

You don't answer the question: which between (A) or (B) is correct.

I'm sorry I don't understand your question above.

I have done no test on i40e, because I don't have access to
this hardware.

> The PKT_TX_VXLAN_CKSUM flag meaning just tell driver this is encapsulation 
> packet, so driver should set TX checksum offload for the packet using outer 
> l2/l3 len, inner l2/l3 len and tunneling header length.
>
> If you don't like this flag name, I can change it for  PKT_TX_TUNNEL_CKSUM, 
> which have more generic meaning.

The problem is not only the name. After tens of mails, I'm still not
able to understand the VxLAN checksum API.

I wanted to rework the csum forward engine code, because it is not
understable today. I wanted to clarify the API. But sorry I think
I'll give up now.

>> Moreover, PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM, ... are not "outer flags".
>> They are hardware checksum flags, and before your vxland patch, they 
>> concerned
>> the headers referenced by m->l2_len and m->l3_len.
>
> Actually, the  key point of debate is that you still think the l2_len filed 
> and the l3_len filed  in mbuf are inner part in the case of tunneling, right? 
>  If yes, let me explain what I thought.

This is not the only key point of debate. The very first key point is
that the VxLAN checksum offload API is not documented and I'm not
able to rework the csum code to use it.

> As you know, NIC itself is not responsible for packet decapsulation / 
> encapsulation at all. It sends and receives the whole packet, not only for 
> inner part in the case of tunneling. The translation from receive descriptor 
> to mbuf structure is also for the whole packet. And these fields defined in 
> mbuf structure are also for the whole packet, no matter it is tunneling or 
> non-tunneling.
>
> 1) We assume that a NIC can't  recognize VXLAN packet, when a packet  with 
> the format  outer IP / outer UDP / VxLAN / Ether / inner IP / inner UDP / 
> data is received,
>   do you think whether l2  header and l3 header length of this packet is 
> outer or inner,  according to my understanding, I think it is outer, and 
> m->l2_len and m->l3_len is also outer. Do you agree?

The l2_len and l3_len are never set up by any driver on rx side. Your
example does not apply.

These fields are set by the application (a network stack for instance)
to indicate to the driver and hardware where to find the l3 and l4
headers whose checksum need to be calculated.

The l2_len and l3_len does not refer to inner or outer header. It refers
to the header that has to be checksum'd in hardware when the flag is
set. It can be inner or outer. At least, it was the case before the
adding of VxLAN offload feature.


> 2) We also assume that a NIC can  recognize VXLAN packet,  but there is no 
> difference between 1)  and 2) on data in mbuf before patching my VXLAN patch, 
> so I also think  m->l2_len and m->l3_len is outer.  Do you agree?
> After patching my VXLAN, the inner_l2_len and inner_l3_len were used to stand 
> for inner header part.

Your argumentation would make sense if l2_len and l3_len were filled by
a NIC in RX functions. But that's not the case. Today, these fields are
only used in TX when a checksum flag is also set. And I think that a
flag should always refer to the same length fie

[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Bruce Richardson
On Thu, Nov 20, 2014 at 12:03:40PM -0500, Neil Horman wrote:
> On Thu, Nov 20, 2014 at 04:42:23PM +, Gonzalez Monroy, Sergio wrote:
> > > From: Neil Horman [mailto:nhorman at tuxdriver.com]
> > > Sent: Thursday, November 20, 2014 2:21 PM
> > > On Thu, Nov 20, 2014 at 02:17:13PM +, Sergio Gonzalez Monroy wrote:
> > > > Some features of the cmdline were broken in FreeBSD as a result of
> > > > termios not being compiled.
> > > >
> > > > Signed-off-by: Sergio Gonzalez Monroy
> > > > 
> > > 
> > > Not sure I understand the changelog above.  You're removing ifdefs below
> > > which makes sense, but are you now assuming that BSD will be built with
> > > termios support, or do you need to add some alternate dependency check
> > > during the configuration of DPDK?
> > > Neil
> > > 
> > Yes, I was assuming that BSD has termios support.
> > Is it not a fair assumption?
> > 
> No, I think its a perfectly fair assumption.  I was just trying to understand
> the history of the ifdefs there.  Sounds like it was a dumb idea to intiially
> ifdef the termios stuff out way back when.
> 

The ifdef probably dates from when there were just two versions of DPDK: linux
and baremetal. Guess which one didn't have the termios support :-)

/Bruce

> Acked-by: Neil Horman 
> 
> > Sergio
> > 


[dpdk-dev] one worker reading multiple ports

2014-11-20 Thread Newman Poborsky
Thank you for your answer.

I just realized that the reason the rte_eth_rx_burst() returns 0 is because
inside ixgbe_recv_pkts() this fails:
nmb = rte_rxmbuf_alloc(rxq->mb_pool);  => nmb is NULL

Does this mean that every RX queue should have its own rte_mempool?  If so,
are there any optimal values for: number of RX descriptors, per-queue
rte_mempool size, number of hugepages (from what I understand, these 3 are
correlated)?

If I'm wrong, please explain why.

Thanks!

BR,
Newman

On Thu, Nov 20, 2014 at 9:56 AM, De Lara Guarch, Pablo <
pablo.de.lara.guarch at intel.com> wrote:

> Hi Newman,
>
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Newman Poborsky
> > Sent: Thursday, November 20, 2014 8:34 AM
> > To: dev at dpdk.org
> > Subject: [dpdk-dev] one worker reading multiple ports
> >
> > Hi,
> >
> > is it possible to use one worker thread (one lcore) to read packets from
> > multiple ports?
> >
> > When I start 2 workers and assign each one  to read from different ports
> > (with  rte_eth_rx_burst()) everything works fine, but if I assign one
> > worker to read packets from 2 ports, rte_eth_rx_burst() returns 0 as if
> no
> > packets are read.
>
> Yes, it is totally possible. The only problem would be if you try to use
> multiple threads
> to read/write on one port, in which case you should use multiple queues.
> Look at l3fwd app for instance. You can use just a single core to handle
> packets on multiple ports.
>
> Pablo
> >
> > Is there any reason for this kind of behaviour?
> >
> > Thanks!
> >
> > Br,
> > Newman P.
>


[dpdk-dev] [PATCH v2] i40e: link flow control support

2014-11-20 Thread zhida zang
From: zzang 

Add link flow control support for i40e

Signed-off-by: zhida zang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 155 +-
 lib/librte_pmd_i40e/i40e_ethdev.h |  10 +++
 2 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index a860af7..183b0be 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -69,6 +69,18 @@
 #define I40E_DEFAULT_TX_WTHRESH  0
 #define I40E_DEFAULT_TX_RSBIT_THRESH 32

+/* Flow control default timer */
+#define I40E_DEFAULT_PAUSE_TIME 0xU
+
+/* Flow control default high water */
+#define I40E_DEFAULT_HIGH_WATER 0x1C40
+
+/* Flow control default low water */
+#define I40E_DEFAULT_LOW_WATER  0x1A40
+
+/* Flow control enable fwd bit */
+#define I40E_PRTMAC_FWD_CTRL   0x0001
+
 /* Maximun number of MAC addresses */
 #define I40E_NUM_MACADDR_MAX   64
 #define I40E_CLEAR_PXE_WAIT_MS 200
@@ -98,6 +110,12 @@

 #define I40E_PRE_TX_Q_CFG_WAIT_US   10 /* 10 us */

+/* Receive Packet Buffer size */
+#define I40E_RXPBSIZE (968 * 1024)
+
+/* Receive Average Packet Size in Byte*/
+#define I40E_PACKET_AVERAGE_SIZE 128
+
 static int eth_i40e_dev_init(\
__attribute__((unused)) struct eth_driver *eth_drv,
struct rte_eth_dev *eth_dev);
@@ -131,6 +149,8 @@ static void i40e_vlan_strip_queue_set(struct rte_eth_dev 
*dev,
 static int i40e_vlan_pvid_set(struct rte_eth_dev *dev, uint16_t pvid, int on);
 static int i40e_dev_led_on(struct rte_eth_dev *dev);
 static int i40e_dev_led_off(struct rte_eth_dev *dev);
+static int i40e_flow_ctrl_get(struct rte_eth_dev *dev,
+ struct rte_eth_fc_conf *fc_conf);
 static int i40e_flow_ctrl_set(struct rte_eth_dev *dev,
  struct rte_eth_fc_conf *fc_conf);
 static int i40e_priority_flow_ctrl_set(struct rte_eth_dev *dev,
@@ -237,6 +257,7 @@ static struct eth_dev_ops i40e_eth_dev_ops = {
.tx_queue_release = i40e_dev_tx_queue_release,
.dev_led_on   = i40e_dev_led_on,
.dev_led_off  = i40e_dev_led_off,
+   .flow_ctrl_get= i40e_flow_ctrl_get,
.flow_ctrl_set= i40e_flow_ctrl_set,
.priority_flow_ctrl_set   = i40e_priority_flow_ctrl_set,
.mac_addr_add = i40e_macaddr_add,
@@ -358,6 +379,9 @@ eth_i40e_dev_init(__rte_unused struct eth_driver *eth_drv,
pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
pf->adapter->eth_dev = dev;
pf->dev_data = dev->data;
+   pf->fc_conf.pause_time = I40E_DEFAULT_PAUSE_TIME;
+   pf->fc_conf.high_water[0] = I40E_DEFAULT_HIGH_WATER;
+   pf->fc_conf.low_water[0] = I40E_DEFAULT_LOW_WATER;

hw->back = I40E_PF_TO_ADAPTER(pf);
hw->hw_addr = (uint8_t *)(pci_dev->mem_resource[0].addr);
@@ -1516,12 +1540,137 @@ i40e_dev_led_off(struct rte_eth_dev *dev)
 }

 static int
-i40e_flow_ctrl_set(__rte_unused struct rte_eth_dev *dev,
-  __rte_unused struct rte_eth_fc_conf *fc_conf)
+i40e_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 {
+   struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+   fc_conf->pause_time = pf->fc_conf.pause_time;
+   fc_conf->high_water = (pf->fc_conf.high_water[0] *
+   I40E_PACKET_AVERAGE_SIZE) >> 10;
+   fc_conf->low_water = (pf->fc_conf.low_water[0] *
+   I40E_PACKET_AVERAGE_SIZE) >> 10;
+
+   /*
+* Return current mode according to actual setting
+*/
+   switch (hw->fc.current_mode) {
+   case I40E_FC_FULL:
+   fc_conf->mode = RTE_FC_FULL;
+   break;
+   case I40E_FC_TX_PAUSE:
+   fc_conf->mode = I40E_FC_TX_PAUSE;
+   break;
+   case I40E_FC_RX_PAUSE:
+   fc_conf->mode = I40E_FC_RX_PAUSE;
+   break;
+   case I40E_FC_NONE:
+   fc_conf->mode = RTE_FC_NONE;
+   break;
+   default:
+   break;
+   };
+
+   return 0;
+}
+
+static int
+i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
+{
+   uint32_t mflcn_reg, fctrl_reg, reg;
+   uint32_t max_high_water;
+   uint8_t i, aq_failure;
+   int err;
+   enum i40e_fc_mode rte_fcmode_2_i40e_fcmode[] = {
+   I40E_FC_NONE,
+   I40E_FC_RX_PAUSE,
+   I40E_FC_TX_PAUSE,
+   I40E_FC_FULL
+   };
+
+   max_high_water = I40E_RXPBSIZE >> 10;
+   if ((fc_conf->high_water > max_high_water) ||
+   (fc_conf->high_water < fc_conf->low_water)) {
+   PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB, "
+   "High

[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Gonzalez Monroy, Sergio
> From: Neil Horman [mailto:nhorman at tuxdriver.com]
> Sent: Thursday, November 20, 2014 2:21 PM
> On Thu, Nov 20, 2014 at 02:17:13PM +, Sergio Gonzalez Monroy wrote:
> > Some features of the cmdline were broken in FreeBSD as a result of
> > termios not being compiled.
> >
> > Signed-off-by: Sergio Gonzalez Monroy
> > 
> 
> Not sure I understand the changelog above.  You're removing ifdefs below
> which makes sense, but are you now assuming that BSD will be built with
> termios support, or do you need to add some alternate dependency check
> during the configuration of DPDK?
> Neil
> 
Yes, I was assuming that BSD has termios support.
Is it not a fair assumption?

Sergio


[dpdk-dev] [PATCH v2] i40e: fixed tx packets stats bug

2014-11-20 Thread zhida zang
From: zzang 

i40e only count tx packets that sent by pf. But in some condition packet can be 
sent by NIC without being counted by pf. So count vsi tx packets instead.

Signed-off-by: zhida zang 
---
 lib/librte_pmd_i40e/i40e_ethdev.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index 4b7a827..c1ea37e 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -1102,6 +1102,7 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct 
rte_eth_stats *stats)
struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
struct i40e_hw_port_stats *ns = &pf->stats; /* new stats */
struct i40e_hw_port_stats *os = &pf->stats_offset; /* old stats */
+   struct i40e_eth_stats *ves = &pf->main_vsi->eth_stats; /* vsi stats */

/* Get statistics of struct i40e_eth_stats */
i40e_stat_update_48(hw, I40E_GLPRT_GORCH(hw->port),
@@ -1277,15 +1278,18 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct 
rte_eth_stats *stats)
/* GLPRT_MSPDC not supported */
/* GLPRT_XEC not supported */

-   pf->offset_loaded = true;
-
if (pf->main_vsi)
i40e_update_vsi_stats(pf->main_vsi);

stats->ipackets = ns->eth.rx_unicast + ns->eth.rx_multicast +
ns->eth.rx_broadcast;
-   stats->opackets = ns->eth.tx_unicast + ns->eth.tx_multicast +
+   if (pf->offset_loaded)
+   stats->opackets = ves->tx_unicast + ves->tx_multicast +
+   ves->tx_broadcast;
+   else
+   stats->opackets = ns->eth.tx_unicast + ns->eth.tx_multicast +
ns->eth.tx_broadcast;
+   pf->offset_loaded = true;
stats->ibytes   = ns->eth.rx_bytes;
stats->obytes   = ns->eth.tx_bytes;
stats->oerrors  = ns->eth.tx_errors;
-- 
1.9.3



[dpdk-dev] [PULL REQUEST] doc: Various document fixes in testpmd UG

2014-11-20 Thread Bernard Iremonger
These changes are DPDK 1.8 modifications and some corrections to the TestPMD 
Application User Guide.

The following changes since commit fc1f2750a3ec6da919e3c86e59d56f34ec97154b:

  doc: programmers guide (2014-11-18 14:49:54 +0100)

are available in the git repository at:
  git://dpdk.org/next/dpdk-doc  master

Pablo de Lara (4):
  doc: Added new commands in testpmd UG
  doc: Corrected info for tx_checksum set mask function, in testpmd UG
  doc: Moved commands in testpmd UG to match testpmd command help order
  doc: Various document fixes in testpmd UG

 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 1070 ++-
 1 files changed, 566 insertions(+), 504 deletions(-)


[dpdk-dev] Newbie question: compiling virtio-net-pmd v1.2 fails with dpdk 1.7 on a VM

2014-11-20 Thread Tapio Tallgren
On Thu, Nov 20, 2014 at 1:33 PM, Thomas Monjalon 
wrote:

> 2014-11-20 06:17, Neil Horman:
> > On Thu, Nov 20, 2014 at 09:14:35AM +0200, Tapio Tallgren wrote:
> > > Looks like a version conflict? Dpdk 1.7 should support virtio-pmd so
> what
> > > am I doing wrong?
> >
> > Nothing, its a bug.  DPDK changed the API here with commit
> > 9aaccf1abdb2894ec23870e1d2199a657f85850e but it never got changed in the
> > virtio_user pmd.  You need to modify the pmd like the other call sites
> have been
> > in the above commit.
> >
> > This is why those external pmds need to be merged into the dpdk tree.
>
> Yes, exactly.
> We'll see during 2.0 cycle how to merge them. The ideal plan is to keep
> only
> 1 implementation.
>
> In the meantime, patches are welcome.
>
> --
> Thomas
>

This looks like a purely cosmetic change? So, if the code compiles, then it
will also run correctly?


-- 
-Tapio


[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Sergio Gonzalez Monroy
Some features of the cmdline were broken in FreeBSD as a result of
termios not being compiled.

Signed-off-by: Sergio Gonzalez Monroy 
---
 lib/librte_cmdline/cmdline.h|  2 --
 lib/librte_cmdline/cmdline_socket.c | 10 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h
index 4c28d37..06ae086 100644
--- a/lib/librte_cmdline/cmdline.h
+++ b/lib/librte_cmdline/cmdline.h
@@ -71,9 +71,7 @@ struct cmdline {
cmdline_parse_ctx_t *ctx;
struct rdline rdl;
char prompt[RDLINE_PROMPT_SIZE];
-#ifdef RTE_EXEC_ENV_LINUXAPP
struct termios oldterm;
-#endif
 };

 struct cmdline *cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int 
s_in, int s_out);
diff --git a/lib/librte_cmdline/cmdline_socket.c 
b/lib/librte_cmdline/cmdline_socket.c
index b51b537..6820b6d 100644
--- a/lib/librte_cmdline/cmdline_socket.c
+++ b/lib/librte_cmdline/cmdline_socket.c
@@ -93,7 +93,6 @@ struct cmdline *
 cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt)
 {
struct cmdline *cl;
-#ifdef RTE_EXEC_ENV_LINUXAPP
struct termios oldterm, term;

tcgetattr(0, &oldterm);
@@ -101,14 +100,12 @@ cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char 
*prompt)
term.c_lflag &= ~(ICANON | ECHO | ISIG);
tcsetattr(0, TCSANOW, &term);
setbuf(stdin, NULL);
-#endif

cl = cmdline_new(ctx, prompt, 0, 1);

-#ifdef RTE_EXEC_ENV_LINUXAPP
if (cl)
memcpy(&cl->oldterm, &oldterm, sizeof(term));
-#endif
+
return cl;
 }

@@ -118,10 +115,5 @@ cmdline_stdin_exit(struct cmdline *cl)
if (!cl)
return;

-#ifdef RTE_EXEC_ENV_LINUXAPP
tcsetattr(fileno(stdin), TCSANOW, &cl->oldterm);
-#else
-   /* silent the compiler */
-   (void)cl;
-#endif
 }
-- 
2.1.0



[dpdk-dev] [PATCH 2/2] eal: Remove unused HPET support from FreeBSD

2014-11-20 Thread Sergio Gonzalez Monroy
The HPET support in the BSD EAL was copied directly from the Linux version,
but did not actually work on FreeBSD. We replace this old code with a simple
compiler message that informs the user that we don't support HPET on BSD if
they enable such support in the build-time configuration file.

Signed-off-by: Sergio Gonzalez Monroy 
---
 lib/librte_eal/bsdapp/eal/eal_timer.c | 187 ++
 1 file changed, 7 insertions(+), 180 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c 
b/lib/librte_eal/bsdapp/eal/eal_timer.c
index 67da167..3163496 100644
--- a/lib/librte_eal/bsdapp/eal/eal_timer.c
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -30,17 +30,10 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-
 #include 
-#include 
 #include 
-#include 
 #include 
-#include 
 #include 
-#include 
-#include 
-#include 
 #include 
 #include 
 #include 
@@ -57,111 +50,14 @@
 #include "eal_private.h"
 #include "eal_internal_cfg.h"

-enum timer_source eal_timer_source = EAL_TIMER_HPET;
-
-/* The frequency of the RDTSC timer resolution */
-static uint64_t eal_tsc_resolution_hz = 0;
-
 #ifdef RTE_LIBEAL_USE_HPET
-
-#define DEV_HPET "/dev/hpet"
-
-/* Maximum number of counters. */
-#define HPET_TIMER_NUM 3
-
-/* General capabilities register */
-#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */
-#define CLK_PERIOD_MASK  0xULL /* Clock period mask. */
-
-/**
- * HPET timer registers. From the Intel IA-PC HPET (High Precision Event
- * Timers) Specification.
- */
-struct eal_hpet_regs {
-   /* Memory-mapped, software visible registers */
-   uint64_t capabilities;  /**< RO General Capabilities Register. */
-   uint64_t reserved0; /**< Reserved for future use. */
-   uint64_t config;/**< RW General Configuration Register. */
-   uint64_t reserved1; /**< Reserved for future use. */
-   uint64_t isr;   /**< RW Clear General Interrupt Status. */
-   uint64_t reserved2[25]; /**< Reserved for future use. */
-   union {
-   uint64_t counter;   /**< RW Main Counter Value Register. */
-   struct {
-   uint32_t counter_l; /**< RW Main Counter Low. */
-   uint32_t counter_h; /**< RW Main Counter High. */
-   };
-   };
-   uint64_t reserved3; /**< Reserved for future use. */
-   struct {
-   uint64_t config;/**< RW Timer Config and Capability Reg. */
-   uint64_t comp;  /**< RW Timer Comparator Value Register. */
-   uint64_t fsb;   /**< RW FSB Interrupt Route Register. */
-   uint64_t reserved4; /**< Reserved for future use. */
-   } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */
-};
-
-/* Mmap'd hpet registers */
-static volatile struct eal_hpet_regs *eal_hpet = NULL;
-
-/* Period at which the HPET counter increments in
- * femtoseconds (10^-15 seconds). */
-static uint32_t eal_hpet_resolution_fs = 0;
-
-/* Frequency of the HPET counter in Hz */
-static uint64_t eal_hpet_resolution_hz = 0;
-
-/* Incremented 4 times during one 32bits hpet full count */
-static uint32_t eal_hpet_msb;
-
-static pthread_t msb_inc_thread_id;
-
-/*
- * This function runs on a specific thread to update a global variable
- * containing used to process MSB of the HPET (unfortunatelly, we need
- * this because hpet is 32 bits by default under linux).
- */
-static void
-hpet_msb_inc(__attribute__((unused)) void *arg)
-{
-   uint32_t t;
-
-   while (1) {
-   t = (eal_hpet->counter_l >> 30);
-   if (t != (eal_hpet_msb & 3))
-   eal_hpet_msb ++;
-   sleep(10);
-   }
-}
-
-uint64_t
-rte_get_hpet_hz(void)
-{
-   if(internal_config.no_hpet)
-   rte_panic("Error, HPET called, but no HPET present\n");
-
-   return eal_hpet_resolution_hz;
-}
-
-uint64_t
-rte_get_hpet_cycles(void)
-{
-   uint32_t t, msb;
-   uint64_t ret;
-
-   if(internal_config.no_hpet)
-   rte_panic("Error, HPET called, but no HPET present\n");
-
-   t = eal_hpet->counter_l;
-   msb = eal_hpet_msb;
-   ret = (msb + 2 - (t >> 30)) / 4;
-   ret <<= 32;
-   ret += t;
-   return ret;
-}
-
+#warning HPET is not supported in FreeBSD
 #endif

+enum timer_source eal_timer_source = EAL_TIMER_TSC;
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz = 0;

 void
 rte_delay_us(unsigned us)
@@ -178,70 +74,6 @@ rte_get_tsc_hz(void)
return eal_tsc_resolution_hz;
 }

-
-#ifdef RTE_LIBEAL_USE_HPET
-/*
- * Open and mmap /dev/hpet (high precision event timer) that will
- * provide our time reference.
- */
-int
-rte_eal_hpet_init(int make_default)
-{
-   int fd, ret;
-
-   if (internal_config.no_hpet) {
-   RTE_L

[dpdk-dev] [PATCH 1/2] eal: use sysctl in BSD to set TSC freq

2014-11-20 Thread Sergio Gonzalez Monroy
BSD provides the TSC frequency value through sysctl.

Signed-off-by: Sergio Gonzalez Monroy 
---
 lib/librte_eal/bsdapp/eal/eal_timer.c | 46 ---
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c 
b/lib/librte_eal/bsdapp/eal/eal_timer.c
index fd800b9..67da167 100644
--- a/lib/librte_eal/bsdapp/eal/eal_timer.c
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -41,6 +41,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 

 #include 
@@ -241,29 +243,34 @@ rte_eal_hpet_init(int make_default)
 #endif

 static int
-set_tsc_freq_from_clock(void)
+set_tsc_freq_from_sysctl(void)
 {
-#ifdef CLOCK_MONOTONIC_RAW
-#define NS_PER_SEC 1E9
+   size_t sz;
+   int tmp;

-   struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */
+   sz = sizeof(tmp);
+   tmp = 0;

-   struct timespec t_start, t_end;
+   if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0))
+   RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+   else if (tmp != 1)
+   RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n");

-   if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
-   uint64_t ns, end, start = rte_rdtsc();
-   nanosleep(&sleeptime,NULL);
-   clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
-   end = rte_rdtsc();
-   ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC);
-   ns += (t_end.tv_nsec - t_start.tv_nsec);
+   tmp = 0;

-   double secs = (double)ns/NS_PER_SEC;
-   eal_tsc_resolution_hz = (uint64_t)((end - start)/secs);
-   return 0;
+   if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0))
+   RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+   else if (tmp != 1)
+   RTE_LOG(WARNING, EAL, "TSC is not invariant\n");
+
+   sz = sizeof(eal_tsc_resolution_hz);
+
+   if (sysctlbyname("machdep.tsc_freq", &eal_tsc_resolution_hz, &sz, NULL, 
0)) {
+   RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+   return -1;
}
-#endif
-   return -1;
+
+   return 0;
 }

 static void
@@ -277,10 +284,11 @@ set_tsc_freq_fallback(void)
sleep(1);
eal_tsc_resolution_hz = rte_rdtsc() - start;
 }
+
 /*
  * This function measures the TSC frequency. It uses a variety of approaches.
  *
- * 1. If kernel provides CLOCK_MONOTONIC_RAW we use that to tune the TSC value
+ * 1. Read the TSC frequency value provided by the kernel
  * 2. If kernel does not provide that, and we have HPET support, tune using 
HPET
  * 3. Lastly, if neither of the above can be used, just sleep for 1 second and
  * tune off that, printing a warning about inaccuracy of timing
@@ -288,7 +296,7 @@ set_tsc_freq_fallback(void)
 static void
 set_tsc_freq(void)
 {
-   if (set_tsc_freq_from_clock() < 0)
+   if (set_tsc_freq_from_sysctl() < 0)
set_tsc_freq_fallback();

RTE_LOG(INFO, EAL, "TSC frequency is ~%"PRIu64" KHz\n",
-- 
2.1.0



[dpdk-dev] [PATCH 0/2] BSD timer cleanup/update

2014-11-20 Thread Sergio Gonzalez Monroy
This patch set is a quick cleanup/update of the timer in BSD.
Since BSD exposes TSC freq in sysctl, we avoid other less accurate methods.
We still provide a fallback in case we fail to obtain the value, as it is
x86 specific.

The HPET device is not expose in BSD, therefore current code does not work
and we remove it altogether.

Sergio Gonzalez Monroy (2):
  eal: use sysctl in BSD to set TSC freq
  eal: BSD timer cleanup - remove HPET support

 lib/librte_eal/bsdapp/eal/eal_timer.c | 225 +-
 1 file changed, 30 insertions(+), 195 deletions(-)

-- 
2.1.0



[dpdk-dev] one worker reading multiple ports

2014-11-20 Thread Matthew Hall
On Thu, Nov 20, 2014 at 05:10:51PM +0100, Newman Poborsky wrote:
> Thank you for your answer.
> 
> I just realized that the reason the rte_eth_rx_burst() returns 0 is because
> inside ixgbe_recv_pkts() this fails:
> nmb = rte_rxmbuf_alloc(rxq->mb_pool);  => nmb is NULL
> 
> Does this mean that every RX queue should have its own rte_mempool?  If so,
> are there any optimal values for: number of RX descriptors, per-queue
> rte_mempool size, number of hugepages (from what I understand, these 3 are
> correlated)?
> 
> If I'm wrong, please explain why.
> 
> Thanks!
> 
> BR,
> Newman

Newman,

Mempools are created per NUMA node (ordinarily this means per processor socket 
if sockets > 1).

When doing Tx / Rx Queue Setup, one should determine the socket which owns the 
given PCI NIC, and try to use memory on that same socket to handle traffic for 
that NIC and Queues.

So, for N cards with Q * N Tx / Rx queues, you only need S mempools.

Then each of the Q * N queues will use the mempool from the socket closest to 
the card.

Matthew.


[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 05:19:19PM +, Bruce Richardson wrote:
> On Thu, Nov 20, 2014 at 12:03:40PM -0500, Neil Horman wrote:
> > On Thu, Nov 20, 2014 at 04:42:23PM +, Gonzalez Monroy, Sergio wrote:
> > > > From: Neil Horman [mailto:nhorman at tuxdriver.com]
> > > > Sent: Thursday, November 20, 2014 2:21 PM
> > > > On Thu, Nov 20, 2014 at 02:17:13PM +, Sergio Gonzalez Monroy wrote:
> > > > > Some features of the cmdline were broken in FreeBSD as a result of
> > > > > termios not being compiled.
> > > > >
> > > > > Signed-off-by: Sergio Gonzalez Monroy
> > > > > 
> > > > 
> > > > Not sure I understand the changelog above.  You're removing ifdefs below
> > > > which makes sense, but are you now assuming that BSD will be built with
> > > > termios support, or do you need to add some alternate dependency check
> > > > during the configuration of DPDK?
> > > > Neil
> > > > 
> > > Yes, I was assuming that BSD has termios support.
> > > Is it not a fair assumption?
> > > 
> > No, I think its a perfectly fair assumption.  I was just trying to 
> > understand
> > the history of the ifdefs there.  Sounds like it was a dumb idea to 
> > intiially
> > ifdef the termios stuff out way back when.
> > 
> 
> The ifdef probably dates from when there were just two versions of DPDK: linux
> and baremetal. Guess which one didn't have the termios support :-)
> 
> /Bruce
> 
Ah, thanks for the history Bruce :)
Neil

> > Acked-by: Neil Horman 
> > 
> > > Sergio
> > > 
> 


[dpdk-dev] versioning and maintenance

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 06:09:10PM +0100, Thomas Monjalon wrote:
> Hi,
> 
> 2014-11-19 10:13, Neil Horman:
> > On Wed, Nov 19, 2014 at 11:35:08AM +, Bruce Richardson wrote:
> > > On Wed, Nov 19, 2014 at 12:22:14PM +0100, Thomas Monjalon wrote:
> > > > Following the discussion we had with Neil during the conference call,
> > > > I suggest this plan, starting with the next release (2.0):
> > > > - add version numbers to libraries
> > > > - add version numbers to functions inside .map files
> > > > - create a git tree dedicated to maintenance and API 
> > > > compatibility
> > > > 
> > > > It means these version numbers must be incremented when breaking the 
> > > > API.
> > > > Though the old code paths will be maintained and tested separately by 
> > > > volunteers.
> > > > A mailing list for maintenance purpose could be created if needed.
> > > >
> > > Hi Thomas,
> > > 
> > > I really think that the versionning is best handled inside the main 
> > > repository
> > > itself. Given that the proposed deprecation policy is over two releases 
> > > i.e. an
> > > API is marked deprecated in release X and then removed in X+1, I don't 
> > > see the
> > > maintaining of old code paths to be particularly onerous.
> > > 
> > > /Bruce
> > 
> > I agree with Bruce, even if it is on occasion an added workload, its not the
> > sort of thing that can or should be placed on an alternate developer.  
> > Backwards
> > compatibility is the sort of thing that has to be on the mind of the 
> > developer
> > when modifying an API, and on the mind of the reviewer when reviewing code. 
> >  To
> > shunt that responsibility elsewhere invites the opportunity for backwards
> > compatibilty to be a second class citizen who's goal will never be reached,
> > because developers instituting ABI changes will never care about the
> > consequences, and anyone worrying about backwards compatibility will always 
> > be
> > playing catch up, possibly allowing ABI breaks to slip through.
> > 
> > Neil
>  
> Before taking a decision, we should detail every concern.
> 
> 1/
> Currently there are not a lot of API refactoring because DPDK is well tailored
> for x86 and Intel NICs. But we are seeing that new CPU and new NICs to support
> would require some adaptations.
> 
Yes, you're absolutely right here.  I had hoped that, during my presentation
that this would happen occasionaly, and that we would need to deal with it.
What I think you are implying here (correct me if I'm wrong), is that you would
advocate that we wait to introduce ABI versioning until after such refactoring
is, for lack of a better term "complete".  The problem here is that, software
that is growing in user base is never "complete".  What you are effectively
saying is that you want to wait until the API is in a state in which no (or
almost no) more changes are required, then fixate it.  Thats quite simply never
going to happen.  And if it does, it obviates the need for versioning at all.

> 2/
> I'm curious to know how you would handle a big change like the recent mbuf 
> rework.
> Should we duplicate the structure and all the functions using mbuf?
> 

Several ways, what you suggest above is one way, although thats what I would
consider to be a pessimal case.  Ideally such large changes are extreemely rare
(a search of the git history I think confirms this).  Much more common are
small, limited changes to various API's for which providing multiple versions of
a function is a much more reasonable approach.

In the event that we do decide to do a refactor that is so far reaching that we
simply don't feel like multi-versioning is feasible, the recourse is then to
deprecate the old API, publish that information on the deprecation schedule,
wait for a release, then replace it wholesale.  When the API is released, we
bump the DSO version number.  Note the versioning policy never guarantees that
backwards compatibility will always be available, nor does it stipulate that a
newer version of the API is available prior to removing the old one. The goal
here is to give distributors and application vendors advanced notice of ABI
breaking changes so that they can adapt appropriately before they are caught off
guard.  If the new ABI can't be packaged alongside the old, then so be it,
downstream vendors will have to use the upstream git head to test and validate,
rather than a newer distribution release

Ideally though, that shouldn't happen, because it causes downstream headaches,
and we would really like to avoid that.  Thats why I feel its so important to
keep this work in the main tree.  If we segregate it to a separate location it
will make it all to easy for developers to ignore these needs and just assume we
constantly drop old ABI versions without providing backwards compatibility.

> 3/
> Should we add new fields at the end of its structure to avoid ABI breaking?
> 
In the common case yes, this usually avoids ABI breakage, though it can't always
be relied u

[dpdk-dev] Community conference call - Tuesday 18th November

2014-11-20 Thread O'driscoll, Tim
Hi Mirek,

> From: Walukiewicz, Miroslaw
> Is it a possibility to create some place on dpdk.org available to everyone
> where the slides and recording will be available?

Yes, I've been talking with Thomas about this. I have a recording of the 
meeting, and I'll work with Thomas to find the best way to post it on the 
dpdk.org site so people can access it.


Tim


[dpdk-dev] DPDK Community Conference Call - Friday 31st October

2014-11-20 Thread O'driscoll, Tim
Hi Kevin,

> From: Kevin Wilson [mailto:wkevils at gmail.com]
> > We'll schedule a follow-up call for 2 weeks' time
> Just a short question - is this still intended to be held ?

We had our second call earlier this week, on Tuesday. I'll post a recording of 
it soon.

The next call will be in 2 weeks' time, probably on Tuesday December 2nd. I 
just need to finalise the time before confirming this. We have had a couple of 
requests to alternate between a time that's suitable for USA/Europe, and one 
that's more suitable for Asia. So, the next call will probably be early in the 
morning in Europe, afternoon in Asia, and the middle of the night in the USA.


Tim


[dpdk-dev] Newbie question: compiling virtio-net-pmd v1.2 fails with dpdk 1.7 on a VM

2014-11-20 Thread Thomas Monjalon
2014-11-20 06:17, Neil Horman:
> On Thu, Nov 20, 2014 at 09:14:35AM +0200, Tapio Tallgren wrote:
> > Looks like a version conflict? Dpdk 1.7 should support virtio-pmd so what
> > am I doing wrong?
> 
> Nothing, its a bug.  DPDK changed the API here with commit
> 9aaccf1abdb2894ec23870e1d2199a657f85850e but it never got changed in the
> virtio_user pmd.  You need to modify the pmd like the other call sites have 
> been
> in the above commit.
> 
> This is why those external pmds need to be merged into the dpdk tree.

Yes, exactly.
We'll see during 2.0 cycle how to merge them. The ideal plan is to keep only
1 implementation.

In the meantime, patches are welcome.

-- 
Thomas


[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 04:42:23PM +, Gonzalez Monroy, Sergio wrote:
> > From: Neil Horman [mailto:nhorman at tuxdriver.com]
> > Sent: Thursday, November 20, 2014 2:21 PM
> > On Thu, Nov 20, 2014 at 02:17:13PM +, Sergio Gonzalez Monroy wrote:
> > > Some features of the cmdline were broken in FreeBSD as a result of
> > > termios not being compiled.
> > >
> > > Signed-off-by: Sergio Gonzalez Monroy
> > > 
> > 
> > Not sure I understand the changelog above.  You're removing ifdefs below
> > which makes sense, but are you now assuming that BSD will be built with
> > termios support, or do you need to add some alternate dependency check
> > during the configuration of DPDK?
> > Neil
> > 
> Yes, I was assuming that BSD has termios support.
> Is it not a fair assumption?
> 
No, I think its a perfectly fair assumption.  I was just trying to understand
the history of the ifdefs there.  Sounds like it was a dumb idea to intiially
ifdef the termios stuff out way back when.

Acked-by: Neil Horman 

> Sergio
> 


[dpdk-dev] [PATCH v5 5/7] hash: add fallback to software CRC32 implementation

2014-11-20 Thread Yerden Zhumabekov
Initially, SSE4.2 support is detected via CPUID instruction via
the constructor function.

Added rte_hash_crc_set_alg() function to detect and set CRC32
implementation if necessary. SSE4.2 is allowed by default.

rte_hash_crc_*byte() functions reworked so they choose available
CRC32 implementation in the runtime.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |   61 ++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 2c8ec99..469b4f5 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -45,6 +45,8 @@ extern "C" {
 #endif

 #include 
+#include 
+#include 

 /* Lookup tables for software implementation of CRC32C */
 static uint32_t crc32c_tables[8][256] = {{
@@ -396,8 +398,52 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
return init_val;
 }

+#define CRC32_SW(1U << 0)
+#define CRC32_SSE42 (1U << 1)
+#define CRC32_x64   (1U << 2)
+#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
+
+static uint8_t crc32_alg = CRC32_SW;
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * calculation.
+ *
+ * @param flag
+ *   An OR of following flags:
+ *   - (CRC32_SW) Don't use SSE4.2 intrinsics
+ *   - (CRC32_SSE42) Use SSE4.2 intrinsics if available
+ *   - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
+ *
+ */
+static inline void
+rte_hash_crc_set_alg(uint8_t alg)
+{
+   switch (alg) {
+   case CRC32_SSE42_x64:
+   if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+   alg = CRC32_SSE42;
+   case CRC32_SSE42:
+   if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+   alg = CRC32_SW;
+   case CRC32_SW:
+   crc32_alg = alg;
+   default:
+   break;
+   }
+}
+
+/* Setting the best available algorithm */
+static inline void __attribute__((constructor))
+rte_hash_crc_init_alg(void)
+{
+   rte_hash_crc_set_alg(CRC32_SSE42_x64);
+}
+
 /**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -409,11 +455,16 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
 static inline uint32_t
 rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 {
-   return crc32c_sse42_u32(data, init_val);
+   if (likely(crc32_alg & CRC32_SSE42))
+   return crc32c_sse42_u32(data, init_val);
+
+   return crc32c_1word(data, init_val);
 }

 /**
  * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
  *
  * @param data
  *   Data to perform hash on.
@@ -425,7 +476,13 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-   return crc32c_sse42_u64(data, init_val);
+   if (likely(crc32_alg == CRC32_SSE42_x64))
+   return crc32c_sse42_u64(data, init_val);
+
+   if (likely(crc32_alg & CRC32_SSE42))
+   return crc32c_sse42_u64_mimic(data, init_val);
+
+   return crc32c_2words(data, init_val);
 }

 /**
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 7/7] test: remove redundant compile checks

2014-11-20 Thread Yerden Zhumabekov
Since rte_hash_crc() can now be run regardless of SSE4.2 support,
we can safely remove compile checks for RTE_MACHINE_CPUFLAG_SSE4_2
in test utilities.

Signed-off-by: Yerden Zhumabekov 
---
 app/test/test_hash.c  |7 ---
 app/test/test_hash_perf.c |   11 ---
 2 files changed, 18 deletions(-)

diff --git a/app/test/test_hash.c b/app/test/test_hash.c
index 178ec3f..76b1b8f 100644
--- a/app/test/test_hash.c
+++ b/app/test/test_hash.c
@@ -55,10 +55,7 @@
 #include 
 #include 
 #include 
-
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #include 
-#endif

 
/***
  * Hash function performance test configuration section. Each performance test
@@ -67,11 +64,7 @@
  * The five arrays below control what tests are performed. Every combination
  * from the array entries is tested.
  */
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-#else
-static rte_hash_function hashtest_funcs[] = {rte_jhash};
-#endif
 static uint32_t hashtest_initvals[] = {0};
 static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 
21, 31, 32, 33, 63, 64};
 
/**/
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index be34957..05a88ec 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -56,10 +56,7 @@
 #include 
 #include 
 #include 
-
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #include 
-#endif

 /* Types of hash table performance test that can be performed */
 enum hash_test_t {
@@ -97,11 +94,7 @@ struct tbl_perf_test_params {
  */
 #define HASHTEST_ITERATIONS 100

-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 static rte_hash_function hashtest_funcs[] = {rte_jhash, rte_hash_crc};
-#else
-static rte_hash_function hashtest_funcs[] = {rte_jhash};
-#endif
 static uint32_t hashtest_initvals[] = {0};
 static uint32_t hashtest_key_lens[] = {2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, 
31, 32, 33, 63, 64};
 
/**/
@@ -243,7 +236,6 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {   LOOKUP,  ITERATIONS,  1048576,   4,  64,rte_jhash,   
0},
 {   LOOKUP,  ITERATIONS,  1048576,   8,  64,rte_jhash,   
0},
 {   LOOKUP,  ITERATIONS,  1048576,  16,  64,rte_jhash,   
0},
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 /* Small table, add */
 /*  Test type | Iterations | Entries | BucketSize | KeyLen |HashFunc | 
InitVal */
 { ADD_ON_EMPTY,1024, 1024,   1,  16, rte_hash_crc,   
0},
@@ -376,7 +368,6 @@ struct tbl_perf_test_params tbl_perf_params[] =
 {   LOOKUP,  ITERATIONS,  1048576,   4,  64, rte_hash_crc,   
0},
 {   LOOKUP,  ITERATIONS,  1048576,   8,  64, rte_hash_crc,   
0},
 {   LOOKUP,  ITERATIONS,  1048576,  16,  64, rte_hash_crc,   
0},
-#endif
 };

 
/**/
@@ -423,10 +414,8 @@ static const char *get_hash_name(rte_hash_function f)
if (f == rte_jhash)
return "jhash";

-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
if (f == rte_hash_crc)
return "rte_hash_crc";
-#endif

return "UnknownHash";
 }
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 6/7] hash: rte_hash_crc() slices data into 8-byte pieces

2014-11-20 Thread Yerden Zhumabekov
Calculating hash for data of variable length is more efficient
when that data is sliced into 8-byte pieces. The rest part of data
is hashed using CRC32 functions with either 8 and 4 byte operands.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |   33 -
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 469b4f5..39d0569 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -486,7 +486,7 @@ rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 }

 /**
- * Use crc32 instruction to perform a hash.
+ * Calculate CRC32 hash on user-supplied byte array.
  *
  * @param data
  *   Data to perform hash on.
@@ -501,23 +501,38 @@ static inline uint32_t
 rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val)
 {
unsigned i;
-   uint32_t temp = 0;
-   const uint32_t *p32 = (const uint32_t *)data;
+   uint64_t temp = 0;
+   const uint64_t *p64 = (const uint64_t *)data;

-   for (i = 0; i < data_len / 4; i++) {
-   init_val = rte_hash_crc_4byte(*p32++, init_val);
+   for (i = 0; i < data_len / 8; i++) {
+   init_val = rte_hash_crc_8byte(*p64++, init_val);
}

-   switch (3 - (data_len & 0x03)) {
+   switch (7 - (data_len & 0x07)) {
case 0:
-   temp |= *((const uint8_t *)p32 + 2) << 16;
+   temp |= (uint64_t) *((const uint8_t *)p64 + 6) << 48;
/* Fallthrough */
case 1:
-   temp |= *((const uint8_t *)p32 + 1) << 8;
+   temp |= (uint64_t) *((const uint8_t *)p64 + 5) << 40;
/* Fallthrough */
case 2:
-   temp |= *((const uint8_t *)p32);
+   temp |= (uint64_t) *((const uint8_t *)p64 + 4) << 32;
+   temp |= *((const uint32_t *)p64);
+   init_val = rte_hash_crc_8byte(temp, init_val);
+   break;
+   case 3:
+   init_val = rte_hash_crc_4byte(*(const uint32_t *)p64, init_val);
+   break;
+   case 4:
+   temp |= *((const uint8_t *)p64 + 2) << 16;
+   /* Fallthrough */
+   case 5:
+   temp |= *((const uint8_t *)p64 + 1) << 8;
+   /* Fallthrough */
+   case 6:
+   temp |= *((const uint8_t *)p64);
init_val = rte_hash_crc_4byte(temp, init_val);
+   /* Fallthrough */
default:
break;
}
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 4/7] hash: add rte_hash_crc_8byte function

2014-11-20 Thread Yerden Zhumabekov
SSE4.2 provides CRC32 intrinsic with 8-byte operand.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |   16 
 1 file changed, 16 insertions(+)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index cd28833..2c8ec99 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -413,6 +413,22 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 }

 /**
+ * Use single crc32 instruction to perform a hash on a 8 byte value.
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+   return crc32c_sse42_u64(data, init_val);
+}
+
+/**
  * Use crc32 instruction to perform a hash.
  *
  * @param data
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 3/7] hash: replace built-in functions implementing SSE4.2

2014-11-20 Thread Yerden Zhumabekov
Give up using built-in intrinsics and use our own assembly
implementation. Remove #include entry as well.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 9bd0cf6..cd28833 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -45,7 +45,6 @@ extern "C" {
 #endif

 #include 
-#include 

 /* Lookup tables for software implementation of CRC32C */
 static uint32_t crc32c_tables[8][256] = {{
@@ -410,7 +409,7 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
 static inline uint32_t
 rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 {
-   return _mm_crc32_u32(init_val, data);
+   return crc32c_sse42_u32(data, init_val);
 }

 /**
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 2/7] hash: add assembly implementation of CRC32 intrinsics

2014-11-20 Thread Yerden Zhumabekov
Added:
- crc32c_sse42_u32() emits 'crc32l' asm instruction;
- crc32c_sse42_u64() emits 'crc32q' asm instruction;
- crc32c_sse42_u64_mimic(), wrapper in case of run on 32-bit platform.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |   34 ++
 1 file changed, 34 insertions(+)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index 4d7532a..9bd0cf6 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -363,6 +363,40 @@ crc32c_2words(uint64_t data, uint32_t init_val)
return crc;
 }

+static inline uint32_t
+crc32c_sse42_u32(uint32_t data, uint32_t init_val)
+{
+   __asm__ volatile(
+   "crc32l %[data], %[init_val];"
+   : [init_val] "+r" (init_val)
+   : [data] "rm" (data));
+   return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u64(uint64_t data, uint64_t init_val)
+{
+   __asm__ volatile(
+   "crc32q %[data], %[init_val];"
+   : [init_val] "+r" (init_val)
+   : [data] "rm" (data));
+   return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
+{
+   union {
+   uint32_t u32[2];
+   uint64_t u64;
+   } d;
+
+   d.u64 = data;
+   init_val = crc32c_sse42_u32(d.u32[0], init_val);
+   init_val = crc32c_sse42_u32(d.u32[1], init_val);
+   return init_val;
+}
+
 /**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
  *
-- 
1.7.9.5



[dpdk-dev] [PATCH v5 1/7] hash: add software CRC32 implementation

2014-11-20 Thread Yerden Zhumabekov
Add lookup tables for CRC32 algorithm, crc32c_1word() and crc32c_2words()
functions returning hash of 32-bit and 64-bit operand.

Signed-off-by: Yerden Zhumabekov 
---
 lib/librte_hash/rte_hash_crc.h |  316 
 1 file changed, 316 insertions(+)

diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
index b48b0db..4d7532a 100644
--- a/lib/librte_hash/rte_hash_crc.h
+++ b/lib/librte_hash/rte_hash_crc.h
@@ -47,6 +47,322 @@ extern "C" {
 #include 
 #include 

+/* Lookup tables for software implementation of CRC32C */
+static uint32_t crc32c_tables[8][256] = {{
+ 0x, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 
0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 
0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 
0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 
0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 
0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 
0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 
0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 
0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 
0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 
0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 
0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 
0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 
0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 
0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 
0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 
0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 
0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 
0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 
0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 
0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 
0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 
0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 
0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 
0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 
0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 
0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 
0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 
0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 
0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 
0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 
0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 
0x5F16D052, 0xAD7D5351
+},
+{
+ 0x, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 
0x69CF5132, 0x7A6DC945,
+ 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 
0xF4DB928A, 0xE7790AFD,
+ 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 
0x560AA0B3, 0x45A838C4,
+ 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 
0xCB1E630B, 0xD8BCFB7C,
+ 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 
0x1644B230, 0x05E62A47,
+ 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 
0x8B507188, 0x98F2E9FF,
+ 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 
0x298143B1, 0x3A23DBC6,
+ 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 
0xB4958009, 0xA737187E,
+ 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 
0x96D89736, 0x857A0F41,
+ 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 
0x0BCC548E, 0x186ECCF9,
+ 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 
0xA91D66B7, 0xBABFFEC0,
+ 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 
0x3409A50F, 0x27AB3D78,
+ 0x809C2506,

[dpdk-dev] [PATCH v5 0/7] rte_hash_crc reworked to be platform-independent

2014-11-20 Thread Yerden Zhumabekov
These patches bring a fallback mechanism to ensure that CRC32 hash is 
calculated regardless of hardware support from CPU (i.e. SSE4.2 intrinsics).
Performance is also improved by slicing data in 8 bytes.

Patches were tested on machines either with and without SSE4.2 support.

Software implementation seems to be about 4-5 times slower than SSE4.2-enabled 
one. Of course, they return identical results.

Summary of changes:
* added CRC32 software implementation, which is used as a fallback in case 
SSE4.2 is not available, or if SSE4.2 is intentionally disabled.
* added rte_hash_crc_set_alg() function to control availability of SSE4.2.
* added rte_hash_crc_8byte() function to calculate CRC32 on 8-byte operand.
* reworked rte_hash_crc() function which leverages both versions of CRC32 hash 
calculation functions with 4 and 8-byte operands.
* removed compile-time checks from test_hash_perf and test_hash.
* setting default algorithm implementation as a constructor while application 
startup.
* SSE4.2 intrinsics are implemented through inline assembly code.
* added additional run-time check for 64-bit support.

Yerden Zhumabekov (7):
  hash: add software CRC32 implementation
  hash: add assembly implementation of CRC32 intrinsics
  hash: replace built-in functions implementing SSE4.2
  hash: add rte_hash_crc_8byte function
  hash: add fallback to software CRC32 implementation
  hash: rte_hash_crc() slices data into 8-byte pieces
  test: remove redundant compile checks

 app/test/test_hash.c   |7 -
 app/test/test_hash_perf.c  |   11 -
 lib/librte_hash/rte_hash_crc.h |  459 +++-
 3 files changed, 448 insertions(+), 29 deletions(-)

-- 
1.7.9.5



[dpdk-dev] [PATCH] examples/dpdk_qat: Fix reference to old mbuf field "data"

2014-11-20 Thread Pablo de Lara
DPDK QAT app was using the old "data" field in mbuf,
so it has been subtituted with rte_pktmbuf_mtod function.

Signed-off-by: Pablo de Lara 
---
 examples/dpdk_qat/crypto.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/dpdk_qat/crypto.c b/examples/dpdk_qat/crypto.c
index 318d47c..f49408a 100644
--- a/examples/dpdk_qat/crypto.c
+++ b/examples/dpdk_qat/crypto.c
@@ -183,7 +183,7 @@ struct glob_keys g_crypto_hash_keys = {
  *
  */
 #define PACKET_DATA_START_PHYS(p) \
-   ((p)->buf_physaddr + ((char *)p->data - (char *)p->buf_addr))
+   ((p)->buf_physaddr + (p)->data_off)

 /*
  * A fixed offset to where the crypto is to be performed, which is the first
@@ -773,7 +773,7 @@ enum crypto_result
 crypto_encrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h)
 {
CpaCySymDpOpData *opData =
-   (CpaCySymDpOpData *) ((char *) (rte_buff->data)
+   (CpaCySymDpOpData *) (rte_pktmbuf_mtod(rte_buff, char *)
+ CRYPTO_OFFSET_TO_OPDATA);
uint32_t lcore_id;

@@ -848,7 +848,7 @@ enum crypto_result
 crypto_decrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h)
 {

-   CpaCySymDpOpData *opData = (void*) (((char *) rte_buff->data)
+   CpaCySymDpOpData *opData = (void*) (rte_pktmbuf_mtod(rte_buff, char *)
+ CRYPTO_OFFSET_TO_OPDATA);
uint32_t lcore_id;

-- 
1.7.4.1



[dpdk-dev] one worker reading multiple ports

2014-11-20 Thread Newman Poborsky
Hi,

is it possible to use one worker thread (one lcore) to read packets from
multiple ports?

When I start 2 workers and assign each one  to read from different ports
(with  rte_eth_rx_burst()) everything works fine, but if I assign one
worker to read packets from 2 ports, rte_eth_rx_burst() returns 0 as if no
packets are read.

Is there any reason for this kind of behaviour?

Thanks!

Br,
Newman P.


[dpdk-dev] Community conference call - Tuesday 18th November

2014-11-20 Thread Walukiewicz, Miroslaw
Tim, 

Is it a possibility to create some place on dpdk.org available to everyone 
where the slides and recording will be available?

Regards,

Mirek

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of O'driscoll, Tim
> Sent: Wednesday, November 19, 2014 1:34 PM
> To: 'dev at dpdk.org'
> Subject: Re: [dpdk-dev] Community conference call - Tuesday 18th
> November
> 
> Thanks again to those who attended the call yesterday. There was a good
> discussion, and it was more interactive than the previous call, which is what
> we were aiming for. We'll schedule a follow-up call for 2 weeks' time.
> 
> Thanks again to those who presented on the following features:
> - ABI Versioning (Neil Horman )
> - Uio_pci_generic (Danny.Zhou at intel.com)
> - Integrated Qemu Userspace vHost (Huawei.Xie at intel.com)
> 
> I have a recording of the session, and will send a link to it soon for anybody
> who missed the call.
> 
> 
> Tim
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of O'driscoll, Tim
> > Sent: Tuesday, November 18, 2014 12:56 PM
> > To: 'dev at dpdk.org'
> > Subject: Re: [dpdk-dev] Community conference call - Tuesday 18th
> > November
> >
> > This is just a reminder of our call later today, which is at 17:00 GMT. The
> time
> > in a variety of timezones is included below.
> >
> > We're going to use GoToMeeting this time. If you follow the meeting link
> > (https://global.gotomeeting.com/join/843960205), the GoToMeeting web
> > viewer will start. You then have two options for audio:
> >
> > 1. You can join using a phone via one of the numbers listed below. The
> > Access Code is 843-960-205. You'll also be asked for an Audio PIN, which is
> > accessible by clicking the phone icon in the GoToMeeting web viewer after
> > you've joined the meeting.
> >
> > 2. To use your computer's audio via a headset, you need to switch to the
> > desktop version of GoToMeeting. You can do this by clicking the
> > GoToMeeting icon on the top right hand side of the web viewer, and then
> > selecting "Switch to the desktop version". The desktop version will need to
> > download and install, so if you plan to use this you may want to get it set 
> > up
> > in advance. Once it starts, under the Audio section, you can select "Mic &
> > Speakers". The desktop version is only available for Windows and Mac, so if
> > you're using Linux then you need to use option 1 above.
> >
> > Info on downloading the desktop app is available at:
> >
> http://support.citrixonline.com/en_US/meeting/help_files/G2M010002?title
> > =Download%7D
> > Info on the web viewer is available at:
> >
> http://support.citrixonline.com/en_US/GoToMeeting/help_files/GTM13001
> > 9?title=Web+Viewer+FAQs
> >
> > I plan to record the session in GoToMeeting, and make that recording
> > available for anybody who can't attend today.
> >
> >
> > Tim
> >
> > > -Original Message-
> > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of O'driscoll, Tim
> > > Sent: Friday, November 14, 2014 10:53 AM
> > > To: 'dev at dpdk.org'
> > > Subject: Re: [dpdk-dev] Community conference call - Tuesday 18th
> > > November
> > >
> > > Firstly, due to some conflicts, we're going to move next Tuesday's
> > > meeting to
> > > 1 hour later. Apologies for the short notice on the change. Here's the
> > > new meeting time in a variety of timezones:
> > >
> > > Dublin (Ireland)  Tuesday, November 18, 2014 at
> > 5:00:00 PMGMT UTC
> > > San Francisco (U.S.A. - California)   Tuesday, November 18, 2014 at
> > 9:00:00 AMPST UTC-8 hours
> > > Phoenix (U.S.A. - Arizona)Tuesday, November 18, 2014 at
> > 10:00:00 AM   MST UTC-7 hours
> > > New York (U.S.A. - New York)  Tuesday, November 18, 2014
> > at 12:00:00 Noon EST UTC-5 hours
> > > Ottawa (Canada - Ontario) Tuesday, November 18, 2014 at
> > 12:00:00 Noon EST UTC-5 hours
> > > Paris (France)Tuesday, November 18, 2014
> at
> > 6:00:00 PMCET UTC+1 hour
> > > Tel Aviv (Israel) Tuesday, November 18, 2014 at
> > 7:00:00 PMIST UTC+2 hours
> > > Moscow (Russia)   Tuesday, November 18, 2014 at
> > 8:00:00 PMMSK UTC+3 hours
> > > Beijing (China - Beijing Municipality)Wednesday, November 19,
> 2014 at
> > 1:00:00 AM  CST UTC+8 hours
> > > Tokyo (Japan) Wednesday, November 19, 2014 at
> > 2:00:00 AM  JST UTC+9 hours
> > > Corresponding UTC (GMT)   Tuesday, November 18, 2014 at
> > 17:00:00
> > >
> > >
> > > Secondly, we're going to try using GoToMeeting this time. Here are the
> > > details:
> > > 1. Please join my meeting from your computer, tablet or smartphone on
> > > Tue, Nov 18, 5:00 PM GMT Standard Time
> > > https://global.gotomeeting.com/join/843960205
> > >
> > > 2. Use your microphone and speakers (VOIP) for audio. You'll sound
> > > best with a headset. You can also call in using your telephone.
> > 

[dpdk-dev] [PATCH] cmdline: Fix broken functionality in FreeBSD

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 02:17:13PM +, Sergio Gonzalez Monroy wrote:
> Some features of the cmdline were broken in FreeBSD as a result of
> termios not being compiled.
> 
> Signed-off-by: Sergio Gonzalez Monroy 

Not sure I understand the changelog above.  You're removing ifdefs below which
makes sense, but are you now assuming that BSD will be built with termios
support, or do you need to add some alternate dependency check during the
configuration of DPDK?
Neil

> ---
>  lib/librte_cmdline/cmdline.h|  2 --
>  lib/librte_cmdline/cmdline_socket.c | 10 +-
>  2 files changed, 1 insertion(+), 11 deletions(-)
> 
> diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h
> index 4c28d37..06ae086 100644
> --- a/lib/librte_cmdline/cmdline.h
> +++ b/lib/librte_cmdline/cmdline.h
> @@ -71,9 +71,7 @@ struct cmdline {
>   cmdline_parse_ctx_t *ctx;
>   struct rdline rdl;
>   char prompt[RDLINE_PROMPT_SIZE];
> -#ifdef RTE_EXEC_ENV_LINUXAPP
>   struct termios oldterm;
> -#endif
>  };
>  
>  struct cmdline *cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, 
> int s_in, int s_out);
> diff --git a/lib/librte_cmdline/cmdline_socket.c 
> b/lib/librte_cmdline/cmdline_socket.c
> index b51b537..6820b6d 100644
> --- a/lib/librte_cmdline/cmdline_socket.c
> +++ b/lib/librte_cmdline/cmdline_socket.c
> @@ -93,7 +93,6 @@ struct cmdline *
>  cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt)
>  {
>   struct cmdline *cl;
> -#ifdef RTE_EXEC_ENV_LINUXAPP
>   struct termios oldterm, term;
>  
>   tcgetattr(0, &oldterm);
> @@ -101,14 +100,12 @@ cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char 
> *prompt)
>   term.c_lflag &= ~(ICANON | ECHO | ISIG);
>   tcsetattr(0, TCSANOW, &term);
>   setbuf(stdin, NULL);
> -#endif
>  
>   cl = cmdline_new(ctx, prompt, 0, 1);
>  
> -#ifdef RTE_EXEC_ENV_LINUXAPP
>   if (cl)
>   memcpy(&cl->oldterm, &oldterm, sizeof(term));
> -#endif
> +
>   return cl;
>  }
>  
> @@ -118,10 +115,5 @@ cmdline_stdin_exit(struct cmdline *cl)
>   if (!cl)
>   return;
>  
> -#ifdef RTE_EXEC_ENV_LINUXAPP
>   tcsetattr(fileno(stdin), TCSANOW, &cl->oldterm);
> -#else
> - /* silent the compiler */
> - (void)cl;
> -#endif
>  }
> -- 
> 2.1.0
> 
> 


[dpdk-dev] DPDK Community Conference Call - Friday 31st October

2014-11-20 Thread Kevin Wilson
Hi, Tim,

> We'll schedule a follow-up call for 2 weeks' time
Just a short question - is this still intended to be held ?

Kevin

On Fri, Oct 31, 2014 at 7:36 PM, O'driscoll, Tim
 wrote:
> Thanks again to those who attended the call earlier. Hopefully people found 
> it useful.
>
> We'll schedule a follow-up call for 2 weeks' time. One thing that we do want 
> to look into is an easy way to allow screen sharing, so that we can use some 
> slides to guide the discussion. Internally within Intel we use MS Lync. We 
> can try that, but it's not always very user-friendly for external 
> participants, and doesn't have a Linux client. Other options would include 
> GoToMeeting or WebEx. If anybody has input on a good tool for this, let me 
> know.
>
> We covered the following features from our 2.0 list today, and will discuss 
> the remainder on the next call. I've called out below who on our side was 
> describing each of the features, and included their email addresses. If 
> anybody has further questions on these, feel free to either ask openly on the 
> mailing list, or else contact the relevant person directly.
>
> Bifurcated Driver (Danny.Zhou at intel.com)
> Packet Reordering/Packet Distributor (Bruce.Richardson at intel.com)
> New Hardware Support (Walter.E.Gilmore at intel.com)
> Fortville features (Heqing.Zhu at intel.com)
> Support Multiple Threads per Core (Venky.Venkatesan at intel.com)
> Cuckoo Hash (Bruce.Richardson at intel.com, Venky.Venkatesan at intel.com)
>
> The Cuckoo Hash paper that was mentioned is available at: 
> http://www.cs.cmu.edu/~dongz/papers/cuckooswitch.pdf.
>
> Finally, if anybody has suggestions for topics for future calls, please let 
> me know.
>
>
> Thanks,
> Tim
>
>> -Original Message-
>> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of O'driscoll, Tim
>> Sent: Friday, October 31, 2014 3:35 PM
>> To: 'dev at dpdk.org'
>> Subject: Re: [dpdk-dev] DPDK Community Conference Call - Friday 31st
>> October
>>
>> This is just a reminder for anybody who's interested that this will be on in 
>> 30
>> minutes, and that we'll be discussing the feature list for the DPDK 2.0 
>> release
>> in March 2015.
>>
>> Audio bridge details are:
>> France:   +33 1588 77298
>> Germany:  +49 8999 143191
>> Israel:   +972 2589 6577
>> Russia:   +7 495 641 4663
>> UK:   +44 1793 402663
>> USA/Canada:   +1 916 356 2663 or +1-888-875-9370
>>
>> Bridge: 5
>> Conference ID: 1264677285
>>
>>
>> Tim
>>
>> > -Original Message-
>> > From: O'driscoll, Tim
>> > Sent: Friday, October 24, 2014 10:22 AM
>> > To: dev at dpdk.org
>> > Subject: DPDK Community Conference Call - Friday 31st October
>> >
>> > We're planning to hold our first community conference call on Friday
>> > 31st October. It's impossible to find a time that suits everybody, so
>> > we've chosen to do this in the afternoon/evening in Europe, which is
>> > the morning in the USA. This does unfortunately limit participation
>> > from PRC, Japan and other parts of the world. Here's the time and date in a
>> variety of time zones:
>> >
>> > Dublin (Ireland)Friday, October 31, 2014 at
>> > 4:00:00 PMGMT UTC
>> > Paris (France)  Friday, October 31, 2014 at 
>> > 5:00:00
>> > PMCET UTC+1 hour
>> > San Francisco (U.S.A. - California) Friday, October 31, 2014 at 9:00:00
>> > AMPDT UTC-7 hours
>> > New York (U.S.A. - New York)Friday, October 31, 2014 at
>> 12:00:00
>> > Noon EDT UTC-4 hours
>> > Tel Aviv (Israel)   Friday, October 31, 2014 at
>> 6:00:00
>> > PMIST UTC+2 hours
>> > Moscow (Russia) Friday, October 31, 2014 at 7:00:00
>> > PMMSK UTC+3 hours
>> >
>> >
>> > Audio bridge details are:
>> > France: +33 1588 77298
>> > Germany:+49 8999 143191
>> > Israel: +972 2589 6577
>> > Russia: +7 495 641 4663
>> > UK: +44 1793 402663
>> > USA:+1 916 356 2663
>> >
>> > Bridge: 5
>> > Conference ID: 1264677285
>> >
>> > If anybody needs an access number for another country, let me know.
>> >
>> >
>> > Agenda:
>> > Discuss feature list for DPDK 2.0 (Q1 2015).
>> > Suggestions for topics for future calls.
>> >
>> >
>> > Thanks,
>> > Tim


[dpdk-dev] Newbie question: compiling virtio-net-pmd v1.2 fails with dpdk 1.7 on a VM

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 02:37:27PM +0200, Tapio Tallgren wrote:
> On Thu, Nov 20, 2014 at 1:33 PM, Thomas Monjalon  6wind.com>
> wrote:
> 
> > 2014-11-20 06:17, Neil Horman:
> > > On Thu, Nov 20, 2014 at 09:14:35AM +0200, Tapio Tallgren wrote:
> > > > Looks like a version conflict? Dpdk 1.7 should support virtio-pmd so
> > what
> > > > am I doing wrong?
> > >
> > > Nothing, its a bug.  DPDK changed the API here with commit
> > > 9aaccf1abdb2894ec23870e1d2199a657f85850e but it never got changed in the
> > > virtio_user pmd.  You need to modify the pmd like the other call sites
> > have been
> > > in the above commit.
> > >
> > > This is why those external pmds need to be merged into the dpdk tree.
> >
> > Yes, exactly.
> > We'll see during 2.0 cycle how to merge them. The ideal plan is to keep
> > only
> > 1 implementation.
> >
> > In the meantime, patches are welcome.
> >
> > --
> > Thomas
> >
> 
> This looks like a purely cosmetic change? So, if the code compiles, then it
> will also run correctly?
> 
Not sure what you mean by that.  It looks to me as though the second argument
was removed.  so if you remove that, yes the code should compile and run
correctly.  But you can't just do any old thing to make the code compile (e.g
ifdef out the macro entirely, or some such), you still have to make the right
change.
Neil

> 
> -- 
> -Tapio


[dpdk-dev] Newbie question: compiling virtio-net-pmd v1.2 fails with dpdk 1.7 on a VM

2014-11-20 Thread Tapio Tallgren
Hi,

I am probably doing something obviously wrong but I cannot figure it out
right now..
I have dpdk version 1.7.1 installed and compiled (from the git repository)
and virtio-net-pmd driver version v1.2 from git.dpdk.org. When I try to
compile virtio-net-pmd with

make RTE_INCLUDE=../dpdk/build/include

I get errors like this:

virtio_user.c: In function rte_rmxbuf_alloc:
virtio_user.c:208:44: error: macro "__rte_mbuf_sanity_check" passed 3
arguments, but takes just 2

Looks like a version conflict? Dpdk 1.7 should support virtio-pmd so what
am I doing wrong?

-- 
-Tapio


[dpdk-dev] [PATCH v4 3/5] hash: add fallback to software CRC32 implementation

2014-11-20 Thread Yerden Zhumabekov

19.11.2014 21:07, Neil Horman ?:
> On Wed, Nov 19, 2014 at 05:35:51PM +0600, Yerden Zhumabekov wrote:
>> static inline uint32_t
>> crc32_sse42_u32(uint32_t data, uint32_t init_val)
>> {
>> /*??__asm__ volatile(
>> "crc32l %[data], %[init_val];"
>> : [init_val] "+r" (init_val)
>> : [data] "rm" (data));
>> return init_val;*/
>>
>> But wait, will __builtin_ia32_crc32si and __builtin_ia32_crc32di
>> functions do the trick? ICC has them?
> If builtins work on both icc and gcc, yes, that would be a solution as it
> creates non sse instructions when the target cpu doesn't support it.

Can anyone acknowledge?

>
>> What about prototyping functions and extracting their bodies to separate
>> module? Does it break anything?
>>
> That would be a variant on the asm inline idea, but yes, I think that would 
> work
> too

No luck. Performance degrades up to 30-50 percent if extracting
functions to separate module.

-- 
Sincerely,

Yerden Zhumabekov
State Technical Service
Astana, KZ




[dpdk-dev] one worker reading multiple ports

2014-11-20 Thread De Lara Guarch, Pablo
Hi Newman,

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Newman Poborsky
> Sent: Thursday, November 20, 2014 8:34 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] one worker reading multiple ports
> 
> Hi,
> 
> is it possible to use one worker thread (one lcore) to read packets from
> multiple ports?
> 
> When I start 2 workers and assign each one  to read from different ports
> (with  rte_eth_rx_burst()) everything works fine, but if I assign one
> worker to read packets from 2 ports, rte_eth_rx_burst() returns 0 as if no
> packets are read.

Yes, it is totally possible. The only problem would be if you try to use 
multiple threads 
to read/write on one port, in which case you should use multiple queues.
Look at l3fwd app for instance. You can use just a single core to handle 
packets on multiple ports.

Pablo
> 
> Is there any reason for this kind of behaviour?
> 
> Thanks!
> 
> Br,
> Newman P.


[dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum offload

2014-11-20 Thread Liu, Jijiang
Hi,

> -Original Message-
> From: Olivier MATZ [mailto:olivier.matz at 6wind.com]
> Sent: Monday, November 17, 2014 7:22 PM
> To: Liu, Jijiang; Thomas Monjalon
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v8 10/10] app/testpmd:test VxLAN Tx checksum
> offload
> 
> Hi Jijiang,
> 
> On 11/17/2014 07:52 AM, Liu, Jijiang wrote:
> > Anyway, I explain the checksum mechanism here again.
> >
> > In my VXLAN patch set, for an VXLAN packet TX checksum offload,  the logics
> below:
> >
> > 1. only set outer L3/L4 header TX checksum
> > tx_checksum set 0x3(0r 0x1) 0
> >   In this case, the PKT_TX_VXLAN_CKSUM flag is not set as we don't set
> > inner flags(PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM), so we don't need to
> change inner ones, driver think it is the ordinary packet, HW will do outer 
> L3/L4
> checksum offload.
> 
> Let's take an example with the following packet:
> Ether / IP / UDP / VxLAN / Ether / IP / UDP / data
> 
> The original behavior (without your vxlan patches), which still works today, 
> is to
> select inner or outer using the m->l2_len field:
> 
>   - checksum outer IP + UDP
> m->l2_len=14 m->l3_len=20
> flags=PKT_TX_IP_CKSUM PKT_TX_UDP_CKSUM
> 
>   - checksum inner IP + UDP
> m->l2_len=64 m->l3_len=20
> flags=PKT_TX_IP_CKSUM PKT_TX_UDP_CKSUM
> of course, the packet is valid only if the outer IP checksum is
> already correct and outer UDP checksum is 0
> 
> If i40e does not act like this, it does not follow the previous API.

No,  i40e follows this.

> > 2. only set inner L3/L4 header TX checksum
> > tx_checksum set 0x30 0
> >   In this case, the PKT_TX_VXLAN_CKSUM flag is set, so driver think it is 
> > VXLAN
> packet, and we don't need to change outer ones because we don't set outer 
> flags
> here (PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM).


> As explained above, there is no need to set the PKT_TX_VXLAN_CKSUM if you
> only want to set the inner L3/L4 checksum.
> This was already working like this
> before your patches, as long as l2_len and l3_len are set properly in the mbuf
> (l2_len should include the outer headers).

Does VXLAN TX checksum offload or ordinary L2 packet TX checksum offload work?
Have you ever tested it on a NIC that supports VXLAN. 

The PKT_TX_VXLAN_CKSUM flag meaning just tell driver this is encapsulation 
packet, so driver should set TX checksum offload for the packet using outer 
l2/l3 len, inner l2/l3 len and tunneling header length.

If you don't like this flag name, I can change it for  PKT_TX_TUNNEL_CKSUM, 
which have more generic meaning.

> Moreover, PKT_TX_IPV4_CSUM, PKT_TX_UDP_CKSUM, ... are not "outer flags".
> They are hardware checksum flags, and before your vxland patch, they concerned
> the headers referenced by m->l2_len and m->l3_len.

Actually, the  key point of debate is that you still think the l2_len filed and 
the l3_len filed  in mbuf are inner part in the case of tunneling, right?  If 
yes, let me explain what I thought.

As you know, NIC itself is not responsible for packet decapsulation / 
encapsulation at all. It sends and receives the whole packet, not only for 
inner part in the case of tunneling. The translation from receive descriptor to 
mbuf structure is also for the whole packet. And these fields defined in mbuf 
structure are also for the whole packet, no matter it is tunneling or 
non-tunneling.

1) We assume that a NIC can't  recognize VXLAN packet, when a packet  with the 
format  outer IP / outer UDP / VxLAN / Ether / inner IP / inner UDP / data is 
received, 
 do you think whether l2  header and l3 header length of this packet is outer 
or inner,  according to my understanding, I think it is outer, and m->l2_len 
and m->l3_len is also outer. Do you agree?

2) We also assume that a NIC can  recognize VXLAN packet,  but there is no 
difference between 1)  and 2) on data in mbuf before patching my VXLAN patch, 
so I also think  m->l2_len and m->l3_len is outer.  Do you agree?
After patching my VXLAN, the inner_l2_len and inner_l3_len were used to stand 
for inner header part.


> With your vxlan patch, it changed without beeing documented. These flags use
> either (m->l2_len, m->l3_len) or (m->inner_l2_len,
> m->inner_l3_len), which is not a good idea in my opinion.

The PKT_RX_IPV4_HDR  definition is listed below,
#define PKT_RX_IPV4_HDR  (1ULL << 5)  /**< RX packet with IPv4 header. */
I don't think it just stand for inner IP TX checksum offload, I just extend its 
usage scope in the case of tunneling.  

> > 3. set outer L3/L4 TX checksum and inner L3&L4 TX checksum tx_checksum
> > set 0x31(0x33) 0 in this case, the PKT_TX_VXLAN_CKSUM flag is set,
> > driver think it is VXLAN packet, and we need to change outer and inner as 
> > both
> outer and inner flags are set.
> 
> Here you are talking about test pmd flags. You do not describe the mbuf API:
> PKT_TX_* flags and lengths values that need to be set (l2_len, l3_len, ...) 
> and to
> what they refer to.
> 
> I think if you want to explai

[dpdk-dev] Newbie question: compiling virtio-net-pmd v1.2 fails with dpdk 1.7 on a VM

2014-11-20 Thread Neil Horman
On Thu, Nov 20, 2014 at 09:14:35AM +0200, Tapio Tallgren wrote:
> Hi,
> 
> I am probably doing something obviously wrong but I cannot figure it out
> right now..
> I have dpdk version 1.7.1 installed and compiled (from the git repository)
> and virtio-net-pmd driver version v1.2 from git.dpdk.org. When I try to
> compile virtio-net-pmd with
> 
> make RTE_INCLUDE=../dpdk/build/include
> 
> I get errors like this:
> 
> virtio_user.c: In function rte_rmxbuf_alloc:
> virtio_user.c:208:44: error: macro "__rte_mbuf_sanity_check" passed 3
> arguments, but takes just 2
> 
> Looks like a version conflict? Dpdk 1.7 should support virtio-pmd so what
> am I doing wrong?
> 
> -- 
> -Tapio
> 

Nothing, its a bug.  DPDK changed the API here with commit
9aaccf1abdb2894ec23870e1d2199a657f85850e but it never got changed in the
virtio_user pmd.  You need to modify the pmd like the other call sites have been
in the above commit.

This is why those external pmds need to be merged into the dpdk tree.

Neil



[dpdk-dev] Enhance KNI DPDK-app-side to be Multi-Producer/Consumer

2014-11-20 Thread Zhou, Danny
Robert, I roughly review the code below about lockless KNI fifo with MP/MC 
support , which looks good to me. Comparing to SP/SC implementation, I think It 
should introduce a little performance degradation but it will be making your 
multiple-threaded DPDK application easier to program.  Do you have plan to 
support MP/MC in kernel part of KNI as well?

From: Robert Sanford [mailto:rsanfo...@gmail.com]
Sent: Thursday, November 20, 2014 4:49 AM
To: Zhou, Danny; dev at dpdk.org
Subject: Re: [dpdk-dev] Enhance KNI DPDK-app-side to be Multi-Producer/Consumer

Hi Danny,

On Fri, Nov 14, 2014 at 7:04 PM, Zhou, Danny mailto:danny.zhou at intel.com>> wrote:
It will be always good if you can submit the RFC patch in terms of KNI 
optimization.

On the other hand, do you have any perf. data to prove that your patchset could 
improve
KNI performance which is the concern that most customers care about? We 
introduced
multiple-threaded KNI kernel support last year, if I remember correctly, the 
key perform
bottle-neck we found is the skb alloc/free and memcpy between skb and mbuf. 
Would be
very happy if your patchset can approve I am wrong.


This is not an attempt to improve raw performance. Our modest goal is to make 
librte_kni's RX/TX burst APIs multithreaded, without changing rte_kni.ko. In 
this RFC patch, we make it possible for multiple cores to concurrently invoke 
rte_kni_tx_burst (or rte_kni_rx_burst) for the same KNI device.

At the moment, multiple cores invoking rte_kni_tx_burst for the same device 
cannot function correctly, because the rte_kni_fifo structures (memory shared 
between app and kernel driver) are single-producer, single-consumer. The 
following patch supplements the rte_kni_fifo structure with an additional 
structure that is private to the application, and we borrow librte_ring's MP/MC 
enqueue/dequeue logic.


Here is a patch for 1.8. We have only tested a 1.7.1 version. Please have a 
look and let us know whether you think something like this would be useful.

--
Thanks,
Robert


Signed-off-by: Robert Sanford mailto:rsanford at 
akamai.com>>

---
 lib/librte_kni/rte_kni.c  |   21 +-
 lib/librte_kni/rte_kni_fifo.h |  131 +
 2 files changed, 148 insertions(+), 4 deletions(-)

diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index fdb7509..8009173 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -76,6 +76,11 @@ struct rte_kni {
  struct rte_kni_fifo *alloc_q;   /**< Allocated mbufs queue */
  struct rte_kni_fifo *free_q;/**< To be freed mbufs queue */

+  struct rte_kni_fifo_multi tx_q_mc;  /**< Make tx_q multi-consumer */
+  struct rte_kni_fifo_multi alloc_q_mp;/**< Make alloc_q multi-producer */
+  struct rte_kni_fifo_multi rx_q_mp;  /**< Make rx_q multi-producer */
+  struct rte_kni_fifo_multi free_q_mc;/**< Make free_q multi-consumer */
+
  /* For request & response */
  struct rte_kni_fifo *req_q; /**< Request queue */
  struct rte_kni_fifo *resp_q;/**< Response queue */
@@ -414,6 +419,11 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
  kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
  dev_info.free_phys = mz->phys_addr;

+  kni_fifo_multi_init(&ctx->tx_q_mc, KNI_FIFO_COUNT_MAX);
+  kni_fifo_multi_init(&ctx->alloc_q_mp, KNI_FIFO_COUNT_MAX);
+  kni_fifo_multi_init(&ctx->rx_q_mp, KNI_FIFO_COUNT_MAX);
+  kni_fifo_multi_init(&ctx->free_q_mc, KNI_FIFO_COUNT_MAX);
+
  /* Request RING */
  mz = slot->m_req_q;
  ctx->req_q = mz->addr;
@@ -557,7 +567,8 @@ rte_kni_handle_request(struct rte_kni *kni)
 unsigned
 rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
-  unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num);
+  unsigned ret = kni_fifo_put_mp(kni->rx_q, &kni->rx_q_mp, (void **)mbufs,
+ num);

  /* Get mbufs from free_q and then free them */
  kni_free_mbufs(kni);
@@ -568,7 +579,8 @@ rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf 
**mbufs, unsigned num)
 unsigned
 rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
-  unsigned ret = kni_fifo_get(kni->tx_q, (void **)mbufs, num);
+  unsigned ret = kni_fifo_get_mc(kni->tx_q, &kni->tx_q_mc,
+ (void **)mbufs, num);

  /* Allocate mbufs and then put them into alloc_q */
  kni_allocate_mbufs(kni);
@@ -582,7 +594,8 @@ kni_free_mbufs(struct rte_kni *kni)
  int i, ret;
  struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];

-  ret = kni_fifo_get(kni->free_q, (void **)pkts, MAX_MBUF_BURST_NUM);
+  ret = kni_fifo_get_mc(kni->free_q, &kni->free_q_mc, (void **)pkts,
+ MAX_MBUF_BURST_NUM);
  if (likely(ret > 0)) {
 for (i = 0; i < ret; i++)
 rte_pktmbuf_free(pkts[i]);
@@ -629,7 +642,7 @@ kni_allocate_mbufs(struct rte_kni *kni)
  if (i <= 0)
 return;

-  ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i);
+  ret = kni_fifo_put_mp(kni->alloc_q, &kni->alloc_q_mp, (void **)pkts, i);

  /* Check if any mbufs not put into alloc_q, and then fre

[dpdk-dev] [PATCH] i40e: fixed tx stats bug

2014-11-20 Thread Zang, Zhida
Is that OK that I send another patch with details directly?

-Original Message-
From: Qiu, Michael 
Sent: Wednesday, November 19, 2014 17:27
To: Zang, Zhida; dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH] i40e: fixed tx stats bug

On 11/19/2014 4:43 PM, zhida zang wrote:
> From: zzang 
>
> Fixed tx stats bug in i40e


Would you mind to give more details about this bug in the commit log?

 This can really help reviewer to get the initial idea about your patch without 
go through the code it self.

Thanks,
Michael
>
> Signed-off-by: zzang 
> ---
>  lib/librte_pmd_i40e/i40e_ethdev.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
> b/lib/librte_pmd_i40e/i40e_ethdev.c
> index 4b7a827..e01590c 100644
> --- a/lib/librte_pmd_i40e/i40e_ethdev.c
> +++ b/lib/librte_pmd_i40e/i40e_ethdev.c
> @@ -1102,6 +1102,7 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct 
> rte_eth_stats *stats)
>   struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>   struct i40e_hw_port_stats *ns = &pf->stats; /* new stats */
>   struct i40e_hw_port_stats *os = &pf->stats_offset; /* old stats */
> + struct i40e_eth_stats *ves = &pf->main_vsi->eth_stats; /* vsi stats 
> +*/
>  
>   /* Get statistics of struct i40e_eth_stats */
>   i40e_stat_update_48(hw, I40E_GLPRT_GORCH(hw->port), @@ -1284,8 
> +1285,8 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct 
> rte_eth_stats *stats)
>  
>   stats->ipackets = ns->eth.rx_unicast + ns->eth.rx_multicast +
>   ns->eth.rx_broadcast;
> - stats->opackets = ns->eth.tx_unicast + ns->eth.tx_multicast +
> - ns->eth.tx_broadcast;
> + stats->opackets = ves->tx_unicast + ves->tx_multicast +
> + ves->tx_broadcast;
>   stats->ibytes   = ns->eth.rx_bytes;
>   stats->obytes   = ns->eth.tx_bytes;
>   stats->oerrors  = ns->eth.tx_errors;