[dpdk-dev] [PATCH] mbuf:rearrange mbuf to be more mbuf chain friendly

2016-06-25 Thread Wiles, Keith

On 6/25/16, 10:29 AM, "dev on behalf of Keith Wiles"  wrote:

>Move the next pointer to the first cacheline of the rte_mbuf structure
>and move the offload values to the second cacheline to give better
>performance to applications using chained mbufs.
>
>Enabled by a configuration option CONFIG_RTE_MBUF_CHAIN_FRIENDLY default
>is set to No.
>
>Signed-off-by: Keith Wiles 

nak thought I had based these on the current master ?



[dpdk-dev] [PATCH] mbuf:rearrange mbuf to be more mbuf chain friendly

2016-06-25 Thread Keith Wiles
Move the next pointer to the first cacheline of the rte_mbuf structure
and move the offload values to the second cacheline to give better
performance to applications using chained mbufs.

Enabled by a configuration option CONFIG_RTE_MBUF_CHAIN_FRIENDLY default
is set to No.

Signed-off-by: Keith Wiles 
---
 config/common_base |  2 +
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |  8 +++
 lib/librte_mbuf/rte_mbuf.h | 67 +++---
 3 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/config/common_base b/config/common_base
index 3a04fba..bdde2e7 100644
--- a/config/common_base
+++ b/config/common_base
@@ -402,6 +402,8 @@ CONFIG_RTE_LIBRTE_MBUF=y
 CONFIG_RTE_LIBRTE_MBUF_DEBUG=n
 CONFIG_RTE_MBUF_REFCNT_ATOMIC=y
 CONFIG_RTE_PKTMBUF_HEADROOM=128
+# Set to y if needing to be mbuf chain friendly.
+CONFIG_RTE_MBUF_CHAIN_FRIENDLY=n

 #
 # Compile librte_timer
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 2acdfd9..44d65cd 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -120,11 +120,19 @@ struct rte_kni_mbuf {
char pad2[4];
uint32_t pkt_len;   /**< Total pkt len: sum of all segment 
data_len. */
uint16_t data_len;  /**< Amount of data in segment buffer. */
+#ifdef RTE_MBUF_CHAIN_FRIENDLY
+   char pad3[8];
+   void *next;

/* fields on second cache line */
+   char pad4[16] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+   void *pool;
+#else
+   /* fields on second cache line */
char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
void *pool;
void *next;
+#endif
 };

 /*
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 8798c41..d02ca28 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -758,6 +758,28 @@ typedef uint8_t  MARKER8[0];  /**< generic marker with 1B 
alignment */
 typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
* with a single assignment */

+typedef union {
+   uint32_t rss; /**< RSS hash result if RSS enabled */
+   struct {
+   union {
+   struct {
+   uint16_t hash;
+   uint16_t id;
+   };
+   uint32_t lo;
+   /**< Second 4 flexible bytes */
+   };
+   uint32_t hi;
+   /**< First 4 flexible bytes or FD ID, dependent on
+   PKT_RX_FDIR_* flag in ol_flags. */
+   } fdir;   /**< Filter identifier if FDIR enabled */
+   struct {
+   uint32_t lo;
+   uint32_t hi;
+   } sched;  /**< Hierarchical scheduler */
+   uint32_t usr; /**< User defined tags. See rte_distributor_process() 
*/
+} rss_hash_t;
+
 /**
  * The generic rte_mbuf, containing a packet mbuf.
  */
@@ -817,28 +839,31 @@ struct rte_mbuf {
uint16_t data_len;/**< Amount of data in segment buffer. */
/** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
uint16_t vlan_tci;
+#ifdef RTE_MBUF_CHAIN_FRIENDLY
+   /*
+* Move offload into the second cache line and next in the first.
+* Better performance for applications using chained mbufs to have
+* the next pointer in the first cache line.
+* If you change this structure, you must change the user-mode
+* version in rte_kni_common.h to match the new layout.
+*/
+   uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
+   uint16_t vlan_tci_outer;  /**< Outer VLAN Tag Control Identifier (CPU 
order) */
+   struct rte_mbuf *next;/**< Next segment of scattered packet. */
+
+   /* second cache line - fields only used in slow path or on TX */
+   MARKER cacheline1 __rte_cache_min_aligned;
+
+   rss_hash_t hash;  /**< hash information */

union {
-   uint32_t rss; /**< RSS hash result if RSS enabled */
-   struct {
-   union {
-   struct {
-   uint16_t hash;
-   uint16_t id;
-   };
-   uint32_t lo;
-   /**< Second 4 flexible bytes */
-   };
-   uint32_t hi;
-   /**< First 4 flexible bytes or FD ID, dependent on
-PKT_RX_FDIR_* flag in ol_flags. */
-   } fdir;   /**< Filter identifier if FDIR enabled */
-   struct {
-   uint32_t lo;