[dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures

2016-09-20 Thread NĂ©lio Laranjeiro
On Mon, Sep 19, 2016 at 05:14:26PM +0100, Bruce Richardson wrote:
> On Wed, Sep 14, 2016 at 02:18:02PM +0200, Nelio Laranjeiro wrote:
> > Rework Work Queue Element (aka WQE) structures to fit PMD needs.
> > A WQE is an aggregation of 16 bytes elements known as "data segments"
> > (aka dseg).
> > 
> > Signed-off-by: Nelio Laranjeiro 
> 
> Hi Nelio,
> 
> can you give a bit more detail in the commit message here. What are the "PMD
> needs" that must be met, and how is the WQE reworked to meet them?
> 
> Thanks,
> /Bruce

Sure, I will try to explain it better in v3.

Regards,

-- 
N?lio Laranjeiro
6WIND


[dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures

2016-09-19 Thread Bruce Richardson
On Wed, Sep 14, 2016 at 02:18:02PM +0200, Nelio Laranjeiro wrote:
> Rework Work Queue Element (aka WQE) structures to fit PMD needs.
> A WQE is an aggregation of 16 bytes elements known as "data segments"
> (aka dseg).
> 
> Signed-off-by: Nelio Laranjeiro 

Hi Nelio,

can you give a bit more detail in the commit message here. What are the "PMD
needs" that must be met, and how is the WQE reworked to meet them?

Thanks,
/Bruce


[dpdk-dev] [PATCH v2 1/6] net/mlx5: rework hardware structures

2016-09-14 Thread Nelio Laranjeiro
Rework Work Queue Element (aka WQE) structures to fit PMD needs.
A WQE is an aggregation of 16 bytes elements known as "data segments"
(aka dseg).

Signed-off-by: Nelio Laranjeiro 
---
 drivers/net/mlx5/mlx5_prm.h  |  70 ++
 drivers/net/mlx5/mlx5_rxtx.c | 167 ++-
 drivers/net/mlx5/mlx5_rxtx.h |   2 +-
 drivers/net/mlx5/mlx5_txq.c  |   4 +-
 4 files changed, 111 insertions(+), 132 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 5db219b..042562c 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -65,8 +65,15 @@
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5

-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+   (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)

 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
@@ -79,59 +86,26 @@ struct mlx5_wqe_eth_seg_small {
uint16_t mss;
uint32_t rsvd2;
uint16_t inline_hdr_sz;
+   uint8_t inline_hdr[2];
 };

-/* Regular WQE. */
-struct mlx5_wqe_regular {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
-   struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
uint32_t byte_cnt;
-   uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+   uint8_t raw;
+};

-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+   uint32_t ctrl[4];
struct mlx5_wqe_eth_seg_small eseg;
-   struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};

-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-   union {
-   struct mlx5_wqe_ctrl_seg ctrl;
-   uint32_t data[4];
-   } ctrl;
-   struct mlx5_wqe_eth_seg_small eseg;
-   uint32_t byte_cnt;
-   uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+   struct mlx5_wqe hdr;
+   uint8_t raw[32];
 } __rte_aligned(64);

-/* Union of all WQE types. */
-union mlx5_wqe {
-   struct mlx5_wqe_regular wqe;
-   struct mlx5_wqe_inl inl;
-   struct mlx5_wqe_mpw mpw;
-   struct mlx5_wqe_mpw_inl mpw_inl;
-   uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
MLX5_MPW_STATE_OPENED,
@@ -145,7 +119,7 @@ struct mlx5_mpw {
unsigned int pkts_n;
unsigned int len;
unsigned int total_len;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;
union {
volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
volatile uint8_t *raw;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ecc76ad..5feeb3f 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -173,7 +173,7 @@ txq_complete(struct txq *txq)
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe64 *cqe = NULL;
-   volatile union mlx5_wqe *wqe;
+   volatile struct mlx5_wqe *wqe;

do {
volatile struct mlx5_cqe64 *tmp;
@@ -199,8 +199,8 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
-   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-   elts_tail = wqe->wqe.ctrl.data[3];
+   wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+   elts_tail = wqe->ctrl[3];
assert(elts_tail < txq->wqe_n);
/* Free buffers. */
while (elts_free != elts_tail) {
@@ -302,33 +302,33 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
  *   Number of DS elements consumed.
  */
 static inline unsigned int
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
+mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
   struct rte_mbuf *buf, uint32_t length)
 {
-   uintptr_t raw = (uintptr_t)&wqe->wqe.eseg.inline_hdr_start;
+   uint8_t *raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
uint16_t ds;
-   uint16_t pkt_inline_sz = 16;
+   uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
struct mlx5_wqe_data_seg *dseg = NULL;

-   assert(length >= 16);
+   assert(length