Re: [PATCH net-next 10/10] bnxt_en: Add support for XDP_TX action.

2017-01-30 Thread Michael Chan
On Mon, Jan 30, 2017 at 9:27 PM, Jakub Kicinski  wrote:
> On Mon, 30 Jan 2017 20:49:35 -0500, Michael Chan wrote:
>> +static int bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_napi *bnapi,
>> +  struct page *page, dma_addr_t mapping, u32 offset,
>> +  u32 len)
>> +{
>> + struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
>> + struct bnxt_sw_tx_bd *tx_buf;
>> + struct tx_bd_ext *txbd1;
>> + struct tx_bd *txbd;
>> + u32 flags;
>> + u16 prod;
>> +
>> + if (bnxt_tx_avail(bp, txr) < 2)
>> + return -ENOSPC;
>> +
>> + prod = txr->tx_prod;
>> + txbd = >tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
>> +
>> + tx_buf = >tx_buf_ring[prod];
>> + tx_buf->page = page;
>> + dma_unmap_addr_set(tx_buf, mapping, mapping);
>> + flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
>> + (2 << TX_BD_FLAGS_BD_CNT_SHIFT) | TX_BD_FLAGS_PACKET_END |
>> + bnxt_lhint_arr[len >> 9];
>> + txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
>> + txbd->tx_bd_opaque = prod;
>> + txbd->tx_bd_haddr = cpu_to_le64(mapping + offset);
>> +
>> + prod = NEXT_TX(prod);
>> + txbd1 = (struct tx_bd_ext *)
>> + >tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
>> +
>> + txbd1->tx_bd_hsize_lflags = cpu_to_le32(0);
>> + txbd1->tx_bd_mss = cpu_to_le32(0);
>> + txbd1->tx_bd_cfa_action = cpu_to_le32(0);
>> + txbd1->tx_bd_cfa_meta = cpu_to_le32(0);
>> +
>> + prod = NEXT_TX(prod);
>> + txr->tx_prod = prod;
>> + return 0;
>> +}
>
> Are you not lacking DMA syncs in general?  You map the buffers
> bidirectionally, but I failed to find any dma_syncs.  I would expect
> one before you run xdp and one before you TX because packet could have
> been modified.  What am I missing?

You are right.  I missed the DMA syncs.


Re: [PATCH net-next 10/10] bnxt_en: Add support for XDP_TX action.

2017-01-30 Thread Jakub Kicinski
On Mon, 30 Jan 2017 20:49:35 -0500, Michael Chan wrote:
> +static int bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_napi *bnapi,
> +  struct page *page, dma_addr_t mapping, u32 offset,
> +  u32 len)
> +{
> + struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
> + struct bnxt_sw_tx_bd *tx_buf;
> + struct tx_bd_ext *txbd1;
> + struct tx_bd *txbd;
> + u32 flags;
> + u16 prod;
> +
> + if (bnxt_tx_avail(bp, txr) < 2)
> + return -ENOSPC;
> +
> + prod = txr->tx_prod;
> + txbd = >tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
> +
> + tx_buf = >tx_buf_ring[prod];
> + tx_buf->page = page;
> + dma_unmap_addr_set(tx_buf, mapping, mapping);
> + flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD |
> + (2 << TX_BD_FLAGS_BD_CNT_SHIFT) | TX_BD_FLAGS_PACKET_END |
> + bnxt_lhint_arr[len >> 9];
> + txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
> + txbd->tx_bd_opaque = prod;
> + txbd->tx_bd_haddr = cpu_to_le64(mapping + offset);
> +
> + prod = NEXT_TX(prod);
> + txbd1 = (struct tx_bd_ext *)
> + >tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
> +
> + txbd1->tx_bd_hsize_lflags = cpu_to_le32(0);
> + txbd1->tx_bd_mss = cpu_to_le32(0);
> + txbd1->tx_bd_cfa_action = cpu_to_le32(0);
> + txbd1->tx_bd_cfa_meta = cpu_to_le32(0);
> +
> + prod = NEXT_TX(prod);
> + txr->tx_prod = prod;
> + return 0;
> +}

Are you not lacking DMA syncs in general?  You map the buffers
bidirectionally, but I failed to find any dma_syncs.  I would expect
one before you run xdp and one before you TX because packet could have
been modified.  What am I missing?


[PATCH net-next 10/10] bnxt_en: Add support for XDP_TX action.

2017-01-30 Thread Michael Chan
Add dedicated transmit function and transmit completion handler for
XDP which are a lot simpler than the original functions for SKB.

Signed-off-by: Michael Chan 
Tested-by: Andy Gospodarek 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 34 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 14 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 90 +++
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h |  2 +
 4 files changed, 125 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ae1f400..826915c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -212,16 +212,7 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 #define BNXT_CP_DB_IRQ_DIS(db) \
writel(DB_CP_IRQ_DIS_FLAGS, db)
 
-static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
-{
-   /* Tell compiler to fetch tx indices from memory. */
-   barrier();
-
-   return bp->tx_ring_size -
-   ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
-}
-
-static const u16 bnxt_lhint_arr[] = {
+const u16 bnxt_lhint_arr[] = {
TX_BD_FLAGS_LHINT_512_AND_SMALLER,
TX_BD_FLAGS_LHINT_512_TO_1023,
TX_BD_FLAGS_LHINT_1024_TO_2047,
@@ -613,9 +604,8 @@ static inline u8 *__bnxt_alloc_rx_data(struct bnxt *bp, 
dma_addr_t *mapping,
return data;
 }
 
-static inline int bnxt_alloc_rx_data(struct bnxt *bp,
-struct bnxt_rx_ring_info *rxr,
-u16 prod, gfp_t gfp)
+int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+  u16 prod, gfp_t gfp)
 {
struct rx_bd *rxbd = >rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
struct bnxt_sw_rx_bd *rx_buf = >rx_buf_ring[prod];
@@ -1771,6 +1761,17 @@ static int bnxt_poll_work(struct bnxt *bp, struct 
bnxt_napi *bnapi, int budget)
if (tx_pkts)
bnapi->tx_int(bp, bnapi, tx_pkts);
 
+   if (event & BNXT_TX_EVENT) {
+   struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+   void __iomem *db = txr->tx_doorbell;
+   u16 prod = txr->tx_prod;
+
+   /* Sync BD data before updating doorbell */
+   wmb();
+
+   writel(DB_KEY_TX | prod, db);
+   writel(DB_KEY_TX | prod, db);
+   }
if (event & BNXT_RX_EVENT) {
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
 
@@ -3056,9 +3057,12 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool 
irq_re_init)
bp->tx_ring[i].bnapi = bp->bnapi[j];
bp->bnapi[j]->tx_ring = >tx_ring[i];
bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i;
-   if (i < bp->tx_nr_rings_xdp)
+   if (i < bp->tx_nr_rings_xdp) {
bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP;
-   bp->bnapi[j]->tx_int = bnxt_tx_int;
+   bp->bnapi[j]->tx_int = bnxt_tx_int_xdp;
+   } else {
+   bp->bnapi[j]->tx_int = bnxt_tx_int;
+   }
}
 
rc = bnxt_alloc_stats(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 007e779..cf7d33f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -513,6 +513,7 @@ struct rx_tpa_end_cmp_ext {
 
 #define BNXT_RX_EVENT  1
 #define BNXT_AGG_EVENT 2
+#define BNXT_TX_EVENT  4
 
 struct bnxt_sw_tx_bd {
union {
@@ -1190,6 +1191,19 @@ struct bnxt {
 #define SFF_MODULE_ID_QSFP28   0x11
 #define BNXT_MAX_PHY_I2C_RESP_SIZE 64
 
+static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
+{
+   /* Tell compiler to fetch tx indices from memory. */
+   barrier();
+
+   return bp->tx_ring_size -
+   ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
+}
+
+extern const u16 bnxt_lhint_arr[];
+
+int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+  u16 prod, gfp_t gfp);
 void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
 void bnxt_set_tpa_flags(struct bnxt *bp);
 void bnxt_set_ring_params(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 133e515..a9770ed 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -8,6 +8,7 @@
  */
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -18,6 +19,69 @@
 #include "bnxt_xdp.h"
 
 #ifdef CONFIG_BNXT_XDP
+static int bnxt_xmit_xdp(struct