The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs)
per TX WQE. Exceeding this limit can cause TX failures.
Add ndo_features_check() callback to validate SKB layout before
transmission. For GSO SKBs that would exceed the hardware SGE limit, clear
NETIF_F_GSO_MASK to enforce software segmentation in the stack.
Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still
exceed the SGE limit.

Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(),
send other errors to free_sgl_ptr to free resources and record the tx
drop.

Co-developed-by: Dipayaan Roy <[email protected]>
Signed-off-by: Dipayaan Roy <[email protected]>
Signed-off-by: Aditya Garg <[email protected]>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 48 +++++++++++++++++--
 include/net/mana/gdma.h                       |  6 ++-
 include/net/mana/mana.h                       |  1 +
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 0142fd98392c..1f95b644eba1 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
net_device *ndev)
        cq = &apc->tx_qp[txq_idx].tx_cq;
        tx_stats = &txq->stats;
 
+       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+               /* GSO skb with Hardware SGE limit exceeded is not expected here
+                * as they are handled in mana_features_check() callback
+                */
+               if (skb_is_gso(skb))
+                       netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE 
limit\n");
+               if (skb_linearize(skb)) {
+                       netdev_warn_once(ndev, "Failed to linearize skb with 
nr_frags=%d and is_gso=%d\n",
+                                        skb_shinfo(skb)->nr_frags,
+                                        skb_is_gso(skb));
+                       goto tx_drop_count;
+               }
+       }
+
        pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
        pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
 
@@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
net_device *ndev)
                }
        }
 
-       WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
-
        if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
                pkg.wqe_req.sgl = pkg.sgl_array;
        } else {
@@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
net_device *ndev)
 
        if (err) {
                (void)skb_dequeue_tail(&txq->pending_skbs);
+               mana_unmap_skb(skb, apc);
                netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
-               err = NETDEV_TX_BUSY;
-               goto tx_busy;
+               if (err == -ENOSPC) {
+                       err = NETDEV_TX_BUSY;
+                       goto tx_busy;
+               }
+               goto free_sgl_ptr;
        }
 
        err = NETDEV_TX_OK;
@@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
net_device *ndev)
        return NETDEV_TX_OK;
 }
 
+static netdev_features_t mana_features_check(struct sk_buff *skb,
+                                            struct net_device *ndev,
+                                            netdev_features_t features)
+{
+       if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES &&
+           skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) {
+               /* Exceeds HW SGE limit.
+                * GSO case:
+                *   Disable GSO so the stack will software-segment the skb
+                *   into smaller skbs that fit the SGE budget.
+                * Non-GSO case:
+                *   The xmit path will attempt skb_linearize() as a fallback.
+                */
+               if (skb_is_gso(skb))
+                       features &= ~NETIF_F_GSO_MASK;
+       }
+       return features;
+}
+
 static void mana_get_stats64(struct net_device *ndev,
                             struct rtnl_link_stats64 *st)
 {
@@ -838,6 +875,7 @@ static const struct net_device_ops mana_devops = {
        .ndo_open               = mana_open,
        .ndo_stop               = mana_close,
        .ndo_select_queue       = mana_select_queue,
+       .ndo_features_check     = mana_features_check,
        .ndo_start_xmit         = mana_start_xmit,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_get_stats64        = mana_get_stats64,
@@ -1606,7 +1644,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 
num_units)
        return 0;
 }
 
-static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 {
        struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
        struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 57df78cfbf82..b35ecc58fbab 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -591,6 +591,9 @@ enum {
 /* Driver can self reset on FPGA Reconfig EQE notification */
 #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
 
+/* Driver supports linearizing the skb when num_sge exceeds hardware limit */
+#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20)
+
 #define GDMA_DRV_CAP_FLAGS1 \
        (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
         GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
@@ -599,7 +602,8 @@ enum {
         GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
         GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
         GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
-        GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
+        GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \
+        GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 0921485565c0..330e1bb088bb 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 
speed,
 void mana_query_phy_stats(struct mana_port_context *apc);
 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int 
num_queues);
 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
+void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 extern struct dentry *mana_debugfs_root;
-- 
2.43.0


Reply via email to