> -----Original Message----- > From: Intel-wired-lan <intel-wired-lan-boun...@osuosl.org> On Behalf Of > Maciej Fijalkowski > Sent: Tuesday, April 29, 2025 5:52 PM > To: intel-wired-...@lists.osuosl.org > Cc: net...@vger.kernel.org; Nguyen, Anthony L > <anthony.l.ngu...@intel.com>; Karlsson, Magnus > <magnus.karls...@intel.com>; Kubiak, Michal <michal.kub...@intel.com>; > Fijalkowski, Maciej <maciej.fijalkow...@intel.com>; Tobias Böhm > <tobias.bo...@hetzner-cloud.de>; Marcus Wichelmann > <marcus.wichelm...@hetzner-cloud.de> > Subject: [Intel-wired-lan] [PATCH iwl-net] ixgbe: fix ndo_xdp_xmit() workloads > > Currently ixgbe driver checks periodically in its watchdog subtask if there is > anything to be transmitted (consdidering both Tx and XDP rings) under state > of carrier not being 'ok'. Such event is interpreted as Tx hang and therefore > results in interface reset. > > This is currently problematic for ndo_xdp_xmit() as it is allowed to produce > descriptors when interface is going through reset or its carrier is turned > off. > > Furthermore, XDP rings should not really be objects of Tx hang detection. This > mechanism is rather a matter of ndo_tx_timeout() being called from > dev_watchdog against Tx rings exposed to networking stack. > > Taking into account issues described above, let us have a two fold fix - do > not > respect XDP rings in local ixgbe watchdog and do not produce Tx descriptors in > ndo_xdp_xmit callback when there is some problem with carrier currently. For > now, keep the Tx hang checks in clean Tx irq routine, but adjust it to not > execute it for XDP rings. > > Cc: Tobias Böhm <tobias.bo...@hetzner-cloud.de> > Reported-by: Marcus Wichelmann <marcus.wichelm...@hetzner-cloud.de> > Closes: https://lore.kernel.org/netdev/eca1880f-253a-4955-afe6- > 732d7c692...@hetzner-cloud.de/ > Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect") > Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action") > Signed-off-by: Maciej Fijalkowski <maciej.fijalkow...@intel.com> Reviewed-by: Aleksandr Loktionov <aleksandr.loktio...@intel.com>
> --- > drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 ++++++------------- > 1 file changed, 11 insertions(+), 23 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > index 467f81239e12..21bfea8aeb67 100644 > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > @@ -966,10 +966,6 @@ static void ixgbe_update_xoff_rx_lfc(struct > ixgbe_adapter *adapter) > for (i = 0; i < adapter->num_tx_queues; i++) > clear_bit(__IXGBE_HANG_CHECK_ARMED, > &adapter->tx_ring[i]->state); > - > - for (i = 0; i < adapter->num_xdp_queues; i++) > - clear_bit(__IXGBE_HANG_CHECK_ARMED, > - &adapter->xdp_ring[i]->state); > } > > static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) @@ - > 1263,10 +1259,13 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector > *q_vector, > total_bytes); > adapter->tx_ipsec += total_ipsec; > > + if (ring_is_xdp(tx_ring)) > + return !!budget; > + > if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { > /* schedule immediate reset if we believe we hung */ > struct ixgbe_hw *hw = &adapter->hw; > - e_err(drv, "Detected Tx Unit Hang %s\n" > + e_err(drv, "Detected Tx Unit Hang\n" > " Tx Queue <%d>\n" > " TDH, TDT <%x>, <%x>\n" > " next_to_use <%x>\n" > @@ -1274,16 +1273,14 @@ static bool ixgbe_clean_tx_irq(struct > ixgbe_q_vector *q_vector, > "tx_buffer_info[next_to_clean]\n" > " time_stamp <%lx>\n" > " jiffies <%lx>\n", > - ring_is_xdp(tx_ring) ? "(XDP)" : "", > tx_ring->queue_index, > IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)), > IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)), > tx_ring->next_to_use, i, > tx_ring->tx_buffer_info[i].time_stamp, jiffies); > > - if (!ring_is_xdp(tx_ring)) > - netif_stop_subqueue(tx_ring->netdev, > - tx_ring->queue_index); > + netif_stop_subqueue(tx_ring->netdev, > + tx_ring->queue_index); > > e_info(probe, > "tx hang %d detected on queue %d, resetting adapter\n", > @@ -1296,9 +1293,6 @@ static bool ixgbe_clean_tx_irq(struct > ixgbe_q_vector *q_vector, > return true; > } > > - if (ring_is_xdp(tx_ring)) > - return !!budget; > - > #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) > txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); > if (!__netif_txq_completed_wake(txq, total_packets, total_bytes, @@ > -7791,12 +7785,9 @@ static void ixgbe_check_hang_subtask(struct > ixgbe_adapter *adapter) > return; > > /* Force detection of hung controller */ > - if (netif_carrier_ok(adapter->netdev)) { > + if (netif_carrier_ok(adapter->netdev)) > for (i = 0; i < adapter->num_tx_queues; i++) > set_check_for_tx_hang(adapter->tx_ring[i]); > - for (i = 0; i < adapter->num_xdp_queues; i++) > - set_check_for_tx_hang(adapter->xdp_ring[i]); > - } > > if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { > /* > @@ -8011,13 +8002,6 @@ static bool ixgbe_ring_tx_pending(struct > ixgbe_adapter *adapter) > return true; > } > > - for (i = 0; i < adapter->num_xdp_queues; i++) { > - struct ixgbe_ring *ring = adapter->xdp_ring[i]; > - > - if (ring->next_to_use != ring->next_to_clean) > - return true; > - } > - > return false; > } > > @@ -10742,6 +10726,10 @@ static int ixgbe_xdp_xmit(struct net_device > *dev, int n, > if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) > return -ENETDOWN; > > + if (!netif_carrier_ok(adapter->netdev) || > + !netif_running(adapter->netdev)) > + return -ENETDOWN; > + > if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) > return -EINVAL; > > -- > 2.43.0