The current implementation in the igb_xsk_wakeup expects the Rx and Tx queues
to share the same irq. This would lead to triggering of incorrect irq
in split irq configuration.
This patch addresses this issue which could impact environments
with 2 active cpu cores
or when the number of queues is reduced to 2 or less

cat /proc/interrupts | grep eno2
 167:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 0-edge      eno2
 168:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 1-edge      eno2-rx-0
 169:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 2-edge      eno2-rx-1
 170:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 3-edge      eno2-tx-0
 171:          0          0          0          0 IR-PCI-MSIX-0000:08:00.0
 4-edge      eno2-tx-1

Furthermore it uses the flags input argument to trigger either rx, tx or
both rx and tx irqs as specified in the ndo_xsk_wakeup api documentation

Fixes: 80f6ccf9f116 ("igb: Introduce XSK data structures and helpers")
Signed-off-by: Vivek Behera <[email protected]>
Reviewed-by: Aleksandr Loktionov <[email protected]>
---
v1: 
https://lore.kernel.org/intel-wired-lan/[email protected]/
v2: 
https://lore.kernel.org/intel-wired-lan/[email protected]/
v3: 
https://lore.kernel.org/intel-wired-lan/[email protected]/
v4: 
https://lore.kernel.org/intel-wired-lan/[email protected]/

changelog:
v1
- Inital description of the Bug and fixes made in the patch

v1 -> v2
- Handling of RX and TX Wakeup in igc_xsk_wakeup for a split IRQ configuration
- Review suggestions by Aleksander: Modified sequence to complete all
  error checks for rx and tx before updating napi states and triggering irqs
- Corrected trigger of TX and RX interrupts over E1000_ICS (non msix use case)
- Added define for Tx interrupt trigger bit mask for E1000_ICS

v2 -> v3
- Included applicable feedback and suggestions from igc patch
- Fixed logic in updating eics value when  both TX and RX need wakeup

v3 -> v4
- Added comments to explain trigerring of both TX and RX with active queue pairs
- Fixed check of xsk pools in if statement

v4 -> v5
- Introduced a simplified logic for sequential check for RX and TX
---
 .../net/ethernet/intel/igb/e1000_defines.h    |  1 +
 drivers/net/ethernet/intel/igb/igb_xsk.c      | 75 +++++++++++++++----
 2 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h 
b/drivers/net/ethernet/intel/igb/e1000_defines.h
index fa028928482f..9357564a2d58 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -443,6 +443,7 @@
 #define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
 #define E1000_ICS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Aserted */
+#define E1000_ICS_TXDW      E1000_ICR_TXDW     /* Transmit desc written back */
 
 /* Extended Interrupt Cause Set */
 /* E1000_EITR_CNT_IGNR is only for 82576 and newer */
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c 
b/drivers/net/ethernet/intel/igb/igb_xsk.c
index 30ce5fbb5b77..6e51b5b6f131 100644
--- a/drivers/net/ethernet/intel/igb/igb_xsk.c
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -529,6 +529,13 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 
flags)
        struct igb_adapter *adapter = netdev_priv(dev);
        struct e1000_hw *hw = &adapter->hw;
        struct igb_ring *ring;
+       struct igb_q_vector *q_vector;
+       struct napi_struct *rx_napi;
+       struct napi_struct *tx_napi;
+       bool trigger_irq_tx = false;
+       bool trigger_irq_rx = false;
+       u32 eics_tx = 0;
+       u32 eics_rx = 0;
        u32 eics = 0;
 
        if (test_bit(__IGB_DOWN, &adapter->state))
@@ -536,27 +543,65 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 
flags)
 
        if (!igb_xdp_is_enabled(adapter))
                return -EINVAL;
-
-       if (qid >= adapter->num_tx_queues)
+       /* Check if queue_id is valid. Tx and Rx queue numbers are always same 
*/
+       if (qid >= adapter->num_rx_queues)
                return -EINVAL;
-
-       ring = adapter->tx_ring[qid];
-
-       if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
-               return -ENETDOWN;
-
-       if (!READ_ONCE(ring->xsk_pool))
+       /* Check if flags are valid */
+       if (!(flags & XDP_WAKEUP_RX) && !(flags & XDP_WAKEUP_TX))
                return -EINVAL;
-
-       if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
-               /* Cause software interrupt */
+       if (flags & XDP_WAKEUP_RX) {
+               /* IRQ trigger preparation for Rx */
+               ring = adapter->rx_ring[qid];
+               if (!READ_ONCE(ring->xsk_pool))
+                       return -ENXIO;
+               q_vector = ring->q_vector;
+               rx_napi = &q_vector->napi;
+               /* Extend the BIT mask for eics */
+               eics_rx = ring->q_vector->eims_value;
+               trigger_irq_rx = true;
+       }
+       if (flags & XDP_WAKEUP_TX) {
+               if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) {
+               /* In queue-pair mode, rx_ring and tx_ring share the same 
q_vector,
+                * so a single IRQ trigger will wake both RX and TX processing
+                */
+               } else {
+                       /* IRQ trigger preparation for Tx */
+                       ring = adapter->tx_ring[qid];
+                       if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+                               return -ENETDOWN;
+
+                       if (!READ_ONCE(ring->xsk_pool))
+                               return -ENXIO;
+                       q_vector = ring->q_vector;
+                       tx_napi = &q_vector->napi;
+                       /* Extend the BIT mask for eics */
+                       eics_tx = ring->q_vector->eims_value;
+                       trigger_irq_tx = true;
+               }
+       }
+       /* All error checks are finished. Check and update napi states for rx 
and tx */
+       if (trigger_irq_rx) {
+               if (!napi_if_scheduled_mark_missed(rx_napi))
+                       eics |= eics_rx;
+       }
+       if (trigger_irq_tx) {
+               if (!napi_if_scheduled_mark_missed(tx_napi))
+                       eics |= eics_tx;
+       }
+       /* Now we trigger the required irqs for Rx and Tx */
+       if ((trigger_irq_rx) || (trigger_irq_tx)) {
                if (adapter->flags & IGB_FLAG_HAS_MSIX) {
-                       eics |= ring->q_vector->eims_value;
                        wr32(E1000_EICS, eics);
                } else {
-                       wr32(E1000_ICS, E1000_ICS_RXDMT0);
+                       if ((trigger_irq_rx) && (trigger_irq_tx))
+                               wr32(E1000_ICS,
+                                    E1000_ICS_RXDMT0 | E1000_ICS_TXDW);
+                       else if (trigger_irq_rx)
+                               wr32(E1000_ICS, E1000_ICS_RXDMT0);
+                       else
+                               wr32(E1000_ICS, E1000_ICS_TXDW);
                }
        }
-
        return 0;
 }
-- 
2.34.1

Reply via email to