On Thu, Jan 15, 2026 at 11:05:37AM +0000, Behera, VIVEK wrote:
> Hi Maciej
> 
> > -----Original Message-----
> > From: Maciej Fijalkowski <[email protected]>
> > Sent: Wednesday, January 14, 2026 7:21 PM
> > To: Behera, Vivek (DI FA DSP ICC PRC1) <[email protected]>
> > Cc: [email protected]; [email protected];
> > [email protected]; [email protected];
> > [email protected]; [email protected]; intel-wired-
> > [email protected]; [email protected]
> > Subject: Re: AW: [PATCH iwl-net v5] igb: Fix trigger of incorrect irq in
> > igb_xsk_wakeup
> > 
> > On Wed, Jan 14, 2026 at 08:19:37AM +0000, Behera, VIVEK wrote:
> > 
> > (...)
> > 
> > > > >
> > > > > diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h
> > > > > b/drivers/net/ethernet/intel/igb/e1000_defines.h
> > > > > index fa028928482f..9357564a2d58 100644
> > > > > --- a/drivers/net/ethernet/intel/igb/e1000_defines.h
> > > > > +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
> > > > > @@ -443,6 +443,7 @@
> > > > >  #define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status 
> > > > > Change */
> > > > >  #define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min.
> > threshold
> > > > */
> > > > >  #define E1000_ICS_DRSTA     E1000_ICR_DRSTA     /* Device Reset
> > Aserted */
> > > > > +#define E1000_ICS_TXDW      E1000_ICR_TXDW   /* Transmit desc
> > written
> > > > back */
> > > > >
> > > > >  /* Extended Interrupt Cause Set */
> > > > >  /* E1000_EITR_CNT_IGNR is only for 82576 and newer */ diff --git
> > > > > a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > > b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > > index 30ce5fbb5b77..6e51b5b6f131 100644
> > > > > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > > @@ -529,6 +529,13 @@ int igb_xsk_wakeup(struct net_device *dev,
> > > > > u32 qid, u32
> > > > flags)
> > > > >       struct igb_adapter *adapter = netdev_priv(dev);
> > > > >       struct e1000_hw *hw = &adapter->hw;
> > > > >       struct igb_ring *ring;
> > > > > +     struct igb_q_vector *q_vector;
> > > > > +     struct napi_struct *rx_napi;
> > > > > +     struct napi_struct *tx_napi;
> > > > > +     bool trigger_irq_tx = false;
> > > > > +     bool trigger_irq_rx = false;
> > > > > +     u32 eics_tx = 0;
> > > > > +     u32 eics_rx = 0;
> > > > >       u32 eics = 0;
> > > > >
> > > > >       if (test_bit(__IGB_DOWN, &adapter->state)) @@ -536,27 +543,65 @@
> > > > > int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
> > > > >
> > > > >       if (!igb_xdp_is_enabled(adapter))
> > > > >               return -EINVAL;
> > > > > -
> > > > > -     if (qid >= adapter->num_tx_queues)
> > > > > +     /* Check if queue_id is valid. Tx and Rx queue numbers are 
> > > > > always same */
> > > > > +     if (qid >= adapter->num_rx_queues)
> > > > >               return -EINVAL;
> > > > > -
> > > > > -     ring = adapter->tx_ring[qid];
> > > > > -
> > > > > -     if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
> > > > > -             return -ENETDOWN;
> > > > > -
> > > > > -     if (!READ_ONCE(ring->xsk_pool))
> > > > > +     /* Check if flags are valid */
> > > > > +     if (!(flags & XDP_WAKEUP_RX) && !(flags & XDP_WAKEUP_TX))
> > > > >               return -EINVAL;
> > > > > -
> > > > > -     if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
> > > > > -             /* Cause software interrupt */
> > > > > +     if (flags & XDP_WAKEUP_RX) {
> > > > > +             /* IRQ trigger preparation for Rx */
> > > > > +             ring = adapter->rx_ring[qid];
> > > > > +             if (!READ_ONCE(ring->xsk_pool))
> > > > > +                     return -ENXIO;
> > > > > +             q_vector = ring->q_vector;
> > > > > +             rx_napi = &q_vector->napi;
> > > > > +             /* Extend the BIT mask for eics */
> > > > > +             eics_rx = ring->q_vector->eims_value;
> > > > > +             trigger_irq_rx = true;
> > > > > +     }
> > > > > +     if (flags & XDP_WAKEUP_TX) {
> > > > > +             if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) {
> > > > > +             /* In queue-pair mode, rx_ring and tx_ring share the 
> > > > > same
> > q_vector,
> > > > > +              * so a single IRQ trigger will wake both RX and TX 
> > > > > processing
> > > > > +              */
> > > > > +             } else {
> > > > > +                     /* IRQ trigger preparation for Tx */
> > > > > +                     ring = adapter->tx_ring[qid];
> > > > > +                     if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring-
> > >flags))
> > > > > +                             return -ENETDOWN;
> > > > > +
> > > > > +                     if (!READ_ONCE(ring->xsk_pool))
> > > > > +                             return -ENXIO;
> > > > > +                     q_vector = ring->q_vector;
> > > > > +                     tx_napi = &q_vector->napi;
> > > > > +                     /* Extend the BIT mask for eics */
> > > > > +                     eics_tx = ring->q_vector->eims_value;
> > > > > +                     trigger_irq_tx = true;
> > > > > +             }
> > > > > +     }
> > > > > +     /* All error checks are finished. Check and update napi states 
> > > > > for rx and tx
> > */
> > > > > +     if (trigger_irq_rx) {
> > > > > +             if (!napi_if_scheduled_mark_missed(rx_napi))
> > > > > +                     eics |= eics_rx;
> > > > > +     }
> > > > > +     if (trigger_irq_tx) {
> > > > > +             if (!napi_if_scheduled_mark_missed(tx_napi))
> > > > > +                     eics |= eics_tx;
> > > > > +     }
> > > > > +     /* Now we trigger the required irqs for Rx and Tx */
> > > > > +     if ((trigger_irq_rx) || (trigger_irq_tx)) {
> > > > >               if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> > > > > -                     eics |= ring->q_vector->eims_value;
> > > > >                       wr32(E1000_EICS, eics);
> > > > >               } else {
> > > > > -                     wr32(E1000_ICS, E1000_ICS_RXDMT0);
> > > > > +                     if ((trigger_irq_rx) && (trigger_irq_tx))
> > > > > +                             wr32(E1000_ICS,
> > > > > +                                  E1000_ICS_RXDMT0 | E1000_ICS_TXDW);
> > > > > +                     else if (trigger_irq_rx)
> > > > > +                             wr32(E1000_ICS, E1000_ICS_RXDMT0);
> > > > > +                     else
> > > > > +                             wr32(E1000_ICS, E1000_ICS_TXDW);
> > > >
> > > > My understanding is something below would be sufficient. Bits set on
> > > > E1000_ICS are not handled in any way so we don't have to distinguish
> > > > between rx/tx, it's just the matter of irq trigger and napi schedule.
> > > >
> > > Hi see my comments below
> > > > -----------------8<-----------------
> > > >
> > > > diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > index 30ce5fbb5b77..0aba7afd6a03 100644
> > > > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > > > @@ -524,12 +524,26 @@ bool igb_xmit_zc(struct igb_ring *tx_ring,
> > > > struct xsk_buff_pool *xsk_pool)
> > > >         return nb_pkts < budget;
> > > >  }
> > > >
> > > > +static void igb_sw_irq(struct igb_q_vector *q_vector) {
> > > > +       u32 eics = 0;
> > > > +
> > > > +       if (!napi_if_scheduled_mark_missed(&q_vector->napi)) {
> > > > +               /* Cause software interrupt */
> > > > +               if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> > > > +                       eics |= ring->q_vector->eims_value;
> > > > +                       wr32(E1000_EICS, eics);
> > > > +               } else {
> > > > +                       wr32(E1000_ICS, E1000_ICS_RXDMT0);
> > > So here it is sufficient to rely on the E1000_ICS_RXDMT0 bit to trigger 
> > > the
> > correct irq (Tx and Rx)?
> > >  I remember I received a review comment from Intel point to E1000_ICS_TXDW
> > as being the correct bit of triggering TX for non MSIX case.
> > > I can't really evaluate this since I don't have a setup to test this.
> > > But okay
> > 
> > I don't see in irq handlers that we do any specific handling for txdw vs 
> > rxdmt0. It's
> > rather a matter of getting an irq here.
> Yes amongst the interrupt cause checks implemented in the msi interrupt 
> handler
> there is no interest in either E1000_ICR_RXDMT0 E1000_ICR_TXDW as events.
> All that matters in this in this case is a softirq trigerring napi_schedule  
> . So we can stick
> to E1000_ICS_RXDMT0 irrespective of the flag in the xsk wakeup function. 
> Although
> I must mention that all other usages of E1000_ICS_RXDMT0 in the kernel are in 
> combination
> with rx_ring so from the code perspective this usage looks strange to someone 
> without deeper knowledge of the system. 
> > 
> > > > +               }
> > > > +       }
> > > > +}
> > > > +
> > > >  int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)  {
> > > >         struct igb_adapter *adapter = netdev_priv(dev);
> > > >         struct e1000_hw *hw = &adapter->hw;
> > > >         struct igb_ring *ring;
> > > > -       u32 eics = 0;
> > > >
> > > >         if (test_bit(__IGB_DOWN, &adapter->state))
> > > >                 return -ENETDOWN;
> > > > @@ -548,14 +562,15 @@ int igb_xsk_wakeup(struct net_device *dev, u32
> > > > qid, u32
> > > > flags)
> > > >         if (!READ_ONCE(ring->xsk_pool))
> > > >                 return -EINVAL;
> > > >
> > > > -       if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
> > > > -               /* Cause software interrupt */
> > > > -               if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> > > > -                       eics |= ring->q_vector->eims_value;
> > > > -                       wr32(E1000_EICS, eics);
> > > > -               } else {
> > > > -                       wr32(E1000_ICS, E1000_ICS_RXDMT0);
> > > > -               }
> > > > +       if (flags & XDP_WAKEUP_TX)
> > > > +               igb_sw_irq(ring->q_vector);
> > > > +
> > > > +       if (flags & XDP_WAKEUP_RX) {
> > > > +               ring = adapter->rx_ring[qid];
> > > > +               /* for !IGB_FLAG_QUEUE_PAIRS, this will be NOP as NAPI 
> > > > has
> > > > +                * been already marked with NAPIF_STATE_MISSED
> > > > +                */
> > > I think you meant for the case IGB_FLAG_QUEUE_PAIRS. Since when the
> > > queue pairs are not active the Tx AND Rx queues don't share the same
> > > qvector and consequently not the same NAPI
> > 
> > yes, correct
> > 
> > > > +               igb_sw_irq(ring->q_vector);
> > > Okay so you would be triggering soft irq's in two steps if both TX and RX 
> > > flags
> > are set.
> > > Honestly, I have tried to avoid doing this in my patch.  Which is the
> > > reason why I wait to finish all the error checks, Napi updates before 
> > > triggering the
> > required irq vectors by writing to eics with a single write.
> > > But okay the other approach also works
> > 
> > 
> > 
> > >
> > > >         }
> > > >
> > > >         return 0;
> > > >
> > > > ----------------->8-----------------
> > > >
> > > > >               }
> > > > >       }
> > > > > -
> > > > >       return 0;
> > > > >  }
> > > > > --
> > > > > 2.34.1
> > > > >
> > > I think the strategy of triggering interrupts in one step after 
> > > performing all the
> > necessary checks is what might make this approach look complex.
> > > IMHO the one step strategy is better and more intuitive.
> > > Unfortunately, there isn't a reference here to go by since none of the
> > xsk_wakeup hooks implemented in the kernel care about the flags
> > > I can submit a v6 of the patch based the one step approach with further
> > simplifications. v6 would also include review suggestions I received for v5.
> > > Like this I can also submit the next version to the igc patch. It follows 
> > > the same
> > logic as the igb
> > > I have our regression tests with RTC testbench and our Siemens Profinet RT
> > tester running with these patches with I210 and I226
> > >
> > > Alternatively, you could submit patches following the igb and igc 
> > > following the
> > two-step logic.
> > 
> > How about we meet the half way and something below? that would include
> > your request of having a single write to E1000_ICS.
> > 
> Yes it sounds reasonable. However I would like to make some observations
> > 
> > diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > index 30ce5fbb5b77..432b4c7c1850 100644
> > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c
> > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
> > @@ -524,6 +524,17 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct
> > xsk_buff_pool *xsk_pool)
> >     return nb_pkts < budget;
> >  }
> > 
> > +static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector)
> > +{
> > +   u32 eics = 0;
> > +
> > +   if (!napi_if_scheduled_mark_missed(&q_vector->napi))
> > +           eics = adapter->flags & IGB_FLAG_HAS_MSIX ?
> > +                   q_vector->eims_value : 1;
> > +
> > +   return eics;
> > +}
> > +
> >  int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
> >  {
> >     struct igb_adapter *adapter = netdev_priv(dev);
> > @@ -548,14 +559,23 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, 
> > u32
> > flags)
> >     if (!READ_ONCE(ring->xsk_pool))
> >             return -EINVAL;
> If we want to implement proper usage of flags then I would move everything
> related to a ring's internal checks inside the if case of the corresponding 
> if case.
> Use the correct adapter ring (rx or tx) to perform the checks.
> Even though the functionality wise this code is correct why just randomly 
> pick the tx  ring 
> right at beginning of the function and do two checks with it? And same 
> question would 
> pop up to the reviewers mind for the igc driver where rx_ring is used. For me 
> correct
> usage is more important than saving some lines of code in the patch

And for me code that will be easy to maintain is important.

We could move IGB_RING_FLAG_TX_DISABLED to XDP_WAKEUP_TX branch but rest
is generic. AF_XDP works on pairs of queues so pool is loaded on both tx
and rx pointed by given queue id. IOW xsk_pool will be present on both rx
and tx rings, queue id validation can stay as is as well as igb works on
'combined' channels.

FWIW I had some PoC in the past where I implemented in AF_XDP core support
for async sockets - where pool was loaded *only* on rx or tx side. Not
sure if TSN workloads would benefit from that?

> > 
> > -   if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
> > -           /* Cause software interrupt */
> > -           if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> > -                   eics |= ring->q_vector->eims_value;
> > +   if (flags & XDP_WAKEUP_TX)
> > +           eics |= igb_sw_irq_prep(ring->q_vector);
> > +
> > +   if (flags & XDP_WAKEUP_RX) {
> > +           ring = adapter->rx_ring[qid];
> > +           /* for IGB_FLAG_QUEUE_PAIRS, this will be NOP as NAPI has
> > +            * been already marked with NAPIF_STATE_MISSED
> > +            */
> > +           eics |= igb_sw_irq_prep(ring->q_vector);
> > +   }
> > +
> > +   /* Cause software interrupt */
> > +   if (eics) {
> > +           if (adapter->flags & IGB_FLAG_HAS_MSIX)
> >                     wr32(E1000_EICS, eics);
> > -           } else {
> > +           else
> >                     wr32(E1000_ICS, E1000_ICS_RXDMT0);
> > -           }
> >     }
> > 
> >     return 0;
> > 
> > >
> > > Regards
> > >
> > > Vivek
> 
> I would like to know your view about the next steps. Would you like me
> to include the changes we discussed in next version of the patch I
> submitted? 
> Or would you like to submit the patch you have prepared?

Hmm...up to you. You can take my suggestions and add some tag, such as
Suggested-by.

Thanks,
Maciej

> 
> Regards
> 
> Vivek

Reply via email to