Fix a race condition between ice_free_tx_tstamp_ring() and ice_tx_map()
that can cause a NULL pointer dereference.

ice_free_tx_tstamp_ring currently clears the ICE_TX_FLAGS_TXTIME flag
after NULLing the tstamp_ring. This could allow a concurrent ice_tx_map
call on another CPU to dereference the tstamp_ring, which could lead to
a NULL pointer dereference.

  CPU A:ice_free_tx_tstamp_ring() | CPU B:ice_tx_map()
  --------------------------------|---------------------------------
  tx_ring->tstamp_ring = NULL     |
                                  | ice_is_txtime_cfg() -> true
                                  | tstamp_ring = tx_ring->tstamp_ring
                                  | tstamp_ring->count  // NULL deref!
  flags &= ~ICE_TX_FLAGS_TXTIME   |

Fix by
1. ice_free_tx_tstamp_ring: Clear the flag before NULLing the pointer.
   Use WRITE_ONCE() to prevent store tearing, and smp_wmb() to
   prevent re-ordering.
2. ice_tx_map: Add smp_rmb() after the flag check to order the flag
   read before the pointer read, use READ_ONCE() for the pointer, and
   add a NULL check. If tstamp_ring is NULL, fall through to the
   regular TX ring kick to avoid leaving packets stuck in the ring.

Fixes: ccde82e90946 ("ice: add E830 Earliest TxTime First Offload support")
Signed-off-by: Keita Morisaki <[email protected]>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c 
b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ad76768a42323..d48740f2b626a 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -190,9 +190,10 @@ void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring)
 void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring)
 {
        ice_free_tstamp_ring(tx_ring);
+       WRITE_ONCE(tx_ring->flags, tx_ring->flags & ~ICE_TX_FLAGS_TXTIME);
+       smp_wmb();      /* order flag clear before pointer NULL; pairs with 
ice_tx_map() */
        kfree_rcu(tx_ring->tstamp_ring, rcu);
-       tx_ring->tstamp_ring = NULL;
-       tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME;
+       WRITE_ONCE(tx_ring->tstamp_ring, NULL);
 }
 
 /**
@@ -1519,13 +1520,20 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct 
ice_tx_buf *first,
                return;
 
        if (ice_is_txtime_cfg(tx_ring)) {
-               struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
-               u32 tstamp_count = tstamp_ring->count;
-               u32 j = tstamp_ring->next_to_use;
+               struct ice_tstamp_ring *tstamp_ring;
+               u32 tstamp_count, j;
                struct ice_ts_desc *ts_desc;
                struct timespec64 ts;
                u32 tstamp;
 
+               smp_rmb();      /* order flag read before pointer read */
+               tstamp_ring = READ_ONCE(tx_ring->tstamp_ring);
+               if (unlikely(!tstamp_ring))
+                       goto ring_kick;
+
+               tstamp_count = tstamp_ring->count;
+               j = tstamp_ring->next_to_use;
+
                ts = ktime_to_timespec64(first->skb->tstamp);
                tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS;
 
@@ -1553,6 +1561,7 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf 
*first,
                tstamp_ring->next_to_use = j;
                writel_relaxed(j, tstamp_ring->tail);
        } else {
+ring_kick:
                writel_relaxed(i, tx_ring->tail);
        }
        return;

base-commit: 18f7fcd5e69a04df57b563360b88be72471d6b62
-- 
2.34.1

Reply via email to