From: Paolo Abeni <pab...@redhat.com>

[ Upstream commit d518d2ed8640c1cbbbb6f63939e3e65471817367 ]

The test implemented by some_qdisc_is_busy() is somewhat loosy for
NOLOCK qdisc, as we may hit the following scenario:

CPU1                                            CPU2
// in net_tx_action()
clear_bit(__QDISC_STATE_SCHED...);
                                                // in some_qdisc_is_busy()
                                                val = (qdisc_is_running(q) ||
                                                       
test_bit(__QDISC_STATE_SCHED,
                                                                &q->state));
                                                // here val is 0 but...
qdisc_run(q)
// ... CPU1 is going to run the qdisc next

As a conseguence qdisc_run() in net_tx_action() can race with qdisc_reset()
in dev_qdisc_reset(). Such race is not possible for !NOLOCK qdisc as
both the above bit operations are under the root qdisc lock().

After commit 021a17ed796b ("pfifo_fast: drop unneeded additional lock on 
dequeue")
the race can cause use after free and/or null ptr dereference, but the root
cause is likely older.

This patch addresses the issue explicitly checking for deactivation under
the seqlock for NOLOCK qdisc, so that the qdisc_run() in the critical
scenario becomes a no-op.

Note that the enqueue() op can still execute concurrently with 
dev_qdisc_reset(),
but that is safe due to the skb_array() locking, and we can't avoid that
for NOLOCK qdiscs.

Fixes: 021a17ed796b ("pfifo_fast: drop unneeded additional lock on dequeue")
Reported-by: Li Shuang <shu...@redhat.com>
Reported-and-tested-by: Davide Caratti <dcara...@redhat.com>
Signed-off-by: Paolo Abeni <pab...@redhat.com>
Signed-off-by: David S. Miller <da...@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 include/net/pkt_sched.h |    7 ++++++-
 net/core/dev.c          |   16 ++++++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -118,7 +118,12 @@ void __qdisc_run(struct Qdisc *q);
 static inline void qdisc_run(struct Qdisc *q)
 {
        if (qdisc_run_begin(q)) {
-               __qdisc_run(q);
+               /* NOLOCK qdisc must check 'state' under the qdisc seqlock
+                * to avoid racing with dev_qdisc_reset()
+                */
+               if (!(q->flags & TCQ_F_NOLOCK) ||
+                   likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+                       __qdisc_run(q);
                qdisc_run_end(q);
        }
 }
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct
        qdisc_calculate_pkt_len(skb, q);
 
        if (q->flags & TCQ_F_NOLOCK) {
-               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-                       __qdisc_drop(skb, &to_free);
-                       rc = NET_XMIT_DROP;
-               } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
-                          qdisc_run_begin(q)) {
+               if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+                   qdisc_run_begin(q)) {
+                       if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+                                             &q->state))) {
+                               __qdisc_drop(skb, &to_free);
+                               rc = NET_XMIT_DROP;
+                               goto end_run;
+                       }
                        qdisc_bstats_cpu_update(q, skb);
 
+                       rc = NET_XMIT_SUCCESS;
                        if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
                                __qdisc_run(q);
 
+end_run:
                        qdisc_run_end(q);
-                       rc = NET_XMIT_SUCCESS;
                } else {
                        rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
                        qdisc_run(q);


Reply via email to