Use the previously added shadow refcount for packets that are in the Rx/Tx
ring so that the ring itself only ever holds a single ref on the skbuff.

This allows skb_cow_data() to be used by the recvmsg code to make the data
modifyable for in-place decryption without triggering the assertion in
pskb_expand_head:

        BUG_ON(skb_shared(skb));

This *should* be okay as:

 (1) Once rxrpc_input_data() starts attaching the sk_buff to the ring, it
     no longer looks inside the packet (all the parsing was done previously
     and notes were taken in struct rxrpc_skb_priv).

 (2) rxrpc_recvmsg_data() may not run in parallel for a particular call.

 (3) rxrpc_recvmsg_data() cow's the sk_buff the first time it sees it and
     then steps through each pointer from the buffer in order, unpinning as
     it goes.

     Each subpacket is individually and sequentially decrypted in place in
     the sk_buff, hence the need for skb_cow_data().

 (4) No one else can be looking in a packet in the Rx ring once it's there.

The problem was occuring because the softirq handler may be holding a ref
or the ring may be holding multiple refs when skb_cow_data() is called in
rxkad_verify_packet(), and so skb_shared() returns true and
__pskb_pull_tail() dislikes that.  If this occurs, something like the
following report will be generated.

        kernel BUG at net/core/skbuff.c:1463!
        ...
        RIP: 0010:pskb_expand_head+0x253/0x2b0
        ...
        Call Trace:
         __pskb_pull_tail+0x49/0x460
         skb_cow_data+0x6f/0x300
         rxkad_verify_packet+0x18b/0xb10 [rxrpc]
         rxrpc_recvmsg_data.isra.11+0x4a8/0xa10 [rxrpc]
         rxrpc_kernel_recv_data+0x126/0x240 [rxrpc]
         afs_extract_data+0x51/0x2d0 [kafs]
         afs_deliver_fs_fetch_data+0x188/0x400 [kafs]
         afs_deliver_to_call+0xac/0x430 [kafs]
         afs_wait_for_call_to_complete+0x22f/0x3d0 [kafs]
         afs_make_call+0x282/0x3f0 [kafs]
         afs_fs_fetch_data+0x164/0x300 [kafs]
         afs_fetch_data+0x54/0x130 [kafs]
         afs_readpages+0x20d/0x340 [kafs]
         read_pages+0x66/0x180
         __do_page_cache_readahead+0x188/0x1a0
         ondemand_readahead+0x17d/0x2e0
         generic_file_read_iter+0x740/0xc10
         __vfs_read+0x145/0x1a0
         vfs_read+0x8c/0x140
         ksys_read+0x4a/0xb0
         do_syscall_64+0x43/0xf0
         entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Reported-by: Julian Wollrath <jwollr...@web.de>
Signed-off-by: David Howells <dhowe...@redhat.com>
---

 net/rxrpc/call_object.c |    2 +-
 net/rxrpc/input.c       |   22 ++++++++++------------
 net/rxrpc/recvmsg.c     |    2 +-
 net/rxrpc/sendmsg.c     |    1 +
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 014548c259ce..830b6152dfa3 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -429,7 +429,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
        int i;
 
        for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
-               rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
+               rxrpc_unpin_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
                call->rxtx_buffer[i] = NULL;
        }
 }
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 31090bdf1fae..660b7eed39b7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -258,7 +258,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, 
rxrpc_seq_t to,
                skb = list;
                list = skb->next;
                skb_mark_not_on_list(skb);
-               rxrpc_free_skb(skb, rxrpc_skb_freed);
+               rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
        }
 
        return rot_last;
@@ -447,6 +447,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, 
struct sk_buff *skb)
                return;
        }
 
+       atomic_set(&sp->nr_ring_pins, 1);
+
        if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
                unsigned long timo = READ_ONCE(call->next_req_timo);
                unsigned long now, expect_req_by;
@@ -550,6 +552,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, 
struct sk_buff *skb)
                        ack_serial = serial;
                }
 
+               /* Each insertion into the rxtx_buffer holds a ring pin.  This
+                * allows a single ref on the buffer to be shared, thereby
+                * allowing skb_cow_data() to be used.
+                */
+               rxrpc_pin_skb(skb, rxrpc_skb_pin);
+
                /* Queue the packet.  We use a couple of memory barriers here 
as need
                 * to make sure that rx_top is perceived to be set after the 
buffer
                 * pointer and that the buffer pointer is set after the 
annotation and
@@ -558,8 +566,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, 
struct sk_buff *skb)
                 * Barriers against rxrpc_recvmsg_data() and 
rxrpc_rotate_rx_window()
                 * and also rxrpc_fill_out_ack().
                 */
-               if (!terminal)
-                       rxrpc_get_skb(skb, rxrpc_skb_got);
                call->rxtx_annotations[ix] = annotation;
                smp_wmb();
                call->rxtx_buffer[ix] = skb;
@@ -574,14 +580,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, 
struct sk_buff *skb)
                        immediate_ack = true;
                }
 
-               if (terminal) {
-                       /* From this point on, we're not allowed to touch the
-                        * packet any longer as its ref now belongs to the Rx
-                        * ring.
-                        */
-                       skb = NULL;
-               }
-
                if (last) {
                        set_bit(RXRPC_CALL_RX_LAST, &call->flags);
                        if (!ack) {
@@ -620,7 +618,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, 
struct sk_buff *skb)
 
 unlock:
        spin_unlock(&call->input_lock);
-       rxrpc_free_skb(skb, rxrpc_skb_freed);
+       rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
        _leave(" [queued]");
 }
 
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3b0becb12041..82bb48d96526 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -205,7 +205,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
        /* Barrier against rxrpc_input_data(). */
        smp_store_release(&call->rx_hard_ack, hard_ack);
 
-       rxrpc_free_skb(skb, rxrpc_skb_freed);
+       rxrpc_unpin_skb(skb, rxrpc_skb_unpin);
 
        trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
        if (last) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 6a1547b270fe..ba0e2aa268b1 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -175,6 +175,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct 
rxrpc_call *call,
         */
        skb->tstamp = ktime_get_real();
 
+       atomic_set(&sp->nr_ring_pins, 1);
        ix = seq & RXRPC_RXTX_BUFF_MASK;
        rxrpc_get_skb(skb, rxrpc_skb_got);
        call->rxtx_annotations[ix] = annotation;

Reply via email to