When the BPF sk_msg verdict sets apply_bytes smaller than the current
open record, tls_push_record() splits ctx->open_rec into the record
being encrypted and a remainder record. The synchronous path reattaches
the remainder to ctx->open_rec before continuing.

If the selected AEAD provider completes asynchronously,
tls_do_encryption() returns -EINPROGRESS after unhooking ctx->open_rec.
tls_push_record() currently returns immediately in that case, before
the split remainder is reattached. The remainder is no longer reachable
through ctx->open_rec or ctx->tx_list, which can silently drop
transmitted data and leak the unreachable tls_rec.

Keep the split remainder rooted even when encryption of the first record
is pending asynchronously, and continue the BPF verdict drain loop after
an async record has been queued. Re-rooting alone is insufficient: the
final split remainder can otherwise remain as ctx->open_rec until close,
where it is freed instead of transmitted.

Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Cc: [email protected] # 4.20+
Signed-off-by: Christopher Lusk <[email protected]>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
 net/tls/tls_sw.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 964ebc268..6d3df74dd 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -840,16 +840,19 @@ static int tls_push_record(struct sock *sk, int flags,
        rc = tls_do_encryption(sk, tls_ctx, ctx, req,
                               msg_pl->sg.size + prot->tail_size, i);
        if (rc < 0) {
-               if (rc != -EINPROGRESS) {
-                       tls_err_abort(sk, -EBADMSG);
-                       if (split) {
-                               tls_ctx->pending_open_record_frags = true;
-                               tls_merge_open_record(sk, rec, tmp, orig_end);
-                       }
+               if (rc == -EINPROGRESS)
+                       goto split_done;
+
+               tls_err_abort(sk, -EBADMSG);
+               if (split) {
+                       tls_ctx->pending_open_record_frags = true;
+                       tls_merge_open_record(sk, rec, tmp, orig_end);
                }
                ctx->async_capable = 1;
                return rc;
-       } else if (split) {
+       }
+split_done:
+       if (split) {
                msg_pl = &tmp->msg_plaintext;
                msg_en = &tmp->msg_encrypted;
                sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
@@ -857,6 +860,11 @@ static int tls_push_record(struct sock *sk, int flags,
                ctx->open_rec = tmp;
        }
 
+       if (rc < 0) {
+               ctx->async_capable = 1;
+               return rc;
+       }
+
        return tls_tx_records(sk, flags);
 }
 
@@ -871,6 +879,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
        struct sock *sk_redir;
        struct tls_rec *rec;
        bool enospc, policy, redir_ingress;
+       bool async = false;
        int err = 0, send;
        u32 delta = 0;
 
@@ -920,6 +929,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
        switch (psock->eval) {
        case __SK_PASS:
                err = tls_push_record(sk, flags, record_type);
+               if (err == -EINPROGRESS) {
+                       async = true;
+                       err = 0;
+               }
                if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
                        *copied -= sk_msg_free(sk, msg);
                        tls_free_open_rec(sk);
@@ -989,7 +1002,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
        }
  out_err:
        sk_psock_put(sk, psock);
-       return err;
+       return err ?: (async ? -EINPROGRESS : 0);
 }
 
 static int tls_sw_push_pending_record(struct sock *sk, int flags)
-- 
2.54.0


Reply via email to