This fixes a problem which was first pointed out on
http://www.mail-archive.com/[email protected]/msg00354.html
The problem occurs when a listening DCCPv4/6 socket has sent a Response in
reply to a Request and happens at the time the ACK is received.
The problem occurs since sk_receive_skb locks sk, then calls the AF-specific
backlog
function (dccp_v{4,6}_do_rcv). This ends up calling
dccp_v{4,6}_request_recv_sock,
which calls dccp_create_openreq_child, from there to sk_clone via
inet_sk_clone.
In sk_clone, a lock is placed on newsk, which makes the lock mechanism think
that
the same lock was applied twice.
Call graph:
**********
dccp_v4_rcv(skb)
|
sk_receive_skb(sk, skb)
|
bh_lock_sock(sk)
rc = sk->backlog_rcv(sk, skb)
|
dccp_v4_do_rcv(sk, skb) /* state == LISTEN */
|
dccp_v4_hdn_req(sk, skb)
|
dccp_check_req(sk, skb, req, prev)
|
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk,
skb, req, NULL);
| /* syn_recv_sock translates to the
following: */
child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
| /* contains the following code: */
newsk = dccp_create_openreq_child(sk, req, skb)
| /* calls: */
newsk = inet_csk_clone(sk, req, GFP_ATOMIC)
| /* in turn calls: */
newsk = sk_clone(sk, priority) /* newsk !=
NULL */
| /* has this code segment */
sk_node_init(&newsk->sk_node)
sock_lock_init(newsk)
bh_lock_sock(newsk) /* <===
putative recursive lock */
/* ..... returns here ..... */
bh_unlock_sock(sk)
/* return of dccp_v4_skb(): sk is unlocked, whereas newsk is still locked
*/
This patch fixes the problem by using relevant code from net/ipv?/tcp_ipv?.c,
applied
with some thinking.
Effectively, the code of the short sk_receive_skb function is unfolded in-place
and the bh_nested_lock indicator is used instead of bh_lock.
Signed-off-by: Gerrit Renker <[EMAIL PROTECTED]>
---
ipv4.c | 22 +++++++++++++++++++---
ipv6.c | 27 ++++++++++++++++++++-------
2 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f87b0c0..f86ebcc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -897,6 +897,7 @@ static int dccp_v4_rcv(struct sk_buff *s
{
const struct dccp_hdr *dh;
struct sock *sk;
+ int ret = 0;
/* Step 1: Check header basics: */
@@ -970,7 +971,23 @@ static int dccp_v4_rcv(struct sk_buff *s
goto discard_and_relse;
nf_reset(skb);
- return sk_receive_skb(sk, skb);
+ if (sk_filter(sk, skb))
+ goto discard_and_relse;
+
+ bh_lock_sock_nested(sk); /* sk might be cloned later */
+ if (!sock_owned_by_user(sk)) {
+ /*
+ * lock semantics stolen from sk_receive_skb
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+ ret = dccp_v4_do_rcv(sk, skb);
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ } else
+ sk_add_backlog(sk, skb);
+ bh_unlock_sock(sk);
+
+ sock_put(sk);
+ return ret;
no_dccp_socket:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -987,9 +1004,8 @@ no_dccp_socket:
}
discard_it:
- /* Discard frame. */
kfree_skb(skb);
- return 0;
+ return ret;
discard_and_relse:
sock_put(sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 91e7b12..49690ee 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1035,6 +1035,7 @@ static int dccp_v6_rcv(struct sk_buff **
const struct dccp_hdr *dh;
struct sk_buff *skb = *pskb;
struct sock *sk;
+ int ret = 0;
/* Step 1: Check header basics: */
@@ -1078,7 +1079,23 @@ static int dccp_v6_rcv(struct sk_buff **
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- return sk_receive_skb(sk, skb) ? -1 : 0;
+ if (sk_filter(sk, skb))
+ goto discard_and_relse;
+
+ bh_lock_sock_nested(sk); /* sk might be cloned later */
+ if (!sock_owned_by_user(sk)) {
+ /*
+ * lock semantics stolen from sk_receive_skb
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+ ret = dccp_v6_do_rcv(sk, skb);
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ } else
+ sk_add_backlog(sk, skb);
+ bh_unlock_sock(sk);
+
+ sock_put(sk);
+ return ret? -1 : 0;
no_dccp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -1093,14 +1110,10 @@ no_dccp_socket:
DCCP_RESET_CODE_NO_CONNECTION;
dccp_v6_ctl_send_reset(skb);
}
-discard_it:
-
- /*
- * Discard frame
- */
+discard_it:
kfree_skb(skb);
- return 0;
+ return ret;
discard_and_relse:
sock_put(sk);
-
To unsubscribe from this list: send the line "unsubscribe dccp" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html