Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset

2024-03-29 Thread Eric Dumazet
On Fri, Mar 29, 2024 at 11:23 AM Jason Xing  wrote:
>
> On Fri, Mar 29, 2024 at 5:07 PM Eric Dumazet  wrote:
> >
> > On Fri, Mar 29, 2024 at 4:43 AM Jason Xing  
> > wrote:
> > >
> > > From: Jason Xing 
> > >
> > > Prior to this patch, what we can see by enabling trace_tcp_send is
> > > only happening under two circumstances:
> > > 1) active rst mode
> > > 2) non-active rst mode and based on the full socket
> > >
> > > That means the inconsistency occurs if we use tcpdump and trace
> > > simultaneously to see how rst happens.
> > >
> > > It's necessary that we should take into other cases into considerations,
> > > say:
> > > 1) time-wait socket
> > > 2) no socket
> > > ...
> > >
> > > By parsing the incoming skb and reversing its 4-tuple can
> > > we know the exact 'flow' which might not exist.
> > >
> > > Samples after applied this patch:
> > > 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> > > state=TCP_ESTABLISHED
> > > 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> > > state=UNKNOWN
> > > Note:
> > > 1) UNKNOWN means we cannot extract the right information from skb.
> > > 2) skbaddr/skaddr could be 0
> > >
> > > Signed-off-by: Jason Xing 
> > > ---
> > >  include/trace/events/tcp.h | 39 --
> > >  net/ipv4/tcp_ipv4.c|  4 ++--
> > >  net/ipv6/tcp_ipv6.c|  3 ++-
> > >  3 files changed, 41 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> > > index 194425f69642..289438c54227 100644
> > > --- a/include/trace/events/tcp.h
> > > +++ b/include/trace/events/tcp.h
> > > @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
> > >   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
> > >   * active reset, skb should be NULL
> > >   */
> > > -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> > > +TRACE_EVENT(tcp_send_reset,
> > >
> > > TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
> > >
> > > -   TP_ARGS(sk, skb)
> > > +   TP_ARGS(sk, skb),
> > > +
> > > +   TP_STRUCT__entry(
> > > +   __field(const void *, skbaddr)
> > > +   __field(const void *, skaddr)
> > > +   __field(int, state)
> > > +   __array(__u8, saddr, sizeof(struct sockaddr_in6))
> > > +   __array(__u8, daddr, sizeof(struct sockaddr_in6))
> > > +   ),
> > > +
> > > +   TP_fast_assign(
> > > +   __entry->skbaddr = skb;
> > > +   __entry->skaddr = sk;
> > > +   /* Zero means unknown state. */
> > > +   __entry->state = sk ? sk->sk_state : 0;
> > > +
> > > +   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> > > +   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> > > +
> > > +   if (sk && sk_fullsock(sk)) {
> > > +   const struct inet_sock *inet = inet_sk(sk);
> > > +
> > > +   TP_STORE_ADDR_PORTS(__entry, inet, sk);
> > > +   } else {
> >
> > To be on the safe side, I would test if (skb) here.
> > We have one caller with skb == NULL, we might have more in the future.
>
> Thanks for the review.
>
> How about changing '} else {' to '} else if (skb) {', then if we go
> into this else-if branch, we will print nothing, right? I'll test it
> in this case.

Right, the fields are cleared before this else

+   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));

>
> >
> > > +   /*
> > > +* We should reverse the 4-tuple of skb, so later
> > > +* it can print the right flow direction of rst.
> > > +*/
> > > +   TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, 
> > > entry->saddr);
> > > +   }
> > > +   ),
> > > +
> > > +   TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> > > + __entry->skbaddr, __entry->skaddr,
> > > + __entry->saddr, __entry->daddr,
> > > + __entry->state ? show_tcp_state_name(__entry->state) : 
> > > "UNKNOWN")
> > >  );
> > >
> > >  /*
> > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > > index a22ee5838751..d5c4a969c066 100644
> > > --- a/net/ipv4/tcp_ipv4.c
> > > +++ b/net/ipv4/tcp_ipv4.c
> > > @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock 
> > > *sk, struct sk_buff *skb)
> > >  */
> > > if (sk) {
> > > arg.bound_dev_if = sk->sk_bound_dev_if;
> > > -   if (sk_fullsock(sk))
> > > -   trace_tcp_send_reset(sk, skb);
> > > }
> >
> > Remove the { } ?
>
> Yes, I forgot to remove them.

No problem.



Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset

2024-03-29 Thread Jason Xing
On Fri, Mar 29, 2024 at 5:07 PM Eric Dumazet  wrote:
>
> On Fri, Mar 29, 2024 at 4:43 AM Jason Xing  wrote:
> >
> > From: Jason Xing 
> >
> > Prior to this patch, what we can see by enabling trace_tcp_send is
> > only happening under two circumstances:
> > 1) active rst mode
> > 2) non-active rst mode and based on the full socket
> >
> > That means the inconsistency occurs if we use tcpdump and trace
> > simultaneously to see how rst happens.
> >
> > It's necessary that we should take into other cases into considerations,
> > say:
> > 1) time-wait socket
> > 2) no socket
> > ...
> >
> > By parsing the incoming skb and reversing its 4-tuple can
> > we know the exact 'flow' which might not exist.
> >
> > Samples after applied this patch:
> > 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> > state=TCP_ESTABLISHED
> > 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> > state=UNKNOWN
> > Note:
> > 1) UNKNOWN means we cannot extract the right information from skb.
> > 2) skbaddr/skaddr could be 0
> >
> > Signed-off-by: Jason Xing 
> > ---
> >  include/trace/events/tcp.h | 39 --
> >  net/ipv4/tcp_ipv4.c|  4 ++--
> >  net/ipv6/tcp_ipv6.c|  3 ++-
> >  3 files changed, 41 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> > index 194425f69642..289438c54227 100644
> > --- a/include/trace/events/tcp.h
> > +++ b/include/trace/events/tcp.h
> > @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
> >   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
> >   * active reset, skb should be NULL
> >   */
> > -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> > +TRACE_EVENT(tcp_send_reset,
> >
> > TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
> >
> > -   TP_ARGS(sk, skb)
> > +   TP_ARGS(sk, skb),
> > +
> > +   TP_STRUCT__entry(
> > +   __field(const void *, skbaddr)
> > +   __field(const void *, skaddr)
> > +   __field(int, state)
> > +   __array(__u8, saddr, sizeof(struct sockaddr_in6))
> > +   __array(__u8, daddr, sizeof(struct sockaddr_in6))
> > +   ),
> > +
> > +   TP_fast_assign(
> > +   __entry->skbaddr = skb;
> > +   __entry->skaddr = sk;
> > +   /* Zero means unknown state. */
> > +   __entry->state = sk ? sk->sk_state : 0;
> > +
> > +   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> > +   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> > +
> > +   if (sk && sk_fullsock(sk)) {
> > +   const struct inet_sock *inet = inet_sk(sk);
> > +
> > +   TP_STORE_ADDR_PORTS(__entry, inet, sk);
> > +   } else {
>
> To be on the safe side, I would test if (skb) here.
> We have one caller with skb == NULL, we might have more in the future.

Thanks for the review.

How about changing '} else {' to '} else if (skb) {', then if we go
into this else-if branch, we will print nothing, right? I'll test it
in this case.

>
> > +   /*
> > +* We should reverse the 4-tuple of skb, so later
> > +* it can print the right flow direction of rst.
> > +*/
> > +   TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, 
> > entry->saddr);
> > +   }
> > +   ),
> > +
> > +   TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> > + __entry->skbaddr, __entry->skaddr,
> > + __entry->saddr, __entry->daddr,
> > + __entry->state ? show_tcp_state_name(__entry->state) : 
> > "UNKNOWN")
> >  );
> >
> >  /*
> > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > index a22ee5838751..d5c4a969c066 100644
> > --- a/net/ipv4/tcp_ipv4.c
> > +++ b/net/ipv4/tcp_ipv4.c
> > @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, 
> > struct sk_buff *skb)
> >  */
> > if (sk) {
> > arg.bound_dev_if = sk->sk_bound_dev_if;
> > -   if (sk_fullsock(sk))
> > -   trace_tcp_send_reset(sk, skb);
> > }
>
> Remove the { } ?

Yes, I forgot to remove them.

Thanks,
Jason

>
>
> >
> > +   trace_tcp_send_reset(sk, skb);
> > +
> > BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
> >  offsetof(struct inet_timewait_sock, tw_bound_dev_if));
> >
> > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> > index 3f4cba49e9ee..8e9c59b6c00c 100644
> > --- a/net/ipv6/tcp_ipv6.c
> > +++ b/net/ipv6/tcp_ipv6.c
> > @@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, 
> > struct sk_buff *skb)
> > if (sk) {
> > oif = sk->sk_bound_dev_if;
> > if (sk_fullsock(sk)) {
> > -

Re: [PATCH net-next v3 2/3] trace: tcp: fully support trace_tcp_send_reset

2024-03-29 Thread Eric Dumazet
On Fri, Mar 29, 2024 at 4:43 AM Jason Xing  wrote:
>
> From: Jason Xing 
>
> Prior to this patch, what we can see by enabling trace_tcp_send is
> only happening under two circumstances:
> 1) active rst mode
> 2) non-active rst mode and based on the full socket
>
> That means the inconsistency occurs if we use tcpdump and trace
> simultaneously to see how rst happens.
>
> It's necessary that we should take into other cases into considerations,
> say:
> 1) time-wait socket
> 2) no socket
> ...
>
> By parsing the incoming skb and reversing its 4-tuple can
> we know the exact 'flow' which might not exist.
>
> Samples after applied this patch:
> 1. tcp_send_reset: skbaddr=XXX skaddr=XXX src=ip:port dest=ip:port
> state=TCP_ESTABLISHED
> 2. tcp_send_reset: skbaddr=000...000 skaddr=XXX src=ip:port dest=ip:port
> state=UNKNOWN
> Note:
> 1) UNKNOWN means we cannot extract the right information from skb.
> 2) skbaddr/skaddr could be 0
>
> Signed-off-by: Jason Xing 
> ---
>  include/trace/events/tcp.h | 39 --
>  net/ipv4/tcp_ipv4.c|  4 ++--
>  net/ipv6/tcp_ipv6.c|  3 ++-
>  3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 194425f69642..289438c54227 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -78,11 +78,46 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
>   * skb of trace_tcp_send_reset is the skb that caused RST. In case of
>   * active reset, skb should be NULL
>   */
> -DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
> +TRACE_EVENT(tcp_send_reset,
>
> TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
>
> -   TP_ARGS(sk, skb)
> +   TP_ARGS(sk, skb),
> +
> +   TP_STRUCT__entry(
> +   __field(const void *, skbaddr)
> +   __field(const void *, skaddr)
> +   __field(int, state)
> +   __array(__u8, saddr, sizeof(struct sockaddr_in6))
> +   __array(__u8, daddr, sizeof(struct sockaddr_in6))
> +   ),
> +
> +   TP_fast_assign(
> +   __entry->skbaddr = skb;
> +   __entry->skaddr = sk;
> +   /* Zero means unknown state. */
> +   __entry->state = sk ? sk->sk_state : 0;
> +
> +   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
> +   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
> +
> +   if (sk && sk_fullsock(sk)) {
> +   const struct inet_sock *inet = inet_sk(sk);
> +
> +   TP_STORE_ADDR_PORTS(__entry, inet, sk);
> +   } else {

To be on the safe side, I would test if (skb) here.
We have one caller with skb == NULL, we might have more in the future.

> +   /*
> +* We should reverse the 4-tuple of skb, so later
> +* it can print the right flow direction of rst.
> +*/
> +   TP_STORE_ADDR_PORTS_SKB(skb, entry->daddr, 
> entry->saddr);
> +   }
> +   ),
> +
> +   TP_printk("skbaddr=%p skaddr=%p src=%pISpc dest=%pISpc state=%s",
> + __entry->skbaddr, __entry->skaddr,
> + __entry->saddr, __entry->daddr,
> + __entry->state ? show_tcp_state_name(__entry->state) : 
> "UNKNOWN")
>  );
>
>  /*
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index a22ee5838751..d5c4a969c066 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -868,10 +868,10 @@ static void tcp_v4_send_reset(const struct sock *sk, 
> struct sk_buff *skb)
>  */
> if (sk) {
> arg.bound_dev_if = sk->sk_bound_dev_if;
> -   if (sk_fullsock(sk))
> -   trace_tcp_send_reset(sk, skb);
> }

Remove the { } ?


>
> +   trace_tcp_send_reset(sk, skb);
> +
> BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
>  offsetof(struct inet_timewait_sock, tw_bound_dev_if));
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 3f4cba49e9ee..8e9c59b6c00c 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1113,7 +1113,6 @@ static void tcp_v6_send_reset(const struct sock *sk, 
> struct sk_buff *skb)
> if (sk) {
> oif = sk->sk_bound_dev_if;
> if (sk_fullsock(sk)) {
> -   trace_tcp_send_reset(sk, skb);
> if (inet6_test_bit(REPFLOW, sk))
> label = ip6_flowlabel(ipv6h);
> priority = READ_ONCE(sk->sk_priority);
> @@ -1129,6 +1128,8 @@ static void tcp_v6_send_reset(const struct sock *sk, 
> struct sk_buff *skb)
> label = ip6_flowlabel(ipv6h);
> }
>
> +   trace_tcp_send_reset(sk, skb);
> +
> tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
>