With this newly introduced TRACE_EVENT, it will be very easy to minotor
TCP/IPv4 state transition.

A new TRACE_SYSTEM named tcp is added, in which we can trace other TCP
event as well.

Two helpers are added,
static inline void __tcp_set_state(struct sock *sk, int state)
static inline void __sk_state_store(struct sock *sk, int newstate)

When do TCP/IPv4 state transition, we should use these two helpers or
use tcp_set_state() instead of assign a value to sk_state directly.

Signed-off-by: Yafang Shao <laoar.s...@gmail.com>
---
 include/net/tcp.h               | 16 ++++++++++++
 include/trace/events/tcp.h      | 58 +++++++++++++++++++++++++++++++++++++++++
 net/ipv4/inet_connection_sock.c |  9 ++++---
 net/ipv4/inet_hashtables.c      |  2 +-
 net/ipv4/tcp.c                  |  2 +-
 5 files changed, 82 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/tcp.h

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 89974c5..a8336d3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -49,6 +49,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/bpf-cgroup.h>
+#include <trace/events/tcp.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
@@ -1284,6 +1285,21 @@ static inline bool tcp_checksum_complete(struct sk_buff 
*skb)
 #endif
 void tcp_set_state(struct sock *sk, int state);
 
+/*
+ * To trace TCP state transition.
+ */
+static inline void __tcp_set_state(struct sock *sk, int state)
+{
+       trace_tcp_set_state(sk, sk->sk_state, state);
+       sk->sk_state = state;
+}
+
+static inline void __sk_state_store(struct sock *sk, int newstate)
+{
+       trace_tcp_set_state(sk, sk->sk_state, newstate);
+       sk_state_store(sk, newstate);
+}
+
 void tcp_done(struct sock *sk);
 
 int tcp_abort(struct sock *sk, int err);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 0000000..abf65af
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,58 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/tracepoint.h>
+#include <net/sock.h>
+#include <net/inet_timewait_sock.h>
+#include <net/request_sock.h>
+#include <net/inet_sock.h>
+#include <net/tcp_states.h>
+
+TRACE_EVENT(tcp_set_state,
+       TP_PROTO(struct sock *sk, int oldstate, int newstate),
+       TP_ARGS(sk, oldstate, newstate),
+
+       TP_STRUCT__entry(
+               __field(__be32, dst)
+               __field(__be32, src)
+               __field(__u16, dport)
+               __field(__u16, sport)
+               __field(int, oldstate)
+               __field(int, newstate)
+       ),
+
+       TP_fast_assign(
+               if (oldstate == TCP_TIME_WAIT) {
+                       __entry->dst = inet_twsk(sk)->tw_daddr;
+                       __entry->src = inet_twsk(sk)->tw_rcv_saddr;
+                       __entry->dport = ntohs(inet_twsk(sk)->tw_dport);
+                       __entry->sport = ntohs(inet_twsk(sk)->tw_sport);
+               } else if (oldstate == TCP_NEW_SYN_RECV) {
+                       __entry->dst = inet_rsk(inet_reqsk(sk))->ir_rmt_addr;
+                       __entry->src = inet_rsk(inet_reqsk(sk))->ir_loc_addr;
+                       __entry->dport =
+                               ntohs(inet_rsk(inet_reqsk(sk))->ir_rmt_port);
+                       __entry->sport = inet_rsk(inet_reqsk(sk))->ir_num;
+               } else {
+                       __entry->dst = inet_sk(sk)->inet_daddr;
+                       __entry->src = inet_sk(sk)->inet_rcv_saddr;
+                       __entry->dport = ntohs(inet_sk(sk)->inet_dport);
+                       __entry->sport = ntohs(inet_sk(sk)->inet_sport);
+               }
+
+               __entry->oldstate = oldstate;
+               __entry->newstate = newstate;
+       ),
+
+       TP_printk("%08X:%04X %08X:%04X, %02x %02x",
+               __entry->src, __entry->sport, __entry->dst, __entry->dport,
+               __entry->oldstate, __entry->newstate)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c039c93..307a046 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -27,6 +27,9 @@
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/tcp.h>
+
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
@@ -786,7 +789,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
        if (newsk) {
                struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-               newsk->sk_state = TCP_SYN_RECV;
+               __tcp_set_state(newsk, TCP_SYN_RECV);
                newicsk->icsk_bind_hash = NULL;
 
                inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -880,7 +883,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
         * It is OK, because this socket enters to hash table only
         * after validation is complete.
         */
-       sk_state_store(sk, TCP_LISTEN);
+       __sk_state_store(sk, TCP_LISTEN);
        if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
                inet->inet_sport = htons(inet->inet_num);
 
@@ -891,7 +894,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
                        return 0;
        }
 
-       sk->sk_state = TCP_CLOSE;
+       __tcp_set_state(sk, TCP_CLOSE);
        return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 597bb4c..0f45d456 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -430,7 +430,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
        } else {
                percpu_counter_inc(sk->sk_prot->orphan_count);
-               sk->sk_state = TCP_CLOSE;
+               __tcp_set_state(sk, TCP_CLOSE);
                sock_set_flag(sk, SOCK_DEAD);
                inet_csk_destroy_sock(sk);
        }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402..984dce6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2040,7 +2040,7 @@ void tcp_set_state(struct sock *sk, int state)
        /* Change state AFTER socket is unhashed to avoid closed
         * socket sitting in hash tables.
         */
-       sk_state_store(sk, state);
+       __sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
        SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], 
statename[state]);
-- 
1.8.3.1

Reply via email to