Eelco Chaudron <[email protected]> writes:
> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
>> Refactored TCP module to use a new ct private storage area rather than
>> an compatible extended conn struct so that future modules will have
>> access to the TCP state details. This will be needed when getting the
>> actual tcp state of the connection for offload.
>>
>> Signed-off-by: Aaron Conole <[email protected]>
>
> Thanks Aaron,
>
> Find some comments below.
>
> //Eelco
>
>> ---
>> lib/automake.mk | 1 +
>> lib/conntrack-tcp.c | 64 +++++++++++++++++++--------------------------
>> lib/conntrack-tcp.h | 61 ++++++++++++++++++++++++++++++++++++++++++
>> lib/conntrack.c | 2 ++
>> 4 files changed, 91 insertions(+), 37 deletions(-)
>> create mode 100644 lib/conntrack-tcp.h
>>
>> diff --git a/lib/automake.mk b/lib/automake.mk
>> index 933b71226b..027dd986ba 100644
>> --- a/lib/automake.mk
>> +++ b/lib/automake.mk
>> @@ -90,6 +90,7 @@ lib_libopenvswitch_la_SOURCES = \
>> lib/conntrack-icmp.c \
>> lib/conntrack-private.h \
>> lib/conntrack-tcp.c \
>> + lib/conntrack-tcp.h \
>> lib/conntrack-tftp.c \
>> lib/conntrack-tp.c \
>> lib/conntrack-tp.h \
>> diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
>> index 8a7c98cc45..696fd5c109 100644
>> --- a/lib/conntrack-tcp.c
>> +++ b/lib/conntrack-tcp.c
>> @@ -3,6 +3,7 @@
>> * Copyright (c) 2002 - 2008 Henning Brauer
>> * Copyright (c) 2012 Gleb Smirnoff <[email protected]>
>> * Copyright (c) 2015, 2016 Nicira, Inc.
>> + * Copyright (c) 2026 Red Hat, Inc.
>> * All rights reserved.
>> *
>> * Redistribution and use in source and binary forms, with or without
>> @@ -39,6 +40,7 @@
>> #include <config.h>
>>
>> #include "conntrack-private.h"
>> +#include "conntrack-tcp.h"
>> #include "conntrack-tp.h"
>> #include "coverage.h"
>> #include "ct-dpif.h"
>> @@ -49,18 +51,7 @@ COVERAGE_DEFINE(conntrack_tcp_seq_chk_bypass);
>> COVERAGE_DEFINE(conntrack_tcp_seq_chk_failed);
>> COVERAGE_DEFINE(conntrack_invalid_tcp_flags);
>>
>> -struct tcp_peer {
>> - uint32_t seqlo; /* Max sequence number sent
>> */
>> - uint32_t seqhi; /* Max the other end ACKd + win
>> */
>> - uint16_t max_win; /* largest window (pre scaling)
>> */
>> - uint8_t wscale; /* window scaling factor
>> */
>> - enum ct_dpif_tcp_state state;
>> -};
>> -
>> -struct conn_tcp {
>> - struct conn up;
>> - struct tcp_peer peer[2]; /* 'conn' lock protected. */
>> -};
>> +ct_private_id_t conntrack_tcp_private_id = CT_PRIVATE_ID_INVALID;
>>
>> enum {
>> TCPOPT_EOL,
>> @@ -79,12 +70,6 @@ enum {
>> #define SEQ_MIN(a, b) INT_MOD_MIN(a, b)
>> #define SEQ_MAX(a, b) INT_MOD_MAX(a, b)
>>
>> -static struct conn_tcp*
>> -conn_tcp_cast(const struct conn* conn)
>> -{
>> - return CONTAINER_OF(conn, struct conn_tcp, up);
>> -}
>> -
>> /* pf does this in in pf_normalize_tcp(), and it is called only if scrub
>> * is enabled. We're not scrubbing, but this check seems reasonable. */
>> static bool
>> @@ -113,9 +98,6 @@ tcp_invalid_flags(uint16_t flags)
>> }
>>
>> #define TCP_MAX_WSCALE 14
>> -#define CT_WSCALE_FLAG 0x80
>> -#define CT_WSCALE_UNKNOWN 0x40
>> -#define CT_WSCALE_MASK 0xf
>>
>> static uint8_t
>> tcp_get_wscale(const struct tcp_header *tcp)
>> @@ -164,7 +146,7 @@ static enum ct_update_res
>> tcp_conn_update(struct conntrack *ct, struct conn *conn_,
>> struct dp_packet *pkt, bool reply, long long now)
>> {
>> - struct conn_tcp *conn = conn_tcp_cast(conn_);
>> + struct conn_tcp_state *conn = conn_tcp_state_get(conn_);
>
> This function can return NULL so we should check, or else
> static analyzers go crazy :)
ACK
>> struct tcp_header *tcp = dp_packet_l4(pkt);
>> /* The peer that sent 'pkt' */
>> struct tcp_peer *src = &conn->peer[reply ? 1 : 0];
>> @@ -189,7 +171,7 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
>> return CT_UPDATE_NEW;
>> } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
>> src->state = CT_DPIF_TCPS_SYN_SENT;
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET,
>> now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_FIRST_PACKET, now);
>> return CT_UPDATE_VALID_NEW;
>> }
>> }
>> @@ -340,18 +322,18 @@ tcp_conn_update(struct conntrack *ct, struct conn
>> *conn_,
>>
>> if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2
>> && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSED, now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_CLOSED, now);
>> } else if (src->state >= CT_DPIF_TCPS_CLOSING
>> && dst->state >= CT_DPIF_TCPS_CLOSING) {
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIN_WAIT, now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_FIN_WAIT, now);
>> } else if (src->state < CT_DPIF_TCPS_ESTABLISHED
>> || dst->state < CT_DPIF_TCPS_ESTABLISHED) {
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_OPENING, now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_OPENING, now);
>> } else if (src->state >= CT_DPIF_TCPS_CLOSING
>> || dst->state >= CT_DPIF_TCPS_CLOSING) {
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSING, now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_CLOSING, now);
>> } else {
>> - conn_update_expiration(ct, &conn->up, CT_TM_TCP_ESTABLISHED,
>> now);
>> + conn_update_expiration(ct, conn_, CT_TM_TCP_ESTABLISHED, now);
>> }
>> } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT
>> || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
>> @@ -439,15 +421,14 @@ static struct conn *
>> tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now,
>> uint32_t tp_id)
>> {
>> - struct conn_tcp* newconn = NULL;
>> struct tcp_header *tcp = dp_packet_l4(pkt);
>> + struct conn_tcp_state *tcp_state;
>> struct tcp_peer *src, *dst;
>> uint16_t tcp_flags = TCP_FLAGS(tcp->tcp_ctl);
>>
>> - newconn = xzalloc(sizeof *newconn);
>> -
>> - src = &newconn->peer[0];
>> - dst = &newconn->peer[1];
>> + tcp_state = xzalloc(sizeof *tcp_state);
>> + src = &tcp_state->peer[0];
>> + dst = &tcp_state->peer[1];
>>
>> src->seqlo = ntohl(get_16aligned_be32(&tcp->tcp_seq));
>> src->seqhi = src->seqlo + dp_packet_get_tcp_payload_length(pkt) + 1;
>> @@ -473,10 +454,12 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet
>> *pkt, long long now,
>> src->state = CT_DPIF_TCPS_SYN_SENT;
>> dst->state = CT_DPIF_TCPS_CLOSED;
>>
>> - newconn->up.tp_id = tp_id;
>> - conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
>> + struct conn *newconn = xzalloc(sizeof *newconn);
>> + newconn->tp_id = tp_id;
>> + conn_private_set(newconn, conntrack_tcp_private_id, tcp_state);
>> + conn_init_expiration(ct, newconn, CT_TM_TCP_FIRST_PACKET, now);
>>
>> - return &newconn->up;
>> + return newconn;
>> }
>>
>> static uint8_t
>> @@ -499,7 +482,7 @@ static void
>> tcp_conn_get_protoinfo(const struct conn *conn_,
>> struct ct_dpif_protoinfo *protoinfo)
>> {
>> - const struct conn_tcp *conn = conn_tcp_cast(conn_);
>> + const struct conn_tcp_state *conn = conn_tcp_state_get(conn_);
>
> NULL check.
ACK.
>>
>> protoinfo->proto = IPPROTO_TCP;
>> protoinfo->tcp.state_orig = conn->peer[0].state;
>> @@ -518,3 +501,10 @@ struct ct_l4_proto ct_proto_tcp = {
>> .conn_update = tcp_conn_update,
>> .conn_get_protoinfo = tcp_conn_get_protoinfo,
>> };
>> +
>> +
>> +void
>> +conntrack_tcp_init(void)
>> +{
>> + conntrack_tcp_private_id = conn_private_id_alloc(free);
>
> We need to check the return value here, or assert. If we do
> assert, we might need proper handling in the
Comment got cut off, but I get the idea and will fix it.
>> +}
>> diff --git a/lib/conntrack-tcp.h b/lib/conntrack-tcp.h
>> new file mode 100644
>> index 0000000000..519993874c
>> --- /dev/null
>> +++ b/lib/conntrack-tcp.h
>> @@ -0,0 +1,61 @@
>> +/*
>> + * Copyright (c) 2026 Red Hat, Inc.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at:
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#ifndef CONNTRACK_TCP_H
>> +#define CONNTRACK_TCP_H
>> +
>> +#include "conntrack.h"
>> +#include "ct-dpif.h"
>> +
>> +/* wscale field flags stored in tcp_peer.wscale. */
>
> Add wscale to the checkpatch_dict.txt file.
okay.
>> +#define CT_WSCALE_FLAG 0x80 /* Negotiated window scaling is in use. */
>> +#define CT_WSCALE_UNKNOWN 0x40 /* Scale factor not yet known. */
>> +#define CT_WSCALE_MASK 0x0f /* Actual scale factor (0-14). */
>> +
>> +/* Per-direction TCP state tracked by the conntrack TCP module. */
>> +struct tcp_peer {
>> + uint32_t seqlo; /* Max sequence number sent. */
>> + uint32_t seqhi; /* Max the other end ACKd + win. */
>
> nit: "ACKd" -> "ACKed" to keep checkpatch happy, since this
> is moving to a new header anyway.
Okay.
>> + uint16_t max_win; /* Largest window (pre-scaling). */
>> + uint8_t wscale; /* Window scaling factor + flags. */
>> + enum ct_dpif_tcp_state state;
>> +};
>> +
>> +/* TCP-specific connection state stored in the conntrack private data slot.
>> + * Access via conn_tcp_state_get(). */
>> +struct conn_tcp_state {
>> + struct tcp_peer peer[2]; /* peer[0]=original, peer[1]=reply. */
>> +};
>> +
>> +/* Private slot ID for TCP state; valid after conntrack_tcp_init(). */
>> +extern ct_private_id_t conntrack_tcp_private_id;
>> +
>> +/* Must be called once at module initialization before any connections are
>> + * created (called internally by conntrack_init()). */
>> +void conntrack_tcp_init(void);
>> +
>> +/* Returns the TCP state for 'conn', or NULL if not a TCP connection or
>> + * conntrack_tcp_init() has not been called. */
>> +static inline struct conn_tcp_state *
>> +conn_tcp_state_get(const struct conn *conn)
>> +{
>> + if (conntrack_tcp_private_id == CT_PRIVATE_ID_INVALID) {
>> + return NULL;
>> + }
>> + return conn_private_get(conn, conntrack_tcp_private_id);
>> +}
>> +
>> +#endif /* CONNTRACK_TCP_H */
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 62251caa92..7eca4abf6b 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -24,6 +24,7 @@
>>
>> #include "conntrack.h"
>> #include "conntrack-private.h"
>> +#include "conntrack-tcp.h"
>> #include "conntrack-tp.h"
>> #include "coverage.h"
>> #include "crc32c.h"
>> @@ -223,6 +224,7 @@ conntrack_init(void)
>> l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4;
>> l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
>>
>> + conntrack_tcp_init();
>> conntrack_ftp_init();
>> conntrack_tftp_init();
>>
>> --
>> 2.53.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev