Eelco Chaudron <[email protected]> writes:
> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
>> Conntrack has a number of places where it would be useful to monitor
>> changes. Currently, these are hard coded spots for things like alg
>> handlers that need to fire when connections are added and transition
>> so that we can monitor packets. This leads to very difficult to read
>> code, with messy branches all over the place, and a bunch of unrelated
>> functions mixed together.
>>
>> Rename the conn_update_state_alg() function to conn_update_state_dist()
>> and abstract away the FTP specific hook logic. The original function
>> required manual modification to add additional handlers, which we want
>> to avoid and make more generic for future additions (which can include
>> observers for hardware offloads). The hooks are priority based so
>> that some high priority hooks can run early, while later hooks that
>> consume the event can run later.
>>
>> This infrastructure relies on the fact that there is only one global
>> conntrack instance. If the conntrack ever returns to allowing for
>> multiple instances, this will need to be re-abstracted.
>>
>> Signed-off-by: Aaron Conole <[email protected]>
>
> Hi Aaron,
>
> See some comments below.
>
> //Eelco
>
>> ---
>> lib/conntrack-private.h | 55 +++++++++++++++
>> lib/conntrack.c | 148 +++++++++++++++++++++++++++-------------
>> 2 files changed, 157 insertions(+), 46 deletions(-)
>>
>> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>> index bd095277cd..a5bf1bb519 100644
>> --- a/lib/conntrack-private.h
>> +++ b/lib/conntrack-private.h
>> @@ -252,6 +252,17 @@ struct conntrack {
>> * 3. 'resources_lock'
>> */
>>
>> +/* ALG control type identifiers. These determine which application-layer
>> + * gateway helper applies to a given connection. */
>> +enum ct_alg_ctl_type {
>> + CT_ALG_CTL_NONE,
>> + CT_ALG_CTL_FTP,
>> + CT_ALG_CTL_TFTP,
>> + /* SIP is not enabled through OpenFlow and is present only as an example
>> + * of an ALG that allows a wildcard source IP address. */
>> + CT_ALG_CTL_SIP,
>> +};
>> +
>> extern struct ct_l4_proto ct_proto_tcp;
>> extern struct ct_l4_proto ct_proto_other;
>> extern struct ct_l4_proto ct_proto_icmp4;
>> @@ -268,6 +279,50 @@ struct ct_l4_proto {
>> struct ct_dpif_protoinfo *);
>> };
>>
>> +/* Transient lookup context built for each packet; private to conntrack.c
>> and
>> + * the ALG helper modules. */
>> +struct conn_lookup_ctx {
>> + struct conn_key key;
>> + struct conn *conn;
>> + uint32_t hash;
>> + bool reply;
>> + bool icmp_related;
>> +};
>> +
>> +/* conn_update_state_dist() hook
>> + *
>> + * Modules may register a hook to intercept connection state transitions.
>> + * conn_update_state_dist() calls registered hooks in ascending priority
>> order
>> + * until one returns true (meaning the hook handled the update, including
>> any
>> + * call to conn_update_state() it needed to make). If no hook claims the
>> + * packet the caller falls through to the default conn_update_state() path.
>> + *
>> + * Priority: lower numerical value -> higher precedence -> called first.
>> + * Named constants below cover the common cases; any int value is accepted.
>> + *
>> + * conn->lock is NOT held on entry; hooks must acquire it as needed
>> following
>> + * the lock-ordering rules.
>> + *
>> + * Registration must happen before any connection is processed (i.e. at
>> module
>> + * initialisation time, under ovsthread_once).
>
>
> Maybe add that this is not thread-safe and must only be
> called during single-threaded initialization? Something
> like:
>
> * Registration is not thread-safe and must happen before any
> * connection is processed (i.e. during single-threaded
> * module initialisation, under ovsthread_once).
ACK.
>> + */
>> +typedef bool (*conn_update_state_hook_fn)(
>> + struct conntrack *ct, struct dp_packet *pkt,
>> + struct conn_lookup_ctx *ctx, struct conn *conn,
>> + const struct nat_action_info_t *nat_action_info,
>> + enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> + bool *create_new_conn);
>> +
>> +enum conn_update_state_hook_priority {
>> + CT_HOOK_PRI_HIGH = 50, /* Before built-in ALG handlers. */
>> + CT_HOOK_PRI_NORMAL = 100, /* Default; used by built-in ALG handlers. */
>> + CT_HOOK_PRI_LOW = 150, /* After built-in ALG handlers. */
>> +};
>> +
>> +void conn_update_state_hook_register(int priority,
>> + conn_update_state_hook_fn);
>> +void conn_update_state_hook_unregister(conn_update_state_hook_fn);
>> +
>> /* conn_private_get() / conn_private_set()
>> *
>> * Fast-path accessors for per-connection private storage slots. Both
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 04f6b4b77c..cbf268e5ee 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -55,14 +55,6 @@ COVERAGE_DEFINE(conntrack_l4csum_err);
>> COVERAGE_DEFINE(conntrack_lookup_natted_miss);
>> COVERAGE_DEFINE(conntrack_zone_full);
>>
>> -struct conn_lookup_ctx {
>> - struct conn_key key;
>> - struct conn *conn;
>> - uint32_t hash;
>> - bool reply;
>> - bool icmp_related;
>> -};
>> -
>> enum ftp_ctl_pkt {
>> /* Control packets with address and/or port specifiers. */
>> CT_FTP_CTL_INTEREST,
>> @@ -77,15 +69,6 @@ enum ct_alg_mode {
>> CT_TFTP_MODE,
>> };
>>
>> -enum ct_alg_ctl_type {
>> - CT_ALG_CTL_NONE,
>> - CT_ALG_CTL_FTP,
>> - CT_ALG_CTL_TFTP,
>> - /* SIP is not enabled through Openflow and presently only used as
>> - * an example of an alg that allows a wildcard src ip. */
>> - CT_ALG_CTL_SIP,
>> -};
>> -
>> struct zone_limit {
>> struct cmap_node node;
>> struct conntrack_zone_limit czl;
>> @@ -170,6 +153,29 @@ struct ct_private_slot {
>> static struct ct_private_slot ct_private_slots[CT_CONN_PRIVATE_MAX];
>> static atomic_uint32_t ct_private_next_id = 0;
>>
>> +/* conn_update_state_dist() hook registry.
>> + *
>> + * Written once at module initialisation (under ovsthread_once), then
>> + * read-only during packet processing, so no additional locking is needed.
>> + * Entries are kept sorted by ascending priority so conn_update_state_dist()
>> + * can iterate in order without extra bookkeeping.
>
> Is this accurate? conn_update_state_hook_unregister()
> exists and mutates the array with memmove(), but there is no
> locking to prevent it from racing with the iteration in
> conn_update_state_dist() on the fast path. Would it be
> better to remove the unregister function if it has no
> callers? Or if it is needed, this comment is incorrect and
> some kind of locking is required.
Okay, I can probably remove it.
>> + */
>> +#define CT_UPDATE_STATE_HOOKS_MAX 8
>> +
>> +struct ct_update_hook {
>> + int priority;
>> + conn_update_state_hook_fn fn;
>> +};
>> +
>> +static struct ct_update_hook ct_update_hooks[CT_UPDATE_STATE_HOOKS_MAX];
>> +static size_t n_ct_update_hooks;
>> +
>> +static bool ftp_conn_update_state_hook(struct conntrack *, struct
>> dp_packet *,
>
> OVS_EXCLUDED(conn->lock, ct->ct_lock, ct->resources_lock) ?
>
>> + struct conn_lookup_ctx *, struct
>> conn *,
>> + const struct nat_action_info_t *,
>> + enum ct_alg_ctl_type, long long,
>> + bool *);
>> +
>> static void
>> handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>> struct dp_packet *pkt, struct conn *ec, long long now,
>> @@ -305,6 +311,9 @@ conntrack_init(void)
>> l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4;
>> l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
>>
>> + conn_update_state_hook_register(CT_HOOK_PRI_NORMAL,
>> + ftp_conn_update_state_hook);
>> +
>> ovsthread_once_done(&setup_l4_once);
>> }
>> return ct;
>> @@ -1301,38 +1310,56 @@ check_orig_tuple(struct conntrack *ct, struct
>> dp_packet *pkt,
>> }
>>
>> static bool
>> -conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
>> - struct conn_lookup_ctx *ctx, struct conn *conn,
>> - const struct nat_action_info_t *nat_action_info,
>> - enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> - bool *create_new_conn)
>> -{
>> - if (is_ftp_ctl(ct_alg_ctl)) {
>> - /* Keep sequence tracking in sync with the source of the
>> - * sequence skew. */
>> +ftp_conn_update_state_hook(struct conntrack *ct, struct dp_packet *pkt,
>> + struct conn_lookup_ctx *ctx, struct conn *conn,
>> + const struct nat_action_info_t *nat_action_info,
>> + enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> + bool *create_new_conn)
>> +{
>> + if (!is_ftp_ctl(ct_alg_ctl)) {
>> + return false;
>> + }
>> +
>> + /* Keep sequence tracking in sync with the source of the sequence skew.
>> */
>> + ovs_mutex_lock(&conn->lock);
>> + if (ctx->reply != conn->seq_skew_dir) {
>> + handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>> + !!nat_action_info);
>> + /* conn_update_state acquires conn->lock for unrelated fields. */
>> + ovs_mutex_unlock(&conn->lock);
>> + *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> + } else {
>> + ovs_mutex_unlock(&conn->lock);
>> + *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> ovs_mutex_lock(&conn->lock);
>> - if (ctx->reply != conn->seq_skew_dir) {
>> + if (!*create_new_conn) {
>> handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>> !!nat_action_info);
>> - /* conn_update_state locks for unrelated fields, so unlock. */
>> - ovs_mutex_unlock(&conn->lock);
>> - *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> - } else {
>> - /* conn_update_state locks for unrelated fields, so unlock. */
>> - ovs_mutex_unlock(&conn->lock);
>> - *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> - ovs_mutex_lock(&conn->lock);
>> - if (*create_new_conn == false) {
>> - handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>> - !!nat_action_info);
>> - }
>> - ovs_mutex_unlock(&conn->lock);
>> }
>> - return true;
>> + ovs_mutex_unlock(&conn->lock);
>> }
>> - return false;
>> + return true;
>> }
>>
>> +/* Distribute a connection state-transition event to registered hooks.
>> + * Returns true if a hook handled the update (and set *create_new_conn),
>> + * false if the caller should fall through to default conn_update_state().
>> */
>> +static bool
>> +conn_update_state_dist(struct conntrack *ct, struct dp_packet *pkt,
>> + struct conn_lookup_ctx *ctx, struct conn *conn,
>
> The header comment says conn->lock is NOT held on entry.
> Should we add OVS_EXCLUDED instrumentation here to enforce
> that at compile time? Per the documented lock acquisition
> order in conntrack-private.h, none of the three locks should
> be held on entry, so this could be:
>
> OVS_EXCLUDED(conn->lock, ct->ct_lock, ct->resources_lock)
ACK
>> + const struct nat_action_info_t *nat_action_info,
>> + enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> + bool *create_new_conn)
>> +{
>> + for (size_t i = 0; i < n_ct_update_hooks; i++) {
>> + if (ct_update_hooks[i].fn(ct, pkt, ctx, conn, nat_action_info,
>> + ct_alg_ctl, now, create_new_conn)) {
>> + return true;
>> + }
>> + }
>> + return false;
>> + }
>
> Remove extra space before two closing braces, and return.
oof, ack.
>> +
>> static void
>> set_cached_conn(const struct nat_action_info_t *nat_action_info,
>> const struct conn_lookup_ctx *ctx, struct conn *conn,
>> @@ -1437,10 +1464,10 @@ process_one(struct conntrack *ct, struct dp_packet
>> *pkt,
>> enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, helper);
>>
>> if (OVS_LIKELY(conn)) {
>> - if (OVS_LIKELY(!conn_update_state_alg(ct, pkt, ctx, conn,
>> - nat_action_info,
>> - ct_alg_ctl, now,
>> - &create_new_conn))) {
>> + if (OVS_LIKELY(!conn_update_state_dist(ct, pkt, ctx, conn,
>> + nat_action_info,
>> + ct_alg_ctl, now,
>> + &create_new_conn))) {
>> create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> }
>> if (nat_action_info && !create_new_conn) {
>> @@ -3743,6 +3770,35 @@ adj_seqnum(ovs_16aligned_be32 *val, int32_t inc)
>> put_16aligned_be32(val, htonl(ntohl(get_16aligned_be32(val)) + inc));
>> }
>>
>> +void
>> +conn_update_state_hook_register(int priority, conn_update_state_hook_fn fn)
>> +{
>> + ovs_assert(n_ct_update_hooks < CT_UPDATE_STATE_HOOKS_MAX);
>> +
>> + /* Insert in sorted order (ascending priority = higher precedence). */
>> + size_t i = n_ct_update_hooks;
>> + while (i > 0 && ct_update_hooks[i - 1].priority > priority) {
>> + ct_update_hooks[i] = ct_update_hooks[i - 1];
>> + i--;
>> + }
>> + ct_update_hooks[i].priority = priority;
>> + ct_update_hooks[i].fn = fn;
>> + n_ct_update_hooks++;
>> +}
>> +
>> +void
>> +conn_update_state_hook_unregister(conn_update_state_hook_fn fn)
>> +{
>> + for (size_t i = 0; i < n_ct_update_hooks; i++) {
>> + if (ct_update_hooks[i].fn == fn) {
>> + memmove(&ct_update_hooks[i], &ct_update_hooks[i + 1],
>> + (n_ct_update_hooks - i - 1) * sizeof
>> ct_update_hooks[0]);
>> + n_ct_update_hooks--;
>> + return;
>> + }
>> + }
>> +}
>> +
>> static void
>> handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>> struct dp_packet *pkt, struct conn *ec, long long now,
>> --
>> 2.53.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev