Eelco Chaudron <[email protected]> writes:

> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
>> Conntrack has a number of places where it would be useful to monitor
>> changes.  Currently, these are hard coded spots for things like alg
>> handlers that need to fire when connections are added and transition
>> so that we can monitor packets.  This leads to very difficult to read
>> code, with messy branches all over the place, and a bunch of unrelated
>> functions mixed together.
>>
>> Rename the conn_update_state_alg() function to conn_update_state_dist()
>> and abstract away the FTP specific hook logic.  The original function
>> required manual modification to add additional handlers, which we want
>> to avoid and make more generic for future additions (which can include
>> observers for hardware offloads).  The hooks are priority based so
>> that some high priority hooks can run early, while later hooks that
>> consume the event can run later.
>>
>> This infrastructure relies on the fact that there is only one global
>> conntrack instance.  If the conntrack ever returns to allowing for
>> multiple instances, this will need to be re-abstracted.
>>
>> Signed-off-by: Aaron Conole <[email protected]>
>
> Hi Aaron,
>
> See some comments below.
>
> //Eelco
>
>> ---
>>  lib/conntrack-private.h |  55 +++++++++++++++
>>  lib/conntrack.c         | 148 +++++++++++++++++++++++++++-------------
>>  2 files changed, 157 insertions(+), 46 deletions(-)
>>
>> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>> index bd095277cd..a5bf1bb519 100644
>> --- a/lib/conntrack-private.h
>> +++ b/lib/conntrack-private.h
>> @@ -252,6 +252,17 @@ struct conntrack {
>>   *    3. 'resources_lock'
>>   */
>>
>> +/* ALG control type identifiers.  These determine which application-layer
>> + * gateway helper applies to a given connection. */
>> +enum ct_alg_ctl_type {
>> +    CT_ALG_CTL_NONE,
>> +    CT_ALG_CTL_FTP,
>> +    CT_ALG_CTL_TFTP,
>> +    /* SIP is not enabled through OpenFlow and is present only as an example
>> +     * of an ALG that allows a wildcard source IP address. */
>> +    CT_ALG_CTL_SIP,
>> +};
>> +
>>  extern struct ct_l4_proto ct_proto_tcp;
>>  extern struct ct_l4_proto ct_proto_other;
>>  extern struct ct_l4_proto ct_proto_icmp4;
>> @@ -268,6 +279,50 @@ struct ct_l4_proto {
>>                                 struct ct_dpif_protoinfo *);
>>  };
>>
>> +/* Transient lookup context built for each packet; private to conntrack.c 
>> and
>> + * the ALG helper modules. */
>> +struct conn_lookup_ctx {
>> +    struct conn_key key;
>> +    struct conn *conn;
>> +    uint32_t hash;
>> +    bool reply;
>> +    bool icmp_related;
>> +};
>> +
>> +/* conn_update_state_dist() hook
>> + *
>> + * Modules may register a hook to intercept connection state transitions.
>> + * conn_update_state_dist() calls registered hooks in ascending priority 
>> order
>> + * until one returns true (meaning the hook handled the update, including 
>> any
>> + * call to conn_update_state() it needed to make).  If no hook claims the
>> + * packet the caller falls through to the default conn_update_state() path.
>> + *
>> + * Priority: lower numerical value -> higher precedence -> called first.
>> + * Named constants below cover the common cases; any int value is accepted.
>> + *
>> + * conn->lock is NOT held on entry; hooks must acquire it as needed 
>> following
>> + * the lock-ordering rules.
>> + *
>> + * Registration must happen before any connection is processed (i.e. at 
>> module
>> + * initialisation time, under ovsthread_once).
>
>
> Maybe add that this is not thread-safe and must only be
> called during single-threaded initialization?  Something
> like:
>
>  * Registration is not thread-safe and must happen before any
>  * connection is processed (i.e. during single-threaded
>  * module initialisation, under ovsthread_once).

ACK.

>> + */
>> +typedef bool (*conn_update_state_hook_fn)(
>> +    struct conntrack *ct, struct dp_packet *pkt,
>> +    struct conn_lookup_ctx *ctx, struct conn *conn,
>> +    const struct nat_action_info_t *nat_action_info,
>> +    enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> +    bool *create_new_conn);
>> +
>> +enum conn_update_state_hook_priority {
>> +    CT_HOOK_PRI_HIGH   =  50,  /* Before built-in ALG handlers. */
>> +    CT_HOOK_PRI_NORMAL = 100,  /* Default; used by built-in ALG handlers. */
>> +    CT_HOOK_PRI_LOW    = 150,  /* After built-in ALG handlers. */
>> +};
>> +
>> +void conn_update_state_hook_register(int priority,
>> +                                     conn_update_state_hook_fn);
>> +void conn_update_state_hook_unregister(conn_update_state_hook_fn);
>> +
>>  /* conn_private_get() / conn_private_set()
>>   *
>>   * Fast-path accessors for per-connection private storage slots.  Both
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 04f6b4b77c..cbf268e5ee 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -55,14 +55,6 @@ COVERAGE_DEFINE(conntrack_l4csum_err);
>>  COVERAGE_DEFINE(conntrack_lookup_natted_miss);
>>  COVERAGE_DEFINE(conntrack_zone_full);
>>
>> -struct conn_lookup_ctx {
>> -    struct conn_key key;
>> -    struct conn *conn;
>> -    uint32_t hash;
>> -    bool reply;
>> -    bool icmp_related;
>> -};
>> -
>>  enum ftp_ctl_pkt {
>>      /* Control packets with address and/or port specifiers. */
>>      CT_FTP_CTL_INTEREST,
>> @@ -77,15 +69,6 @@ enum ct_alg_mode {
>>      CT_TFTP_MODE,
>>  };
>>
>> -enum ct_alg_ctl_type {
>> -    CT_ALG_CTL_NONE,
>> -    CT_ALG_CTL_FTP,
>> -    CT_ALG_CTL_TFTP,
>> -    /* SIP is not enabled through Openflow and presently only used as
>> -     * an example of an alg that allows a wildcard src ip. */
>> -    CT_ALG_CTL_SIP,
>> -};
>> -
>>  struct zone_limit {
>>      struct cmap_node node;
>>      struct conntrack_zone_limit czl;
>> @@ -170,6 +153,29 @@ struct ct_private_slot {
>>  static struct ct_private_slot ct_private_slots[CT_CONN_PRIVATE_MAX];
>>  static atomic_uint32_t ct_private_next_id = 0;
>>
>> +/* conn_update_state_dist() hook registry.
>> + *
>> + * Written once at module initialisation (under ovsthread_once), then
>> + * read-only during packet processing, so no additional locking is needed.
>> + * Entries are kept sorted by ascending priority so conn_update_state_dist()
>> + * can iterate in order without extra bookkeeping.
>
> Is this accurate?  conn_update_state_hook_unregister()
> exists and mutates the array with memmove(), but there is no
> locking to prevent it from racing with the iteration in
> conn_update_state_dist() on the fast path.  Would it be
> better to remove the unregister function if it has no
> callers?  Or if it is needed, this comment is incorrect and
> some kind of locking is required.

Okay, I can probably remove it.

>> + */
>> +#define CT_UPDATE_STATE_HOOKS_MAX 8
>> +
>> +struct ct_update_hook {
>> +    int priority;
>> +    conn_update_state_hook_fn fn;
>> +};
>> +
>> +static struct ct_update_hook ct_update_hooks[CT_UPDATE_STATE_HOOKS_MAX];
>> +static size_t n_ct_update_hooks;
>> +
>> +static bool ftp_conn_update_state_hook(struct conntrack *, struct
>> dp_packet *,
>
>  OVS_EXCLUDED(conn->lock, ct->ct_lock, ct->resources_lock) ?
>
>> +                                       struct conn_lookup_ctx *, struct 
>> conn *,
>> +                                       const struct nat_action_info_t *,
>> +                                       enum ct_alg_ctl_type, long long,
>> +                                       bool *);
>> +
>>  static void
>>  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>>                 struct dp_packet *pkt, struct conn *ec, long long now,
>> @@ -305,6 +311,9 @@ conntrack_init(void)
>>          l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4;
>>          l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
>>
>> +        conn_update_state_hook_register(CT_HOOK_PRI_NORMAL,
>> +                                        ftp_conn_update_state_hook);
>> +
>>          ovsthread_once_done(&setup_l4_once);
>>      }
>>      return ct;
>> @@ -1301,38 +1310,56 @@ check_orig_tuple(struct conntrack *ct, struct 
>> dp_packet *pkt,
>>  }
>>
>>  static bool
>> -conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
>> -                      struct conn_lookup_ctx *ctx, struct conn *conn,
>> -                      const struct nat_action_info_t *nat_action_info,
>> -                      enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> -                      bool *create_new_conn)
>> -{
>> -    if (is_ftp_ctl(ct_alg_ctl)) {
>> -        /* Keep sequence tracking in sync with the source of the
>> -         * sequence skew. */
>> +ftp_conn_update_state_hook(struct conntrack *ct, struct dp_packet *pkt,
>> +                           struct conn_lookup_ctx *ctx, struct conn *conn,
>> +                           const struct nat_action_info_t *nat_action_info,
>> +                           enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> +                           bool *create_new_conn)
>> +{
>> +    if (!is_ftp_ctl(ct_alg_ctl)) {
>> +        return false;
>> +    }
>> +
>> +    /* Keep sequence tracking in sync with the source of the sequence skew. 
>> */
>> +    ovs_mutex_lock(&conn->lock);
>> +    if (ctx->reply != conn->seq_skew_dir) {
>> +        handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>> +                       !!nat_action_info);
>> +        /* conn_update_state acquires conn->lock for unrelated fields. */
>> +        ovs_mutex_unlock(&conn->lock);
>> +        *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> +    } else {
>> +        ovs_mutex_unlock(&conn->lock);
>> +        *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>>          ovs_mutex_lock(&conn->lock);
>> -        if (ctx->reply != conn->seq_skew_dir) {
>> +        if (!*create_new_conn) {
>>              handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>>                             !!nat_action_info);
>> -            /* conn_update_state locks for unrelated fields, so unlock. */
>> -            ovs_mutex_unlock(&conn->lock);
>> -            *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> -        } else {
>> -            /* conn_update_state locks for unrelated fields, so unlock. */
>> -            ovs_mutex_unlock(&conn->lock);
>> -            *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>> -            ovs_mutex_lock(&conn->lock);
>> -            if (*create_new_conn == false) {
>> -                handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
>> -                               !!nat_action_info);
>> -            }
>> -            ovs_mutex_unlock(&conn->lock);
>>          }
>> -        return true;
>> +        ovs_mutex_unlock(&conn->lock);
>>      }
>> -    return false;
>> +    return true;
>>  }
>>
>> +/* Distribute a connection state-transition event to registered hooks.
>> + * Returns true if a hook handled the update (and set *create_new_conn),
>> + * false if the caller should fall through to default conn_update_state(). 
>> */
>> +static bool
>> +conn_update_state_dist(struct conntrack *ct, struct dp_packet *pkt,
>> +                       struct conn_lookup_ctx *ctx, struct conn *conn,
>
> The header comment says conn->lock is NOT held on entry.
> Should we add OVS_EXCLUDED instrumentation here to enforce
> that at compile time?  Per the documented lock acquisition
> order in conntrack-private.h, none of the three locks should
> be held on entry, so this could be:
>
>     OVS_EXCLUDED(conn->lock, ct->ct_lock, ct->resources_lock)

ACK

>> +                       const struct nat_action_info_t *nat_action_info,
>> +                       enum ct_alg_ctl_type ct_alg_ctl, long long now,
>> +                       bool *create_new_conn)
>> +{
>> +    for (size_t i = 0; i < n_ct_update_hooks; i++) {
>> +        if (ct_update_hooks[i].fn(ct, pkt, ctx, conn, nat_action_info,
>> +                                  ct_alg_ctl, now, create_new_conn)) {
>> +            return true;
>> +        }
>> +     }
>> +     return false;
>> + }
>
> Remove extra space before two closing braces, and return.

oof, ack.

>> +
>>  static void
>>  set_cached_conn(const struct nat_action_info_t *nat_action_info,
>>                  const struct conn_lookup_ctx *ctx, struct conn *conn,
>> @@ -1437,10 +1464,10 @@ process_one(struct conntrack *ct, struct dp_packet 
>> *pkt,
>>      enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, helper);
>>
>>      if (OVS_LIKELY(conn)) {
>> -        if (OVS_LIKELY(!conn_update_state_alg(ct, pkt, ctx, conn,
>> -                                              nat_action_info,
>> -                                              ct_alg_ctl, now,
>> -                                              &create_new_conn))) {
>> +        if (OVS_LIKELY(!conn_update_state_dist(ct, pkt, ctx, conn,
>> +                                               nat_action_info,
>> +                                               ct_alg_ctl, now,
>> +                                               &create_new_conn))) {
>>              create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
>>          }
>>          if (nat_action_info && !create_new_conn) {
>> @@ -3743,6 +3770,35 @@ adj_seqnum(ovs_16aligned_be32 *val, int32_t inc)
>>      put_16aligned_be32(val, htonl(ntohl(get_16aligned_be32(val)) + inc));
>>  }
>>
>> +void
>> +conn_update_state_hook_register(int priority, conn_update_state_hook_fn fn)
>> +{
>> +    ovs_assert(n_ct_update_hooks < CT_UPDATE_STATE_HOOKS_MAX);
>> +
>> +    /* Insert in sorted order (ascending priority = higher precedence). */
>> +    size_t i = n_ct_update_hooks;
>> +    while (i > 0 && ct_update_hooks[i - 1].priority > priority) {
>> +        ct_update_hooks[i] = ct_update_hooks[i - 1];
>> +        i--;
>> +    }
>> +    ct_update_hooks[i].priority = priority;
>> +    ct_update_hooks[i].fn = fn;
>> +    n_ct_update_hooks++;
>> +}
>> +
>> +void
>> +conn_update_state_hook_unregister(conn_update_state_hook_fn fn)
>> +{
>> +    for (size_t i = 0; i < n_ct_update_hooks; i++) {
>> +        if (ct_update_hooks[i].fn == fn) {
>> +            memmove(&ct_update_hooks[i], &ct_update_hooks[i + 1],
>> +                    (n_ct_update_hooks - i - 1) * sizeof 
>> ct_update_hooks[0]);
>> +            n_ct_update_hooks--;
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>>  static void
>>  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>>                 struct dp_packet *pkt, struct conn *ec, long long now,
>> -- 
>> 2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to