Eelco Chaudron <[email protected]> writes:

> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
>> Currently, if a conntrack submodule wants to add per-connection
>> private details, the pattern looks like:
>>
>>    struct private_conn {
>>        struct conn conn_;
>>        ... private data ...
>>    }
>>
>>    ...
>>
>>    new_conn = xalloc(sizeof struct private_conn);
>>    ...
>>    return &new_conn->conn_;
>>
>>    ...
>>
>>    struct private_conn *module_conn = (struct private_conn *)conn_;
>>
>> This is a common pattern where the underlying allocations are
>> delegated to the submodule areas, and the main processing module
>> always assumes that each module allocates a conn_ storage area at the
>> head of the connection struct anyway.
>>
>> However, this means that some storage details can't be shared in a
>> conenient way between modules without leaking details about the
>
> conenien -> convinient

convenient :)

>> underlying implementations of the module.  For example, TCP based
>> connections may want to share some TCP block details, but not want to
>> expose the full private TCP connection module internals.
>>
>> To facilitate this, introduce a private storage section into
>> connection objects.  This will allow storing pre-defined details that
>> each module can fill and guarantee some kind of compatibility without
>> needing to completely expose the internals.  It will be used in
>> upcoming commits.
>>
>> Signed-off-by: Aaron Conole <[email protected]>
>
> Hi Aaron,
>
> Thanks for your work on the series. Unfortunately, I was not able to
> review it during the RFC timeframe, but here we go...
>
> First, I’ll send some feedback on the first four general conntrack
> patches and will continue reviewing the remaining ones. Thanks for
> adding such detailed comments, as they really helped with the review.
>
> See some comments on this patch below.
>
> Cheers,
>
> Eelco
>
>> ---
>>  lib/conntrack-private.h |  26 ++++++++
>>  lib/conntrack.c         |  44 ++++++++++++++
>>  lib/conntrack.h         |  39 ++++++++++++
>>  tests/library.at        |  18 ++++++
>>  tests/test-conntrack.c  | 130 +++++++++++++++++++++++++++++++++++++++-
>>  5 files changed, 256 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>> index f1132e8aa8..bd095277cd 100644
>> --- a/lib/conntrack-private.h
>> +++ b/lib/conntrack-private.h
>> @@ -156,6 +156,10 @@ struct conn {
>>      bool alg_related; /* True if alg data connection. */
>>
>>      uint32_t tp_id; /* Timeout policy ID. */
>> +
>> + /* Private per-module storage.  Indexed by ct_private_id_t values
> obtained
>> +     * via conn_private_id_alloc().  Access is protected by conn->lock. */
>> +    void *private[CT_CONN_PRIVATE_MAX];
>>  };
>>
>>  enum ct_update_res {
>> @@ -264,4 +268,26 @@ struct ct_l4_proto {
>>                                 struct ct_dpif_protoinfo *);
>>  };
>>
>> +/* conn_private_get() / conn_private_set()
>> + *
>> + * Fast-path accessors for per-connection private storage slots.  Both
>> + * functions are static inlines so they compile to a single load/store with
>> + * bounds-checking asserts that disappear in release builds.
>
> I do not think the last part holds up in most of the release
> builds, built by distributions. They do not build with
> ovs_asserts() being removed, and hence will incur the extra
> bounds-check costs. I guess you can just remove the specific
> part of the comment.

Makes sense.  I'll drop it.

>> + *
>> + * The caller must hold conn->lock when accessing the pointer.
>> + */
>> +static inline void *
>> +conn_private_get(const struct conn *conn, ct_private_id_t id)
>> +{
>> +    ovs_assert(id < CT_CONN_PRIVATE_MAX);
>> +    return conn->private[id];
>> +}
>> +
>> +static inline void
>> +conn_private_set(struct conn *conn, ct_private_id_t id, void *data)
>> +{
>> +    ovs_assert(id < CT_CONN_PRIVATE_MAX);
>> +    conn->private[id] = data;
>> +}
>> +
>>  #endif /* conntrack-private.h */
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index f84cdd216a..04f6b4b77c 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -157,6 +157,19 @@ expectation_clean(struct conntrack *ct, const
> struct conn_key *parent_key);
>>
>>  static struct ct_l4_proto *l4_protos[UINT8_MAX + 1];
>>
>> +/* Private per-connection storage slot registry.
>> + *
>> + * ct_private_slots[] is written once per slot at module initialization (via
>> + * conn_private_id_alloc()) and then read-only for the lifetime of
> the process,
>> + * so no additional locking is required to read the destructor pointer.
>> + */
>> +struct ct_private_slot {
>> +    void (*destructor)(void *); /* NULL means no cleanup required. */
>> +};
>> +
>> +static struct ct_private_slot ct_private_slots[CT_CONN_PRIVATE_MAX];
>> +static atomic_uint32_t ct_private_next_id = 0;
>> +
>>  static void
>>  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>>                 struct dp_packet *pkt, struct conn *ec, long long now,
>> @@ -607,6 +620,27 @@ conn_force_expire(struct conn *conn)
>>      atomic_store_relaxed(&conn->expiration, 0);
>>  }
>>
>> +ct_private_id_t
>> +conn_private_id_alloc(void (*destructor)(void *))
>> +{
>> +    uint32_t id;
>
> ct_private_id_t is unsigned int but the atomic counter and
> the local variable are uint32_t.  Would it make sense to
> define ct_private_id_t as uint32_t so both sides stay in
> sync? Or the other way around?

The sizes should be the same, but I agree we can make them the same.

>> +
>> +    atomic_add(&ct_private_next_id, 1u, &id);
>> +    if (id >= CT_CONN_PRIVATE_MAX) {
>> +        /* Undo the increment so the counter doesn't overflow.
>> +         * Because we are not suppoed to call this after ct initialization,
>
> suppoed -> supposed

ack

>> +         * there shouldn't be an access race here. */
>> +        atomic_sub(&ct_private_next_id, 1u, &id);
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>> + VLOG_ERR_RL(&rl, "conntrack: all %d private storage slots are in
> use; "
>> +                    "cannot allocate a new one", CT_CONN_PRIVATE_MAX);
>> +        return CT_PRIVATE_ID_INVALID;
>> +    }
>> +
>> +    ct_private_slots[id].destructor = destructor;
>> +    return id;
>> +}
>> +
>>  /* Destroys the connection tracker 'ct' and frees all the allocated memory.
>>   * The caller of this function must already have shut down packet input
>>   * and PMD threads (which would have been quiesced).  */
>> @@ -2716,6 +2750,16 @@ new_conn(struct conntrack *ct, struct
> dp_packet *pkt, struct conn_key *key,
>>  static void
>>  delete_conn__(struct conn *conn)
>>  {
>> +    uint32_t n;
>> +
>> +    /* Invoke registered destructors for any non-NULL private slots. */
>> +    atomic_read_relaxed(&ct_private_next_id, &n);
>> +    for (uint32_t i = 0; i < n; i++) {
>> +        if (ct_private_slots[i].destructor && conn->private[i]) {
>> +            ct_private_slots[i].destructor(conn->private[i]);
>> +        }
>> +    }
>> +
>>      free(conn->alg);
>>      free(conn);
>>  }
>> diff --git a/lib/conntrack.h b/lib/conntrack.h
>> index c3136e9554..e5ca1528bf 100644
>> --- a/lib/conntrack.h
>> +++ b/lib/conntrack.h
>> @@ -91,6 +91,45 @@ struct nat_action_info_t {
>>      uint16_t nat_flags;
>>  };
>>
>> +/* Private per-connection storage slots.
>> + *
>> + * Modules (protocol handlers, offload interfaces, etc.) can reserve a slot
>> + * at initialization time and use it to attach private data to every tracked
>> + * connection.  Slot IDs are small integers that index directly into a 
>> fixed-
>> + * size array inside struct conn, so get/set operations are O(1) and branch-
>> + * free, safe to call on the datapath fast path.
>> + *
>> + * Usage
>> + * -----
>> + *   // At module initialization, allocate and store the returned id.
>> + *   static ct_private_id_t my_id;
>> + *   my_id = conn_private_id_alloc(my_conn_data_free);
>
> Could the example show checking the return value against
> CT_PRIVATE_ID_INVALID?

ACK, sure thing.

>> + *   // On the fast path (no lock needed beyond conn->lock for the pointer).
>> + *   conn_private_set(conn, my_id, my_data);
>> + *   my_data = conn_private_get(conn, my_id);
>> + *
>> + * Thread-safety
>> + * -------------
>> + * The pointer slot itself is protected by conn->lock.  The pointed-to data
>> + * is the responsibility of the registering module.
>> + */
>> +
>> +/* Maximum number of private storage slots available per connection. */
>> +#define CT_CONN_PRIVATE_MAX 8
>> +
>> +typedef unsigned int ct_private_id_t;
>> +
>> +/* Returned by conn_private_id_alloc() when no slots remain. */
>> +#define CT_PRIVATE_ID_INVALID UINT_MAX
>> +
>> +/* Allocate a private storage slot.  'destructor' (may be NULL) is
> called with
>> + * the stored pointer when a connection is freed; it must be safe
> to call with
>> + * a NULL argument.  Returns CT_PRIVATE_ID_INVALID on failure (all slots
>> + * taken).  Must be called before any connection is created that should 
>> carry
>> + * this slot (i.e. at module initialization time). */
>
> The comment says the destructor must be safe to call with a
> NULL argument, but delete_conn__() skips the call when
> conn->private[i] is NULL, so any reason why we have this comment?

Ahh, I went back through and decided to shortcut the call when NULL got
encountered and didn't update the comment here.

>> +ct_private_id_t conn_private_id_alloc(void (*destructor)(void *));
>> +
>>  struct conntrack *conntrack_init(void);
>>  void conntrack_destroy(struct conntrack *);
>>
>> diff --git a/tests/library.at b/tests/library.at
>> index 449f15fd5a..6c5b55f045 100644
>> --- a/tests/library.at
>> +++ b/tests/library.at
>> @@ -307,3 +307,21 @@ AT_CLEANUP
>>  AT_SETUP([Conntrack Library - FTP ALG parsing])
>>  AT_CHECK([ovstest test-conntrack ftp-alg-large-payload])
>>  AT_CLEANUP
>> +
>> +AT_SETUP([conntrack private storage - id alloc])
>> +AT_KEYWORDS([conntrack])
>> +AT_CHECK([ovstest test-conntrack private-id-alloc], [0], [.
>> +])
>> +AT_CLEANUP
>> +
>> +AT_SETUP([conntrack private storage - slot exhaustion])
>> +AT_KEYWORDS([conntrack])
>> +AT_CHECK([ovstest test-conntrack private-id-exhaustion], [0], [.........
>> +], [ignore])
>> +AT_CLEANUP
>> +
>> +AT_SETUP([conntrack private storage - destructor])
>> +AT_KEYWORDS([conntrack])
>> +AT_CHECK([ovstest test-conntrack private-destructor], [0], [.
>> +])
>> +AT_CLEANUP
>> diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
>> index 22db95f914..7f42adbb55 100644
>> --- a/tests/test-conntrack.c
>> +++ b/tests/test-conntrack.c
>> @@ -16,6 +16,7 @@
>>
>>  #include <config.h>
>>  #include "conntrack.h"
>> +#include "conntrack-private.h"
>>
>>  #include "dp-packet.h"
>>  #include "fatal-signal.h"
>> @@ -492,7 +493,7 @@ test_pcap(struct ovs_cmdl_context *ctx)
>>      ovs_pcap_close(pcap);
>>  }
>>  
>> -/* ALG related testing. */
>> +/* Conntrack functional testing. */
>>
>>  /* FTP IPv4 PORT payload for testing. */
>>  #define FTP_PORT_CMD_STR  "PORT 192,168,123,2,113,42\r\n"
>> @@ -572,6 +573,124 @@ test_ftp_alg_large_payload(struct
> ovs_cmdl_context *ctx OVS_UNUSED)
>>      conntrack_destroy(ct);
>>  }
>>
>> +/* Verify that conn_private_id_alloc() returns a valid slot ID and that the
>> + * idiomatic "store the ID in a static variable at module init"
> pattern works.
>> + */
>> +static void
>> +test_private_id_alloc(struct ovs_cmdl_context *ctx OVS_UNUSED)
>> +{
>> + /* Mirrors the real-world pattern: a module stores its slot ID in
> a static
>> +     * so it is initialised once and available everywhere in the translation
>> +     * unit. */
>> +    static ct_private_id_t my_id = CT_PRIVATE_ID_INVALID;
>> +
>> +    my_id = conn_private_id_alloc(NULL);
>> +
>> +    ovs_assert(my_id != CT_PRIVATE_ID_INVALID);
>> +
>> +    ovs_assert(my_id < CT_CONN_PRIVATE_MAX);
>> +
>> +    /* The first allocation must yield slot 0. */
>> +    ovs_assert(my_id == 0);
>> +    printf(".\n");
>> +}
>> +
>> +/* Allocate every available slot and confirm that the next request returns
>> + * CT_PRIVATE_ID_INVALID.  Each successful allocation prints one dot so the
>> + * .at test can verify both the count and the error behaviour.
>> + */
>> +static void
>> +test_private_id_exhaustion(struct ovs_cmdl_context *ctx OVS_UNUSED)
>> +{
>> +    ct_private_id_t ids[CT_CONN_PRIVATE_MAX];
>> +
>> +    /* Fill all CT_CONN_PRIVATE_MAX slots. */
>> +    for (unsigned int i = 0; i < CT_CONN_PRIVATE_MAX; i++) {
>> +        ids[i] = conn_private_id_alloc(NULL);
>> +        ovs_assert(ids[i] != CT_PRIVATE_ID_INVALID);
>> +
>> +        ovs_assert(ids[i] == i);
>> +        printf(".");
>> +    }
>> +
>> +    /* The very next allocation must fail. */
>> +    ct_private_id_t extra = conn_private_id_alloc(NULL);
>> +    ovs_assert(extra == CT_PRIVATE_ID_INVALID);
>> +    printf(".\n");
>> +}
>> +
>> +/* Globals written by the destructor callback used in test 3. */
>> +static int   dtor_call_count = 0;
>> +static void *dtor_last_ptr   = NULL;
>> +
>> +static void
>> +record_destructor(void *data)
>> +{
>> +    dtor_call_count++;
>> +    dtor_last_ptr = data;
>> +}
>> +
>> +/* Register a destructor, commit a real connection, attach a sentinel 
>> pointer
>> + * as private data, then destroy the conntrack instance.  After draining the
>> + * RCU queue (ovsrcu_exit) the destructor must have been called exactly
>> + * once with the sentinel value.
>> + */
>> +static uintptr_t ERRPTR;
>> +
>> +static void
>> +test_private_destructor(struct ovs_cmdl_context *ctx OVS_UNUSED)
>> +{
>> +    /* Sentinel: a non-NULL pointer value we can identify unambiguously.
>> +     * ERRPTR is defined above in case we want to use it in the future as
>> +     * a platform-agnostic and portable sentinel value rather than some
>> +     * hardcoded hex. */
>> +    void *sentinel = (void *)(uintptr_t)&ERRPTR;
>> +
>> +    static ct_private_id_t dtor_id = CT_PRIVATE_ID_INVALID;
>> +    dtor_id = conn_private_id_alloc(record_destructor);
>> +    ovs_assert(dtor_id != CT_PRIVATE_ID_INVALID);
>> +
>> +    /* Create a conntrack instance and commit one UDP connection. */
>> +    struct conntrack *lct = conntrack_init();
>> +    ovs_be16 dl_type;
>> +    struct dp_packet *pkt = build_packet(1, 2, &dl_type);
>> +    struct dp_packet_batch batch;
>> +    dp_packet_batch_init(&batch);
>> +    dp_packet_batch_add(&batch, pkt);
>> +
>> +    long long now = time_msec();
>> +    conntrack_execute(lct, &batch, dl_type, false, true, 0,
>> +                      NULL, NULL, NULL, NULL, now, 0);
>> +
>> +    /* After a committed execute the packet carries a cached conn pointer. 
>> */
>> +    struct conn *conn = pkt->md.conn;
>> +    ovs_assert(conn != NULL);
>> +
>> +    /* Attach the sentinel as private data for our slot. */
>> +    ovs_mutex_lock(&conn->lock);
>> +    conn_private_set(conn, dtor_id, sentinel);
>> +    ovs_mutex_unlock(&conn->lock);
>> +
>> +    /* Destroying the tracker flushes all connections, queuing delete_conn()
>> +     * callbacks via ovsrcu_postpone().  The destructor fires once those
>> +     * callbacks are processed. */
>> +    conntrack_destroy(lct);
>> +
>> + /* ovsrcu_exit() stops the urcu background thread and
> synchronously drains
>
> Maybe add urcu to our ./utilities/checkpatch_dict.txt file?

Sure thing.

>> +     * all pending postponed callbacks (including delete_conn__ / destructor
>> + * chain) before returning.  ovsrcu_synchronize() is insufficient
> here: it
>> +     * only waits for threads to quiesce, not for the urcu thread to have
>> +     * actually executed the queued callbacks. */
>> +    ovsrcu_exit();
>> +
>> +    ovs_assert(dtor_call_count == 1);
>> +
>> +    ovs_assert(dtor_last_ptr == sentinel);
>> +
>> +    dp_packet_delete_batch(&batch, true);
>> +    printf(".\n");
>> +}
>> +
>>  
> Should we add a test that exercises multiple slots on the
> same connection to make sure we are not overwriting or
> reusing pointers or IDs?

ACK, will do.

>>  static const struct ovs_cmdl_command commands[] = {
>>      /* Connection tracker tests. */
>> @@ -597,6 +716,15 @@ static const struct ovs_cmdl_command commands[] = {
>>       * is rewritten to the SNAT target rather than causing a crash. */
>>      {"ftp-alg-large-payload", "", 0, 0,
>>          test_ftp_alg_large_payload, OVS_RO},
>> +    /* Private per-connection storage registry tests.
>> + * Each MUST be run as a separate ovstest invocation so the
> process-global
>> +     * slot counter is fresh (starts at 0). */
>> +    {"private-id-alloc", "", 0, 0,
>> +     test_private_id_alloc, OVS_RO},
>> +    {"private-id-exhaustion", "", 0, 0,
>> +     test_private_id_exhaustion, OVS_RO},
>> +    {"private-destructor", "", 0, 0,
>> +     test_private_destructor, OVS_RO},
>>
>>      {NULL, NULL, 0, 0, NULL, OVS_RO},
>>  };
>> -- 
>> 2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to