On 8 Jun 2026, at 20:37, Aaron Conole wrote:
> Eelco Chaudron <[email protected]> writes:
>
>> On 18 May 2026, at 19:13, Aaron Conole wrote:
>>
>>> This allows configuring hardware offload on/off. The ct_sweep
>>> expiration is always trying to fill the ops batch anyway, so it
>>> doesn't need an actual check for enabled / disabled. Wrapping that
>>> code in a check may also be harmful in the event that offload is
>>> disabled with offloaded connections.
>>>
>>> Signed-off-by: Aaron Conole <[email protected]>
>>
>> Hi Aaron,
>>
>> See some comments below.
>>
>> //Eelco
>>
>>> ---
>>> lib/conntrack.c | 25 +++++++++++++----------
>>> lib/ct-offload.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-
>>> lib/ct-offload.h | 8 ++++++++
>>> lib/dpif-offload.c | 13 ++++++++++++
>>> lib/dpif-offload.h | 1 +
>>> vswitchd/bridge.c | 4 ++++
>>> 6 files changed, 90 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>>> index 6ebae26411..83dac97b89 100644
>>> --- a/lib/conntrack.c
>>> +++ b/lib/conntrack.c
>>> @@ -534,12 +534,14 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>>> atomic_count_dec(&zl->czl.count);
>>> }
>>>
>>> - struct ct_offload_ctx offload_ctx = {
>>> - .conn = conn,
>>> - .netdev_in = NULL,
>>> - .input_port_id = ODPP_NONE,
>>> - };
>>> - ct_offload_conn_del(&offload_ctx);
>>> + if (ct_offload_conn_is_offloaded(conn)) {
>>> + struct ct_offload_ctx offload_ctx = {
>>> + .conn = conn,
>>> + .netdev_in = NULL,
>>> + .input_port_id = ODPP_NONE,
>>> + };
>>> + ct_offload_conn_del(&offload_ctx);
>>> + }
>>>
>>> ovsrcu_postpone(delete_conn, conn);
>>> atomic_count_dec(&ct->n_conn);
>>> @@ -1392,7 +1394,7 @@ process_one(struct conntrack *ct, struct dp_packet
>>> *pkt,
>>> }
>>> ovs_mutex_unlock(&ct->ct_lock);
>>>
>>> - if (conn) {
>>> + if (conn && ct_offload_enabled()) {
>>> struct ct_offload_ctx offload_ctx = {
>>> .conn = conn,
>>> .netdev_in = NULL,
>>> @@ -1404,6 +1406,7 @@ process_one(struct conntrack *ct, struct dp_packet
>>> *pkt,
>>>
>>> if (!create_new_conn && conn && ctx->reply &&
>>> (pkt->md.ct_state & CS_ESTABLISHED) &&
>>> + ct_offload_enabled() &&
>>> ct_offload_conn_is_offloaded(conn) &&
>>> !ct_offload_conn_is_established(conn)) {
>>> /* Notify offload providers that the connection is established.
>>> @@ -1568,7 +1571,6 @@ ct_sweep(struct conntrack *ct, struct rculist *list,
>>> long long now,
>>> size_t cleaned = 0;
>>> size_t count = 0;
>>>
>>> -
>>> ct_offload_op_batch_init(&batch);
>>>
>>> RCULIST_FOR_EACH (conn, node, list) {
>>> @@ -1582,7 +1584,6 @@ ct_sweep(struct conntrack *ct, struct rculist *list,
>>> long long now,
>>> .netdev_in = NULL,
>>> .input_port_id = ODPP_NONE,
>>> };
>>> -
>>> ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_UPD,
>>> &offload_ctx);
>>> }
>>> @@ -1590,12 +1591,14 @@ ct_sweep(struct conntrack *ct, struct rculist
>>> *list, long long now,
>>> count++;
>>> }
>>>
>>> - /* Run the batch. */
>>> + /* Run the batch: providers that can supply a hw last-used timestamp
>>> + * return error==0, allowing us to extend the expiration. A non-zero
>>> + * error (typically ENODATA) means the connection has no hw activity
>>> + * and should be expired normally. */
>>> ct_offload_op_batch_submit(&batch);
>>>
>>> CT_OFFLOAD_BATCH_OP_FOR_EACH (idx, op, &batch) {
>>> struct conn *c = CONST_CAST(struct conn *, op->ctx.conn);
>>> -
>>> if (op->error) {
>>> conn_clean(ct, c);
>>> cleaned++;
>>> diff --git a/lib/ct-offload.c b/lib/ct-offload.c
>>> index 979b5b13db..1b4d230b80 100644
>>> --- a/lib/ct-offload.c
>>> +++ b/lib/ct-offload.c
>>> @@ -20,8 +20,10 @@
>>> #include "conntrack.h"
>>> #include "conntrack-private.h"
>>> #include "ct-offload.h"
>>> +#include "dpif-offload.h"
>>> #include "ovs-thread.h"
>>> #include "util.h"
>>> +#include "vswitch-idl.h"
>>>
>>> #include "openvswitch/list.h"
>>> #include "openvswitch/vlog.h"
>>> @@ -51,6 +53,12 @@ static struct ovs_list ct_offload_classes
>>> OVS_GUARDED_BY(ct_offload_mutex)
>>> = OVS_LIST_INITIALIZER(&ct_offload_classes);
>>>
>>> +/* Built-in CT offload provider classes. Only those whose name matches a
>>> + * registered dpif offload class will be activated by
>>> ct_offload_module_init().
>>> + */
>>> +static const struct ct_offload_class *base_ct_offload_classes[] = {
>>> +};
>>> +
>>>
>>> /* ct_offload_register() - register a CT offload provider class.
>>> *
>>> @@ -140,11 +148,52 @@ ct_offload_alloc_private_slot(void)
>>>
>>> /* ct_offload_module_init() - register built-in CT offload providers.
>>> *
>>> - * Must be called once before any connections are created. */
>>> + * Only registers providers whose name matches a currently-registered dpif
>>> + * offload class, so CT offload is automatically tied to the active
>>> hardware
>>> + * offload provider. Safe to call multiple times; subsequent calls are
>>> + * no-ops (duplicate registration is detected and skipped). */
>>> void
>>> ct_offload_module_init(void)
>>> {
>>> ct_offload_alloc_private_slot();
>>> +
>>> + for (size_t i = 0; i < ARRAY_SIZE(base_ct_offload_classes); i++) {
>>> + const struct ct_offload_class *class = base_ct_offload_classes[i];
>>> +
>>> + if (dpif_offload_class_is_registered(class->name)) {
>>> + ct_offload_register(class);
>>> + }
>>> + }
>>> +}
>>> +
>>> +/* ct_offload_enabled() - returns true when hardware offload is active.
>>> + *
>>> + * Delegates to dpif_offload_enabled() so CT offload shares the same global
>>> + * enable/disable knob as datapath hardware offload. */
>>> +bool
>>> +ct_offload_enabled(void)
>>> +{
>>> + return dpif_offload_enabled();
>>> +}
>>> +
>>> +/* ct_offload_set_global_cfg() - configure CT offload from OVSDB.
>>> + *
>>> + * Must be called alongside dpif_offload_set_global_cfg() so that CT
>>> offload
>>> + * providers are registered once hardware offload has been enabled and the
>>> + * appropriate dpif offload classes are known. */
>>> +void
>>> +ct_offload_set_global_cfg(const struct ovsrec_open_vswitch *cfg OVS_UNUSED)
>>> +{
>>> + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>>> +
>>> + if (!dpif_offload_enabled()) {
>>> + return;
>>> + }
>>> +
>>> + if (ovsthread_once_start(&once)) {
>>> + ct_offload_module_init();
>>> + ovsthread_once_done(&once);
>>> + }
>>> }
>>
>> This whole part is a bit unclear to me. The offload classes
>> exist, but they might not be in use, meaning not tied to a
>> dpif-offload-provider. They get dynamically created on dpif
>> creation. It looks like the conntrack infrastructure is
>> global, not per dpif (this might need to change, but I guess
>> not part of this effort).
>>
>> For example, a ct offload provider could be registered and
>> initialized even if the corresponding dpif type is not in
>> use by any bridge.
>>
>> I am mainly confused how the specific offload provider
>> instance gets linked to the offload provider's specific
>> data. I assumed the dpif-offload-provider would register
>> the ct-offload at init time (and destruct on destroy).
>
> I think the dpif-offload-provider should have this responsibility. The
> reason for implementing it this way is to avoid
>
> dpif -> ct -> dpif
>
> cascaded calls.
I think what we need as part of this series is an implementation using
the dummy offload provider so that we can see how this API all fits
together, as at this stage it's unclear. We can just add simple UDP
offload in the simulator, it should not be that hard. But at least we
get an idea about the API and initialization flow, and exchange of data
between the different layers.
>> Maybe this will become more clear in subsequent patches.
>>
>>>
>>> /* ct_offload_conn_add_() - notify all eligible providers of a new
>>> connection.
>>> diff --git a/lib/ct-offload.h b/lib/ct-offload.h
>>> index fcb3170fa1..fe4ecd33b8 100644
>>> --- a/lib/ct-offload.h
>>> +++ b/lib/ct-offload.h
>>> @@ -21,6 +21,7 @@
>>>
>>> struct conn;
>>> struct netdev;
>>> +struct ovsrec_open_vswitch;
>>>
>>> /* Context for offload as part of the callbacks that all connection
>>> * offload APIs receive.
>>> @@ -91,6 +92,13 @@ void ct_offload_unregister(const struct ct_offload_class
>>> *);
>>> void ct_offload_alloc_private_slot(void);
>>> /* Module initialization (register built-in providers). */
>>> void ct_offload_module_init(void);
>>> +/* Global configuration: call alongside dpif_offload_set_global_cfg() to
>>> + * enable CT offload when hardware offload is active. */
>>> +void ct_offload_set_global_cfg(const struct ovsrec_open_vswitch *);
>>> +
>>> +/* Returns true when CT offload is enabled (delegates to
>>> dpif_offload_enabled).
>>> + */
>>> +bool ct_offload_enabled(void);
>>>
>>> /* Per-connection offload API that dispatches to all registered providers.
>>> */
>>> int ct_offload_conn_add(const struct ct_offload_ctx *);
>>> diff --git a/lib/dpif-offload.c b/lib/dpif-offload.c
>>> index bb2feced9e..30cc4fd271 100644
>>> --- a/lib/dpif-offload.c
>>> +++ b/lib/dpif-offload.c
>>> @@ -516,6 +516,19 @@ dpif_offload_enabled(void)
>>> return enabled;
>>> }
>>>
>>> +/* dpif_offload_class_is_registered() - returns true if a dpif offload
>>> class
>>> + * with the given name has been successfully registered. */
>>> +bool
>>> +dpif_offload_class_is_registered(const char *name)
>>> +{
>>> + bool found;
>>> +
>>> + ovs_mutex_lock(&dpif_offload_mutex);
>>> + found = shash_find(&dpif_offload_classes, name) != NULL;
>>> + ovs_mutex_unlock(&dpif_offload_mutex);
>>> + return found;
>>> +}
>>> +
>>> bool
>>> dpif_offload_rebalance_policy_enabled(void)
>>> {
>>> diff --git a/lib/dpif-offload.h b/lib/dpif-offload.h
>>> index 7fad3ebee3..d95e4b9463 100644
>>> --- a/lib/dpif-offload.h
>>> +++ b/lib/dpif-offload.h
>>> @@ -45,6 +45,7 @@ enum dpif_offload_impl_type {
>>> void dpif_offload_set_global_cfg(const struct ovsrec_open_vswitch *);
>>> bool dpif_offload_enabled(void);
>>> bool dpif_offload_rebalance_policy_enabled(void);
>>> +bool dpif_offload_class_is_registered(const char *name);
>>>
>>>
>>> /* Per dpif specific functions. */
>>> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
>>> index 7a68e19ac3..91ffe76648 100644
>>> --- a/vswitchd/bridge.c
>>> +++ b/vswitchd/bridge.c
>>> @@ -28,6 +28,7 @@
>>> #include "daemon.h"
>>> #include "dirs.h"
>>> #include "dpif.h"
>>> +#include "ct-offload.h"
>>
>> Alphabetical order?
>>
>>> #include "dpif-offload.h"
>>> #include "dpdk.h"
>>> #include "hash.h"
>>> @@ -543,6 +544,8 @@ bridge_init(const char *remote)
>>> void
>>> bridge_exit(bool delete_datapath)
>>> {
>>> + ct_offload_flush();
>>
>> Calling this low-level operation from bridge_exit() seems
>> like a layer violation. Should this not be called as part
>> of the dpif or conntrack teardown instead?
>
> Yes, I don't remember why I put it here, but it may have been the
> result of some earlier version where I had some kind of memory leak.
>
>> conntrack_destroy() already cleans all connections via
>> conn_clean() which calls ct_offload_conn_del() for each
>> offloaded connection.
>>
>>> if_notifier_manual_set_cb(NULL);
>>> if_notifier_destroy(ifnotifier);
>>> seq_destroy(ifaces_changed);
>>> @@ -3396,6 +3399,7 @@ bridge_run(void)
>>>
>>> if (cfg && ovsdb_idl_get_seqno(idl) != idl_seqno) {
>>> dpif_offload_set_global_cfg(cfg);
>>> + ct_offload_set_global_cfg(cfg);
>>
>> Same layering concern as ct_offload_flush() above. Should
>> dpif_offload_set_global_cfg() call this internally rather
>> than having bridge.c know about ct-offload?
>
> That makes sense to me.
>
>>> }
>>>
>>> if (cfg) {
>>> --
>>> 2.53.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev