On Mon Jun 1, 2026 at 11:48 AM CEST, Eelco Chaudron via dev wrote:
> On 18 May 2026, at 19:13, Aaron Conole wrote:
>
> > This adds the basic primitives, initialization, and operations that
> > conntrack offload providers will need to implement in order to
> > offer a path to offloading.
> >
> > Signed-off-by: Aaron Conole <[email protected]>
>
> Thanks Aaron for the series, my next emails will review the
> remainder of this series.
>
> See comments below.
>
> //Eelco
>
> > ---
> > lib/automake.mk | 2 +
> > lib/ct-offload.c | 257 +++++++++++++++++++++++++++++++++++++++++++++++
> > lib/ct-offload.h | 97 ++++++++++++++++++
> > 3 files changed, 356 insertions(+)
> > create mode 100644 lib/ct-offload.c
> > create mode 100644 lib/ct-offload.h
> >
> > diff --git a/lib/automake.mk b/lib/automake.mk
> > index 027dd986ba..f11e3de27c 100644
> > --- a/lib/automake.mk
> > +++ b/lib/automake.mk
> > @@ -97,6 +97,8 @@ lib_libopenvswitch_la_SOURCES = \
> > lib/conntrack-other.c \
> > lib/conntrack.c \
> > lib/conntrack.h \
> > + lib/ct-offload.c \
> > + lib/ct-offload.h \
> > lib/cooperative-multitasking.c \
> > lib/cooperative-multitasking.h \
> > lib/cooperative-multitasking-private.h \
> > diff --git a/lib/ct-offload.c b/lib/ct-offload.c
> > new file mode 100644
> > index 0000000000..3bd6200e37
> > --- /dev/null
> > +++ b/lib/ct-offload.c
> > @@ -0,0 +1,257 @@
> > +/*
> > + * Copyright (c) 2026 Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + * http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +#include <errno.h>
>
> Include order does not match the style guide. The module's
> own header should come right after config.h, then system
> headers, then OVS headers:
>
> #include <config.h>
>
> #include "ct-offload.h"
>
> #include <errno.h>
>
> #include "ovs-thread.h"
> #include "util.h"
>
> #include "openvswitch/list.h"
> #include "openvswitch/vlog.h"
>
> > +
> > +#include "ct-offload.h"
> > +#include "ovs-thread.h"
> > +#include "util.h"
> > +
> > +#include "openvswitch/list.h"
> > +#include "openvswitch/vlog.h"
> > +
> > +VLOG_DEFINE_THIS_MODULE(ct_offload);
> > +
> > +/* Node in the registered-provider list. */
> > +struct ct_offload_class_node {
> > + const struct ct_offload_class *class;
> > + struct ovs_list list_node;
> > +};
> > +
> > +/* Global list of registered CT offload classes and a mutex to protect it.
> > + * Providers are expected to be registered at module init time and
> > + * unregistered only at module teardown, so contention is minimal. */
>
> See comment below @ct_offload_conn_add().
>
> > +static struct ovs_mutex ct_offload_mutex = OVS_MUTEX_INITIALIZER;
> > +static struct ovs_list ct_offload_classes
> > + OVS_GUARDED_BY(ct_offload_mutex)
> > + = OVS_LIST_INITIALIZER(&ct_offload_classes);
> > +
> > +
> > +/* ct_offload_register() - register a CT offload provider class.
> > + *
> > + * Calls class->init() if provided. Returns 0 on success or a positive
> > + * errno value on failure. Attempting to register the same class twice
> > + * returns EEXIST. */
> > +int
> > +ct_offload_register(const struct ct_offload_class *class)
> > +{
> > + struct ct_offload_class_node *node;
> > + int error = 0;
> > +
> > + ovs_assert(class);
> > + ovs_assert(class->name);
>
> Should we assert on any mandatory callbacks here? Not all
> callbacks need to be mandatory, but to support offload some
> must be (e.g. conn_add, conn_del). Asserting at
> registration time saves NULL checks on every fast-path
> call.
>
Agreed.
init / deinit might be optional depending on providers,
but without conn_add an implementation has little use.
I'd suggest conn_add, conn_del, can_offload as minimum set.
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > +
> > + /* Detect duplicate registrations. */
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + if (!strcmp(node->class->name, class->name)) {
> > + VLOG_WARN("attempted to register duplicate ct offload class:
> > %s",
> > + class->name);
> > + error = EEXIST;
> > + goto out;
> > + }
> > + }
> > +
> > + error = class->init ? class->init() : 0;
> > + if (error) {
> > + VLOG_WARN("failed to initialize ct offload class %s: %s",
> > + class->name, ovs_strerror(error));
> > + goto out;
> > + }
> > +
> > + node = xmalloc(sizeof *node);
> > + node->class = class;
> > + ovs_list_push_back(&ct_offload_classes, &node->list_node);
> > + VLOG_DBG("registered ct offload class: %s", class->name);
> > +
> > +out:
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > + return error;
> > +}
> > +
> > +/* ct_offload_unregister() - unregister a previously registered class.
> > + *
> > + * Safe to call even if the class was never registered (no-op in that
> > + * case). */
> > +void
> > +ct_offload_unregister(const struct ct_offload_class *class)
> > +{
> > + struct ct_offload_class_node *node;
> > +
> > + ovs_assert(class);
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + if (node->class == class) {
> > + ovs_list_remove(&node->list_node);
> > + free(node);
> > + VLOG_DBG("unregistered ct offload class: %s", class->name);
> > + goto out;
> > + }
> > + }
> > + VLOG_WARN("attempted to unregister unknown ct offload class: %s",
> > + class->name);
> > +
> > +out:
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +}
> > +
> > +/* ct_offload_module_init() - register built-in CT offload providers.
> > + *
> > + * Must be called once before any connections are created. */
> > +void
> > +ct_offload_module_init(void)
> > +{
> > + /* No built-in providers yet; third parties call ct_offload_register()
> > + * directly from their own module-init routines. */
> > +}
> > +
> > +/* ct_offload_conn_add() - notify all eligible providers of a new
> > connection.
> > + *
> > + * Iterates over registered providers and calls conn_add() on each one that
> > + * reports can_offload() == true for this context. Returns the first
> > non-zero
> > + * error encountered, but continues notifying remaining providers. This
> > allows
> > + * the underlying hardware conntrack details across providers function. */
> > +int
> > +ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> > +{
> > + struct ct_offload_class_node *node;
> > + int ret = 0;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
>
> This mutex is a bottleneck as multiple PMD threads will call
> this in parallel. The fast-path operations only iterate the
> provider list without modifying it. Should this be an
> ovs_rwlock instead?
>
Yes this is a concern, although a rwlock would also poorly scale.
As it executes within a PMD thread, could we use the same approach
to safety? E.g. a PMD thread does not lock a port but builds a TX cache,
and reloads / refreshes it on datapath configure.
Maybe we can keep a list of providers in the DPIF implementation, that
would be stable between datapath_reconfigure?
During earlier discussion, we also touched on a lazy offload approach
similar to the kernel: a ct-add would only be done on the netdev that
received the packet that triggers it, and future netdev that might
operate the same connection would only offload it on their own time.
In this context, the netdev itself can only work with a single provider:
can we register the ct-offload provider to the netdev, and it would
remain there for the whole life of that netdev? Then we could call
only that netdev's provider to avoid proactive offloading.
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->can_offload && !class->can_offload(ctx)) {
> > + continue;
> > + }
> > + if (class->conn_add) {
> > + int error = class->conn_add(ctx);
> > +
> > + if (error && !ret) {
> > + ret = error;
> > + }
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +
> > + return ret;
> > +}
> > +
> > +/* ct_offload_conn_del() - notify all providers that a connection was
> > removed.
> > + *
> > + * Called unconditionally on all providers so that each can clean up any
> > + * state it may have installed. */
> > +void
> > +ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> > +{
> > + struct ct_offload_class_node *node;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->conn_del) {
> > + class->conn_del(ctx);
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +}
> > +
> > +void
> > +ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> > +{
> > + struct ct_offload_class_node *node;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->conn_established) {
> > + class->conn_established(ctx);
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +}
> > +
> > +/* ct_offload_conn_update() - query the hardware last-used timestamp.
> > + *
> > + * Iterates over providers and returns the first non-zero timestamp
> > returned
> > + * by a provider's conn_update() callback. Returns 0 if no provider
> > + * supplies a timestamp. */
> > +long long
> > +ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> > +{
> > + struct ct_offload_class_node *node;
> > + long long last_used = 0;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->conn_update) {
> > + long long ts = class->conn_update(ctx);
> > +
> > + if (ts) {
> > + last_used = ts;
> > + break;
> > + }
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +
> > + return last_used;
> > +}
> > +
> > +/* ct_offload_can_offload() - returns true if any provider can offload
> > ctx. */
> > +bool
> > +ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> > +{
> > + struct ct_offload_class_node *node;
> > + bool result = false;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->can_offload && class->can_offload(ctx)) {
> > + result = true;
> > + break;
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +
> > + return result;
> > +}
> > +
> > +/* ct_offload_flush() - flush all offloaded connections from every
> > provider. */
There is a global flush needed for datapath close, but also very common
is netdev flush on port del.
> > +void
> > +ct_offload_flush(void)
> > +{
> > + struct ct_offload_class_node *node;
> > +
> > + ovs_mutex_lock(&ct_offload_mutex);
> > + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> > + const struct ct_offload_class *class = node->class;
> > +
> > + if (class->flush) {
> > + class->flush();
> > + }
> > + }
> > + ovs_mutex_unlock(&ct_offload_mutex);
> > +}
> > diff --git a/lib/ct-offload.h b/lib/ct-offload.h
> > new file mode 100644
> > index 0000000000..824b94a5c1
> > --- /dev/null
> > +++ b/lib/ct-offload.h
> > @@ -0,0 +1,97 @@
> > +/*
> > + * Copyright (c) 2026 Red Hat, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + * http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef CT_OFFLOAD_H
> > +#define CT_OFFLOAD_H
> > +
> > +#include "openvswitch/types.h"
> > +
> > +struct conn;
> > +struct netdev;
> > +
> > +/* Context for offload as part of the callbacks that all connection
> > + * offload APIs receive.
> > + */
> > +struct ct_offload_ctx {
> > + const struct conn *conn; /* Connection object being offloaded.
> > */
> > + struct netdev *netdev_in; /* Input netdev. */
> > + odp_port_t input_port_id; /* ODP port number. */
> > +};
> > +
> > +enum ct_offload_op_type {
> > + CT_OFFLOAD_OP_ADD, /* Add operation. */
> > + CT_OFFLOAD_OP_DEL, /* Del operation. */
> > + CT_OFFLOAD_OP_UPD, /* Update operation. */
> > + CT_OFFLOAD_OP_POLICY, /* Policy check operation. */
> > + CT_OFFLOAD_OP_FLUSH, /* Flush. */
> > + CT_OFFLOAD_OP_EST, /* Established - notify that a
> > connection
> > + * has a reply seen. */
> > +};
> > +
> > +struct ct_offload_op {
> > + enum ct_offload_op_type type;
> > + struct ct_offload_ctx ctx;
> > + int error;
> > +};
> > +
> > +/* Batched set of offload contexts and operations.*/
> > +struct ct_offload_op_batch {
> > + struct ct_offload_op *ops;
> > + size_t n_ops;
> > + size_t allocated;
> > +};
> >
>
> This struct is not used in this patch. Should it move to
> the next patch where the batch API is introduced?
>
> > +
> > +/* CT offload class describes a conntrack offload provider implementation.
> > */
> > +struct ct_offload_class {
> > + const char *name;
> > +
> > + /* Initialization routine for the provider. */
>
> Should this be 'Optional initialization routine...'?
> If not we should assert in ct_offload_register.
>
> > + int (*init)(void);
> > +
> > + /* Per-connection operation callbacks get called for individual
> > operations
> > + * on the fast path or when batching is not in use. */
> > + int (*conn_add)(const struct ct_offload_ctx *);
> > + void (*conn_del)(const struct ct_offload_ctx *);
>
> Could we add a blank line between conn_del and the next
> comment block, as is done for other provider structures?
>
> > + /* Populate the last-used timestamp for the connection. Returns the
> > + * last-used time in milliseconds since epoch, or 0 on failure. */
>
> Should this also mention that the caller will only update
> the expiration if the returned timestamp is newer than the
> current value?
>
> Should the callback just return a bool instead, as the
> returned value is not used?
>
> What should a provider return if it has not offloaded the
> flow? Returning 0 causes conn_clean() at the next sweep
> after the connection's expiration passes.
>
> > + long long (*conn_update)(const struct ct_offload_ctx *);
>
> > + /* Called exactly once when the first reply-direction packet is seen
> > + * for an offloaded connection. */
> > + void (*conn_established)(const struct ct_offload_ctx *);
>
> > + /* Check whether this provider can offload a connection. */
> > + bool (*can_offload)(const struct ct_offload_ctx *);
>
> > + /* Flush all offloaded connections. */
> > + void (*flush)(void);
> > +};
> > +
> > +/* Register/unregister a provider. Must be called at module init, before
> > + * any connections are created. */
> > +int ct_offload_register(const struct ct_offload_class *);
> > +void ct_offload_unregister(const struct ct_offload_class *);
> > +
> > +/* Module initialization (register built-in providers). */
> > +void ct_offload_module_init(void);
> > +
> > +/* Per-connection offload API that dispatches to all registered providers.
> > */
> > +int ct_offload_conn_add(const struct ct_offload_ctx *);
> > +void ct_offload_conn_del(const struct ct_offload_ctx *);
> > +long long ct_offload_conn_update(const struct ct_offload_ctx *);
> > +void ct_offload_conn_established(const struct ct_offload_ctx *);
> > +bool ct_offload_can_offload(const struct ct_offload_ctx *);
> > +void ct_offload_flush(void);
> > +
> > +#endif /* CT_OFFLOAD_H */
> > --
> > 2.53.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev