On 18 May 2026, at 19:13, Aaron Conole wrote:

> The CT offload operations API currently considers operating on a
> single connection at a time.  However, there may be reason to
> accumulate offload API operations and execute them as a single large
> batch of operations.  Provide a basic batch abstraction that allows
> for accumulating operations and then executing them all at once.  This
> will be used in an upcoming commit, especially with the ct expiration
> logic.  The provider also may have a batched abstraction that lets it
> do a better provider based optimization.
>
> As part of this extension, move the lock management up a level for the
> batching system to have a single bulk operations lock.
>
> Signed-off-by: Aaron Conole <[email protected]>

Hi Aaron,

See comments below.

//Eelco

> ---
>  lib/ct-offload.c | 254 +++++++++++++++++++++++++++++++++++++++++++----
>  lib/ct-offload.h |  54 ++++++++++
>  2 files changed, 286 insertions(+), 22 deletions(-)
>
> diff --git a/lib/ct-offload.c b/lib/ct-offload.c
> index 3bd6200e37..3aaf1809be 100644
> --- a/lib/ct-offload.c
> +++ b/lib/ct-offload.c
> @@ -121,25 +121,33 @@ ct_offload_module_init(void)
>       * directly from their own module-init routines. */
>  }
>
> -/* ct_offload_conn_add() - notify all eligible providers of a new connection.
> +/* ct_offload_conn_add_() - notify all eligible providers of a new 
> connection.

Per the coding guidelines, internal functions should use a
double underscore suffix, i.e. ct_offload_conn_add__().

>   *
>   * Iterates over registered providers and calls conn_add() on each one that
>   * reports can_offload() == true for this context.  Returns the first 
> non-zero
>   * error encountered, but continues notifying remaining providers.  This 
> allows
> - * the underlying hardware conntrack details across providers function. */
> -int
> -ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> + * the underlying hardware conntrack details across providers function.
> + */

The closing */ could go on the previous line since it fits
within 79 characters.

> +static int
> +ct_offload_conn_add_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      int ret = 0;
>
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->can_offload && !class->can_offload(ctx)) {
>              continue;
>          }
> +
>          if (class->conn_add) {
>              int error = class->conn_add(ctx);
>
> @@ -148,44 +156,83 @@ ct_offload_conn_add(const struct ct_offload_ctx *ctx)
>              }
>          }
>      }
> +
> +    return ret;
> +}
> +
> +int
> +ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> +{
> +    int ret;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ret = ct_offload_conn_add_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>
>      return ret;
>  }
>
> -/* ct_offload_conn_del() - notify all providers that a connection was 
> removed.
> +/* ct_offload_conn_del_() - notify all providers that a connection was 
> removed.

Per the coding guidelines, internal functions should use a
double underscore suffix. Guess this is true for all the remaining
functions.

>   *
>   * Called unconditionally on all providers so that each can clean up any
>   * state it may have installed. */
> -void
> -ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +static void
> +ct_offload_conn_del_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_del) {
>              class->conn_del(ctx);
>          }
>      }
> -    ovs_mutex_unlock(&ct_offload_mutex);
>  }
>
>  void
> -ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ct_offload_conn_del_(ctx, false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +static int
> +ct_offload_conn_established_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_established) {
>              class->conn_established(ctx);
>          }
>      }
> +
> +    return 0;
> +}
> +
> +void
> +ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    (void) ct_offload_conn_established_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>  }
>
> @@ -194,16 +241,22 @@ ct_offload_conn_established(const struct ct_offload_ctx 
> *ctx)
>   * Iterates over providers and returns the first non-zero timestamp returned
>   * by a provider's conn_update() callback.  Returns 0 if no provider
>   * supplies a timestamp. */
> -long long
> -ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +static long long
> +ct_offload_conn_update_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      long long last_used = 0;
>
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_update) {
>              long long ts = class->conn_update(ctx);
>
> @@ -213,45 +266,202 @@ ct_offload_conn_update(const struct ct_offload_ctx 
> *ctx)
>              }
>          }
>      }
> +    return last_used;
> +}
> +
> +long long
> +ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +{
> +    long long ret;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ret = ct_offload_conn_update_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>
> -    return last_used;
> +    return ret;
>  }
>
>  /* ct_offload_can_offload() - returns true if any provider can offload ctx. 
> */
> -bool
> -ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +static bool
> +ct_offload_can_offload_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      bool result = false;
>
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->can_offload && class->can_offload(ctx)) {
>              result = true;
>              break;
>          }
>      }
> -    ovs_mutex_unlock(&ct_offload_mutex);
>
>      return result;
>  }
>
> +bool
> +ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +{
> +    bool can_offload;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    can_offload = ct_offload_can_offload_(ctx, false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +
> +    return can_offload;
> +}
> +
>  /* ct_offload_flush() - flush all offloaded connections from every provider. 
> */
> +static void
> +ct_offload_flush_(bool batched)
> +{
> +    struct ct_offload_class_node *node;
> +
> +    LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> +        const struct ct_offload_class *class = node->class;
> +
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
> +        if (class->flush) {
> +            class->flush();
> +        }
> +    }
> +}
> +
>  void
>  ct_offload_flush(void)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ct_offload_flush_(false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +
> +/* Batch API
> + * =========
> + *
> + * The default implementation serialises each operation in the batch through
> + * the individual per-connection dispatch functions above.  All provider
> + * callbacks are invoked under the ct_offload_mutex, so the per-operation
> + * lock/unlock overhead of the single-op path is avoided across the batch.
> + */
> +
> +#define CT_OFFLOAD_BATCH_INITIAL_SIZE 8
> +
> +/* ct_offload_op_batch_init() - prepare an empty batch for use. */

Should this be a static inline in ct-offload.h?
dp_packet_batch_init() uses the same pattern for a similar
trivial initializer. Same for ct_offload_op_batch_destroy()?

> +void
> +ct_offload_op_batch_init(struct ct_offload_op_batch *batch)
> +{
> +    batch->ops      = NULL;
> +    batch->n_ops    = 0;
> +    batch->allocated = 0;
> +}
> +
> +/* ct_offload_op_batch_add() - append one operation to the batch.
> + *
> + * The batch grows dynamically; callers need not pre-size it. */
> +void
> +ct_offload_op_batch_add(struct ct_offload_op_batch *batch,
> +                        enum ct_offload_op_type type,
> +                        const struct ct_offload_ctx *ctx)
> +{
> +    if (batch->n_ops == batch->allocated) {
> +        batch->allocated = batch->allocated
> +                           ? batch->allocated * 2
> +                           : CT_OFFLOAD_BATCH_INITIAL_SIZE;
> +        batch->ops = xrealloc(batch->ops,
> +                              batch->allocated * sizeof *batch->ops);
> +    }
> +
> +    struct ct_offload_op *op = &batch->ops[batch->n_ops++];

Nit: could we add a blank line before this?

> +    op->type     = type;
> +    op->ctx      = *ctx;
> +    op->error    = 0;
> +}
> +
> +/* ct_offload_op_batch_submit() - execute every operation in the batch.
> + *
> + * Each op's 'error' field is set to the result of the corresponding
> + * per-connection dispatch.  The mutex is held for the duration of each
> + * operation; providers are invoked directly rather than through the
> + * public single-op wrappers to avoid repeated lock/unlock cycles. */
> +void
> +ct_offload_op_batch_submit(struct ct_offload_op_batch *batch)
>  {
>      struct ct_offload_class_node *node;
> +    struct ct_offload_op *op;
>
>      ovs_mutex_lock(&ct_offload_mutex);

See earlier comment on read/write lock.  We should only have
a global read/write lock for the providers.  Maybe we should
rename it to make it clear what it protects, e.g.
ct_offload_classes_rwlock.

>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>
> -        if (class->flush) {
> -            class->flush();
> +        if (class->batch_submit) {
> +            class->batch_submit(batch);
> +        }
> +    }
> +
> +    CT_OFFLOAD_BATCH_OP_FOR_EACH (idx, op, batch) {

This adds quite some overhead, as it will walk all entries
in the batch, and then for each operation all the offload
providers.  We should optimize this somehow, as in most
cases each offload provider will support batch mode, and
hence we do this work for nothing.

Maybe we can record if all classes support batch mode and
skip this block.  Maybe we should force the batch API as
mandatory?  Which makes the code more straightforward.

WDYT?

> +        switch (op->type) {
> +        case CT_OFFLOAD_OP_ADD:
> +            op->error = ct_offload_conn_add_(&op->ctx, true);
> +            break;
> +
> +        case CT_OFFLOAD_OP_DEL:
> +            ct_offload_conn_del_(&op->ctx, true);
> +            op->error = 0;
> +            break;
> +
> +        case CT_OFFLOAD_OP_UPD: {
> +            long long ts = ct_offload_conn_update_(&op->ctx, true);
> +
> +            op->error = ts ? 0 : EIO;
> +            break;
> +        }
> +
> +        case CT_OFFLOAD_OP_POLICY:
> +            op->error = ct_offload_can_offload_(&op->ctx, true) ? 0 : EPERM;
> +            break;
> +
> +        case CT_OFFLOAD_OP_FLUSH:
> +            ct_offload_flush_(true);
> +            op->error = 0;
> +            break;
> +
> +        case CT_OFFLOAD_OP_EST:
> +            op->error = ct_offload_conn_established_(&op->ctx, true);

The non-batch wrapper ct_offload_conn_established() returns
void, and the provider callback also returns void.  Should
we just set op->error to 0 here for consistency?  See also
comments on the return codes in later patches.

> +            break;
> +
> +        default:
> +            op->error = EINVAL;
> +            break;
>          }
>      }
>      ovs_mutex_unlock(&ct_offload_mutex);
>  }
> +
> +/* ct_offload_op_batch_destroy() - release memory held by the batch.
> + *
> + * The batch may be re-initialised with ct_offload_op_batch_init() after
> + * this call. */
> +void
> +ct_offload_op_batch_destroy(struct ct_offload_op_batch *batch)
> +{
> +    free(batch->ops);
> +    batch->ops       = NULL;
> +    batch->n_ops     = 0;
> +    batch->allocated = 0;
> +}
> diff --git a/lib/ct-offload.h b/lib/ct-offload.h
> index 824b94a5c1..36871d12cb 100644
> --- a/lib/ct-offload.h
> +++ b/lib/ct-offload.h
> @@ -62,6 +62,10 @@ struct ct_offload_class {
>      /* Initialization routine for the provider. */
>      int (*init)(void);
>
> +    /* Interface to allow offload providers to operate in bulk.  This
> +     * will be called as part of the batch processing process.  If a provider
> +     * doesn't implemented this the fallback is each individual call. */

The last sentence reads odd, maybe something like:

  If a provider does not implement this, the fallback is to
  dispatch each operation individually.

See also my earlier comment on making this mandatory.

> +    void (*batch_submit)(struct ct_offload_op_batch *);

Blank line before the next comment block.

>      /* Per-connection operation callbacks get called for individual 
> operations
>       * on the fast path or when batching is not in use. */
>      int  (*conn_add)(const struct ct_offload_ctx *);
> @@ -94,4 +98,54 @@ void      ct_offload_conn_established(const struct 
> ct_offload_ctx *);
>  bool      ct_offload_can_offload(const struct ct_offload_ctx *);
>  void      ct_offload_flush(void);
>
> +/* Batch offload API.
> + *
> + * The default implementation dispatches each operation individually using 
> the
> + * per-connection API above.  Providers that can handle a native batch may do
> + * so by implementing a batch_submit callback in struct ct_offload_class in 
> the
> + * future.
> + *
> + * Typical usage:
> + *
> + *   struct ct_offload_op_batch batch;
> + *   ct_offload_op_batch_init(&batch);
> + *
> + *   ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_a);
> + *   ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_b);
> + *
> + *   ct_offload_op_batch_submit(&batch);
> + *   for_each_op inspect batch.ops[i].error
> + *
> + *   ct_offload_op_batch_destroy(&batch);
> + *

Should we document that the conn objects referenced by each
ctx must remain valid until ct_offload_op_batch_submit()
returns?  The ctx struct is copied by value, but the conn
pointers it contains are not.

Should we document this on the struct itself, so when new
pointer members are added people know?

> + * For CT_OFFLOAD_OP_UPD, op->error is set to 0 when the hardware returned a
> + * valid last-used timestamp (expiration was refreshed by the provider), or 
> to
> + * ENODATA when no hardware record was found.

The header comment says CT_OFFLOAD_OP_UPD sets op->error to ENODATA on
failure, but the implementation in ct_offload_op_batch_submit() sets it
to EIO:

    case CT_OFFLOAD_OP_UPD: {
        long long ts = ct_offload_conn_update_(&op->ctx, true);
        op->error = ts ? 0 : EIO;
        break;
    }

> + * For CT_OFFLOAD_OP_POLICY, op->error is set to 0 when the connection is
> + * eligible for offload, or EPERM when no provider will accept it.
> + */
> +void ct_offload_op_batch_init(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_add(struct ct_offload_op_batch *,
> +                             enum ct_offload_op_type,
> +                             const struct ct_offload_ctx *);
> +void ct_offload_op_batch_submit(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_destroy(struct ct_offload_op_batch *);
> +
> +static inline
> +size_t ct_offload_op_batch_len(struct ct_offload_op_batch *batch)
> +{
> +    return batch->n_ops;
> +}
> +
> +static inline
> +size_t ct_offload_op_batch_size(struct ct_offload_op_batch *batch)
> +{
> +    return batch->allocated;
> +}
> +
> +#define CT_OFFLOAD_BATCH_OP_FOR_EACH(IDX, OP, BATCH) \
> +    for (size_t IDX = 0; IDX < ct_offload_op_batch_len(BATCH); IDX++) \
> +        if (OP = &((BATCH)->ops[IDX]), true)
> +
>  #endif /* CT_OFFLOAD_H */
> -- 
> 2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to