Re: [ovs-dev] [RFC 06/12] ct-offload: Add batching support.

Aaron Conole via dev Mon, 20 Apr 2026 17:13:18 -0700

Aaron Conole via dev <[email protected]> writes:

> The CT offload operations API currently considers operating on a
> single connection at a time.  However, there may be reason to
> accumulate offload API operations and execute them as a single large
> batch of operations.  Provide a basic batch abstraction that allows
> for accumulating operations and then executing them all at once.  This
> will be used in an upcoming commit, especially with the ct expiration
> logic.  The provider also may have a batched abstraction that lets it
> do a better provider based optimization.
>
> As part of this extension, move the lock management up a level for the
> batching system to have a single bulk operations lock.
>
> Signed-off-by: Aaron Conole <[email protected]>
> ---
>  lib/ct-offload.c | 254 +++++++++++++++++++++++++++++++++++++++++++----
>  lib/ct-offload.h |  54 ++++++++++
>  2 files changed, 286 insertions(+), 22 deletions(-)
>
> diff --git a/lib/ct-offload.c b/lib/ct-offload.c
> index 3bd6200e37..97c922dde1 100644
> --- a/lib/ct-offload.c
> +++ b/lib/ct-offload.c
> @@ -121,25 +121,33 @@ ct_offload_module_init(void)
>       * directly from their own module-init routines. */
>  }
>  
> -/* ct_offload_conn_add() - notify all eligible providers of a new connection.
> +/* ct_offload_conn_add_() - notify all eligible providers of a new 
> connection.
>   *
>   * Iterates over registered providers and calls conn_add() on each one that
>   * reports can_offload() == true for this context.  Returns the first 
> non-zero
>   * error encountered, but continues notifying remaining providers.  This 
> allows
> - * the underlying hardware conntrack details across providers function. */
> -int
> -ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> + * the underlying hardware conntrack details across providers function.
> + */
> +static int
> +ct_offload_conn_add_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      int ret = 0;
>  
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->can_offload && !class->can_offload(ctx)) {
>              continue;
>          }
> +
>          if (class->conn_add) {
>              int error = class->conn_add(ctx);
>  
> @@ -148,44 +156,83 @@ ct_offload_conn_add(const struct ct_offload_ctx *ctx)
>              }
>          }
>      }
> +
> +    return ret;
> +}
> +
> +int
> +ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> +{
> +    int ret;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ret = ct_offload_conn_add_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>  
>      return ret;
>  }
>  
> -/* ct_offload_conn_del() - notify all providers that a connection was 
> removed.
> +/* ct_offload_conn_del_() - notify all providers that a connection was 
> removed.
>   *
>   * Called unconditionally on all providers so that each can clean up any
>   * state it may have installed. */
> -void
> -ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +static void
> +ct_offload_conn_del_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>  
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_del) {
>              class->conn_del(ctx);
>          }
>      }
> -    ovs_mutex_unlock(&ct_offload_mutex);
>  }
>  
>  void
> -ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ct_offload_conn_del_(ctx, false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +static int
> +ct_offload_conn_established_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>  
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_established) {
>              class->conn_established(ctx);
>          }
>      }
> +
> +    return 0;
> +}
> +
> +void
> +ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    (void) ct_offload_conn_established_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>  }
>  
> @@ -194,16 +241,22 @@ ct_offload_conn_established(const struct ct_offload_ctx 
> *ctx)
>   * Iterates over providers and returns the first non-zero timestamp returned
>   * by a provider's conn_update() callback.  Returns 0 if no provider
>   * supplies a timestamp. */
> -long long
> -ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +static long long
> +ct_offload_conn_update_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      long long last_used = 0;
>  
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->conn_update) {
>              long long ts = class->conn_update(ctx);
>  
> @@ -213,45 +266,202 @@ ct_offload_conn_update(const struct ct_offload_ctx 
> *ctx)
>              }
>          }
>      }
> +    return last_used;
> +}
> +
> +long long
> +ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +{
> +    long long ret;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ret = ct_offload_conn_update_(ctx, false);
>      ovs_mutex_unlock(&ct_offload_mutex);
>  
> -    return last_used;
> +    return ret;
>  }
>  
>  /* ct_offload_can_offload() - returns true if any provider can offload ctx. 
> */
> -bool
> -ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +static bool
> +ct_offload_can_offload_(const struct ct_offload_ctx *ctx, bool batched)
>  {
>      struct ct_offload_class_node *node;
>      bool result = false;
>  
> -    ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
>          if (class->can_offload && class->can_offload(ctx)) {
>              result = true;
>              break;
>          }
>      }
> -    ovs_mutex_unlock(&ct_offload_mutex);
>  
>      return result;
>  }
>  
> +bool
> +ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +{
> +    bool can_offload;
> +
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    can_offload = ct_offload_can_offload_(ctx, false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +
> +    return can_offload;
> +}
> +
>  /* ct_offload_flush() - flush all offloaded connections from every provider. 
> */
> +static void
> +ct_offload_flush_(bool batched)
> +{
> +    struct ct_offload_class_node *node;
> +
> +    LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> +        const struct ct_offload_class *class = node->class;
> +
> +        if (batched && class->batch_submit) {
> +            /* Called via the batched path - skip the providers
> +             * that support batched submits since they already processed
> +             * this. */
> +            continue;
> +        }
> +
> +        if (class->flush) {
> +            class->flush();
> +        }
> +    }
> +}
> +
>  void
>  ct_offload_flush(void)
> +{
> +    ovs_mutex_lock(&ct_offload_mutex);
> +    ct_offload_flush_(false);
> +    ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +
> +/* Batch API
> + * =========
> + *
> + * The default implementation serialises each operation in the batch through
> + * the individual per-connection dispatch functions above.  All provider
> + * callbacks are invoked under the ct_offload_mutex, so the per-operation
> + * lock/unlock overhead of the single-op path is avoided across the batch.
> + */
> +
> +#define CT_OFFLOAD_BATCH_INITIAL_SIZE 8
> +
> +/* ct_offload_op_batch_init() - prepare an empty batch for use. */
> +void
> +ct_offload_op_batch_init(struct ct_offload_op_batch *batch)
> +{
> +    batch->ops      = NULL;
> +    batch->n_ops    = 0;
> +    batch->allocated = 0;
> +}
> +
> +/* ct_offload_op_batch_add() - append one operation to the batch.
> + *
> + * The batch grows dynamically; callers need not pre-size it. */
> +void
> +ct_offload_op_batch_add(struct ct_offload_op_batch *batch,
> +                        enum ct_offload_op_type type,
> +                        const struct ct_offload_ctx *ctx)
> +{
> +    if (batch->n_ops == batch->allocated) {
> +        batch->allocated = batch->allocated
> +                           ? batch->allocated * 2
> +                           : CT_OFFLOAD_BATCH_INITIAL_SIZE;
> +        batch->ops = xrealloc(batch->ops,
> +                              batch->allocated * sizeof *batch->ops);
> +    }
> +
> +    struct ct_offload_op *op = &batch->ops[batch->n_ops++];
> +    op->type     = type;
> +    op->ctx      = *ctx;
> +    op->error    = 0;
> +}
> +
> +/* ct_offload_op_batch_submit() - execute every operation in the batch.
> + *
> + * Each op's 'error' field is set to the result of the corresponding
> + * per-connection dispatch.  The mutex is held for the duration of each
> + * operation; providers are invoked directly rather than through the
> + * public single-op wrappers to avoid repeated lock/unlock cycles. */
> +void
> +ct_offload_op_batch_submit(struct ct_offload_op_batch *batch)
>  {
>      struct ct_offload_class_node *node;
> +    struct ct_offload_op *op;
>  
>      ovs_mutex_lock(&ct_offload_mutex);
>      LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
>          const struct ct_offload_class *class = node->class;
>  
> -        if (class->flush) {
> -            class->flush();
> +        if (class->batch_submit) {
> +            class->batch_submit(batch);
> +        }
> +    }
> +
> +    CT_OFFLOAD_BATCH_OP_FOR_EACH (idx, op, batch) {
> +
> +        switch (op->type) {
> +        case CT_OFFLOAD_OP_ADD:
> +            op->error = ct_offload_conn_add_(&op->ctx, true);
> +            break;
> +
> +        case CT_OFFLOAD_OP_DEL:
> +            ct_offload_conn_del_(&op->ctx, true);
> +            op->error = 0;
> +            break;
> +
> +        case CT_OFFLOAD_OP_UPD: {
> +            long long ts = ct_offload_conn_update_(&op->ctx, true);
> +
> +            op->error = ts ? 0 : ENODATA;


This should return something else - it seems ENODATA isn't available
under FreeBSD.  I guess it may be better to use EIO or ENODEV.

> +            break;
> +        }
> +
> +        case CT_OFFLOAD_OP_POLICY:
> +            op->error = ct_offload_can_offload_(&op->ctx, true) ? 0 : EPERM;
> +            break;
> +
> +        case CT_OFFLOAD_OP_FLUSH:
> +            ct_offload_flush_(true);
> +            op->error = 0;
> +            break;
> +
> +        case CT_OFFLOAD_OP_EST:
> +            op->error = ct_offload_conn_established_(&op->ctx, true);
> +            break;
> +
> +        default:
> +            op->error = EINVAL;
> +            break;
>          }
>      }
>      ovs_mutex_unlock(&ct_offload_mutex);
>  }
> +
> +/* ct_offload_op_batch_destroy() - release memory held by the batch.
> + *
> + * The batch may be re-initialised with ct_offload_op_batch_init() after
> + * this call. */
> +void
> +ct_offload_op_batch_destroy(struct ct_offload_op_batch *batch)
> +{
> +    free(batch->ops);
> +    batch->ops       = NULL;
> +    batch->n_ops     = 0;
> +    batch->allocated = 0;
> +}
> diff --git a/lib/ct-offload.h b/lib/ct-offload.h
> index 824b94a5c1..36871d12cb 100644
> --- a/lib/ct-offload.h
> +++ b/lib/ct-offload.h
> @@ -62,6 +62,10 @@ struct ct_offload_class {
>      /* Initialization routine for the provider. */
>      int (*init)(void);
>  
> +    /* Interface to allow offload providers to operate in bulk.  This
> +     * will be called as part of the batch processing process.  If a provider
> +     * doesn't implemented this the fallback is each individual call. */
> +    void (*batch_submit)(struct ct_offload_op_batch *);
>      /* Per-connection operation callbacks get called for individual 
> operations
>       * on the fast path or when batching is not in use. */
>      int  (*conn_add)(const struct ct_offload_ctx *);
> @@ -94,4 +98,54 @@ void      ct_offload_conn_established(const struct 
> ct_offload_ctx *);
>  bool      ct_offload_can_offload(const struct ct_offload_ctx *);
>  void      ct_offload_flush(void);
>  
> +/* Batch offload API.
> + *
> + * The default implementation dispatches each operation individually using 
> the
> + * per-connection API above.  Providers that can handle a native batch may do
> + * so by implementing a batch_submit callback in struct ct_offload_class in 
> the
> + * future.
> + *
> + * Typical usage:
> + *
> + *   struct ct_offload_op_batch batch;
> + *   ct_offload_op_batch_init(&batch);
> + *
> + *   ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_a);
> + *   ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_b);
> + *
> + *   ct_offload_op_batch_submit(&batch);
> + *   for_each_op inspect batch.ops[i].error
> + *
> + *   ct_offload_op_batch_destroy(&batch);
> + *
> + * For CT_OFFLOAD_OP_UPD, op->error is set to 0 when the hardware returned a
> + * valid last-used timestamp (expiration was refreshed by the provider), or 
> to
> + * ENODATA when no hardware record was found.
> + *
> + * For CT_OFFLOAD_OP_POLICY, op->error is set to 0 when the connection is
> + * eligible for offload, or EPERM when no provider will accept it.
> + */
> +void ct_offload_op_batch_init(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_add(struct ct_offload_op_batch *,
> +                             enum ct_offload_op_type,
> +                             const struct ct_offload_ctx *);
> +void ct_offload_op_batch_submit(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_destroy(struct ct_offload_op_batch *);
> +
> +static inline
> +size_t ct_offload_op_batch_len(struct ct_offload_op_batch *batch)
> +{
> +    return batch->n_ops;
> +}
> +
> +static inline
> +size_t ct_offload_op_batch_size(struct ct_offload_op_batch *batch)
> +{
> +    return batch->allocated;
> +}
> +
> +#define CT_OFFLOAD_BATCH_OP_FOR_EACH(IDX, OP, BATCH) \
> +    for (size_t IDX = 0; IDX < ct_offload_op_batch_len(BATCH); IDX++) \
> +        if (OP = &((BATCH)->ops[IDX]), true)
> +
>  #endif /* CT_OFFLOAD_H */

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [RFC 06/12] ct-offload: Add batching support.

Reply via email to