Aaron Conole via dev <[email protected]> writes:
> The CT offload operations API currently considers operating on a
> single connection at a time. However, there may be reason to
> accumulate offload API operations and execute them as a single large
> batch of operations. Provide a basic batch abstraction that allows
> for accumulating operations and then executing them all at once. This
> will be used in an upcoming commit, especially with the ct expiration
> logic. The provider also may have a batched abstraction that lets it
> do a better provider based optimization.
>
> As part of this extension, move the lock management up a level for the
> batching system to have a single bulk operations lock.
>
> Signed-off-by: Aaron Conole <[email protected]>
> ---
> lib/ct-offload.c | 254 +++++++++++++++++++++++++++++++++++++++++++----
> lib/ct-offload.h | 54 ++++++++++
> 2 files changed, 286 insertions(+), 22 deletions(-)
>
> diff --git a/lib/ct-offload.c b/lib/ct-offload.c
> index 3bd6200e37..97c922dde1 100644
> --- a/lib/ct-offload.c
> +++ b/lib/ct-offload.c
> @@ -121,25 +121,33 @@ ct_offload_module_init(void)
> * directly from their own module-init routines. */
> }
>
> -/* ct_offload_conn_add() - notify all eligible providers of a new connection.
> +/* ct_offload_conn_add_() - notify all eligible providers of a new
> connection.
> *
> * Iterates over registered providers and calls conn_add() on each one that
> * reports can_offload() == true for this context. Returns the first
> non-zero
> * error encountered, but continues notifying remaining providers. This
> allows
> - * the underlying hardware conntrack details across providers function. */
> -int
> -ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> + * the underlying hardware conntrack details across providers function.
> + */
> +static int
> +ct_offload_conn_add_(const struct ct_offload_ctx *ctx, bool batched)
> {
> struct ct_offload_class_node *node;
> int ret = 0;
>
> - ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> if (class->can_offload && !class->can_offload(ctx)) {
> continue;
> }
> +
> if (class->conn_add) {
> int error = class->conn_add(ctx);
>
> @@ -148,44 +156,83 @@ ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> }
> }
> }
> +
> + return ret;
> +}
> +
> +int
> +ct_offload_conn_add(const struct ct_offload_ctx *ctx)
> +{
> + int ret;
> +
> + ovs_mutex_lock(&ct_offload_mutex);
> + ret = ct_offload_conn_add_(ctx, false);
> ovs_mutex_unlock(&ct_offload_mutex);
>
> return ret;
> }
>
> -/* ct_offload_conn_del() - notify all providers that a connection was
> removed.
> +/* ct_offload_conn_del_() - notify all providers that a connection was
> removed.
> *
> * Called unconditionally on all providers so that each can clean up any
> * state it may have installed. */
> -void
> -ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +static void
> +ct_offload_conn_del_(const struct ct_offload_ctx *ctx, bool batched)
> {
> struct ct_offload_class_node *node;
>
> - ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> if (class->conn_del) {
> class->conn_del(ctx);
> }
> }
> - ovs_mutex_unlock(&ct_offload_mutex);
> }
>
> void
> -ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +ct_offload_conn_del(const struct ct_offload_ctx *ctx)
> +{
> + ovs_mutex_lock(&ct_offload_mutex);
> + ct_offload_conn_del_(ctx, false);
> + ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +static int
> +ct_offload_conn_established_(const struct ct_offload_ctx *ctx, bool batched)
> {
> struct ct_offload_class_node *node;
>
> - ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> if (class->conn_established) {
> class->conn_established(ctx);
> }
> }
> +
> + return 0;
> +}
> +
> +void
> +ct_offload_conn_established(const struct ct_offload_ctx *ctx)
> +{
> + ovs_mutex_lock(&ct_offload_mutex);
> + (void) ct_offload_conn_established_(ctx, false);
> ovs_mutex_unlock(&ct_offload_mutex);
> }
>
> @@ -194,16 +241,22 @@ ct_offload_conn_established(const struct ct_offload_ctx
> *ctx)
> * Iterates over providers and returns the first non-zero timestamp returned
> * by a provider's conn_update() callback. Returns 0 if no provider
> * supplies a timestamp. */
> -long long
> -ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +static long long
> +ct_offload_conn_update_(const struct ct_offload_ctx *ctx, bool batched)
> {
> struct ct_offload_class_node *node;
> long long last_used = 0;
>
> - ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> if (class->conn_update) {
> long long ts = class->conn_update(ctx);
>
> @@ -213,45 +266,202 @@ ct_offload_conn_update(const struct ct_offload_ctx
> *ctx)
> }
> }
> }
> + return last_used;
> +}
> +
> +long long
> +ct_offload_conn_update(const struct ct_offload_ctx *ctx)
> +{
> + long long ret;
> +
> + ovs_mutex_lock(&ct_offload_mutex);
> + ret = ct_offload_conn_update_(ctx, false);
> ovs_mutex_unlock(&ct_offload_mutex);
>
> - return last_used;
> + return ret;
> }
>
> /* ct_offload_can_offload() - returns true if any provider can offload ctx.
> */
> -bool
> -ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +static bool
> +ct_offload_can_offload_(const struct ct_offload_ctx *ctx, bool batched)
> {
> struct ct_offload_class_node *node;
> bool result = false;
>
> - ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> if (class->can_offload && class->can_offload(ctx)) {
> result = true;
> break;
> }
> }
> - ovs_mutex_unlock(&ct_offload_mutex);
>
> return result;
> }
>
> +bool
> +ct_offload_can_offload(const struct ct_offload_ctx *ctx)
> +{
> + bool can_offload;
> +
> + ovs_mutex_lock(&ct_offload_mutex);
> + can_offload = ct_offload_can_offload_(ctx, false);
> + ovs_mutex_unlock(&ct_offload_mutex);
> +
> + return can_offload;
> +}
> +
> /* ct_offload_flush() - flush all offloaded connections from every provider.
> */
> +static void
> +ct_offload_flush_(bool batched)
> +{
> + struct ct_offload_class_node *node;
> +
> + LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> + const struct ct_offload_class *class = node->class;
> +
> + if (batched && class->batch_submit) {
> + /* Called via the batched path - skip the providers
> + * that support batched submits since they already processed
> + * this. */
> + continue;
> + }
> +
> + if (class->flush) {
> + class->flush();
> + }
> + }
> +}
> +
> void
> ct_offload_flush(void)
> +{
> + ovs_mutex_lock(&ct_offload_mutex);
> + ct_offload_flush_(false);
> + ovs_mutex_unlock(&ct_offload_mutex);
> +}
> +
> +
> +/* Batch API
> + * =========
> + *
> + * The default implementation serialises each operation in the batch through
> + * the individual per-connection dispatch functions above. All provider
> + * callbacks are invoked under the ct_offload_mutex, so the per-operation
> + * lock/unlock overhead of the single-op path is avoided across the batch.
> + */
> +
> +#define CT_OFFLOAD_BATCH_INITIAL_SIZE 8
> +
> +/* ct_offload_op_batch_init() - prepare an empty batch for use. */
> +void
> +ct_offload_op_batch_init(struct ct_offload_op_batch *batch)
> +{
> + batch->ops = NULL;
> + batch->n_ops = 0;
> + batch->allocated = 0;
> +}
> +
> +/* ct_offload_op_batch_add() - append one operation to the batch.
> + *
> + * The batch grows dynamically; callers need not pre-size it. */
> +void
> +ct_offload_op_batch_add(struct ct_offload_op_batch *batch,
> + enum ct_offload_op_type type,
> + const struct ct_offload_ctx *ctx)
> +{
> + if (batch->n_ops == batch->allocated) {
> + batch->allocated = batch->allocated
> + ? batch->allocated * 2
> + : CT_OFFLOAD_BATCH_INITIAL_SIZE;
> + batch->ops = xrealloc(batch->ops,
> + batch->allocated * sizeof *batch->ops);
> + }
> +
> + struct ct_offload_op *op = &batch->ops[batch->n_ops++];
> + op->type = type;
> + op->ctx = *ctx;
> + op->error = 0;
> +}
> +
> +/* ct_offload_op_batch_submit() - execute every operation in the batch.
> + *
> + * Each op's 'error' field is set to the result of the corresponding
> + * per-connection dispatch. The mutex is held for the duration of each
> + * operation; providers are invoked directly rather than through the
> + * public single-op wrappers to avoid repeated lock/unlock cycles. */
> +void
> +ct_offload_op_batch_submit(struct ct_offload_op_batch *batch)
> {
> struct ct_offload_class_node *node;
> + struct ct_offload_op *op;
>
> ovs_mutex_lock(&ct_offload_mutex);
> LIST_FOR_EACH (node, list_node, &ct_offload_classes) {
> const struct ct_offload_class *class = node->class;
>
> - if (class->flush) {
> - class->flush();
> + if (class->batch_submit) {
> + class->batch_submit(batch);
> + }
> + }
> +
> + CT_OFFLOAD_BATCH_OP_FOR_EACH (idx, op, batch) {
> +
> + switch (op->type) {
> + case CT_OFFLOAD_OP_ADD:
> + op->error = ct_offload_conn_add_(&op->ctx, true);
> + break;
> +
> + case CT_OFFLOAD_OP_DEL:
> + ct_offload_conn_del_(&op->ctx, true);
> + op->error = 0;
> + break;
> +
> + case CT_OFFLOAD_OP_UPD: {
> + long long ts = ct_offload_conn_update_(&op->ctx, true);
> +
> + op->error = ts ? 0 : ENODATA;
This should return something else - it seems ENODATA isn't available
under FreeBSD. I guess it may be better to use EIO or ENODEV.
> + break;
> + }
> +
> + case CT_OFFLOAD_OP_POLICY:
> + op->error = ct_offload_can_offload_(&op->ctx, true) ? 0 : EPERM;
> + break;
> +
> + case CT_OFFLOAD_OP_FLUSH:
> + ct_offload_flush_(true);
> + op->error = 0;
> + break;
> +
> + case CT_OFFLOAD_OP_EST:
> + op->error = ct_offload_conn_established_(&op->ctx, true);
> + break;
> +
> + default:
> + op->error = EINVAL;
> + break;
> }
> }
> ovs_mutex_unlock(&ct_offload_mutex);
> }
> +
> +/* ct_offload_op_batch_destroy() - release memory held by the batch.
> + *
> + * The batch may be re-initialised with ct_offload_op_batch_init() after
> + * this call. */
> +void
> +ct_offload_op_batch_destroy(struct ct_offload_op_batch *batch)
> +{
> + free(batch->ops);
> + batch->ops = NULL;
> + batch->n_ops = 0;
> + batch->allocated = 0;
> +}
> diff --git a/lib/ct-offload.h b/lib/ct-offload.h
> index 824b94a5c1..36871d12cb 100644
> --- a/lib/ct-offload.h
> +++ b/lib/ct-offload.h
> @@ -62,6 +62,10 @@ struct ct_offload_class {
> /* Initialization routine for the provider. */
> int (*init)(void);
>
> + /* Interface to allow offload providers to operate in bulk. This
> + * will be called as part of the batch processing process. If a provider
> + * doesn't implemented this the fallback is each individual call. */
> + void (*batch_submit)(struct ct_offload_op_batch *);
> /* Per-connection operation callbacks get called for individual
> operations
> * on the fast path or when batching is not in use. */
> int (*conn_add)(const struct ct_offload_ctx *);
> @@ -94,4 +98,54 @@ void ct_offload_conn_established(const struct
> ct_offload_ctx *);
> bool ct_offload_can_offload(const struct ct_offload_ctx *);
> void ct_offload_flush(void);
>
> +/* Batch offload API.
> + *
> + * The default implementation dispatches each operation individually using
> the
> + * per-connection API above. Providers that can handle a native batch may do
> + * so by implementing a batch_submit callback in struct ct_offload_class in
> the
> + * future.
> + *
> + * Typical usage:
> + *
> + * struct ct_offload_op_batch batch;
> + * ct_offload_op_batch_init(&batch);
> + *
> + * ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_a);
> + * ct_offload_op_batch_add(&batch, CT_OFFLOAD_OP_ADD, &ctx_b);
> + *
> + * ct_offload_op_batch_submit(&batch);
> + * for_each_op inspect batch.ops[i].error
> + *
> + * ct_offload_op_batch_destroy(&batch);
> + *
> + * For CT_OFFLOAD_OP_UPD, op->error is set to 0 when the hardware returned a
> + * valid last-used timestamp (expiration was refreshed by the provider), or
> to
> + * ENODATA when no hardware record was found.
> + *
> + * For CT_OFFLOAD_OP_POLICY, op->error is set to 0 when the connection is
> + * eligible for offload, or EPERM when no provider will accept it.
> + */
> +void ct_offload_op_batch_init(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_add(struct ct_offload_op_batch *,
> + enum ct_offload_op_type,
> + const struct ct_offload_ctx *);
> +void ct_offload_op_batch_submit(struct ct_offload_op_batch *);
> +void ct_offload_op_batch_destroy(struct ct_offload_op_batch *);
> +
> +static inline
> +size_t ct_offload_op_batch_len(struct ct_offload_op_batch *batch)
> +{
> + return batch->n_ops;
> +}
> +
> +static inline
> +size_t ct_offload_op_batch_size(struct ct_offload_op_batch *batch)
> +{
> + return batch->allocated;
> +}
> +
> +#define CT_OFFLOAD_BATCH_OP_FOR_EACH(IDX, OP, BATCH) \
> + for (size_t IDX = 0; IDX < ct_offload_op_batch_len(BATCH); IDX++) \
> + if (OP = &((BATCH)->ops[IDX]), true)
> +
> #endif /* CT_OFFLOAD_H */
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev