Am Fri, May 29, 2026 at 12:21:59PM -0400 schrieb Aaron Conole via dev:
> Hi Felix,
>
> Felix Huettner via dev <[email protected]> writes:
>
> > When "commit" is false then conn_not_found will never write to
> > ct->conns. This means that the guarantee the mutex provides for us is not
> > necessary.
> > If "commit" is true then the mutex is still necessary. We need to be sure
> > that we do not insert a connection that is added in parallel somewhere
> > else.
> >
> > When doing a loadtest with OVN and natting across two LRs we increased
> > our packet rate from 1 Mio pps to 2.5 Mio pps. This mainly comes from
> > the UNSNAT and UNDNAT stages of OVN that generally do not match anything
> > as long as we are in not specific cases.
> >
> > Co-Authored-by: Florian Werner <[email protected]>
> > Signed-off-by: Florian Werner <[email protected]>
> > Co-Authored-by: Sebastian Riese <[email protected]>
> > Signed-off-by: Sebastian Riese <[email protected]>
> > Signed-off-by: Felix Huettner <[email protected]>
> > ---
>
> Some quick comments for the series.
Hi Aaron,
thanks for the review.
The small topics will all be in v2.
>
> > lib/conntrack.c | 225 ++++++++++++++++++++++++++++--------------------
> > 1 file changed, 132 insertions(+), 93 deletions(-)
> >
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index f84cdd216..bcce71808 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -1022,115 +1022,102 @@ ct_verify_helper(const char *helper, enum
> > ct_alg_ctl_type ct_alg_ctl)
> > }
> >
> > static struct conn *
> > -conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
> > - struct conn_lookup_ctx *ctx, bool commit, long long now,
> > - const struct nat_action_info_t *nat_action_info,
> > - const char *helper, const struct alg_exp_node *alg_exp,
> > - enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> > +conn_insert(struct conntrack *ct, struct dp_packet *pkt,
> > + struct conn_lookup_ctx *ctx, long long now,
> > + const struct nat_action_info_t *nat_action_info,
> > + const char *helper, const struct alg_exp_node *alg_exp,
> > + enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> > OVS_REQUIRES(ct->ct_lock)
> > {
> > struct conn *nc = NULL;
> >
> > - if (!valid_new(pkt, &ctx->key)) {
> > - pkt->md.ct_state = CS_INVALID;
> > + int64_t czl_limit;
> > + struct conn_key_node *fwd_key_node, *rev_key_node;
> > + struct zone_limit *zl = zone_limit_lookup_or_default(ct,
> > + ctx->key.zone);
> > + if (zl) {
> > + czl_limit = zone_limit_get_limit(ct, &zl->czl);
> > + if (czl_limit >= 0 &&
> > + atomic_count_get(&zl->czl.count) >= czl_limit) {
> > + COVERAGE_INC(conntrack_zone_full);
> > + return nc;
> > + }
> > + }
> > +
> > + unsigned int n_conn_limit;
> > + atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> > + if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
> > + COVERAGE_INC(conntrack_full);
> > return nc;
> > }
> >
> > - pkt->md.ct_state = CS_NEW;
> > + nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
> > + fwd_key_node = &nc->key_node[CT_DIR_FWD];
> > + rev_key_node = &nc->key_node[CT_DIR_REV];
> > + memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key);
> > + memcpy(&rev_key_node->key, &fwd_key_node->key,
> > + sizeof rev_key_node->key);
> > + conn_key_reverse(&rev_key_node->key);
> > +
> > + if (ct_verify_helper(helper, ct_alg_ctl)) {
> > + nc->alg = nullable_xstrdup(helper);
> > + }
> >
> > if (alg_exp) {
> > - pkt->md.ct_state |= CS_RELATED;
> > + nc->alg_related = true;
> > + nc->mark = alg_exp->parent_mark;
> > + nc->label = alg_exp->parent_label;
> > + nc->parent_key = alg_exp->parent_key;
> > }
> >
> > - if (commit) {
> > - int64_t czl_limit;
> > - struct conn_key_node *fwd_key_node, *rev_key_node;
> > - struct zone_limit *zl = zone_limit_lookup_or_default(ct,
> > -
> > ctx->key.zone);
> > - if (zl) {
> > - czl_limit = zone_limit_get_limit(ct, &zl->czl);
> > - if (czl_limit >= 0 &&
> > - atomic_count_get(&zl->czl.count) >= czl_limit) {
> > - COVERAGE_INC(conntrack_zone_full);
> > - return nc;
> > - }
> > - }
> > + ovs_mutex_init_adaptive(&nc->lock);
> > + atomic_flag_clear(&nc->reclaimed);
> > + fwd_key_node->dir = CT_DIR_FWD;
> > + rev_key_node->dir = CT_DIR_REV;
> >
> > - unsigned int n_conn_limit;
> > - atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> > - if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
> > - COVERAGE_INC(conntrack_full);
> > - return nc;
> > - }
> > + if (zl) {
> > + nc->admit_zone = zl->czl.zone;
> > + nc->zone_limit_seq = zl->czl.zone_limit_seq;
> > + } else {
> > + nc->admit_zone = INVALID_ZONE;
> > + }
> >
> > - nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
> > - fwd_key_node = &nc->key_node[CT_DIR_FWD];
> > - rev_key_node = &nc->key_node[CT_DIR_REV];
> > - memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key);
> > - memcpy(&rev_key_node->key, &fwd_key_node->key,
> > - sizeof rev_key_node->key);
> > - conn_key_reverse(&rev_key_node->key);
> > -
> > - if (ct_verify_helper(helper, ct_alg_ctl)) {
> > - nc->alg = nullable_xstrdup(helper);
> > - }
> > + if (nat_action_info) {
> > + nc->nat_action = nat_action_info->nat_action;
> >
> > if (alg_exp) {
> > - nc->alg_related = true;
> > - nc->mark = alg_exp->parent_mark;
> > - nc->label = alg_exp->parent_label;
> > - nc->parent_key = alg_exp->parent_key;
> > - }
> > -
> > - ovs_mutex_init_adaptive(&nc->lock);
> > - atomic_flag_clear(&nc->reclaimed);
> > - fwd_key_node->dir = CT_DIR_FWD;
> > - rev_key_node->dir = CT_DIR_REV;
> > -
> > - if (zl) {
> > - nc->admit_zone = zl->czl.zone;
> > - nc->zone_limit_seq = zl->czl.zone_limit_seq;
> > - } else {
> > - nc->admit_zone = INVALID_ZONE;
> > - }
> > -
> > - if (nat_action_info) {
> > - nc->nat_action = nat_action_info->nat_action;
> > -
> > - if (alg_exp) {
> > - if (alg_exp->nat_rpl_dst) {
> > - rev_key_node->key.dst.addr =
> > alg_exp->alg_nat_repl_addr;
> > - nc->nat_action = NAT_ACTION_SRC;
> > - } else {
> > - rev_key_node->key.src.addr =
> > alg_exp->alg_nat_repl_addr;
> > - nc->nat_action = NAT_ACTION_DST;
> > - }
> > + if (alg_exp->nat_rpl_dst) {
> > + rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr;
> > + nc->nat_action = NAT_ACTION_SRC;
> > } else {
> > - bool nat_res = nat_get_unique_tuple(ct, nc,
> > nat_action_info);
> > - if (!nat_res) {
> > - goto nat_res_exhaustion;
> > - }
> > + rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr;
> > + nc->nat_action = NAT_ACTION_DST;
> > + }
> > + } else {
> > + bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info);
> > + if (!nat_res) {
> > + goto nat_res_exhaustion;
> > }
> > -
> > - nat_packet(pkt, nc, false, ctx->icmp_related);
> > - uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
> > - ct->hash_basis);
> > - cmap_insert(&ct->conns[ctx->key.zone],
> > - &rev_key_node->cm_node, rev_hash);
> > }
> >
> > + nat_packet(pkt, nc, false, ctx->icmp_related);
> > + uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
> > + ct->hash_basis);
> > cmap_insert(&ct->conns[ctx->key.zone],
> > - &fwd_key_node->cm_node, ctx->hash);
> > - conn_expire_push_front(ct, nc);
> > - atomic_count_inc(&ct->n_conn);
> > -
> > - if (zl) {
> > - atomic_count_inc(&zl->czl.count);
> > - }
> > -
> > - ctx->conn = nc; /* For completeness. */
> > + &rev_key_node->cm_node, rev_hash);
> > }
> >
> > + cmap_insert(&ct->conns[ctx->key.zone],
> > + &fwd_key_node->cm_node, ctx->hash);
> > + conn_expire_push_front(ct, nc);
> > + atomic_count_inc(&ct->n_conn);
> > +
> > + if (zl) {
> > + atomic_count_inc(&zl->czl.count);
> > + }
> > +
> > + ctx->conn = nc; /* For completeness. */
> > +
> > return nc;
> >
> > /* This would be a user error or a DOS attack. A user error is
> > prevented
> > @@ -1146,6 +1133,62 @@ nat_res_exhaustion:
> > return NULL;
> > }
> >
> > +static bool
> > +pkt_set_new_ct_state(struct dp_packet *pkt, struct conn_lookup_ctx *ctx,
> > + const struct alg_exp_node *alg_exp)
> > +{
> > + if (!valid_new(pkt, &ctx->key)) {
> > + pkt->md.ct_state = CS_INVALID;
> > + return false;
> > + }
> > +
> > + pkt->md.ct_state = CS_NEW;
> > +
> > + if (alg_exp) {
> > + pkt->md.ct_state |= CS_RELATED;
> > + }
> > +
> > + return true;
> > +}
> > +
> > +static struct conn *
> > +conn_maybe_not_found(struct conntrack *ct, struct dp_packet *pkt,
> > + struct conn_lookup_ctx *ctx, bool commit, long long
> > now,
> > + const struct nat_action_info_t *nat_action_info,
> > + const char *helper, const struct alg_exp_node
> > *alg_exp,
> > + enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> > + {
>
> ^ Whitespace
>
> > + struct conn *nc = NULL;
> > +
> > + /* Note that we only insert a connection during if commit=true. In this
>
> omit the 'during' I think.
>
> > + * case we must ensure that the connection is not already part of
> > + * ct->conns. This means the lock needs to be held across the loockup
> > and
>
> lookup
>
> > + * insert.
> > + * Since we do not need this sequencing if we do not insert we can
> > skip the
> > + * lock completely.
> > + * We do not use multiple small if's as this confused clangs locking
> > + * analysis. */
> > + if (commit) {
> > + ovs_mutex_lock(&ct->ct_lock);
> > + bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> > + if (!found) {
> > + if (!pkt_set_new_ct_state(pkt, ctx, alg_exp)) {
> > + ovs_mutex_unlock(&ct->ct_lock);
> > + return nc;
> > + }
> > + nc = conn_insert(ct, pkt, ctx, now, nat_action_info,
> > + helper, alg_exp, ct_alg_ctl, tp_id);
> > + }
> > + ovs_mutex_unlock(&ct->ct_lock);
> > + } else {
> > + bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> > + if (!found) {
> > + pkt_set_new_ct_state(pkt, ctx, alg_exp);
> > + }
> > + }
> > + return nc;
> > +}
> > +
> > static bool
> > conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
> > struct conn_lookup_ctx *ctx, struct conn *conn,
> > @@ -1440,12 +1483,8 @@ process_one(struct conntrack *ct, struct dp_packet
> > *pkt,
> > }
> > ovs_rwlock_unlock(&ct->resources_lock);
> >
> > - ovs_mutex_lock(&ct->ct_lock);
> > - if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
> > - conn = conn_not_found(ct, pkt, ctx, commit, now,
> > nat_action_info,
> > - helper, alg_exp, ct_alg_ctl, tp_id);
> > - }
> > - ovs_mutex_unlock(&ct->ct_lock);
> > + conn = conn_maybe_not_found(ct, pkt, ctx, commit, now,
> > nat_action_info,
> > + helper, alg_exp, ct_alg_ctl, tp_id);
> > }
> >
> > write_ct_md(pkt, zone, conn, &ctx->key, alg_exp);
> >
> > base-commit: 57a9016751af1f9bf9af36d57c978838c00c0948
>
> Not sure what this line is (I guess git generated it?)
Yes this comes from git
https://git-scm.com/docs/git-format-patch#_base_tree_information
I think i enabled it sometime ago in order to try and convince patchwork
to test patches that where based on some older version of the main
branch.
Not sure if it actually helped in any way, but i just left it enabled.
Thanks a lot,
Felix
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev