Am Thu, Jun 18, 2026 at 04:24:59PM +0200 schrieb Felix Huettner:
> Previously modifications to the list of conntrack entries required
> taking a global lock. As zones are generally separate from each other
> there is no need for such a global lock. This allows multiple pmds to
> handle changes to different zones in parallel.
>
> During the removal of the global lock we also extracted the zone limits
> to be part of the individual zones. This skips an additional need for
> global locks.
>
> In addition this required a restructuring of conntrack_cleanup to no
> longer use rculists and instead iterate over each zone. This also allows
> to parallalize such operations in the future.
>
> The new "ovstest test-conntrack benchmark-tcp" shows the benefits
> nicely. They mostly exist on connections that are setup and teared down
> all the time (maybe as part of some portscanning or similar).
>
> When using the same testcases as in the previous commits
> Below lists the total time for the testrun with these various configs.
> | | Without this patch | With this patch |
> | Config 1 | 70.5 s | 17.9 s |
> | Config 2 | 80.7 s | 22.9 s |
> | Config 3 | 65.9 s | 19.2 s |
> | Config 4 | 71.4 s | 18.5 s |
> | Config 5 | 43.3 s | 47.0 s |
> | Config 6 | 46.6 s | 51.1 s |
>
> Signed-off-by: Felix Huettner <[email protected]>
> ---
> lib/conntrack-private.h | 36 ++--
> lib/conntrack.c | 445 ++++++++++++++--------------------------
> 2 files changed, 166 insertions(+), 315 deletions(-)
Something strange happened in the CI in regards to installation of
dependencies.
Recheck-request: github-robot-_Build_and_Test
>
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 0141afd4a..8282bbe5a 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -134,9 +134,6 @@ struct conn {
> * True as soon as a thread has started freeing
> * its memory. */
>
> - /* Inserted once by a PMD, then managed by the 'ct_clean' thread. */
> - struct rculist node;
> -
> /* Mutable data. */
> struct ovs_mutex lock; /* Guards all mutable fields. */
> ovs_u128 label;
> @@ -144,10 +141,6 @@ struct conn {
> uint32_t mark;
> int seq_skew;
>
> - /* Immutable data. */
> - int32_t admit_zone; /* The zone for managing zone limit counts. */
> - uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> -
> /* Mutable data. */
> bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP
> * control messages; true if reply direction. */
> @@ -198,32 +191,33 @@ enum ct_ephemeral_range {
> #define FOR_EACH_PORT_IN_RANGE(curr, min, max) \
> FOR_EACH_PORT_IN_RANGE__(curr, min, max, OVS_JOIN(idx, __COUNTER__))
>
> -#define ZONE_LIMIT_CONN_DEFAULT -1
> +#define CONN_LIMIT_NONE -1
> +#define CONN_LIMIT_USE_DEFAULT -2
>
> -struct conntrack_zone_limit {
> +struct conntrack_zone {
> int32_t zone;
> - atomic_int64_t limit;
> - atomic_count count;
> - uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> +
> + struct ovs_mutex zone_lock; /* Protects the following fields. */
> + struct cmap conns;
> +
> + /* Limits */
> + atomic_int64_t limit; /* Currently active limit. */
> + atomic_int64_t requested_limit; /* User requested limit. May be
> + * ZONE_LIMIT_CONN_DEFAULT if it should
> use
> + * the default limit. */
> + atomic_count count; /* Number of connections currently tracked. */
> };
>
> struct conntrack {
> struct ovs_mutex ct_lock; /* Protects the following fields. */
> - struct cmap conns[UINT16_MAX + 1];
> - struct rculist exp_lists[N_EXP_LISTS];
> - struct cmap zone_limits;
> + struct conntrack_zone zones[UINT16_MAX + 1];
> struct cmap timeout_policies;
> - uint32_t zone_limit_seq OVS_GUARDED; /* Used to disambiguate zone limit
> - * counts. */
> atomic_uint32_t default_zone_limit;
>
> uint32_t hash_basis; /* Salt for hashing a connection key. */
> pthread_t clean_thread; /* Periodically cleans up connection tracker. */
> struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
> - unsigned int next_list; /* Next list where the newly created connection
> - * gets inserted. */
> - unsigned int next_sweep; /* List from which the gc thread will resume
> - * the sweeping. */
> + unsigned int next_clean_zone; /* Next zone where the clean should run. */
>
> /* Counting connections. */
> atomic_count n_conn; /* Number of connections currently tracked. */
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index ffff4111f..6d28cffe3 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -41,7 +41,6 @@
> #include "ovs-thread.h"
> #include "openvswitch/poll-loop.h"
> #include "random.h"
> -#include "rculist.h"
> #include "timeval.h"
> #include "unaligned.h"
>
> @@ -89,11 +88,6 @@ enum ct_alg_ctl_type {
> CT_ALG_CTL_SIP,
> };
>
> -struct zone_limit {
> - struct cmap_node node;
> - struct conntrack_zone_limit czl;
> -};
> -
> static bool conn_key_extract(struct conntrack *, struct dp_packet *,
> ovs_be16 dl_type, struct conn_lookup_ctx *,
> uint16_t zone);
> @@ -111,7 +105,6 @@ static enum ct_update_res conn_update(struct conntrack
> *ct, struct conn *conn,
> long long now);
> static long long int conn_expiration(const struct conn *);
> static bool conn_expired(const struct conn *, long long now);
> -static void conn_expire_push_front(struct conntrack *ct, struct conn *conn);
> static void set_mark(struct dp_packet *, struct conn *,
> uint32_t val, uint32_t mask);
> static void set_label(struct dp_packet *, struct conn *,
> @@ -267,14 +260,12 @@ conntrack_init(void)
>
> ovs_mutex_init_adaptive(&ct->ct_lock);
> ovs_mutex_lock(&ct->ct_lock);
> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
> - cmap_init(&ct->conns[i]);
> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) {
> + ovs_mutex_init_adaptive(&ct->zones[i].zone_lock);
> + cmap_init(&ct->zones[i].conns);
> + ct->zones[i].limit = CONN_LIMIT_NONE;
> + ct->zones[i].requested_limit = CONN_LIMIT_USE_DEFAULT;
> }
> - for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
> - rculist_init(&ct->exp_lists[i]);
> - }
> - cmap_init(&ct->zone_limits);
> - ct->zone_limit_seq = 0;
> timeout_policy_init(ct);
> ovs_mutex_unlock(&ct->ct_lock);
>
> @@ -282,7 +273,7 @@ conntrack_init(void)
> atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
> atomic_init(&ct->tcp_seq_chk, true);
> atomic_init(&ct->sweep_ms, 20000);
> - atomic_init(&ct->default_zone_limit, 0);
> + atomic_init(&ct->default_zone_limit, CONN_LIMIT_NONE);
> latch_init(&ct->clean_thread_exit);
> ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
> ct->ipf = ipf_init();
> @@ -302,114 +293,28 @@ conntrack_init(void)
> return ct;
> }
>
> -static uint32_t
> -zone_key_hash(int32_t zone, uint32_t basis)
> -{
> - size_t hash = hash_int((OVS_FORCE uint32_t) zone, basis);
> - return hash;
> -}
> -
> static int64_t
> -zone_limit_get_limit__(struct conntrack_zone_limit *czl)
> +zone_read_limit(struct conntrack_zone *cz)
> {
> int64_t limit;
> - atomic_read_relaxed(&czl->limit, &limit);
> -
> + atomic_read_relaxed(&cz->limit, &limit);
> return limit;
> }
>
> -static int64_t
> -zone_limit_get_limit(struct conntrack *ct, struct conntrack_zone_limit *czl)
> +static struct conntrack_zone *
> +zone_lookup(struct conntrack *ct, int32_t zone)
> {
> - int64_t limit = zone_limit_get_limit__(czl);
> -
> - if (limit == ZONE_LIMIT_CONN_DEFAULT) {
> - atomic_read_relaxed(&ct->default_zone_limit, &limit);
> - limit = limit ? limit : -1;
> + if (zone < MIN_ZONE || zone > MAX_ZONE) {
> + return NULL;
> }
>
> - return limit;
> -}
> -
> -static struct zone_limit *
> -zone_limit_lookup_protected(struct conntrack *ct, int32_t zone)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - uint32_t hash = zone_key_hash(zone, ct->hash_basis);
> - struct zone_limit *zl;
> - CMAP_FOR_EACH_WITH_HASH_PROTECTED (zl, node, hash, &ct->zone_limits) {
> - if (zl->czl.zone == zone) {
> - return zl;
> - }
> - }
> - return NULL;
> -}
> -
> -static struct zone_limit *
> -zone_limit_lookup(struct conntrack *ct, int32_t zone)
> -{
> - uint32_t hash = zone_key_hash(zone, ct->hash_basis);
> - struct zone_limit *zl;
> - CMAP_FOR_EACH_WITH_HASH (zl, node, hash, &ct->zone_limits) {
> - if (zl->czl.zone == zone) {
> - return zl;
> - }
> - }
> - return NULL;
> -}
> -
> -static struct zone_limit *
> -zone_limit_create__(struct conntrack *ct, int32_t zone, int64_t limit)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - struct zone_limit *zl = NULL;
> -
> - if (zone > DEFAULT_ZONE && zone <= MAX_ZONE) {
> - zl = xmalloc(sizeof *zl);
> - atomic_init(&zl->czl.limit, limit);
> - atomic_count_init(&zl->czl.count, 0);
> - zl->czl.zone = zone;
> - zl->czl.zone_limit_seq = ct->zone_limit_seq++;
> - uint32_t hash = zone_key_hash(zone, ct->hash_basis);
> - cmap_insert(&ct->zone_limits, &zl->node, hash);
> - }
> -
> - return zl;
> -}
> -
> -static struct zone_limit *
> -zone_limit_create(struct conntrack *ct, int32_t zone, int64_t limit)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
> -
> - if (zl) {
> - return zl;
> - }
> -
> - return zone_limit_create__(ct, zone, limit);
> -}
> -
> -/* Lazily creates a new entry in the zone_limits cmap if default limit
> - * is set and there's no entry for the zone. */
> -static struct zone_limit *
> -zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone);
> -
> - if (!zl) {
> - uint32_t limit;
> - atomic_read_relaxed(&ct->default_zone_limit, &limit);
> -
> - if (limit) {
> - zl = zone_limit_create__(ct, zone, ZONE_LIMIT_CONN_DEFAULT);
> - }
> - }
> -
> - return zl;
> + return &ct->zones[zone];
> }
>
> +/* Returns the conntrack_zone_info for the requested zone.
> + * Note: to be compatible with the kernel implementation we return empty
> + * entries if the default limit is to be used.
> + * This can be improved upon later. */
> struct conntrack_zone_info
> zone_limit_get(struct conntrack *ct, int32_t zone)
> {
> @@ -418,82 +323,66 @@ zone_limit_get(struct conntrack *ct, int32_t zone)
> .limit = 0,
> .count = 0,
> };
> - struct zone_limit *zl = zone_limit_lookup(ct, zone);
> - if (zl) {
> - int64_t czl_limit = zone_limit_get_limit__(&zl->czl);
> - if (czl_limit > ZONE_LIMIT_CONN_DEFAULT) {
> - czl.zone = zl->czl.zone;
> - czl.limit = czl_limit;
> - } else {
> - atomic_read_relaxed(&ct->default_zone_limit, &czl.limit);
> - }
> -
> - czl.count = atomic_count_get(&zl->czl.count);
> - } else {
> + if (zone == DEFAULT_ZONE) {
> atomic_read_relaxed(&ct->default_zone_limit, &czl.limit);
> + } else {
> + struct conntrack_zone *cz = zone_lookup(ct, zone);
> + uint64_t req_limit;
> + atomic_read_relaxed(&cz->requested_limit, &req_limit);
> + if (req_limit != CONN_LIMIT_USE_DEFAULT) {
> + czl.zone = zone;
> + czl.limit = zone_read_limit(cz);
> + czl.count = atomic_count_get(&cz->count);
> + }
> + }
> +
> + if (czl.limit == CONN_LIMIT_NONE) {
> + czl.limit = 0;
> }
>
> return czl;
> }
>
> -static void
> -zone_limit_clean__(struct conntrack *ct, struct zone_limit *zl)
> - OVS_REQUIRES(ct->ct_lock)
> +static bool
> +zone_limit_reset(struct conntrack *ct, struct conntrack_zone *cz)
> {
> - uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis);
> - cmap_remove(&ct->zone_limits, &zl->node, hash);
> - ovsrcu_postpone(free, zl);
> -}
> -
> -static void
> -zone_limit_clean(struct conntrack *ct, struct zone_limit *zl)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - uint32_t limit;
> + int64_t limit;
> + atomic_read_relaxed(&cz->requested_limit, &limit);
> + if (limit == CONN_LIMIT_USE_DEFAULT) {
> + return false;
> + }
>
> atomic_read_relaxed(&ct->default_zone_limit, &limit);
> - /* Do not remove the entry if the default limit is enabled, but
> - * simply move the limit to default. */
> - if (limit) {
> - atomic_store_relaxed(&zl->czl.limit, ZONE_LIMIT_CONN_DEFAULT);
> - } else {
> - zone_limit_clean__(ct, zl);
> - }
> + atomic_store_relaxed(&cz->limit, limit);
> + atomic_store_relaxed(&cz->requested_limit, CONN_LIMIT_USE_DEFAULT);
> + return true;
> }
>
> +
> static void
> -zone_limit_clean_default(struct conntrack *ct)
> - OVS_REQUIRES(ct->ct_lock)
> +zone_limit_update_default(struct conntrack *ct, uint32_t limit)
> {
> - struct zone_limit *zl;
> - int64_t czl_limit;
> + int64_t cz_req_limit;
>
> - atomic_store_relaxed(&ct->default_zone_limit, 0);
> + atomic_store_relaxed(&ct->default_zone_limit, limit);
>
> - CMAP_FOR_EACH (zl, node, &ct->zone_limits) {
> - atomic_read_relaxed(&zl->czl.limit, &czl_limit);
> - if (zone_limit_get_limit__(&zl->czl) == ZONE_LIMIT_CONN_DEFAULT) {
> - zone_limit_clean__(ct, zl);
> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) {
> + atomic_read_relaxed(&ct->zones[i].requested_limit, &cz_req_limit);
> + if (cz_req_limit == CONN_LIMIT_USE_DEFAULT) {
> + atomic_store_relaxed(&ct->zones[i].limit, limit);
> }
> }
> }
>
> static bool
> zone_limit_delete__(struct conntrack *ct, int32_t zone)
> - OVS_REQUIRES(ct->ct_lock)
> {
> - struct zone_limit *zl = NULL;
> -
> if (zone == DEFAULT_ZONE) {
> - zone_limit_clean_default(ct);
> + zone_limit_update_default(ct, 0);
> + return false;
> } else {
> - zl = zone_limit_lookup_protected(ct, zone);
> - if (zl) {
> - zone_limit_clean(ct, zl);
> - }
> + return zone_limit_reset(ct, zone_lookup(ct, zone));
> }
> -
> - return zl != NULL;
> }
>
> int
> @@ -501,9 +390,7 @@ zone_limit_delete(struct conntrack *ct, int32_t zone)
> {
> bool deleted;
>
> - ovs_mutex_lock(&ct->ct_lock);
> deleted = zone_limit_delete__(ct, zone);
> - ovs_mutex_unlock(&ct->ct_lock);
>
> if (zone != DEFAULT_ZONE) {
> VLOG_INFO(deleted
> @@ -515,45 +402,27 @@ zone_limit_delete(struct conntrack *ct, int32_t zone)
> return 0;
> }
>
> -static void
> -zone_limit_update_default(struct conntrack *ct, int32_t zone, uint32_t limit)
> -{
> - /* limit zero means delete default. */
> - if (limit == 0) {
> - ovs_mutex_lock(&ct->ct_lock);
> - zone_limit_delete__(ct, zone);
> - ovs_mutex_unlock(&ct->ct_lock);
> - } else {
> - atomic_store_relaxed(&ct->default_zone_limit, limit);
> - }
> -}
> -
> int
> zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit)
> {
> - struct zone_limit *zl;
> + struct conntrack_zone *cz;
> int err = 0;
>
> if (zone == DEFAULT_ZONE) {
> - zone_limit_update_default(ct, zone, limit);
> + zone_limit_update_default(ct, limit);
> VLOG_INFO("Set default zone limit to %u", limit);
> return err;
> }
>
> - zl = zone_limit_lookup(ct, zone);
> - if (zl) {
> - atomic_store_relaxed(&zl->czl.limit, limit);
> - VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone);
> + cz = zone_lookup(ct, zone);
> + if (cz) {
> + atomic_store_relaxed(&cz->limit, limit);
> + atomic_store_relaxed(&cz->requested_limit, limit);
> + VLOG_INFO("Set zone limit of %u for zone %d", limit, zone);
> } else {
> - ovs_mutex_lock(&ct->ct_lock);
> - err = zone_limit_create(ct, zone, limit) == NULL;
> - ovs_mutex_unlock(&ct->ct_lock);
> - if (!err) {
> - VLOG_INFO("Created zone limit of %u for zone %d", limit, zone);
> - } else {
> - VLOG_WARN("Request to create zone limit for invalid zone %d",
> - zone);
> - }
> + VLOG_WARN("Request to create zone limit for invalid zone %d",
> + zone);
> + err = 1;
> }
>
> return err;
> @@ -561,47 +430,47 @@ zone_limit_update(struct conntrack *ct, int32_t zone,
> uint32_t limit)
>
> static void
> conn_clean__(struct conntrack *ct, struct conn *conn)
> - OVS_REQUIRES(ct->ct_lock)
> {
> + uint16_t fwd_zone;
> + struct conntrack_zone *cz;
> uint32_t hash;
>
> if (conn->alg) {
> expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key);
> }
>
> + fwd_zone = conn->key_node[CT_DIR_FWD].key.zone;
> + cz = zone_lookup(ct, fwd_zone);
> hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis);
> - cmap_remove(&ct->conns[conn->key_node[CT_DIR_FWD].key.zone],
> + ovs_mutex_lock(&cz->zone_lock);
> + cmap_remove(&cz->conns,
> &conn->key_node[CT_DIR_FWD].cm_node, hash);
> + atomic_count_dec(&cz->count);
>
> if (conn->nat_action) {
> + ovs_assert(fwd_zone == conn->key_node[CT_DIR_REV].key.zone);
> hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key,
> ct->hash_basis);
> - cmap_remove(&ct->conns[conn->key_node[CT_DIR_REV].key.zone],
> +
> + cmap_remove(&cz->conns,
> &conn->key_node[CT_DIR_REV].cm_node, hash);
> }
>
> - rculist_remove(&conn->node);
> + ovs_mutex_unlock(&cz->zone_lock);
> }
>
> /* Also removes the associated nat 'conn' from the lookup
> datastructures. */
> static void
> conn_clean(struct conntrack *ct, struct conn *conn)
> - OVS_EXCLUDED(conn->lock, ct->ct_lock)
> + OVS_EXCLUDED(conn->lock)
> {
> if (atomic_flag_test_and_set(&conn->reclaimed)) {
> return;
> }
>
> COVERAGE_INC(conntrack_remove);
> - ovs_mutex_lock(&ct->ct_lock);
> conn_clean__(ct, conn);
> - ovs_mutex_unlock(&ct->ct_lock);
> -
> - struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
> - if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) {
> - atomic_count_dec(&zl->czl.count);
> - }
>
> ovsrcu_postpone(delete_conn, conn);
> atomic_count_dec(&ct->n_conn);
> @@ -619,26 +488,10 @@ conn_force_expire(struct conn *conn)
> void
> conntrack_destroy(struct conntrack *ct)
> {
> - struct conn *conn;
> -
> latch_set(&ct->clean_thread_exit);
> pthread_join(ct->clean_thread, NULL);
> latch_destroy(&ct->clean_thread_exit);
>
> - for (unsigned i = 0; i < N_EXP_LISTS; i++) {
> - RCULIST_FOR_EACH (conn, node, &ct->exp_lists[i]) {
> - conn_clean(ct, conn);
> - }
> - }
> -
> - struct zone_limit *zl;
> - CMAP_FOR_EACH (zl, node, &ct->zone_limits) {
> - uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis);
> -
> - cmap_remove(&ct->zone_limits, &zl->node, hash);
> - ovsrcu_postpone(free, zl);
> - }
> -
> struct timeout_policy *tp;
> CMAP_FOR_EACH (tp, node, &ct->timeout_policies) {
> uint32_t hash = hash_int(tp->policy.id, ct->hash_basis);
> @@ -649,14 +502,14 @@ conntrack_destroy(struct conntrack *ct)
>
> conntrack_flush(ct, NULL);
>
> - ovs_mutex_lock(&ct->ct_lock);
> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
> - cmap_destroy(&ct->conns[i]);
> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) {
> + ovs_mutex_lock(&ct->zones[i].zone_lock);
> + cmap_destroy(&ct->zones[i].conns);
> + ovs_mutex_unlock(&ct->zones[i].zone_lock);
> + ovs_mutex_destroy(&ct->zones[i].zone_lock);
> }
> - cmap_destroy(&ct->zone_limits);
> cmap_destroy(&ct->timeout_policies);
>
> - ovs_mutex_unlock(&ct->ct_lock);
> ovs_mutex_destroy(&ct->ct_lock);
>
> ovs_mutex_lock(&ct->resources_lock);
> @@ -672,7 +525,7 @@ conntrack_destroy(struct conntrack *ct)
>
>
> static bool
> -conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
> +conn_key_lookup(struct conntrack_zone *cz, const struct conn_key *key,
> uint32_t hash, long long now, struct conn **conn_out,
> bool *reply)
> {
> @@ -680,7 +533,8 @@ conn_key_lookup(struct conntrack *ct, const struct
> conn_key *key,
> struct conn *conn = NULL;
> bool found = false;
>
> - CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns[key->zone]) {
> + CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash,
> + &cz->conns) {
> if (keyn->dir == CT_DIR_FWD) {
> conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]);
> } else {
> @@ -712,12 +566,21 @@ out_found:
> return found;
> }
>
> +static bool
> +conn_lookup_zone(struct conntrack *ct, struct conntrack_zone *cz,
> + const struct conn_key *key, long long now,
> + struct conn **conn_out, bool *reply)
> +{
> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
> + return conn_key_lookup(cz, key, hash, now, conn_out, reply);
> +}
> +
> static bool
> conn_lookup(struct conntrack *ct, const struct conn_key *key,
> long long now, struct conn **conn_out, bool *reply)
> {
> - uint32_t hash = conn_key_hash(key, ct->hash_basis);
> - return conn_key_lookup(ct, key, hash, now, conn_out, reply);
> + struct conntrack_zone *cz = &ct->zones[key->zone];
> + return conn_lookup_zone(ct, cz, key, now, conn_out, reply);
> }
>
> static void
> @@ -1026,27 +889,25 @@ ct_verify_helper(const char *helper, enum
> ct_alg_ctl_type ct_alg_ctl)
> }
>
> static struct conn *
> -conn_insert(struct conntrack *ct, struct dp_packet *pkt,
> +conn_insert(struct conntrack *ct, struct conntrack_zone *cz,
> + struct dp_packet *pkt,
> struct conn_lookup_ctx *ctx, long long now,
> const struct nat_action_info_t *nat_action_info,
> const char *helper, const struct alg_exp_node *alg_exp,
> enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
> - OVS_REQUIRES(ct->ct_lock)
> + OVS_REQUIRES(cz->zone_lock)
> {
> COVERAGE_INC(conntrack_insert);
> struct conn *nc = NULL;
>
> int64_t czl_limit;
> struct conn_key_node *fwd_key_node, *rev_key_node;
> - struct zone_limit *zl = zone_limit_lookup_or_default(ct,
> - ctx->key.zone);
> - if (zl) {
> - czl_limit = zone_limit_get_limit(ct, &zl->czl);
> - if (czl_limit >= 0 &&
> - atomic_count_get(&zl->czl.count) >= czl_limit) {
> - COVERAGE_INC(conntrack_zone_full);
> - return nc;
> - }
> +
> + czl_limit = zone_read_limit(cz);
> + if (czl_limit != CONN_LIMIT_NONE &&
> + atomic_count_get(&cz->count) >= czl_limit) {
> + COVERAGE_INC(conntrack_zone_full);
> + return nc;
> }
>
> unsigned int n_conn_limit;
> @@ -1080,13 +941,6 @@ conn_insert(struct conntrack *ct, struct dp_packet *pkt,
> fwd_key_node->dir = CT_DIR_FWD;
> rev_key_node->dir = CT_DIR_REV;
>
> - if (zl) {
> - nc->admit_zone = zl->czl.zone;
> - nc->zone_limit_seq = zl->czl.zone_limit_seq;
> - } else {
> - nc->admit_zone = INVALID_ZONE;
> - }
> -
> if (nat_action_info) {
> nc->nat_action = nat_action_info->nat_action;
>
> @@ -1108,18 +962,14 @@ conn_insert(struct conntrack *ct, struct dp_packet
> *pkt,
> nat_packet(pkt, nc, false, ctx->icmp_related);
> uint32_t rev_hash = conn_key_hash(&rev_key_node->key,
> ct->hash_basis);
> - cmap_insert(&ct->conns[ctx->key.zone],
> + cmap_insert(&ct->zones[ctx->key.zone].conns,
> &rev_key_node->cm_node, rev_hash);
> }
>
> - cmap_insert(&ct->conns[ctx->key.zone],
> + cmap_insert(&ct->zones[ctx->key.zone].conns,
> &fwd_key_node->cm_node, ctx->hash);
> - conn_expire_push_front(ct, nc);
> atomic_count_inc(&ct->n_conn);
> -
> - if (zl) {
> - atomic_count_inc(&zl->czl.count);
> - }
> + atomic_count_inc(&cz->count);
>
> ctx->conn = nc; /* For completeness. */
>
> @@ -1165,6 +1015,7 @@ conn_maybe_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> {
> COVERAGE_INC(conntrack_maybe_not_found);
> struct conn *nc = NULL;
> + struct conntrack_zone *cz = zone_lookup(ct, ctx->key.zone);
>
> /* Note that we only insert a connection if commit=true. In this
> * case we must ensure that the connection is not already part of
> @@ -1175,19 +1026,19 @@ conn_maybe_not_found(struct conntrack *ct, struct
> dp_packet *pkt,
> * We do not use multiple small if's as this confused clangs locking
> * analysis. */
> if (commit) {
> - ovs_mutex_lock(&ct->ct_lock);
> - bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> + ovs_mutex_lock(&cz->zone_lock);
> + bool found = conn_lookup_zone(ct, cz, &ctx->key, now, NULL, NULL);
> if (!found) {
> if (!pkt_set_new_ct_state(pkt, ctx, alg_exp)) {
> - ovs_mutex_unlock(&ct->ct_lock);
> + ovs_mutex_unlock(&cz->zone_lock);
> return nc;
> }
> - nc = conn_insert(ct, pkt, ctx, now, nat_action_info,
> + nc = conn_insert(ct, cz, pkt, ctx, now, nat_action_info,
> helper, alg_exp, ct_alg_ctl, tp_id);
> }
> - ovs_mutex_unlock(&ct->ct_lock);
> + ovs_mutex_unlock(&cz->zone_lock);
> } else {
> - bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL);
> + bool found = conn_lookup_zone(ct, cz, &ctx->key, now, NULL, NULL);
> if (!found) {
> pkt_set_new_ct_state(pkt, ctx, alg_exp);
> }
> @@ -1399,7 +1250,8 @@ initial_conn_lookup(struct conntrack *ct, struct
> conn_lookup_ctx *ctx,
> conn_key_reverse(&ctx->key);
> }
>
> - conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply);
> + struct conntrack_zone *cz = &ct->zones[ctx->key.zone];
> + conn_key_lookup(cz, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply);
>
> if (natted) {
> if (OVS_LIKELY(ctx->conn)) {
> @@ -1426,6 +1278,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
> const struct nat_action_info_t *nat_action_info,
> const char *helper, uint32_t tp_id)
> {
> + ovs_assert(ctx->key.zone == zone);
> +
> /* Reset ct_state whenever entering a new zone. */
> if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
> pkt->md.ct_state = 0;
> @@ -1630,28 +1484,34 @@ conntrack_get_sweep_interval(struct conntrack *ct)
> }
>
> static size_t
> -ct_sweep(struct conntrack *ct, struct rculist *list, long long now,
> - size_t *cleaned_count)
> +ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now,
> + size_t *cleaned_count)
> OVS_NO_THREAD_SAFETY_ANALYSIS
> {
> + struct conn_key_node *keyn;
> + struct conntrack_zone *cz;
> + unsigned int conn_count = 0;
> + unsigned int cleaned = 0;
> struct conn *conn;
> - size_t cleaned = 0;
> - size_t count = 0;
> + long long expiration;
>
> - RCULIST_FOR_EACH (conn, node, list) {
> - if (conn_expired(conn, now)) {
> - conn_clean(ct, conn);
> - cleaned++;
> + cz = zone_lookup(ct, zone);
> + CMAP_FOR_EACH (keyn, cm_node, &cz->conns) {
> + if (keyn->dir != CT_DIR_FWD) {
> + continue;
> }
>
> - count++;
> - }
> + conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]);
> + expiration = conn_expiration(conn);
> + if (now >= expiration) {
> + conn_clean(ct, conn);
> + cleaned++;
> + }
>
> - if (cleaned_count) {
> - *cleaned_count = cleaned;
> + conn_count++;
> }
> -
> - return count;
> + *cleaned_count = cleaned;
> + return conn_count;
> }
>
> /* Cleans up old connection entries from 'ct'. Returns the time
> @@ -1664,23 +1524,28 @@ conntrack_clean(struct conntrack *ct, long long now)
> unsigned int n_conn_limit, i;
> size_t clean_end, count = 0;
> size_t total_cleaned = 0;
> + uint16_t current_zone = ct->next_clean_zone;
>
> atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
> clean_end = n_conn_limit / 64;
>
> - for (i = ct->next_sweep; i < N_EXP_LISTS; i++) {
> - size_t cleaned;
> + for (i = 0; i < ARRAY_SIZE(ct->zones); i++) {
> + size_t cleaned = 0;
>
> if (count > clean_end) {
> next_wakeup = 0;
> break;
> }
>
> - count += ct_sweep(ct, &ct->exp_lists[i], now, &cleaned);
> + count += ct_sweep_zone(ct, current_zone, now, &cleaned);
> total_cleaned += cleaned;
> +
> + /* This will overflow and thereby allow us to iterate through all
> + * zones. */
> + current_zone++;
> }
>
> - ct->next_sweep = (i < N_EXP_LISTS) ? i : 0;
> + ct->next_clean_zone = current_zone + 1;
>
> VLOG_DBG("conntrack cleaned %"PRIuSIZE" entries out of %"PRIuSIZE
> " entries in %lld msec", total_cleaned, count,
> @@ -2717,16 +2582,6 @@ conn_update(struct conntrack *ct, struct conn *conn,
> struct dp_packet *pkt,
> return update_res;
> }
>
> -static void
> -conn_expire_push_front(struct conntrack *ct, struct conn *conn)
> - OVS_REQUIRES(ct->ct_lock)
> -{
> - unsigned int curr = ct->next_list;
> -
> - ct->next_list = (ct->next_list + 1) % N_EXP_LISTS;
> - rculist_push_front(&ct->exp_lists[curr], &conn->node);
> -}
> -
> static long long int
> conn_expiration(const struct conn *conn)
> {
> @@ -2914,7 +2769,8 @@ conntrack_dump_start(struct conntrack *ct, struct
> conntrack_dump *dump,
>
> dump->ct = ct;
> *ptot_bkts = 1; /* Need to clean up the callers. */
> - dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
> + dump->cursor = cmap_cursor_start(
> + &dump->ct->zones[dump->current_zone].conns);
> return 0;
> }
>
> @@ -2945,7 +2801,8 @@ conntrack_dump_next(struct conntrack_dump *dump, struct
> ct_dpif_entry *entry)
> break;
> }
> dump->current_zone++;
> - dump->cursor =
> cmap_cursor_start(&dump->ct->conns[dump->current_zone]);
> + dump->cursor = cmap_cursor_start(
> + &dump->ct->zones[dump->current_zone].conns);
> }
>
> return EOF;
> @@ -3015,7 +2872,7 @@ conntrack_flush_zone(struct conntrack *ct, const
> uint16_t zone)
> struct conn_key_node *keyn;
> struct conn *conn;
>
> - CMAP_FOR_EACH (keyn, cm_node, &ct->conns[zone]) {
> + CMAP_FOR_EACH (keyn, cm_node, &ct->zones[zone].conns) {
> if (keyn->dir != CT_DIR_FWD) {
> continue;
> }
> @@ -3033,7 +2890,7 @@ conntrack_flush(struct conntrack *ct, const uint16_t
> *zone)
> return conntrack_flush_zone(ct, *zone);
> }
>
> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) {
> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) {
> conntrack_flush_zone(ct, i);
> }
>
> --
> 2.43.0
>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev