Hi Paolo, many thanks for the patch. I'll try it asap... Regards
Michael -----Ursprüngliche Nachricht----- Von: Paolo Valerio <pvale...@redhat.com> Gesendet: Dienstag, 4. April 2023 21:51 An: ovs-discuss@openvswitch.org Cc: Plato, Michael <michael.pl...@tu-berlin.de>; mrxlazuar...@gmail.com Betreff: Re: Re: [ovs-discuss] ovs-vswitchd crashes serveral times a day Hello, thanks for reporting this. I had a look at it, and, although this needs to be confirmed, I suspect it's related to nat (CT_CONN_TYPE_UN_NAT) and expired connections (but not yet reclaimed). The nat part does not necessarily perform any actual translation, but could still be triggered by ct(nat(src)...) which is the all-zero binding to avoid collisions, if any. Is there any chance to test the following patch (targeted for ovs 2.17)? This should help to confirm. -- >8 -- diff --git a/lib/conntrack.c b/lib/conntrack.c index 08da4ddf7..ba334afb0 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -94,9 +94,8 @@ static bool valid_new(struct dp_packet *pkt, struct conn_key *); static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *, long long now, uint32_t tp_id); -static void delete_conn_cmn(struct conn *); +static void delete_conn__(struct conn *); static void delete_conn(struct conn *); -static void delete_conn_one(struct conn *conn); static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, @@ -444,14 +443,13 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone) } static void -conn_clean_cmn(struct conntrack *ct, struct conn *conn) +conn_clean_cmn(struct conntrack *ct, struct conn *conn, uint32_t hash) OVS_REQUIRES(ct->ct_lock) { if (conn->alg) { expectation_clean(ct, &conn->key); } - uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis); cmap_remove(&ct->conns, &conn->cm_node, hash); struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone); @@ -467,11 +465,14 @@ conn_clean(struct conntrack *ct, struct conn *conn) OVS_REQUIRES(ct->ct_lock) { ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); + uint32_t conn_hash = conn_key_hash(&conn->key, ct->hash_basis); - conn_clean_cmn(ct, conn); + conn_clean_cmn(ct, conn, conn_hash); if (conn->nat_conn) { uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis); - cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash); + if (conn_hash != hash) { + cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash); + } } ovs_list_remove(&conn->exp_node); conn->cleaned = true; @@ -479,19 +480,6 @@ conn_clean(struct conntrack *ct, struct conn *conn) atomic_count_dec(&ct->n_conn); } -static void -conn_clean_one(struct conntrack *ct, struct conn *conn) - OVS_REQUIRES(ct->ct_lock) -{ - conn_clean_cmn(ct, conn); - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { - ovs_list_remove(&conn->exp_node); - conn->cleaned = true; - atomic_count_dec(&ct->n_conn); - } - ovsrcu_postpone(delete_conn_one, conn); -} - /* Destroys the connection tracker 'ct' and frees all the allocated memory. * The caller of this function must already have shut down packet input * and PMD threads (which would have been quiesced). */ @@ -505,7 +493,10 @@ conntrack_destroy(struct conntrack *ct) ovs_mutex_lock(&ct->ct_lock); CMAP_FOR_EACH (conn, cm_node, &ct->conns) { - conn_clean_one(ct, conn); + if (conn->conn_type == CT_CONN_TYPE_UN_NAT) { + continue; + } + conn_clean(ct, conn); } cmap_destroy(&ct->conns); @@ -1052,7 +1043,10 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, nat_conn->alg = NULL; nat_conn->nat_conn = NULL; uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis); - cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash); + + if (nat_hash != ctx->hash) { + cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash); + } } nc->nat_conn = nat_conn; @@ -1080,7 +1074,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, nat_res_exhaustion: free(nat_conn); ovs_list_remove(&nc->exp_node); - delete_conn_cmn(nc); + delete_conn__(nc); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - " "if DoS attack, use firewalling and/or zone partitioning."); @@ -2549,7 +2543,7 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key, } static void -delete_conn_cmn(struct conn *conn) +delete_conn__(struct conn *conn) { free(conn->alg); free(conn); @@ -2561,17 +2555,7 @@ delete_conn(struct conn *conn) ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); ovs_mutex_destroy(&conn->lock); free(conn->nat_conn); - delete_conn_cmn(conn); -} - -/* Only used by conn_clean_one(). */ -static void -delete_conn_one(struct conn *conn) -{ - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { - ovs_mutex_destroy(&conn->lock); - } - delete_conn_cmn(conn); + delete_conn__(conn); } /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'. @@ -2747,8 +2731,12 @@ conntrack_flush(struct conntrack *ct, const uint16_t *zone) ovs_mutex_lock(&ct->ct_lock); CMAP_FOR_EACH (conn, cm_node, &ct->conns) { + if (conn->conn_type == CT_CONN_TYPE_UN_NAT) { + continue; + } + if (!zone || *zone == conn->key.zone) { - conn_clean_one(ct, conn); + conn_clean(ct, conn); } } ovs_mutex_unlock(&ct->ct_lock); _______________________________________________ discuss mailing list disc...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-discuss