Hi Paolo,
many thanks for the patch. I'll try it asap...

Regards

Michael

-----Ursprüngliche Nachricht-----
Von: Paolo Valerio <pvale...@redhat.com> 
Gesendet: Dienstag, 4. April 2023 21:51
An: ovs-discuss@openvswitch.org
Cc: Plato, Michael <michael.pl...@tu-berlin.de>; mrxlazuar...@gmail.com
Betreff: Re: Re: [ovs-discuss] ovs-vswitchd crashes serveral times a day

Hello,

thanks for reporting this.
I had a look at it, and, although this needs to be confirmed, I suspect it's 
related to nat (CT_CONN_TYPE_UN_NAT) and expired connections (but not yet 
reclaimed).

The nat part does not necessarily perform any actual translation, but could 
still be triggered by ct(nat(src)...) which is the all-zero binding to avoid 
collisions, if any.

Is there any chance to test the following patch (targeted for ovs 2.17)?
This should help to confirm.

-- >8 --
diff --git a/lib/conntrack.c b/lib/conntrack.c index 08da4ddf7..ba334afb0 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -94,9 +94,8 @@ static bool valid_new(struct dp_packet *pkt, struct conn_key 
*);  static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
                              struct conn_key *, long long now,
                              uint32_t tp_id);
-static void delete_conn_cmn(struct conn *);
+static void delete_conn__(struct conn *);
 static void delete_conn(struct conn *); -static void delete_conn_one(struct 
conn *conn);  static enum ct_update_res conn_update(struct conntrack *ct, 
struct conn *conn,
                                       struct dp_packet *pkt,
                                       struct conn_lookup_ctx *ctx,
@@ -444,14 +443,13 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)  }

 static void
-conn_clean_cmn(struct conntrack *ct, struct conn *conn)
+conn_clean_cmn(struct conntrack *ct, struct conn *conn, uint32_t hash)
     OVS_REQUIRES(ct->ct_lock)
 {
     if (conn->alg) {
         expectation_clean(ct, &conn->key);
     }

-    uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
     cmap_remove(&ct->conns, &conn->cm_node, hash);

     struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone); @@ 
-467,11 +465,14 @@ conn_clean(struct conntrack *ct, struct conn *conn)
     OVS_REQUIRES(ct->ct_lock)
 {
     ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+    uint32_t conn_hash = conn_key_hash(&conn->key, ct->hash_basis);

-    conn_clean_cmn(ct, conn);
+    conn_clean_cmn(ct, conn, conn_hash);
     if (conn->nat_conn) {
         uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
-        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
+        if (conn_hash != hash) {
+            cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
+        }
     }
     ovs_list_remove(&conn->exp_node);
     conn->cleaned = true;
@@ -479,19 +480,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
     atomic_count_dec(&ct->n_conn);
 }

-static void
-conn_clean_one(struct conntrack *ct, struct conn *conn)
-    OVS_REQUIRES(ct->ct_lock)
-{
-    conn_clean_cmn(ct, conn);
-    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_list_remove(&conn->exp_node);
-        conn->cleaned = true;
-        atomic_count_dec(&ct->n_conn);
-    }
-    ovsrcu_postpone(delete_conn_one, conn);
-}
-
 /* Destroys the connection tracker 'ct' and frees all the allocated memory.
  * The caller of this function must already have shut down packet input
  * and PMD threads (which would have been quiesced).  */ @@ -505,7 +493,10 @@ 
conntrack_destroy(struct conntrack *ct)

     ovs_mutex_lock(&ct->ct_lock);
     CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
-        conn_clean_one(ct, conn);
+        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
+            continue;
+        }
+        conn_clean(ct, conn);
     }
     cmap_destroy(&ct->conns);

@@ -1052,7 +1043,10 @@ conn_not_found(struct conntrack *ct, struct dp_packet 
*pkt,
             nat_conn->alg = NULL;
             nat_conn->nat_conn = NULL;
             uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
-            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
+
+            if (nat_hash != ctx->hash) {
+                cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
+            }
         }

         nc->nat_conn = nat_conn;
@@ -1080,7 +1074,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet 
*pkt,
 nat_res_exhaustion:
     free(nat_conn);
     ovs_list_remove(&nc->exp_node);
-    delete_conn_cmn(nc);
+    delete_conn__(nc);
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
     VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
                  "if DoS attack, use firewalling and/or zone partitioning."); 
@@ -2549,7 +2543,7 @@ new_conn(struct conntrack *ct, struct dp_packet *pkt, 
struct conn_key *key,  }

 static void
-delete_conn_cmn(struct conn *conn)
+delete_conn__(struct conn *conn)
 {
     free(conn->alg);
     free(conn);
@@ -2561,17 +2555,7 @@ delete_conn(struct conn *conn)
     ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
     ovs_mutex_destroy(&conn->lock);
     free(conn->nat_conn);
-    delete_conn_cmn(conn);
-}
-
-/* Only used by conn_clean_one(). */
-static void
-delete_conn_one(struct conn *conn)
-{
-    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-        ovs_mutex_destroy(&conn->lock);
-    }
-    delete_conn_cmn(conn);
+    delete_conn__(conn);
 }

 /* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
@@ -2747,8 +2731,12 @@ conntrack_flush(struct conntrack *ct, const uint16_t 
*zone)

     ovs_mutex_lock(&ct->ct_lock);
     CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
+        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
+            continue;
+        }
+
         if (!zone || *zone == conn->key.zone) {
-            conn_clean_one(ct, conn);
+            conn_clean(ct, conn);
         }
     }
     ovs_mutex_unlock(&ct->ct_lock);

_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to