Hi Paolo,
I installed the patch for 2.17 on april 6th in our test environment and can 
confirm that it works. We haven't had any crashes since then. Many thanks for 
the quick solution!

Best regards

Michael

-----Ursprüngliche Nachricht-----
Von: Paolo Valerio <pvale...@redhat.com> 
Gesendet: Montag, 17. April 2023 10:36
An: Lazuardi Nasution <mrxlazuar...@gmail.com>
Cc: ovs-discuss@openvswitch.org; Plato, Michael <michael.pl...@tu-berlin.de>
Betreff: Re: Re: [ovs-discuss] ovs-vswitchd crashes serveral times a day

Lazuardi Nasution <mrxlazuar...@gmail.com> writes:

> Hi Paolo,
>
> I'm interested in your statement of "expired connections (but not yet 
> reclaimed)". Do you think that shortening conntrack timeout policy will help?
> Or, should we make it larger so there will be fewer conntrack table 
> update and flush attempts?
>

it's hard to say as it depends on the specific use case.
Probably making it larger for the specific case could help, but in general, I 
would not rely on that.
Of course, an actual fix is needed. It would be great if the patch sent could 
tested, but in any case, I'll work on a formal patch.

> Best regards.
>
> On Wed, Apr 5, 2023 at 2:51 AM Paolo Valerio <pvale...@redhat.com> wrote:
>
>     Hello,
>
>     thanks for reporting this.
>     I had a look at it, and, although this needs to be confirmed, I suspect
>     it's related to nat (CT_CONN_TYPE_UN_NAT) and expired connections (but
>     not yet reclaimed).
>
>     The nat part does not necessarily perform any actual translation, but
>     could still be triggered by ct(nat(src)...) which is the all-zero binding
>     to avoid collisions, if any.
>
>     Is there any chance to test the following patch (targeted for ovs 2.17)?
>     This should help to confirm.
>
>     -- >8 --
>     diff --git a/lib/conntrack.c b/lib/conntrack.c
>     index 08da4ddf7..ba334afb0 100644
>     --- a/lib/conntrack.c
>     +++ b/lib/conntrack.c
>     @@ -94,9 +94,8 @@ static bool valid_new(struct dp_packet *pkt, struct
>     conn_key *);
>      static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
>                                   struct conn_key *, long long now,
>                                   uint32_t tp_id);
>     -static void delete_conn_cmn(struct conn *);
>     +static void delete_conn__(struct conn *);
>      static void delete_conn(struct conn *);
>     -static void delete_conn_one(struct conn *conn);
>      static enum ct_update_res conn_update(struct conntrack *ct, struct conn
>     *conn,
>                                            struct dp_packet *pkt,
>                                            struct conn_lookup_ctx *ctx,
>     @@ -444,14 +443,13 @@ zone_limit_delete(struct conntrack *ct, uint16_t
>     zone)
>      }
>
>      static void
>     -conn_clean_cmn(struct conntrack *ct, struct conn *conn)
>     +conn_clean_cmn(struct conntrack *ct, struct conn *conn, uint32_t hash)
>          OVS_REQUIRES(ct->ct_lock)
>      {
>          if (conn->alg) {
>              expectation_clean(ct, &conn->key);
>          }
>
>     -    uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
>          cmap_remove(&ct->conns, &conn->cm_node, hash);
>
>          struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
>     @@ -467,11 +465,14 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>          OVS_REQUIRES(ct->ct_lock)
>      {
>          ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>     +    uint32_t conn_hash = conn_key_hash(&conn->key, 
> ct->hash_basis);
>
>     -    conn_clean_cmn(ct, conn);
>     +    conn_clean_cmn(ct, conn, conn_hash);
>          if (conn->nat_conn) {
>              uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->
>     hash_basis);
>     -        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
>     +        if (conn_hash != hash) {
>     +            cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
>     +        }
>          }
>          ovs_list_remove(&conn->exp_node);
>          conn->cleaned = true;
>     @@ -479,19 +480,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>          atomic_count_dec(&ct->n_conn);
>      }
>
>     -static void
>     -conn_clean_one(struct conntrack *ct, struct conn *conn)
>     -    OVS_REQUIRES(ct->ct_lock)
>     -{
>     -    conn_clean_cmn(ct, conn);
>     -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>     -        ovs_list_remove(&conn->exp_node);
>     -        conn->cleaned = true;
>     -        atomic_count_dec(&ct->n_conn);
>     -    }
>     -    ovsrcu_postpone(delete_conn_one, conn);
>     -}
>     -
>      /* Destroys the connection tracker 'ct' and frees all the allocated
>     memory.
>       * The caller of this function must already have shut down packet input
>       * and PMD threads (which would have been quiesced).  */
>     @@ -505,7 +493,10 @@ conntrack_destroy(struct conntrack *ct)
>
>          ovs_mutex_lock(&ct->ct_lock);
>          CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
>     -        conn_clean_one(ct, conn);
>     +        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>     +            continue;
>     +        }
>     +        conn_clean(ct, conn);
>          }
>          cmap_destroy(&ct->conns);
>
>     @@ -1052,7 +1043,10 @@ conn_not_found(struct conntrack *ct, struct
>     dp_packet *pkt,
>                  nat_conn->alg = NULL;
>                  nat_conn->nat_conn = NULL;
>                  uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->
>     hash_basis);
>     -            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
>     +
>     +            if (nat_hash != ctx->hash) {
>     +                cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
>     +            }
>              }
>
>              nc->nat_conn = nat_conn;
>     @@ -1080,7 +1074,7 @@ conn_not_found(struct conntrack *ct, struct 
> dp_packet
>     *pkt,
>      nat_res_exhaustion:
>          free(nat_conn);
>          ovs_list_remove(&nc->exp_node);
>     -    delete_conn_cmn(nc);
>     +    delete_conn__(nc);
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
>          VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
>                       "if DoS attack, use firewalling and/or zone
>     partitioning.");
>     @@ -2549,7 +2543,7 @@ new_conn(struct conntrack *ct, struct dp_packet 
> *pkt,
>     struct conn_key *key,
>      }
>
>      static void
>     -delete_conn_cmn(struct conn *conn)
>     +delete_conn__(struct conn *conn)
>      {
>          free(conn->alg);
>          free(conn);
>     @@ -2561,17 +2555,7 @@ delete_conn(struct conn *conn)
>          ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>          ovs_mutex_destroy(&conn->lock);
>          free(conn->nat_conn);
>     -    delete_conn_cmn(conn);
>     -}
>     -
>     -/* Only used by conn_clean_one(). */
>     -static void
>     -delete_conn_one(struct conn *conn)
>     -{
>     -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>     -        ovs_mutex_destroy(&conn->lock);
>     -    }
>     -    delete_conn_cmn(conn);
>     +    delete_conn__(conn);
>      }
>
>      /* Convert a conntrack address 'a' into an IP address 'b' based on
>     'dl_type'.
>     @@ -2747,8 +2731,12 @@ conntrack_flush(struct conntrack *ct, const 
> uint16_t
>     *zone)
>
>          ovs_mutex_lock(&ct->ct_lock);
>          CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
>     +        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>     +            continue;
>     +        }
>     +
>              if (!zone || *zone == conn->key.zone) {
>     -            conn_clean_one(ct, conn);
>     +            conn_clean(ct, conn);
>              }
>          }
>          ovs_mutex_unlock(&ct->ct_lock);

_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to