Re: [ovs-dev] [PATCH v1 5/9] conntrack: Inverse conn and ct lock precedence

2021-02-23 Thread William Tu
On Wed, Feb 17, 2021 at 8:34 AM Gaetan Rivet  wrote:
>
> The lock priority order is for the global 'ct_lock' to be taken first
> and then 'conn->lock'. This is an issue, as multiple operations on
> connections are thus blocked between threads contending on the
> global 'ct_lock'.
>
> This was previously necessary due to how the expiration lists, timeout
> policies and zone limits were managed. They are now using RCU-friendly
> structures that allow concurrent readers. The mutual exclusion now only
> needs to happen during writes.
>
> This allows reducing the 'ct_lock' precedence, and to only take it
> when writing the relevant structures. This will reduce contention on
> 'ct_lock', which impairs scalability when the connection tracker is
> used by many threads.
>
> Signed-off-by: Gaetan Rivet 
> Reviewed-by: Eli Britstein 
> ---

Thanks! it's pretty cool to see locks being removed or optimized.
The changes make sense to me. But maybe others should also
take a second look.


William
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v1 5/9] conntrack: Inverse conn and ct lock precedence

2021-02-17 Thread Gaetan Rivet
The lock priority order is for the global 'ct_lock' to be taken first
and then 'conn->lock'. This is an issue, as multiple operations on
connections are thus blocked between threads contending on the
global 'ct_lock'.

This was previously necessary due to how the expiration lists, timeout
policies and zone limits were managed. They are now using RCU-friendly
structures that allow concurrent readers. The mutual exclusion now only
needs to happen during writes.

This allows reducing the 'ct_lock' precedence, and to only take it
when writing the relevant structures. This will reduce contention on
'ct_lock', which impairs scalability when the connection tracker is
used by many threads.

Signed-off-by: Gaetan Rivet 
Reviewed-by: Eli Britstein 
---
 lib/conntrack-private.h | 16 +---
 lib/conntrack-tp.c  | 29 ++---
 lib/conntrack.c | 58 +
 3 files changed, 57 insertions(+), 46 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 7eb555092..e8ffb5bf9 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -140,6 +140,9 @@ struct conn {
 struct nat_action_info_t *nat_info;
 char *alg;
 struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
+atomic_flag reclaimed; /* False during the lifetime of the connection,
+* True as soon as a thread has started freeing
+* its memory. */
 
 /* Mutable data. */
 struct ovs_mutex lock; /* Guards all mutable fields. */
@@ -200,8 +203,8 @@ struct conntrack {
 };
 
 /* Lock acquisition order:
- *1. 'ct_lock'
- *2. 'conn->lock'
+ *1. 'conn->lock'
+ *2. 'ct_lock'
  *3. 'resources_lock'
  */
 
@@ -222,11 +225,18 @@ struct ct_l4_proto {
 };
 
 static inline void
-conn_expire_remove(struct conn_expire *exp)
+conn_expire_remove(struct conn_expire *exp, bool need_ct_lock)
+OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 if (!atomic_flag_test_and_set(>remove_once)
 && rculist_next(>node)) {
+if (need_ct_lock) {
+ovs_mutex_lock(>ct->ct_lock);
+}
 rculist_remove(>node);
+if (need_ct_lock) {
+ovs_mutex_unlock(>ct->ct_lock);
+}
 }
 }
 
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index a376857ec..01ffa30df 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -238,6 +238,7 @@ tm_to_ct_dpif_tp(enum ct_timeout tm)
 
 static void
 conn_expire_init(struct conn *conn, struct conntrack *ct)
+OVS_REQUIRES(conn->lock)
 {
 struct conn_expire *exp = >exp;
 
@@ -257,20 +258,22 @@ conn_expire_insert(struct conn *conn)
 {
 struct conn_expire *exp = >exp;
 
-ovs_mutex_lock(>ct->ct_lock);
 ovs_mutex_lock(>lock);
 
+ovs_mutex_lock(>ct->ct_lock);
 rculist_push_back(>ct->exp_lists[exp->tm], >node);
+ovs_mutex_unlock(>ct->ct_lock);
+
 atomic_flag_clear(>insert_once);
 atomic_flag_clear(>remove_once);
 
 ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct->ct_lock);
 }
 
 static void
 conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
  enum ct_timeout tm, long long now, uint32_t tp_value)
+OVS_REQUIRES(conn->lock)
 {
 conn_expire_init(conn, ct);
 conn->expiration = now + tp_value * 1000;
@@ -285,19 +288,12 @@ conn_update_expiration__(struct conntrack *ct, struct 
conn *conn,
  enum ct_timeout tm, long long now,
  uint32_t tp_value)
 OVS_REQUIRES(conn->lock)
+OVS_EXCLUDED(ct->ct_lock)
 {
-ovs_mutex_unlock(>lock);
-
-ovs_mutex_lock(>ct_lock);
-ovs_mutex_lock(>lock);
 if (!conn->cleaned) {
-conn_expire_remove(>exp);
+conn_expire_remove(>exp, true);
 conn_schedule_expiration(ct, conn, tm, now, tp_value);
 }
-ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct_lock);
-
-ovs_mutex_lock(>lock);
 }
 
 /* The conn entry lock must be held on entry and exit. */
@@ -309,20 +305,12 @@ conn_update_expiration(struct conntrack *ct, struct conn 
*conn,
 struct timeout_policy *tp;
 uint32_t val;
 
-ovs_mutex_unlock(>lock);
-
-ovs_mutex_lock(>ct_lock);
-ovs_mutex_lock(>lock);
 tp = timeout_policy_lookup(ct, conn->tp_id);
 if (tp) {
 val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
 } else {
 val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
 }
-ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct_lock);
-
-ovs_mutex_lock(>lock);
 VLOG_DBG_RL(, "Update timeout %s zone=%u with policy id=%d "
 "val=%u sec.",
 ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
@@ -330,11 +318,10 @@ conn_update_expiration(struct conntrack *ct, struct conn 
*conn,
 conn_update_expiration__(ct, conn, tm, now, val);
 }
 
-/* ct_lock must be held. */
 void
 conn_init_expiration(struct conntrack *ct, struct conn *conn,
  enum