The lock priority order is for the global 'ct_lock' to be taken first
and then 'conn->lock'. This is an issue, as multiple operations on
connections are thus blocked between threads contending on the
global 'ct_lock'.
This was previously necessary due to how the expiration lists, timeout
policies and zone limits were managed. They are now using RCU-friendly
structures that allow concurrent readers. The mutual exclusion now only
needs to happen during writes.
This allows reducing the 'ct_lock' precedence, and to only take it
when writing the relevant structures. This will reduce contention on
'ct_lock', which impairs scalability when the connection tracker is
used by many threads.
Signed-off-by: Gaetan Rivet
Reviewed-by: Eli Britstein
---
lib/conntrack-private.h | 16 +---
lib/conntrack-tp.c | 29 ++---
lib/conntrack.c | 58 +
3 files changed, 57 insertions(+), 46 deletions(-)
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 7eb555092..e8ffb5bf9 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -140,6 +140,9 @@ struct conn {
struct nat_action_info_t *nat_info;
char *alg;
struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */
+atomic_flag reclaimed; /* False during the lifetime of the connection,
+* True as soon as a thread has started freeing
+* its memory. */
/* Mutable data. */
struct ovs_mutex lock; /* Guards all mutable fields. */
@@ -200,8 +203,8 @@ struct conntrack {
};
/* Lock acquisition order:
- *1. 'ct_lock'
- *2. 'conn->lock'
+ *1. 'conn->lock'
+ *2. 'ct_lock'
*3. 'resources_lock'
*/
@@ -222,11 +225,18 @@ struct ct_l4_proto {
};
static inline void
-conn_expire_remove(struct conn_expire *exp)
+conn_expire_remove(struct conn_expire *exp, bool need_ct_lock)
+OVS_NO_THREAD_SAFETY_ANALYSIS
{
if (!atomic_flag_test_and_set(>remove_once)
&& rculist_next(>node)) {
+if (need_ct_lock) {
+ovs_mutex_lock(>ct->ct_lock);
+}
rculist_remove(>node);
+if (need_ct_lock) {
+ovs_mutex_unlock(>ct->ct_lock);
+}
}
}
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index a376857ec..01ffa30df 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -238,6 +238,7 @@ tm_to_ct_dpif_tp(enum ct_timeout tm)
static void
conn_expire_init(struct conn *conn, struct conntrack *ct)
+OVS_REQUIRES(conn->lock)
{
struct conn_expire *exp = >exp;
@@ -257,20 +258,22 @@ conn_expire_insert(struct conn *conn)
{
struct conn_expire *exp = >exp;
-ovs_mutex_lock(>ct->ct_lock);
ovs_mutex_lock(>lock);
+ovs_mutex_lock(>ct->ct_lock);
rculist_push_back(>ct->exp_lists[exp->tm], >node);
+ovs_mutex_unlock(>ct->ct_lock);
+
atomic_flag_clear(>insert_once);
atomic_flag_clear(>remove_once);
ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct->ct_lock);
}
static void
conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
enum ct_timeout tm, long long now, uint32_t tp_value)
+OVS_REQUIRES(conn->lock)
{
conn_expire_init(conn, ct);
conn->expiration = now + tp_value * 1000;
@@ -285,19 +288,12 @@ conn_update_expiration__(struct conntrack *ct, struct
conn *conn,
enum ct_timeout tm, long long now,
uint32_t tp_value)
OVS_REQUIRES(conn->lock)
+OVS_EXCLUDED(ct->ct_lock)
{
-ovs_mutex_unlock(>lock);
-
-ovs_mutex_lock(>ct_lock);
-ovs_mutex_lock(>lock);
if (!conn->cleaned) {
-conn_expire_remove(>exp);
+conn_expire_remove(>exp, true);
conn_schedule_expiration(ct, conn, tm, now, tp_value);
}
-ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct_lock);
-
-ovs_mutex_lock(>lock);
}
/* The conn entry lock must be held on entry and exit. */
@@ -309,20 +305,12 @@ conn_update_expiration(struct conntrack *ct, struct conn
*conn,
struct timeout_policy *tp;
uint32_t val;
-ovs_mutex_unlock(>lock);
-
-ovs_mutex_lock(>ct_lock);
-ovs_mutex_lock(>lock);
tp = timeout_policy_lookup(ct, conn->tp_id);
if (tp) {
val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
} else {
val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
}
-ovs_mutex_unlock(>lock);
-ovs_mutex_unlock(>ct_lock);
-
-ovs_mutex_lock(>lock);
VLOG_DBG_RL(, "Update timeout %s zone=%u with policy id=%d "
"val=%u sec.",
ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
@@ -330,11 +318,10 @@ conn_update_expiration(struct conntrack *ct, struct conn
*conn,
conn_update_expiration__(ct, conn, tm, now, val);
}
-/* ct_lock must be held. */
void
conn_init_expiration(struct conntrack *ct, struct conn *conn,
enum