The hierarchical slowpath is needed for locks that experience
sustained cross-node contention. Enabling it unconditionally is
undesirable for lightly contended locks and may decrease the performance.

Add a simple contention detection scheme that tracks remote handoffs
separately from overall handoff activity and enables HQ mode only when
the observed handoff pattern indicates that cross-node contention is
high enough to benefit from NUMA-aware queueing.

HQ lock type can be turned on if remote_handoffs exceeds 
`hqlock_remote_handoffs_turn_numa`.
Lock can be turned back into QSPINLOCK mode if in HQ mode
amount of remote handoffs will not exceed `hqlock_remote_handoffs_keep_numa`.

Remote handoffs counter will increase only if the amount of local handoffs 
after previous increase
is not less than `hqlock_local_handoffs_to_increase_remotes`.

Additional locktorture reruns showed no degradation
in low-contention configurations after adding contention-based switching
while maintaining practically the same performance improvement in high 
contention cases.

Co-developed-by: Anatoly Stepanov <[email protected]>
Signed-off-by: Anatoly Stepanov <[email protected]>
Co-developed-by: Nikita Fedorov <[email protected]>
Signed-off-by: Nikita Fedorov <[email protected]>
---
 kernel/locking/hqlock_core.h  | 57 +++++++++++++++++++++++++++++++++--
 kernel/locking/hqlock_meta.h  |  4 +++
 kernel/locking/hqlock_types.h |  8 +++--
 3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/hqlock_core.h b/kernel/locking/hqlock_core.h
index 7322199228..e2ba09d758 100644
--- a/kernel/locking/hqlock_core.h
+++ b/kernel/locking/hqlock_core.h
@@ -450,6 +450,23 @@ static inline void hqlock_handoff(struct qspinlock *lock,
                                         struct mcs_spinlock *next, u32 tail,
                                         int handoff_info);
 
+/*
+ * In low_contention_mcs_lock_handoff we wanted to help processor optimise 
writes
+ * and avoid extra reading of our cpu cacheline (read our qnode->numa_node),
+ * so previous contender has saved his numa node in our prev_numa_node,
+ * and now we need to update remote_handoffs counter by ourself
+ */
+static __always_inline void update_counters_qspinlock(struct numa_qnode *qnode)
+{
+       if (qnode->numa_node != qnode->prev_numa_node) {
+               if ((qnode->general_handoffs - qnode->prev_general_handoffs)
+                   > hqlock_local_handoffs_to_increase_remotes) {
+                       qnode->remote_handoffs++;
+               }
+
+               qnode->prev_general_handoffs = qnode->general_handoffs;
+       }
+}
 
 /*
  * Chech if contention has risen and if we need to set NUMA-aware mode
@@ -458,8 +475,13 @@ static __always_inline bool 
determine_contention_qspinlock_mode(struct mcs_spinl
 {
        struct numa_qnode *qnode = (void *)node;
 
-       if (qnode->general_handoffs > 
READ_ONCE(hqlock_general_handoffs_turn_numa))
+       unsigned long general_handoffs = (unsigned long) 
qnode->general_handoffs;
+       unsigned long remote_handoffs = (unsigned long) qnode->remote_handoffs;
+
+       if ((general_handoffs > hqlock_general_handoffs_turn_numa) &&
+               (remote_handoffs > hqlock_remote_handoffs_turn_numa))
                return true;
+
        return false;
 }
 
@@ -485,7 +507,14 @@ static __always_inline bool 
low_contention_try_clear_tail(struct qspinlock *lock
        else
                update_val |= _Q_LOCK_INVALID_TAIL;
 
-       return atomic_try_cmpxchg_relaxed(&lock->val, &val, update_val);
+       bool ret = atomic_try_cmpxchg_relaxed(&lock->val, &val, update_val);
+
+#ifdef CONFIG_HQSPINLOCKS_DEBUG
+       if (ret && high_contention)
+               atomic_inc(&transitions_from_qspinlock_to_hq);
+#endif
+
+       return ret;
 }
 
 static __always_inline void low_contention_mcs_lock_handoff(struct 
mcs_spinlock *node,
@@ -502,6 +531,17 @@ static __always_inline void 
low_contention_mcs_lock_handoff(struct mcs_spinlock
                general_handoffs++;
 
        qnext->general_handoffs = general_handoffs;
+       qnext->remote_handoffs = qnode->remote_handoffs;
+       qnext->prev_general_handoffs = qnode->prev_general_handoffs;
+
+       /*
+        * Show next contender our numa node and assume
+        * he will update remote_handoffs counter in update_counters_qspinlock 
by himself
+        * instead of reading his numa_node and updating remote_handoffs here
+        * to avoid extra cacheline transferring and help processor optimise 
several writes here
+        */
+       qnext->prev_numa_node = qnode->numa_node;
+
        arch_mcs_spin_unlock_contended(&next->locked);
 }
 
@@ -557,6 +597,10 @@ static inline void hqlock_init_node(struct mcs_spinlock 
*node)
        qnode->numa_node = numa_node_id() + 1;
        qnode->lock_id = 0;
        qnode->wrong_fallback_tail = 0;
+
+       qnode->remote_handoffs = 0;
+       qnode->prev_numa_node = 0;
+       qnode->prev_general_handoffs = 0;
 }
 
 static inline void reset_handoff_counter(struct numa_qnode *qnode)
@@ -580,6 +624,8 @@ static inline void handoff_local(struct mcs_spinlock *node,
 
        qnext->general_handoffs = general_handoffs;
 
+       qnext->remote_handoffs = qnode->remote_handoffs;
+
        u16 wrong_fallback_tail = qnode->wrong_fallback_tail;
 
        if (wrong_fallback_tail != 0 && wrong_fallback_tail != (tail >> 
_Q_TAIL_OFFSET)) {
@@ -641,6 +687,13 @@ static inline void handoff_remote(struct qspinlock *lock,
 
        mcs_head = (void *) qhead;
 
+       u16 remote_handoffs = qnode->remote_handoffs;
+
+       if (qnode->general_handoffs > hqlock_local_handoffs_to_increase_remotes)
+               remote_handoffs++;
+
+       qhead->remote_handoffs = remote_handoffs;
+
        /* arch_mcs_spin_unlock_contended implies smp-barrier */
        arch_mcs_spin_unlock_contended(&mcs_head->locked);
 }
diff --git a/kernel/locking/hqlock_meta.h b/kernel/locking/hqlock_meta.h
index 5b54801326..561d5a5fd0 100644
--- a/kernel/locking/hqlock_meta.h
+++ b/kernel/locking/hqlock_meta.h
@@ -307,6 +307,10 @@ static inline void release_lock_meta(struct qspinlock 
*lock,
                        goto do_rollback;
        }
 
+       if (qnode->remote_handoffs < hqlock_remote_handoffs_keep_numa) {
+               upd_val |= _Q_LOCK_MODE_QSPINLOCK_VAL;
+       }
+
        /*
         * We need wait until pending is gone.
         * Otherwise, clearing pending can erase a mode we will set here
diff --git a/kernel/locking/hqlock_types.h b/kernel/locking/hqlock_types.h
index 32d06f2755..40061f11a1 100644
--- a/kernel/locking/hqlock_types.h
+++ b/kernel/locking/hqlock_types.h
@@ -37,9 +37,13 @@ struct numa_qnode {
 
        u16 lock_id;
        u16 wrong_fallback_tail;
-       u16 general_handoffs;
-
        u16 numa_node;
+
+
+       u16 general_handoffs;
+       u16 remote_handoffs;
+       u16 prev_general_handoffs;
+       u16 prev_numa_node;
 };
 
 struct numa_queue {
-- 
2.34.1


Reply via email to