Prohibit moving certain threads (e.g., in irq and nmi contexts)
to the secondary queue. Those prioritized threads will always stay
in the primary queue, and so will have a shorter wait time for the lock.

Signed-off-by: Alex Kogan <alex.ko...@oracle.com>
Reviewed-by: Steve Sistare <steven.sist...@oracle.com>
Reviewed-by: Waiman Long <long...@redhat.com>
---
 kernel/locking/qspinlock_cna.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/qspinlock_cna.h b/kernel/locking/qspinlock_cna.h
index d3e27549c769..ac3109ab0a84 100644
--- a/kernel/locking/qspinlock_cna.h
+++ b/kernel/locking/qspinlock_cna.h
@@ -4,6 +4,7 @@
 #endif
 
 #include <linux/topology.h>
+#include <linux/sched/rt.h>
 
 /*
  * Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock).
@@ -35,7 +36,8 @@
  * running on the same NUMA node. If it is not, that waiter is detached from 
the
  * main queue and moved into the tail of the secondary queue. This way, we
  * gradually filter the primary queue, leaving only waiters running on the same
- * preferred NUMA node.
+ * preferred NUMA node. Note that certain priortized waiters (e.g., in
+ * irq and nmi contexts) are excluded from being moved to the secondary queue.
  *
  * We change the NUMA node preference after a waiter at the head of the
  * secondary queue spins for a certain amount of time (10ms, by default).
@@ -49,6 +51,8 @@
  *          Dave Dice <dave.d...@oracle.com>
  */
 
+#define CNA_PRIORITY_NODE      0xffff
+
 struct cna_node {
        struct mcs_spinlock     mcs;
        u16                     numa_node;
@@ -121,9 +125,10 @@ static int __init cna_init_nodes(void)
 
 static __always_inline void cna_init_node(struct mcs_spinlock *node)
 {
+       bool priority = !in_task() || irqs_disabled() || rt_task(current);
        struct cna_node *cn = (struct cna_node *)node;
 
-       cn->numa_node = cn->real_numa_node;
+       cn->numa_node = priority ? CNA_PRIORITY_NODE : cn->real_numa_node;
        cn->start_time = 0;
 }
 
@@ -262,11 +267,13 @@ static u32 cna_order_queue(struct mcs_spinlock *node)
        next_numa_node = ((struct cna_node *)next)->numa_node;
 
        if (next_numa_node != numa_node) {
-               struct mcs_spinlock *nnext = READ_ONCE(next->next);
+               if (next_numa_node != CNA_PRIORITY_NODE) {
+                       struct mcs_spinlock *nnext = READ_ONCE(next->next);
 
-               if (nnext) {
-                       cna_splice_next(node, next, nnext);
-                       next = nnext;
+                       if (nnext) {
+                               cna_splice_next(node, next, nnext);
+                               next = nnext;
+                       }
                }
                /*
                 * Inherit NUMA node id of primary queue, to maintain the
@@ -284,6 +291,13 @@ static __always_inline u32 cna_wait_head_or_lock(struct 
qspinlock *lock,
        struct cna_node *cn = (struct cna_node *)node;
 
        if (!cn->start_time || !intra_node_threshold_reached(cn)) {
+               /*
+                * We are at the head of the wait queue, no need to use
+                * the fake NUMA node ID.
+                */
+               if (cn->numa_node == CNA_PRIORITY_NODE)
+                       cn->numa_node = cn->real_numa_node;
+
                /*
                 * Try and put the time otherwise spent spin waiting on
                 * _Q_LOCKED_PENDING_MASK to use by sorting our lists.
-- 
2.24.3 (Apple Git-128)

Reply via email to