This patch enables the accumulation of unfair qspinlock statistics
when the CONFIG_QUEUED_LOCK_STAT configuration parameter is set.

The accumulated lock statistics will be reported in debugfs under
the unfair-qspinlock directory.

On a KVM guest with 32 vCPUs, the statistics counts after bootup were:

lsteal_cnts = 172219 2377 425 118 33 8 5 12 14 0 0 0
trylock_cnt = 1495372

So most of the lock stealing happened in the initial trylock before
entering the queue. Once a vCPU is in the queue, the chance of getting
the lock drop off significantly the further it is away from queue head.

Signed-off-by: Waiman Long <waiman.l...@hp.com>
---
 arch/x86/Kconfig                  |    7 ++-
 kernel/locking/qspinlock.c        |    2 +-
 kernel/locking/qspinlock_unfair.h |   89 +++++++++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 299a1c4..aee6236 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -680,11 +680,12 @@ config PARAVIRT_SPINLOCKS
          If you are unsure how to answer this question, answer Y.
 
 config QUEUED_LOCK_STAT
-       bool "Paravirt queued lock statistics"
-       depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS
+       bool "Paravirt/Unfair queued lock statistics"
+       depends on DEBUG_FS && QUEUED_SPINLOCKS
        ---help---
          Enable the collection of statistical data on the behavior of
-         paravirtualized queued spinlocks and report them on debugfs.
+         paravirtualized and unfair queued spinlocks and report them
+         on debugfs.
 
 source "arch/x86/xen/Kconfig"
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 65dead9..12e2e89 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -538,7 +538,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 #ifdef queued_spin_trylock
 #undef queued_spin_trylock
 #endif
-#define queued_spin_trylock            queued_spin_trylock_unfair
+#define queued_spin_trylock            __queued_spin_trylock_unfair
 
 /*
  * The unfair lock code is used internally and so don't need to be exported
diff --git a/kernel/locking/qspinlock_unfair.h 
b/kernel/locking/qspinlock_unfair.h
index 0e8a40f..fc94578 100644
--- a/kernel/locking/qspinlock_unfair.h
+++ b/kernel/locking/qspinlock_unfair.h
@@ -44,6 +44,93 @@ struct uf_node {
        u32                     prev_tail;      /* Previous node tail code */
 };
 
+#ifdef CONFIG_QUEUED_LOCK_STAT
+
+#include <linux/debugfs.h>
+
+/*
+ * Unfair qspinlock statistics
+ *
+ * All spinning CPUs are grouped into buckets depending on the most
+ * significant bit in their lock stealing period. The first entry in
+ * the array is for the queue head.
+ */
+#define NR_LPERIOD_CNTS        (LPERIOD_THRESHOLD_SHIFT - LPERIOD_MIN_SHIFT + 
6)
+static atomic_t lsteal_cnts[NR_LPERIOD_CNTS];
+
+/*
+ * # of successful trylocks at beginning of slowpath
+ */
+static atomic_t trylock_cnt;
+
+/*
+ * Counts reset flag
+ */
+static bool reset_cnts __read_mostly;
+
+/*
+ * Initialize debugfs for the unfair qspinlock statistics
+ */
+static int __init unfair_qspinlock_debugfs(void)
+{
+       struct dentry *d_ufqlock = debugfs_create_dir("unfair-qspinlock", NULL);
+
+       if (!d_ufqlock)
+               printk(KERN_WARNING
+                 "Could not create 'unfair-qspinlock' debugfs directory\n");
+
+       debugfs_create_u32_array("lsteal_cnts", 0444, d_ufqlock,
+                               (u32 *)lsteal_cnts, NR_LPERIOD_CNTS);
+       debugfs_create_u32("trylock_cnt", 0444, d_ufqlock, (u32 *)&trylock_cnt);
+       debugfs_create_bool("reset_cnts", 0644, d_ufqlock, (u32 *)&reset_cnts);
+       return 0;
+}
+fs_initcall(unfair_qspinlock_debugfs);
+
+/*
+ * Reset all the statistics counts
+ */
+static noinline void reset_counts(void)
+{
+       int idx;
+
+       reset_cnts = false;
+       atomic_set(&trylock_cnt, 0);
+       for (idx = 0 ; idx < NR_LPERIOD_CNTS; idx++)
+               atomic_set(&lsteal_cnts[idx], 0);
+}
+
+/*
+ * Increment the unfair qspinlock statistic count
+ */
+static inline void ustat_inc(struct uf_node *pn)
+{
+       /*
+        * fls() returns the most significant 1 bit position + 1
+        */
+       int idx = fls(pn->lsteal_period) - LPERIOD_MIN_SHIFT;
+
+       if (idx >= NR_LPERIOD_CNTS)
+               idx = NR_LPERIOD_CNTS - 1;
+       atomic_inc(&lsteal_cnts[idx]);
+       if (unlikely(reset_cnts))
+               reset_counts();
+}
+
+static inline bool __queued_spin_trylock_unfair(struct qspinlock *lock)
+{
+       bool ret = queued_spin_trylock_unfair(lock);
+
+       if (ret)
+               atomic_inc(&trylock_cnt);
+       return ret;
+}
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+static inline void ustat_inc(struct uf_node *pn) { }
+#define __queued_spin_trylock_unfair   queued_spin_trylock_unfair
+#endif /* CONFIG_QUEUED_LOCK_STAT */
+
 /**
  * cmpxchg_tail - Put in the new tail code if it matches the old one
  * @lock : Pointer to queue spinlock structure
@@ -125,6 +212,7 @@ static inline bool unfair_wait_node(struct qspinlock *lock,
                if (queued_spin_trylock_unfair(lock))
                        break;  /* Got the lock */
        }
+       ustat_inc(pn);
 
        /*
         * Have stolen the lock, need to remove itself from the wait queue.
@@ -220,6 +308,7 @@ unfair_wait_head(struct qspinlock *lock, struct 
mcs_spinlock *node, u32 tail)
        pn->lsteal_period = LPERIOD_QHEAD;
        while (!queued_spin_trylock_unfair(lock))
                cpu_relax();
+       ustat_inc(pn);
 
        /*
         * Remove tail code in the lock if it is the only one in the queue
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to