[PATCH tip/core/rcu 01/12] rcu: Remove restrictions on no-CBs CPUs

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore
at least one no-CBs CPU must remain online at any given time.  These
restrictions are problematic in some situations, such as cases where
all CPUs must run a real-time workload that needs to be insulated from
OS jitter and latencies due to RCU callback invocation.  This commit
therefore provides no-CBs CPUs a way to start and to wait for grace
periods independently of the normal RCU callback mechanisms.  This
approach allows any or all of the CPUs to be designated as no-CBs CPUs,
and allows any proper subset of the CPUs (whether no-CBs CPUs or not)
to be offlined.

This commit also provides event tracing, as well as a fix for a locking
bug spotted by Xie ChanglongX .

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 include/trace/events/rcu.h |   55 +
 init/Kconfig   |4 +-
 kernel/rcutree.c   |   18 ++--
 kernel/rcutree.h   |   20 ++--
 kernel/rcutree_plugin.h|  276 +++-
 5 files changed, 250 insertions(+), 123 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 1918e83..cdfed6d 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -72,6 +72,58 @@ TRACE_EVENT(rcu_grace_period,
 );
 
 /*
+ * Tracepoint for no-callbacks grace-period events.  The caller should
+ * pull the data from the rcu_node structure, other than rcuname, which
+ * comes from the rcu_state structure, and event, which is one of the
+ * following:
+ *
+ * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startedleaf": Leaf-node start proved sufficient.
+ * "Startedleafroot": Leaf-node start proved sufficient after checking root.
+ * "Startedroot": Requested a nocb grace period based on root-node data.
+ * "StartWait": Start waiting for the requested grace period.
+ * "ResumeWait": Resume waiting after signal.
+ * "EndWait": Complete wait.
+ * "Cleanup": Clean up rcu_node structure after previous GP.
+ * "CleanupMore": Clean up, and another no-CB GP is needed.
+ */
+TRACE_EVENT(rcu_nocb_grace_period,
+
+   TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed,
+unsigned long c, u8 level, int grplo, int grphi,
+char *gpevent),
+
+   TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent),
+
+   TP_STRUCT__entry(
+   __field(char *, rcuname)
+   __field(unsigned long, gpnum)
+   __field(unsigned long, completed)
+   __field(unsigned long, c)
+   __field(u8, level)
+   __field(int, grplo)
+   __field(int, grphi)
+   __field(char *, gpevent)
+   ),
+
+   TP_fast_assign(
+   __entry->rcuname = rcuname;
+   __entry->gpnum = gpnum;
+   __entry->completed = completed;
+   __entry->c = c;
+   __entry->level = level;
+   __entry->grplo = grplo;
+   __entry->grphi = grphi;
+   __entry->gpevent = gpevent;
+   ),
+
+   TP_printk("%s %lu %lu %lu %u %d %d %s",
+ __entry->rcuname, __entry->gpnum, __entry->completed,
+ __entry->c, __entry->level, __entry->grplo, __entry->grphi,
+ __entry->gpevent)
+);
+
+/*
  * Tracepoint for grace-period-initialization events.  These are
  * distinguished by the type of RCU, the new grace-period number, the
  * rcu_node structure level, the starting and ending CPU covered by the
@@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
+#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \
+   level, grplo, grphi, event) \
+   do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
diff --git a/init/Kconfig b/init/Kconfig
index fb19b46..97fc178 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -665,7 +665,7 @@ config RCU_BOOST_DELAY
  Accept the default if unsure.
 
 config RCU_NOCB_CPU
-   bool "Offload RCU callback processing from boot-selected CPUs"
+   bool "Offload RCU callback processing from boot-selected CPUs 
(EXPERIMENTAL"
depends on TREE_RCU || TREE_PREEMPT_RCU
default n
help
@@ -683,7 +683,7 @@ config RCU_NOCB_CPU
  callback, and (2) affinity or cgroups can be used to force
  the kthreads to run on whatever set of CPUs is desired.
 
- Say Y here if you want reduced OS jitter on selected CPUs.

[PATCH tip/core/rcu 01/12] rcu: Remove restrictions on no-CBs CPUs

2013-01-26 Thread Paul E. McKenney
From: Paul E. McKenney paul...@linux.vnet.ibm.com

Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore
at least one no-CBs CPU must remain online at any given time.  These
restrictions are problematic in some situations, such as cases where
all CPUs must run a real-time workload that needs to be insulated from
OS jitter and latencies due to RCU callback invocation.  This commit
therefore provides no-CBs CPUs a way to start and to wait for grace
periods independently of the normal RCU callback mechanisms.  This
approach allows any or all of the CPUs to be designated as no-CBs CPUs,
and allows any proper subset of the CPUs (whether no-CBs CPUs or not)
to be offlined.

This commit also provides event tracing, as well as a fix for a locking
bug spotted by Xie ChanglongX changlongx@intel.com.

Signed-off-by: Paul E. McKenney paul.mcken...@linaro.org
Signed-off-by: Paul E. McKenney paul...@linux.vnet.ibm.com
---
 include/trace/events/rcu.h |   55 +
 init/Kconfig   |4 +-
 kernel/rcutree.c   |   18 ++--
 kernel/rcutree.h   |   20 ++--
 kernel/rcutree_plugin.h|  276 +++-
 5 files changed, 250 insertions(+), 123 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 1918e83..cdfed6d 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -72,6 +72,58 @@ TRACE_EVENT(rcu_grace_period,
 );
 
 /*
+ * Tracepoint for no-callbacks grace-period events.  The caller should
+ * pull the data from the rcu_node structure, other than rcuname, which
+ * comes from the rcu_state structure, and event, which is one of the
+ * following:
+ *
+ * Startleaf: Request a nocb grace period based on leaf-node data.
+ * Startedleaf: Leaf-node start proved sufficient.
+ * Startedleafroot: Leaf-node start proved sufficient after checking root.
+ * Startedroot: Requested a nocb grace period based on root-node data.
+ * StartWait: Start waiting for the requested grace period.
+ * ResumeWait: Resume waiting after signal.
+ * EndWait: Complete wait.
+ * Cleanup: Clean up rcu_node structure after previous GP.
+ * CleanupMore: Clean up, and another no-CB GP is needed.
+ */
+TRACE_EVENT(rcu_nocb_grace_period,
+
+   TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed,
+unsigned long c, u8 level, int grplo, int grphi,
+char *gpevent),
+
+   TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent),
+
+   TP_STRUCT__entry(
+   __field(char *, rcuname)
+   __field(unsigned long, gpnum)
+   __field(unsigned long, completed)
+   __field(unsigned long, c)
+   __field(u8, level)
+   __field(int, grplo)
+   __field(int, grphi)
+   __field(char *, gpevent)
+   ),
+
+   TP_fast_assign(
+   __entry-rcuname = rcuname;
+   __entry-gpnum = gpnum;
+   __entry-completed = completed;
+   __entry-c = c;
+   __entry-level = level;
+   __entry-grplo = grplo;
+   __entry-grphi = grphi;
+   __entry-gpevent = gpevent;
+   ),
+
+   TP_printk(%s %lu %lu %lu %u %d %d %s,
+ __entry-rcuname, __entry-gpnum, __entry-completed,
+ __entry-c, __entry-level, __entry-grplo, __entry-grphi,
+ __entry-gpevent)
+);
+
+/*
  * Tracepoint for grace-period-initialization events.  These are
  * distinguished by the type of RCU, the new grace-period number, the
  * rcu_node structure level, the starting and ending CPU covered by the
@@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
+#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \
+   level, grplo, grphi, event) \
+   do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
diff --git a/init/Kconfig b/init/Kconfig
index fb19b46..97fc178 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -665,7 +665,7 @@ config RCU_BOOST_DELAY
  Accept the default if unsure.
 
 config RCU_NOCB_CPU
-   bool Offload RCU callback processing from boot-selected CPUs
+   bool Offload RCU callback processing from boot-selected CPUs 
(EXPERIMENTAL
depends on TREE_RCU || TREE_PREEMPT_RCU
default n
help
@@ -683,7 +683,7 @@ config RCU_NOCB_CPU
  callback, and (2) affinity or cgroups can be used to force
  the kthreads to run on whatever set of CPUs is desired.
 
-