Currently, reader, writer, and kfree threads set their affinity by their id % nr_cpu_ids. IDs of the all three types all start from 0, and therefore readers, writers, and kfrees may be scheduled on the same CPU.
This patch adds options to offset CPU affinity. >From the experiments below, writer duration characteristics are very different between offset 0 and 1. Experiments carried out on a 256C 512T machine running PREEMPT=n kernel. Experiment: nreaders=1 nwriters=1 reader_cpu_offset=0 writer_cpu_offset=0 Average grace-period duration: 108376 microseconds Minimum grace-period duration: 13000.4 50th percentile grace-period duration: 115000 90th percentile grace-period duration: 121000 99th percentile grace-period duration: 121004 Maximum grace-period duration: 219000 Grace periods: 101 Batches: 1 Ratio: 101 Experiment: nreaders=1 nwriters=1 reader_cpu_offset=0 writer_cpu_offset=1 Average grace-period duration: 185950 microseconds Minimum grace-period duration: 8999.84 50th percentile grace-period duration: 217946 90th percentile grace-period duration: 218003 99th percentile grace-period duration: 218018 Maximum grace-period duration: 272195 Grace periods: 101 Batches: 1 Ratio: 101 Signed-off-by: Yuzhuo Jing <[email protected]> --- .../admin-guide/kernel-parameters.txt | 19 +++++++++++++++++++ kernel/rcu/rcuscale.c | 16 +++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5e233e511f81..f68651c103a4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1,3 +1,4 @@ +# vim: noet:sw=8:sts=8: accept_memory= [MM] Format: { eager | lazy } default: lazy @@ -5513,6 +5514,12 @@ test until boot completes in order to avoid interference. + rcuscale.kfree_cpu_offset= [KNL] + Set the starting CPU affinity index of kfree threads. + CPU affinity is assigned sequentially from + kfree_cpu_offset to kfree_cpu_offset+kfree_nthreads, + modded by number of CPUs. Negative value is reset to 0. + rcuscale.kfree_by_call_rcu= [KNL] In kernels built with CONFIG_RCU_LAZY=y, test call_rcu() instead of kfree_rcu(). @@ -5567,6 +5574,12 @@ the same as for rcuscale.nreaders. N, where N is the number of CPUs + rcuscale.reader_cpu_offset= [KNL] + Set the starting CPU affinity index of reader threads. + CPU affinity is assigned sequentially from + reader_cpu_offset to reader_cpu_offset+nreaders, modded + by number of CPUs. Negative value is reset to 0. + rcuscale.scale_type= [KNL] Specify the RCU implementation to test. @@ -5578,6 +5591,12 @@ rcuscale.verbose= [KNL] Enable additional printk() statements. + rcuscale.writer_cpu_offset= [KNL] + Set the starting CPU affinity index of writer threads. + CPU affinity is assigned sequentially from + writer_cpu_offset to writer_cpu_offset+nwriters, modded + by number of CPUs. Negative value is reset to 0. + rcuscale.writer_holdoff= [KNL] Write-side holdoff between grace periods, in microseconds. The default of zero says diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 43bcaeac457f..1208169be15e 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -95,12 +95,15 @@ torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, minruntime, 0, "Minimum run time (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, nwriters, -1, "Number of RCU updater threads"); +torture_param(int, reader_cpu_offset, 0, "Offset of reader CPU affinity") torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, "Shutdown at end of scalability tests."); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); +torture_param(int, writer_cpu_offset, 0, "Offset of writer CPU affinity") torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer"); +torture_param(int, kfree_cpu_offset, 0, "Offset of kfree CPU affinity") torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?"); @@ -495,7 +498,7 @@ rcu_scale_reader(void *arg) long me = (long)arg; VERBOSE_SCALEOUT_STRING("rcu_scale_reader task started"); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_cpus_allowed_ptr(current, cpumask_of((reader_cpu_offset + me) % nr_cpu_ids)); set_user_nice(current, MAX_NICE); atomic_inc(&n_rcu_scale_reader_started); @@ -585,7 +588,7 @@ rcu_scale_writer(void *arg) VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started"); WARN_ON(!wdpp); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_cpus_allowed_ptr(current, cpumask_of((writer_cpu_offset + me) % nr_cpu_ids)); current->flags |= PF_NO_SETAFFINITY; sched_set_fifo_low(current); @@ -719,8 +722,8 @@ static void rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) { pr_alert("%s" SCALE_FLAG - "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n", - scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown); + "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d reader_cpu_offset=%d writer_cpu_offset=%d writer_holdoff=%d writer_holdoff_jiffies=%d kfree_cpu_offset=%d verbose=%d shutdown=%d\n", + scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, reader_cpu_offset, writer_cpu_offset, writer_holdoff, writer_holdoff_jiffies, kfree_cpu_offset, verbose, shutdown); } /* @@ -785,7 +788,7 @@ kfree_scale_thread(void *arg) DEFINE_TORTURE_RANDOM(tr); VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started"); - set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_cpus_allowed_ptr(current, cpumask_of((kfree_cpu_offset + me) % nr_cpu_ids)); set_user_nice(current, MAX_NICE); kfree_rcu_test_both = (kfree_rcu_test_single == kfree_rcu_test_double); @@ -1446,6 +1449,9 @@ rcu_scale_init(void) atomic_set(&n_rcu_scale_reader_started, 0); atomic_set(&n_rcu_scale_writer_started, 0); atomic_set(&n_rcu_scale_writer_finished, 0); + reader_cpu_offset = max(reader_cpu_offset, 0); + writer_cpu_offset = max(writer_cpu_offset, 0); + kfree_cpu_offset = max(kfree_cpu_offset, 0); rcu_scale_print_module_parms(cur_ops, "Start of test"); if (!block_start) -- 2.50.1.552.g942d659e1b-goog
