On Fri, Jun 17, 2016 at 12:43:22PM -0500, Josh Poimboeuf wrote: > NOTE: I didn't include any performance numbers because I wasn't able to > get consistent results. I tried the following on a Xeon E5-2420 v2 CPU: > > $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo > -n performance > $i; done > $ echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo > $ echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct > $ echo 0 > /proc/sys/kernel/nmi_watchdog > $ taskset 0x10 perf stat -n -r10 perf bench sched pipe -l 1000000 > > I was going to post the numbers from that, both with and without > SCHEDSTATS, but then when I tried to repeat the test on a different day, > the results were surprisingly different, with different conclusions. > > So any advice on measuring scheduler performance would be appreciated...
Yeah, its a bit of a pain in general... A) perf stat --null --repeat 50 -- perf bench sched messaging -g 50 -l 5000 | grep "seconds time elapsed" B) perf stat --null --repeat 50 -- taskset 1 perf bench sched pipe | grep "seconds time elapsed" 1) tip/master + 1-4 2) tip/master + 1-5 3) tip/master + 1-5 + below 1 2 3 A) 4.627767855 4.650429917 4.646208062 4.633921933 4.641424424 4.612021058 4.649536375 4.663144144 4.636815948 4.630165619 4.649053552 4.613022902 B) 1.770732957 1.789534273 1.773334291 1.761740716 1.795618428 1.773338681 1.763761666 1.822316496 1.774385589 >From this it looks like patch 5 does hurt a wee bit, but we can get most of that back by reordering the structure a bit. The results seem 'stable' across rebuilds and reboots (I've pop'ed all patches and rebuild, rebooted and re-benched 1 at the end and obtained similar results). Although, possible that if we reorder first and then do 5, we'll just see a bigger regression. I've not bothered. --- include/linux/sched.h | 33 +++++++++++++++------------------ kernel/sched/core.c | 4 ++-- kernel/sched/debug.c | 6 +++--- 3 files changed, 20 insertions(+), 23 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1220,7 +1220,7 @@ struct uts_namespace; struct load_weight { unsigned long weight; u32 inv_weight; -}; +} __packed; /* * The load_avg/util_avg accumulates an infinite geometric series @@ -1315,44 +1315,40 @@ struct sched_statistics { struct sched_entity { struct load_weight load; /* for load-balancing */ + unsigned int on_rq; struct rb_node run_node; struct list_head group_node; - unsigned int on_rq; - u64 exec_start; + u64 exec_start ____cacheline_aligned_in_smp; u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; - - u64 nr_migrations; - u64 wait_start; u64 sleep_start; u64 block_start; +#ifdef CONFIG_SMP + /* + * Per entity load average tracking. + */ + struct sched_avg avg ____cacheline_aligned_in_smp; +#endif #ifdef CONFIG_SCHEDSTATS struct sched_statistics statistics; #endif #ifdef CONFIG_FAIR_GROUP_SCHED - int depth; + /* + * mostly constant values, separate from modifications above + */ + int depth ____cacheline_aligned_in_smp; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif - -#ifdef CONFIG_SMP - /* - * Per entity load average tracking. - * - * Put into separate cache line so it does not - * collide with read-mostly values above. - */ - struct sched_avg avg ____cacheline_aligned_in_smp; -#endif -}; +} ____cacheline_aligned_in_smp; struct sched_rt_entity { struct list_head run_list; @@ -1475,6 +1471,7 @@ struct task_struct { int prio, static_prio, normal_prio; unsigned int rt_priority; const struct sched_class *sched_class; + u64 nr_migrations; struct sched_entity se; struct sched_rt_entity rt; #ifdef CONFIG_CGROUP_SCHED --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1239,7 +1239,7 @@ void set_task_cpu(struct task_struct *p, if (task_cpu(p) != new_cpu) { if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p); - p->se.nr_migrations++; + p->nr_migrations++; perf_event_task_migrate(p); } @@ -2167,7 +2167,7 @@ static void __sched_fork(unsigned long c p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; - p->se.nr_migrations = 0; + p->nr_migrations = 0; p->se.vruntime = 0; INIT_LIST_HEAD(&p->se.group_node); --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -885,7 +885,7 @@ void proc_sched_show_task(struct task_st nr_switches = p->nvcsw + p->nivcsw; - P(se.nr_migrations); + P(nr_migrations); PN(se.wait_start); PN(se.sleep_start); @@ -926,9 +926,9 @@ void proc_sched_show_task(struct task_st avg_atom = -1LL; avg_per_cpu = p->se.sum_exec_runtime; - if (p->se.nr_migrations) { + if (p->nr_migrations) { avg_per_cpu = div64_u64(avg_per_cpu, - p->se.nr_migrations); + p->nr_migrations); } else { avg_per_cpu = -1LL; }