FYI, we noticed the below changes on git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master commit 10359213d05acf804558bda7cc9b8422a828d1cd ("mm: incorporate read-only pages into transparent huge pages")
testbox/testcase/testparams: xps2/pigz/performance-100%-512K ba4877b9ca51f80b 10359213d05acf804558bda7cc ---------------- -------------------------- fail:runs %reproduction fail:runs | | | %stddev %change %stddev \ | \ 25068 ± 3% +8.1% 27104 ± 3% pigz.time.maximum_resident_set_size 27037 ± 0% -3.1% 26197 ± 1% pigz.time.minor_page_faults 12558 ± 2% +31.3% 16493 ± 3% meminfo.AnonHugePages 0.92 ± 30% -45.1% 0.51 ± 25% perf-profile.cpu-cycles.update_curr.task_tick_fair.scheduler_tick.update_process_times.tick_sched_handle 1.12 ± 3% +42.1% 1.59 ± 15% perf-profile.cpu-cycles.ret_from_fork 2.46 ± 21% -29.4% 1.74 ± 18% perf-profile.cpu-cycles.get_page_from_freelist.__alloc_pages_nodemask.alloc_pages_current.pipe_write.new_sync_write 595245 ± 11% -26.6% 436928 ± 23% sched_debug.cpu#3.avg_idle 602523 ± 9% -25.1% 451132 ± 15% sched_debug.cpu#5.avg_idle 16.26 ± 9% -16.0% 13.66 ± 4% perf-profile.cpu-cycles.tick_program_event.hrtimer_interrupt.hpet_interrupt_handler.handle_irq_event_percpu.handle_irq_event 0.81 ± 24% +30.2% 1.05 ± 5% perf-profile.cpu-cycles.__do_softirq.irq_exit.smp_apic_timer_interrupt.apic_timer_interrupt 1.68 ± 5% -22.2% 1.30 ± 22% perf-profile.cpu-cycles.copy_page_from_iter_iovec.copy_page_from_iter.pipe_write.new_sync_write.vfs_write 150 ± 2% -11.8% 132 ± 8% sched_debug.cfs_rq[3]:/.load 7785 ± 6% +11.4% 8675 ± 6% slabinfo.kmalloc-64.active_objs 1986 ± 3% -12.4% 1739 ± 4% slabinfo.kmalloc-256.active_objs 8048 ± 4% +9.7% 8826 ± 5% slabinfo.kmalloc-64.num_objs 321 ± 28% +61.6% 520 ± 28% sched_debug.cfs_rq[2]:/.blocked_load_avg 139 ± 3% +14.0% 159 ± 7% sched_debug.cpu#1.load 121 ± 2% +8.2% 131 ± 3% sched_debug.cpu#0.cpu_load[4] 122 ± 2% +9.2% 134 ± 3% sched_debug.cpu#0.cpu_load[3] 1014549 ± 8% -9.8% 914927 ± 4% cpuidle.C1E-NHM.time 25068 ± 3% +8.1% 27104 ± 3% time.maximum_resident_set_size 143231 ± 9% +11.4% 159600 ± 2% softirqs.RCU 150 ± 2% -12.1% 132 ± 8% sched_debug.cpu#3.load testbox/testcase/testparams: lkp-sbx04/will-it-scale/performance-malloc1 ba4877b9ca51f80b 10359213d05acf804558bda7cc ---------------- -------------------------- 3954711 ± 4% +11.8% 4420591 ± 8% will-it-scale.time.voluntary_context_switches 7468699 ± 1% +3.4% 7722740 ± 1% will-it-scale.time.minor_page_faults 0.02 ± 0% -3.7% 0.02 ± 0% will-it-scale.scalability 37784 ± 0% +1.7% 38421 ± 0% will-it-scale.per_thread_ops 0.00 ± 0% +5.8e+12% 58403.14 ± 29% sched_debug.cfs_rq[49]:/.MIN_vruntime 0.00 ± 0% +5.8e+12% 58403.14 ± 29% sched_debug.cfs_rq[49]:/.max_vruntime 5 ± 24% +340.0% 22 ± 13% sched_debug.cpu#59.cpu_load[1] 4 ± 17% +310.5% 19 ± 17% sched_debug.cpu#59.cpu_load[0] 34 ± 29% +198.5% 101 ± 31% sched_debug.cfs_rq[33]:/.tg_load_contrib 5 ± 24% +280.0% 19 ± 15% sched_debug.cpu#59.cpu_load[2] 19 ± 41% -79.5% 4 ± 50% sched_debug.cfs_rq[26]:/.load 19 ± 41% -79.5% 4 ± 50% sched_debug.cpu#26.load 33117 ± 39% +159.0% 85783 ± 15% sched_debug.cpu#4.sched_goidle 4 ± 19% +211.1% 14 ± 14% sched_debug.cpu#59.cpu_load[3] 34216 ± 41% +175.2% 94151 ± 18% sched_debug.cpu#4.ttwu_count 7 ± 34% +160.0% 19 ± 17% sched_debug.cfs_rq[49]:/.load 124 ± 21% -78.8% 26 ± 43% sched_debug.cfs_rq[48]:/.blocked_load_avg 66389 ± 39% +158.7% 171750 ± 15% sched_debug.cpu#4.nr_switches 135 ± 23% -74.1% 35 ± 28% sched_debug.cfs_rq[48]:/.tg_load_contrib 0.00 ± 1% +166.2% 0.00 ± 34% sched_debug.rt_rq[63]:/.rt_time 5 ± 47% -63.6% 2 ± 0% sched_debug.cpu#11.cpu_load[2] 67305 ± 38% +155.4% 171876 ± 15% sched_debug.cpu#4.sched_count 47 ± 40% -68.3% 15 ± 20% sched_debug.cfs_rq[12]:/.tg_load_contrib 54 ± 20% +149.8% 135 ± 44% sched_debug.cfs_rq[45]:/.blocked_load_avg 0.00 ± 7% -15.3% 0.00 ± 4% sched_debug.rt_rq[7]:/.rt_time 5 ± 37% -65.2% 2 ± 0% sched_debug.cpu#3.cpu_load[1] 1 ± 34% +100.0% 2 ± 20% sched_debug.cfs_rq[19]:/.nr_spread_over 136 ± 42% -62.3% 51 ± 8% sched_debug.cfs_rq[43]:/.tg_load_contrib 67 ± 16% +124.6% 150 ± 41% sched_debug.cfs_rq[45]:/.tg_load_contrib 69 ± 36% +126.1% 156 ± 21% sched_debug.cfs_rq[61]:/.blocked_load_avg 4 ± 10% +135.3% 10 ± 10% sched_debug.cpu#59.cpu_load[4] 7 ± 14% -74.2% 2 ± 50% sched_debug.cpu#49.nr_uninterruptible 877 ± 32% +64.3% 1442 ± 30% sched_debug.cpu#22.curr->pid 7 ± 29% +151.6% 19 ± 28% sched_debug.cpu#50.load 4 ± 22% -57.9% 2 ± 0% sched_debug.cpu#3.cpu_load[2] 347606 ± 36% -58.7% 143557 ± 1% sched_debug.cfs_rq[16]:/.min_vruntime 0.00 ± 39% -47.9% 0.00 ± 0% sched_debug.rt_rq[55]:/.rt_time 7757 ± 45% -61.5% 2985 ± 5% sched_debug.cfs_rq[16]:/.exec_clock 15 ± 36% -42.9% 9 ± 0% sched_debug.cpu#0.cpu_load[0] 77 ± 32% +120.7% 170 ± 16% sched_debug.cfs_rq[61]:/.tg_load_contrib 8767 ± 9% +70.0% 14901 ± 33% sched_debug.cfs_rq[1]:/.exec_clock 406 ± 36% +35.5% 550 ± 29% sched_debug.cpu#46.ttwu_local 97714 ± 37% +80.7% 176528 ± 19% sched_debug.cpu#6.nr_switches 98697 ± 36% +79.0% 176658 ± 19% sched_debug.cpu#6.sched_count 88 ± 14% -60.0% 35 ± 23% sched_debug.cfs_rq[62]:/.blocked_load_avg 0.00 ± 36% -48.5% 0.00 ± 10% sched_debug.rt_rq[54]:/.rt_time 1 ± 0% +100.0% 2 ± 0% sched_debug.cfs_rq[53]:/.nr_spread_over 3 ± 11% -46.7% 2 ± 0% sched_debug.cpu#3.cpu_load[3] 5714 ± 27% -37.4% 3577 ± 4% sched_debug.cpu#28.sched_goidle 745 ± 26% +48.1% 1104 ± 20% sched_debug.cpu#15.ttwu_local 11686 ± 26% -37.0% 7366 ± 4% sched_debug.cpu#28.nr_switches 90749 ± 11% +54.3% 140005 ± 29% sched_debug.cpu#32.sched_goidle 181994 ± 11% +54.1% 280377 ± 29% sched_debug.cpu#32.nr_switches 182550 ± 10% +53.8% 280771 ± 29% sched_debug.cpu#32.sched_count 48790 ± 37% +80.7% 88182 ± 19% sched_debug.cpu#6.sched_goidle 48101669 ± 16% +58.7% 76337780 ± 27% cpuidle.C1-SNB.time 11390 ± 16% +62.0% 18455 ± 28% sched_debug.cpu#11.ttwu_count 50734 ± 38% +79.4% 91038 ± 16% sched_debug.cpu#6.ttwu_count 15 ± 21% -34.4% 10 ± 0% sched_debug.cpu#0.cpu_load[1] 63 ± 39% +96.1% 125 ± 12% sched_debug.cfs_rq[58]:/.blocked_load_avg 409 ± 9% +47.9% 605 ± 26% sched_debug.cpu#59.ttwu_local 1267 ± 23% +22.4% 1552 ± 25% numa-meminfo.node3.AnonHugePages 72 ± 25% +92.4% 140 ± 3% sched_debug.cfs_rq[58]:/.tg_load_contrib 0.00 ± 32% -43.6% 0.00 ± 1% sched_debug.rt_rq[32]:/.rt_time 1147 ± 24% +44.2% 1654 ± 10% sched_debug.cpu#25.ttwu_local 5 ± 14% -60.0% 2 ± 50% sched_debug.cpu#51.nr_uninterruptible 12 ± 42% +95.9% 24 ± 4% sched_debug.cfs_rq[50]:/.load 14738 ± 6% +53.2% 22584 ± 7% sched_debug.cpu#12.sched_goidle 29667 ± 6% +52.8% 45321 ± 7% sched_debug.cpu#12.nr_switches 14586 ± 8% +54.0% 22463 ± 7% sched_debug.cpu#12.ttwu_count 1025 ± 17% -41.3% 601 ± 10% sched_debug.cpu#12.ttwu_local 26504 ± 33% +57.3% 41688 ± 19% sched_debug.cpu#9.sched_count 73 ± 38% +68.7% 124 ± 3% sched_debug.cfs_rq[29]:/.tg_load_contrib 707 ± 20% +63.1% 1154 ± 8% numa-meminfo.node3.Shmem 94 ± 12% -50.0% 47 ± 34% sched_debug.cfs_rq[62]:/.tg_load_contrib 176 ± 20% +63.2% 288 ± 9% numa-vmstat.node3.nr_shmem 1378 ± 15% +15.9% 1597 ± 21% sched_debug.cpu#1.curr->pid 69 ± 48% +77.5% 122 ± 2% sched_debug.cfs_rq[29]:/.blocked_load_avg 692 ± 2% +25.0% 865 ± 21% sched_debug.cpu#30.curr->pid 17 ± 33% +52.1% 27 ± 18% sched_debug.cpu#46.load 3 ± 0% -33.3% 2 ± 0% sched_debug.cpu#3.cpu_load[4] 390329 ± 0% +24.1% 484415 ± 19% sched_debug.cfs_rq[1]:/.min_vruntime 635 ± 20% +65.9% 1054 ± 5% numa-meminfo.node3.Inactive(anon) 158 ± 20% +66.2% 263 ± 5% numa-vmstat.node3.nr_inactive_anon 140 ± 4% +26.7% 178 ± 16% sched_debug.cfs_rq[1]:/.tg_runnable_contrib 6553 ± 19% -31.8% 4467 ± 17% sched_debug.cpu#26.sched_goidle 6476 ± 4% +26.5% 8195 ± 16% sched_debug.cfs_rq[1]:/.avg->runnable_avg_sum 719 ± 31% +46.9% 1056 ± 3% sched_debug.cpu#16.ttwu_local 278733 ± 11% +28.3% 357584 ± 11% cpuidle.C1E-SNB.usage 12547 ± 10% +35.0% 16933 ± 6% sched_debug.cpu#58.sched_count 6341 ± 10% +35.5% 8592 ± 2% sched_debug.cpu#58.ttwu_count 139557 ± 14% +18.4% 165256 ± 10% sched_debug.cfs_rq[59]:/.spread0 1767 ± 3% +29.4% 2286 ± 3% sched_debug.cpu#61.curr->pid 136520 ± 13% +16.7% 159351 ± 9% sched_debug.cfs_rq[63]:/.spread0 142974 ± 4% +16.3% 166215 ± 14% sched_debug.cfs_rq[25]:/.min_vruntime 13231 ± 23% +29.8% 17174 ± 12% sched_debug.cpu#16.sched_count 113 ± 35% -34.5% 74 ± 0% sched_debug.cfs_rq[25]:/.tg_load_contrib 23 ± 19% +38.3% 32 ± 10% sched_debug.cfs_rq[37]:/.load 1359 ± 21% -40.1% 814 ± 16% sched_debug.cpu#16.curr->pid 141008 ± 16% +16.6% 164377 ± 9% sched_debug.cfs_rq[61]:/.spread0 107 ± 32% -31.8% 73 ± 1% sched_debug.cfs_rq[25]:/.blocked_load_avg 39693 ± 7% -20.7% 31477 ± 0% sched_debug.cpu#44.ttwu_count 960 ± 14% +38.2% 1327 ± 1% sched_debug.cpu#22.ttwu_local 827 ± 11% +20.2% 995 ± 9% sched_debug.cpu#23.curr->pid 1819 ± 5% +18.8% 2162 ± 7% sched_debug.cpu#60.curr->pid 6671 ± 13% +24.4% 8297 ± 8% sched_debug.cpu#60.sched_goidle 13517 ± 13% +24.2% 16793 ± 8% sched_debug.cpu#60.nr_switches 13658 ± 12% +24.0% 16934 ± 8% sched_debug.cpu#60.sched_count 39297 ± 8% -19.8% 31521 ± 1% sched_debug.cpu#44.sched_goidle 835 ± 13% +30.6% 1090 ± 1% sched_debug.cpu#18.ttwu_local 78817 ± 8% -19.8% 63241 ± 1% sched_debug.cpu#44.nr_switches 79478 ± 8% -20.0% 63595 ± 1% sched_debug.cpu#44.sched_count 14 ± 10% -25.4% 11 ± 9% sched_debug.cpu#0.cpu_load[2] 1.43 ± 1% -20.6% 1.14 ± 2% perf-profile.cpu-cycles.up_write.vm_munmap.sys_munmap.system_call_fastpath 6092 ± 10% +28.1% 7803 ± 0% sched_debug.cpu#58.sched_goidle 348 ± 13% +18.7% 413 ± 10% sched_debug.cpu#6.ttwu_local 12410 ± 10% +27.3% 15796 ± 0% sched_debug.cpu#58.nr_switches 7012 ± 15% +25.7% 8813 ± 7% sched_debug.cpu#60.ttwu_count 377 ± 11% +13.5% 428 ± 7% sched_debug.cpu#60.ttwu_local 9050 ± 6% +15.8% 10476 ± 1% sched_debug.cfs_rq[4]:/.exec_clock 1762 ± 2% +8.1% 1906 ± 8% sched_debug.cpu#62.curr->pid 3954711 ± 4% +11.8% 4420591 ± 8% time.voluntary_context_switches 2045 ± 5% +11.9% 2290 ± 7% numa-meminfo.node3.KernelStack 14 ± 8% -14.3% 12 ± 0% sched_debug.cpu#43.cpu_load[4] 371 ± 1% -17.0% 308 ± 5% sched_debug.cpu#44.ttwu_local 15 ± 15% -20.6% 12 ± 4% sched_debug.cpu#43.cpu_load[3] 23 ± 7% -14.0% 20 ± 0% sched_debug.cpu#34.cpu_load[3] 0.00 ± 4% -22.2% 0.00 ± 14% sched_debug.rt_rq[50]:/.rt_time 374746 ± 13% +18.8% 445099 ± 3% sched_debug.cfs_rq[50]:/.spread0 650306 ± 5% +13.4% 737674 ± 2% sched_debug.cpu#33.avg_idle 2796 ± 5% +3.9% 2905 ± 6% sched_debug.cpu#36.curr->pid 388 ± 7% +14.6% 444 ± 3% sched_debug.cpu#63.ttwu_local 641680 ± 2% +9.2% 700581 ± 5% sched_debug.cpu#39.avg_idle 9736 ± 8% +13.1% 11014 ± 2% sched_debug.cfs_rq[6]:/.exec_clock 3356 ± 2% +8.6% 3646 ± 3% sched_debug.cfs_rq[29]:/.avg->runnable_avg_sum 71 ± 3% +8.7% 78 ± 3% sched_debug.cfs_rq[29]:/.tg_runnable_contrib 16875 ± 3% +9.5% 18477 ± 2% slabinfo.vm_area_struct.active_objs 16875 ± 3% +9.5% 18477 ± 2% slabinfo.vm_area_struct.num_objs 1.17 ± 4% -9.0% 1.06 ± 1% perf-profile.cpu-cycles.perf_event_mmap.mmap_region.do_mmap_pgoff.vm_mmap_pgoff.sys_mmap_pgoff 44305 ± 4% -16.1% 37164 ± 8% sched_debug.cpu#43.ttwu_count 6430 ± 1% +6.4% 6840 ± 5% sched_debug.cfs_rq[14]:/.avg->runnable_avg_sum 2768 ± 3% +3.7% 2870 ± 5% sched_debug.cpu#38.curr->pid 9224 ± 2% +9.8% 10130 ± 1% slabinfo.anon_vma.num_objs 9224 ± 2% +9.8% 10130 ± 1% slabinfo.anon_vma.active_objs 1816 ± 4% +10.1% 2000 ± 2% sched_debug.cpu#57.curr->pid 85823 ± 4% -15.5% 72483 ± 7% sched_debug.cpu#43.nr_switches 86191 ± 4% -15.5% 72848 ± 7% sched_debug.cpu#43.sched_count 44120 ± 12% +18.1% 52091 ± 4% sched_debug.cpu#1.nr_load_updates 42455 ± 5% -14.9% 36139 ± 7% sched_debug.cpu#43.sched_goidle 27096 ± 3% +10.6% 29973 ± 8% vmstat.system.cs 18715 ± 0% +7.0% 20016 ± 5% vmstat.system.in testbox/testcase/testparams: snb-drag/phoronix-test-suite/performance-encode-ogg-1.4.1 ba4877b9ca51f80b 10359213d05acf804558bda7cc ---------------- -------------------------- 29266 ± 0% -13.0% 25474 ± 1% phoronix-test-suite.time.minor_page_faults 320 ± 35% +367.6% 1496 ± 44% sched_debug.cfs_rq[2]:/.tg_load_contrib 63 ± 35% +103.1% 129 ± 28% sched_debug.cpu#2.cpu_load[1] 68 ± 37% +148.7% 171 ± 26% sched_debug.cpu#2.cpu_load[0] 126 ± 37% +73.5% 219 ± 38% sched_debug.cfs_rq[2]:/.runnable_load_avg 298 ± 3% -25.5% 222 ± 19% sched_debug.cfs_rq[3]:/.load 35 ± 8% +21.8% 43 ± 10% sched_debug.cfs_rq[1]:/.nr_spread_over 29266 ± 0% -13.0% 25474 ± 1% time.minor_page_faults xps2: Nehalem Memory: 4G lkp-sbx04: Sandy Bridge-EX Memory: 64G snb-drag: Sandy Bridge Memory: 6G pigz.time.minor_page_faults 27400 ++------------------------------------------------------------------+ 27200 ++ * ** | | .* *** *.* ** +: .** .** + : .* *.* *. * 27000 ** ***.***.** + + ** * .* * :** * ** *.** * * ** *| 26800 ++ ** * * * * | | | 26600 ++ O | 26400 ++ O | 26200 ++ O O O O O O O O O OO O | O | 26000 +O O O O O OO O O | 25800 ++ O O O OO O | | | 25600 ++ O O | 25400 ++---------------------------------O-O------------------------------+ [*] bisect-good sample [O] bisect-bad sample To reproduce: apt-get install ruby git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git cd lkp-tests bin/setup-local job.yaml # the job file attached in this email bin/run-local job.yaml Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. Thanks, Ying Huang
--- testcase: pigz default-monitors: wait: pre-test uptime: iostat: vmstat: numa-numastat: numa-vmstat: numa-meminfo: proc-vmstat: proc-stat: meminfo: slabinfo: interrupts: lock_stat: latency_stats: softirqs: bdi_dev_mapping: diskstats: nfsstat: cpuidle: cpufreq-stats: turbostat: pmeter: sched_debug: interval: 10 default_watchdogs: watch-oom: watchdog: cpufreq_governor: performance commit: ea2bbe3b9bf930408db205344fe10c8f719ba738 model: Nehalem memory: 4G nr_ssd_partitions: 1 ssd_partitions: "/dev/disk/by-id/ata-INTEL_SSDSC2CW240A3_CVCV204303XB240CGN-part1" swap_partitions: rootfs_partition: "/dev/disk/by-id/ata-INTEL_SSDSC2CW240A3_CVCV204303XB240CGN-part2" netconsole_port: 6665 perf-profile: freq: 800 nr_threads: 100% pigz: blocksize: 512K testbox: xps2 tbox_group: xps2 kconfig: x86_64-rhel enqueue_time: 2015-02-15 01:37:17.922838823 +08:00 head_commit: ea2bbe3b9bf930408db205344fe10c8f719ba738 base_commit: bfa76d49576599a4b9f9b7a71f23d73d6dcff735 branch: linux-devel/devel-hourly-2015021423 kernel: "/kernel/x86_64-rhel/ea2bbe3b9bf930408db205344fe10c8f719ba738/vmlinuz-3.19.0-gea2bbe3" user: lkp queue: cyclic rootfs: debian-x86_64-2015-02-07.cgz result_root: "/result/xps2/pigz/performance-100%-512K/debian-x86_64-2015-02-07.cgz/x86_64-rhel/ea2bbe3b9bf930408db205344fe10c8f719ba738/0" job_file: "/lkp/scheduled/xps2/cyclic_pigz-performance-100%-512K-x86_64-rhel-HEAD-ea2bbe3b9bf930408db205344fe10c8f719ba738-0-20150215-58738-yzfujf.yaml" dequeue_time: 2015-02-15 01:49:47.299361589 +08:00 nr_cpu: "$(nproc)" job_state: finished loadavg: 6.66 5.14 2.31 1/132 5309 start_time: '1423936212' end_time: '1423936513' version: "/lkp/lkp/.src-20150213-094846"
_______________________________________________ LKP mailing list l...@linux.intel.com