This sysfs control, if enabled, allows memory migrations on the first numa hinting page fault.
If disabled it forbids it and requires a confirmation through the last_nid logic. By default, the first fault is allowed to migrate memory. Disabling it may increase the time it takes to converge, but it reduces some initial thrashing in case of NUMA false sharing. Signed-off-by: Andrea Arcangeli <aarca...@redhat.com> --- include/linux/autonuma_flags.h | 20 ++++++++++++++++++++ mm/autonuma.c | 7 +++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/linux/autonuma_flags.h b/include/linux/autonuma_flags.h index f53203a..28756ca 100644 --- a/include/linux/autonuma_flags.h +++ b/include/linux/autonuma_flags.h @@ -85,6 +85,20 @@ enum autonuma_flag { * Default not set. */ AUTONUMA_MIGRATE_DEFER_FLAG, + /* + * If set, a page must successfully pass a last_nid check + * before it can be migrated even if it's the very first NUMA + * hinting page fault occurring on the page. If not set, the + * first NUMA hinting page fault of a newly allocated page + * will always pass the last_nid check. + * + * If not set a newly started workload can converge quicker, + * but it may incur in more false positive migrations before + * reaching convergence. + * + * Default not set. + */ + AUTONUMA_MIGRATE_ALLOW_FIRST_FAULT_FLAG, }; extern unsigned long autonuma_flags; @@ -126,4 +140,10 @@ static inline bool autonuma_migrate_defer(void) return test_bit(AUTONUMA_MIGRATE_DEFER_FLAG, &autonuma_flags); } +static inline bool autonuma_migrate_allow_first_fault(void) +{ + return test_bit(AUTONUMA_MIGRATE_ALLOW_FIRST_FAULT_FLAG, + &autonuma_flags); +} + #endif /* _LINUX_AUTONUMA_FLAGS_H */ diff --git a/mm/autonuma.c b/mm/autonuma.c index 4b7c744..e7570df 100644 --- a/mm/autonuma.c +++ b/mm/autonuma.c @@ -28,7 +28,7 @@ unsigned long autonuma_flags __read_mostly = #ifdef CONFIG_HAVE_ARCH_AUTONUMA_SCAN_PMD |(1<<AUTONUMA_SCAN_PMD_FLAG) #endif - ; + |(1<<AUTONUMA_MIGRATE_ALLOW_FIRST_FAULT_FLAG); static DEFINE_MUTEX(knumad_mm_mutex); @@ -345,7 +345,8 @@ static inline bool last_nid_set(struct page *page, int this_nid) int autonuma_last_nid = ACCESS_ONCE(page_autonuma->autonuma_last_nid); VM_BUG_ON(this_nid < 0); VM_BUG_ON(this_nid >= MAX_NUMNODES); - if (autonuma_last_nid >= 0 && autonuma_last_nid != this_nid) { + if ((!autonuma_migrate_allow_first_fault() || + autonuma_last_nid >= 0) && autonuma_last_nid != this_nid) { int migrate_nid; migrate_nid = ACCESS_ONCE(page_autonuma->autonuma_migrate_nid); if (migrate_nid >= 0) @@ -1311,6 +1312,7 @@ SYSFS_ENTRY(debug, AUTONUMA_DEBUG_FLAG); SYSFS_ENTRY(load_balance_strict, AUTONUMA_SCHED_LOAD_BALANCE_STRICT_FLAG); SYSFS_ENTRY(defer, AUTONUMA_MIGRATE_DEFER_FLAG); SYSFS_ENTRY(reset, AUTONUMA_SCHED_RESET_FLAG); +SYSFS_ENTRY(allow_first_fault, AUTONUMA_MIGRATE_ALLOW_FIRST_FAULT_FLAG); #endif /* CONFIG_DEBUG_VM */ #undef SYSFS_ENTRY @@ -1419,6 +1421,7 @@ static struct attribute *knuma_migrated_attr[] = { &pages_migrated_attr.attr, #ifdef CONFIG_DEBUG_VM &defer_attr.attr, + &allow_first_fault_attr.attr, #endif NULL, }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/