Numerous hugepage splits in the linear mapping would give
admins the signal to narrow down the sluggishness caused by TLB
miss/reload.
To help with debugging, we introduce monotonic lifetime hugepage
split event counts since SYSTEM_RUNNING to be displayed as part of
/proc/vmstat in x86 servers
The lifetime split event information will be displayed at the bottom of
/proc/vmstat
swap_ra 0
swap_ra_hit 0
direct_map_2M_splits 139
direct_map_4M_splits 0
direct_map_1G_splits 7
nr_unstable 0
Ancillary debugfs split event counts exported to userspace via read-write
endpoints : /sys/kernel/debug/x86/direct_map_[2M|4M|1G]_split
dmesg log when user resets the debugfs split event count for
debugging
[ 232.470531] debugfs 2M Pages split event count(128) reset to 0
One of the many lasting (as we don't coalesce back) sources for huge page
splits is tracing as the granular page attribute/permission changes would
force the kernel to split code segments mapped to huge pages to smaller
ones thereby increasing the probability of TLB miss/reload even after
tracing has been stopped.
Signed-off-by: Saravanan D
---
arch/x86/mm/pat/set_memory.c | 117 ++
include/linux/vm_event_item.h | 8 +++
mm/vmstat.c | 8 +++
3 files changed, 133 insertions(+)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 16f878c26667..97b6ef8dbd12 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -16,6 +16,8 @@
#include
#include
#include
+#include
+#include
#include
#include
@@ -76,6 +78,104 @@ static inline pgprot_t cachemode2pgprot(enum
page_cache_mode pcm)
#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];
+static unsigned long split_page_event_count[PG_LEVEL_NUM];
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int direct_map_2M_split_set(void *data, u64 val)
+{
+ switch (val) {
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("debugfs 2M Pages split event count(%lu) reset to 0",
+ split_page_event_count[PG_LEVEL_2M]);
+ split_page_event_count[PG_LEVEL_2M] = 0;
+
+ return 0;
+}
+
+static int direct_map_2M_split_get(void *data, u64 *val)
+{
+ *val = split_page_event_count[PG_LEVEL_2M];
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_direct_map_2M_split, direct_map_2M_split_get,
+direct_map_2M_split_set, "%llu\n");
+#else
+static int direct_map_4M_split_set(void *data, u64 val)
+{
+ switch (val) {
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("debugfs 4M Pages split event count(%lu) reset to 0",
+ split_page_event_count[PG_LEVEL_2M]);
+ split_page_event_count[PG_LEVEL_2M] = 0;
+
+ return 0;
+}
+
+static int direct_map_4M_split_get(void *data, u64 *val)
+{
+ *val = split_page_event_count[PG_LEVEL_2M];
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_direct_map_4M_split, direct_map_4M_split_get,
+direct_map_4M_split_set, "%llu\n");
+#endif
+
+static int direct_map_1G_split_set(void *data, u64 val)
+{
+ switch (val) {
+ case 0:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("debugfs 1G Pages split event count(%lu) reset to 0",
+ split_page_event_count[PG_LEVEL_1G]);
+ split_page_event_count[PG_LEVEL_1G] = 0;
+
+ return 0;
+}
+
+static int direct_map_1G_split_get(void *data, u64 *val)
+{
+ *val = split_page_event_count[PG_LEVEL_1G];
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_direct_map_1G_split, direct_map_1G_split_get,
+direct_map_1G_split_set, "%llu\n");
+
+static __init int direct_map_split_debugfs_init(void)
+{
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ debugfs_create_file("direct_map_2M_split", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_direct_map_2M_split);
+#else
+ debugfs_create_file("direct_map_4M_split", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_direct_map_4M_split);
+#endif
+ if (direct_gbpages)
+ debugfs_create_file("direct_map_1G_split", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_direct_map_1G_split);
+ return 0;
+}
+
+late_initcall(direct_map_split_debugfs_init);
void update_page_count(int level, unsigned long pages)
{
@@ -85,12 +185,29 @@ void update_page_count(int level, unsigned long pages)
spin_unlock(&pgd_lock);
}
+void update_split_page_event_count(int level)
+{
+ if (system_state == SYSTEM_RUNNING) {
+ split_page_event_count[level]++;
+ if (level == PG_LEVEL_2M) {
+#if def