To help with debugging the sluggishness caused by TLB miss/reload,
we introduce monotonic lifetime hugepage split event counts since
system state: SYSTEM_RUNNING to be displayed as part of
/proc/vmstat in x86 servers

The lifetime split event information will be displayed at the bottom of
/proc/vmstat
....
swap_ra 0
swap_ra_hit 0
direct_map_2M_splits 167
direct_map_1G_splits 6
nr_unstable 0
....

One of the many lasting (as we don't coalesce back) sources for huge page
splits is tracing as the granular page attribute/permission changes would
force the kernel to split code segments mapped to huge pages to smaller
ones thereby increasing the probability of TLB miss/reload even after
tracing has been stopped.

Signed-off-by: Saravanan D <saravan...@fb.com>
---
 arch/x86/mm/pat/set_memory.c  | 18 ++++++++++++++++++
 include/linux/vm_event_item.h |  8 ++++++++
 mm/vmstat.c                   |  8 ++++++++
 3 files changed, 34 insertions(+)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 16f878c26667..3ea6316df089 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -16,6 +16,8 @@
 #include <linux/pci.h>
 #include <linux/vmalloc.h>
 #include <linux/libnvdimm.h>
+#include <linux/vmstat.h>
+#include <linux/kernel.h>
 
 #include <asm/e820/api.h>
 #include <asm/processor.h>
@@ -85,12 +87,28 @@ void update_page_count(int level, unsigned long pages)
        spin_unlock(&pgd_lock);
 }
 
+void update_split_page_event_count(int level)
+{
+       if (system_state == SYSTEM_RUNNING) {
+               if (level == PG_LEVEL_2M) {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+                       count_vm_event(DIRECT_MAP_2M_SPLIT);
+#else
+                       count_vm_event(DIRECT_MAP_4M_SPLIT);
+#endif
+               } else if (level == PG_LEVEL_1G) {
+                       count_vm_event(DIRECT_MAP_1G_SPLIT);
+               }
+       }
+}
+
 static void split_page_count(int level)
 {
        if (direct_pages_count[level] == 0)
                return;
 
        direct_pages_count[level]--;
+       update_split_page_event_count(level);
        direct_pages_count[level - 1] += PTRS_PER_PTE;
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 18e75974d4e3..439742d2435e 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -120,6 +120,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_SWAP
                SWAP_RA,
                SWAP_RA_HIT,
+#endif
+#if defined(__x86_64__)
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+               DIRECT_MAP_2M_SPLIT,
+#else
+               DIRECT_MAP_4M_SPLIT,
+#endif
+               DIRECT_MAP_1G_SPLIT,
 #endif
                NR_VM_EVENT_ITEMS
 };
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f8942160fc95..beaa2bb4f9dc 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1350,6 +1350,14 @@ const char * const vmstat_text[] = {
        "swap_ra",
        "swap_ra_hit",
 #endif
+#if defined(__x86_64__)
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+       "direct_map_2M_splits",
+#else
+       "direct_map_4M_splits",
+#endif
+       "direct_map_1G_splits",
+#endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
-- 
2.24.1

Reply via email to