The mm_struct always contains a cpumask bitmap, regardless of
CONFIG_CPUMASK_OFFSTACK. That means the first step can be to
simplify things, and simply have one bitmask at the end of the
mm_struct for the mm_cpumask.

The second step is to determine the correct size for the
mm_struct slab object from the size of the mm_struct
(excluding the cpu bitmap) and the size the cpumask.

For init_mm we can simply allocate the maximum size this
kernel is compiled for, since we only have one init_mm
in the system, anyway.

Signed-off-by: Rik van Riel <r...@surriel.com>
Tested-by: Song Liu <songliubrav...@fb.com>
---
 include/linux/mm_types.h | 18 ++++++++----------
 kernel/fork.c            | 14 ++++++++------
 mm/init-mm.c             | 10 ++++++++++
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21612347d311..8e91632958f3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -427,8 +427,6 @@ struct mm_struct {
 
        struct linux_binfmt *binfmt;
 
-       cpumask_var_t cpu_vm_mask_var;
-
        /* Architecture-specific MM context */
        mm_context_t context;
 
@@ -465,9 +463,6 @@ struct mm_struct {
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
-       struct cpumask cpumask_allocation;
-#endif
 #ifdef CONFIG_NUMA_BALANCING
        /*
         * numa_next_scan is the next time that the PTEs will be marked
@@ -502,22 +497,25 @@ struct mm_struct {
        /* HMM needs to track a few things per mm */
        struct hmm *hmm;
 #endif
+
+       /*
+        * The mm_cpumask needs to be at the end of mm_struct, because it
+        * is dynamically sized based on nr_cpu_ids.
+        */
+       unsigned long cpu_bitmap[];
 } __randomize_layout;
 
 extern struct mm_struct init_mm;
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-       mm->cpu_vm_mask_var = &mm->cpumask_allocation;
-#endif
-       cpumask_clear(mm->cpu_vm_mask_var);
+       cpumask_clear((struct cpumask *)&mm->cpu_bitmap);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 {
-       return mm->cpu_vm_mask_var;
+       return (struct cpumask *)&mm->cpu_bitmap;
 }
 
 struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index a5d21c42acfc..c6a20bc78102 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2242,6 +2242,8 @@ static void sighand_ctor(void *data)
 
 void __init proc_caches_init(void)
 {
+       unsigned int mm_size;
+
        sighand_cachep = kmem_cache_create("sighand_cache",
                        sizeof(struct sighand_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2258,15 +2260,15 @@ void __init proc_caches_init(void)
                        sizeof(struct fs_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        NULL);
+
        /*
-        * FIXME! The "sizeof(struct mm_struct)" currently includes the
-        * whole struct cpumask for the OFFSTACK case. We could change
-        * this to *only* allocate as much of it as required by the
-        * maximum number of CPU's we can ever have.  The cpumask_allocation
-        * is at the end of the structure, exactly for that reason.
+        * The mm_cpumask is located at the end of mm_struct, and is
+        * dynamically sized based on nr_cpu_ids.
         */
+       mm_size = sizeof(struct mm_struct) + cpumask_size();
+
        mm_cachep = kmem_cache_create_usercopy("mm_struct",
-                       sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+                       mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                        offsetof(struct mm_struct, saved_auxv),
                        sizeof_field(struct mm_struct, saved_auxv),
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f94d5d15ebc0..20fe222fe4c0 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,15 @@
 #define INIT_MM_CONTEXT(name)
 #endif
 
+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on nr_cpu_ids. That
+ * way we allocate only as much memory for mm_cpumask() as needed for the
+ * hundreds, or thousands of processes that a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
 struct mm_struct init_mm = {
        .mm_rb          = RB_ROOT,
        .pgd            = swapper_pg_dir,
@@ -24,5 +33,6 @@ struct mm_struct init_mm = {
        .page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
        .user_ns        = &init_user_ns,
+       .cpu_bitmap     = { [BITS_TO_LONGS(NR_CPUS)] = 0},
        INIT_MM_CONTEXT(init_mm)
 };
-- 
2.14.4

Reply via email to